mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	Merge pull request #3662 from wwylele/shader-hash-cache
shader: avoid recomputing hash for the same program
This commit is contained in:
		
						commit
						048b0fc0d3
					
				
					 5 changed files with 62 additions and 24 deletions
				
			
		|  | @ -451,6 +451,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
|             LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset); | ||||
|         } else { | ||||
|             g_state.gs.program_code[offset] = value; | ||||
|             g_state.gs.MarkProgramCodeDirty(); | ||||
|             offset++; | ||||
|         } | ||||
|         break; | ||||
|  | @ -469,6 +470,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
|             LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset); | ||||
|         } else { | ||||
|             g_state.gs.swizzle_data[offset] = value; | ||||
|             g_state.gs.MarkSwizzleDataDirty(); | ||||
|             offset++; | ||||
|         } | ||||
|         break; | ||||
|  | @ -518,8 +520,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
|             LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset); | ||||
|         } else { | ||||
|             g_state.vs.program_code[offset] = value; | ||||
|             g_state.vs.MarkProgramCodeDirty(); | ||||
|             if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { | ||||
|                 g_state.gs.program_code[offset] = value; | ||||
|                 g_state.gs.MarkProgramCodeDirty(); | ||||
|             } | ||||
|             offset++; | ||||
|         } | ||||
|  | @ -539,8 +543,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
|             LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset); | ||||
|         } else { | ||||
|             g_state.vs.swizzle_data[offset] = value; | ||||
|             g_state.vs.MarkSwizzleDataDirty(); | ||||
|             if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { | ||||
|                 g_state.gs.swizzle_data[offset] = value; | ||||
|                 g_state.gs.MarkSwizzleDataDirty(); | ||||
|             } | ||||
|             offset++; | ||||
|         } | ||||
|  |  | |||
|  | @ -12,6 +12,7 @@ | |||
| #include "common/assert.h" | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/hash.h" | ||||
| #include "common/vector_math.h" | ||||
| #include "video_core/pica_types.h" | ||||
| #include "video_core/regs_rasterizer.h" | ||||
|  | @ -173,27 +174,29 @@ struct GSUnitState : public UnitState { | |||
|     GSEmitter emitter; | ||||
| }; | ||||
| 
 | ||||
| struct ShaderSetup { | ||||
|     struct { | ||||
| struct Uniforms { | ||||
|     // The float uniforms are accessed by the shader JIT using SSE instructions, and are
 | ||||
|     // therefore required to be 16-byte aligned.
 | ||||
|     alignas(16) Math::Vec4<float24> f[96]; | ||||
| 
 | ||||
|     std::array<bool, 16> b; | ||||
|     std::array<Math::Vec4<u8>, 4> i; | ||||
|     } uniforms; | ||||
| 
 | ||||
|     static size_t GetFloatUniformOffset(unsigned index) { | ||||
|         return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>); | ||||
|         return offsetof(Uniforms, f) + index * sizeof(Math::Vec4<float24>); | ||||
|     } | ||||
| 
 | ||||
|     static size_t GetBoolUniformOffset(unsigned index) { | ||||
|         return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); | ||||
|         return offsetof(Uniforms, b) + index * sizeof(bool); | ||||
|     } | ||||
| 
 | ||||
|     static size_t GetIntUniformOffset(unsigned index) { | ||||
|         return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>); | ||||
|         return offsetof(Uniforms, i) + index * sizeof(Math::Vec4<u8>); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| struct ShaderSetup { | ||||
|     Uniforms uniforms; | ||||
| 
 | ||||
|     std::array<u32, MAX_PROGRAM_CODE_LENGTH> program_code; | ||||
|     std::array<u32, MAX_SWIZZLE_DATA_LENGTH> swizzle_data; | ||||
|  | @ -204,6 +207,36 @@ struct ShaderSetup { | |||
|         /// Used by the JIT, points to a compiled shader object.
 | ||||
|         const void* cached_shader = nullptr; | ||||
|     } engine_data; | ||||
| 
 | ||||
|     void MarkProgramCodeDirty() { | ||||
|         program_code_hash_dirty = true; | ||||
|     } | ||||
| 
 | ||||
|     void MarkSwizzleDataDirty() { | ||||
|         swizzle_data_hash_dirty = true; | ||||
|     } | ||||
| 
 | ||||
|     u64 GetProgramCodeHash() { | ||||
|         if (program_code_hash_dirty) { | ||||
|             program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); | ||||
|             program_code_hash_dirty = false; | ||||
|         } | ||||
|         return program_code_hash; | ||||
|     } | ||||
| 
 | ||||
|     u64 GetSwizzleDataHash() { | ||||
|         if (swizzle_data_hash_dirty) { | ||||
|             swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data)); | ||||
|             swizzle_data_hash_dirty = false; | ||||
|         } | ||||
|         return swizzle_data_hash; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     bool program_code_hash_dirty = true; | ||||
|     bool swizzle_data_hash_dirty = true; | ||||
|     u64 program_code_hash = 0xDEADC0DE; | ||||
|     u64 swizzle_data_hash = 0xDEADC0DE; | ||||
| }; | ||||
| 
 | ||||
| class ShaderEngine { | ||||
|  |  | |||
|  | @ -2,7 +2,6 @@ | |||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "common/hash.h" | ||||
| #include "common/microprofile.h" | ||||
| #include "video_core/shader/shader.h" | ||||
| #include "video_core/shader/shader_jit_x64.h" | ||||
|  | @ -18,8 +17,8 @@ void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { | |||
|     ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); | ||||
|     setup.engine_data.entry_point = entry_point; | ||||
| 
 | ||||
|     u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code)); | ||||
|     u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data)); | ||||
|     u64 code_hash = setup.GetProgramCodeHash(); | ||||
|     u64 swizzle_hash = setup.GetSwizzleDataHash(); | ||||
| 
 | ||||
|     u64 cache_key = code_hash ^ swizzle_hash; | ||||
|     auto iter = cache.find(cache_key); | ||||
|  |  | |||
|  | @ -104,7 +104,7 @@ const JitFunction instr_table[64] = { | |||
| // purposes, as documented below:
 | ||||
| 
 | ||||
| /// Pointer to the uniform memory
 | ||||
| static const Reg64 SETUP = r9; | ||||
| static const Reg64 UNIFORMS = r9; | ||||
| /// The two 32-bit VS address offset registers set by the MOVA instruction
 | ||||
| static const Reg64 ADDROFFS_REG_0 = r10; | ||||
| static const Reg64 ADDROFFS_REG_1 = r11; | ||||
|  | @ -139,7 +139,7 @@ static const Xmm NEGBIT = xmm15; | |||
| // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
 | ||||
| static const BitSet32 persistent_regs = BuildRegSet({ | ||||
|     // Pointers to register blocks
 | ||||
|     SETUP, | ||||
|     UNIFORMS, | ||||
|     STATE, | ||||
|     // Cached registers
 | ||||
|     ADDROFFS_REG_0, | ||||
|  | @ -184,8 +184,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | |||
|     size_t src_offset; | ||||
| 
 | ||||
|     if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { | ||||
|         src_ptr = SETUP; | ||||
|         src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex()); | ||||
|         src_ptr = UNIFORMS; | ||||
|         src_offset = Uniforms::GetFloatUniformOffset(src_reg.GetIndex()); | ||||
|     } else { | ||||
|         src_ptr = STATE; | ||||
|         src_offset = UnitState::InputOffset(src_reg); | ||||
|  | @ -354,8 +354,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { | |||
| } | ||||
| 
 | ||||
| void JitShader::Compile_UniformCondition(Instruction instr) { | ||||
|     size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); | ||||
|     cmp(byte[SETUP + offset], 0); | ||||
|     size_t offset = Uniforms::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); | ||||
|     cmp(byte[UNIFORMS + offset], 0); | ||||
| } | ||||
| 
 | ||||
| BitSet32 JitShader::PersistentCallerSavedRegs() { | ||||
|  | @ -713,8 +713,8 @@ void JitShader::Compile_LOOP(Instruction instr) { | |||
|     // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
 | ||||
|     // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
 | ||||
|     // 4 bits) to be used as an offset into the 16-byte vector registers later
 | ||||
|     size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id); | ||||
|     mov(LOOPCOUNT, dword[SETUP + offset]); | ||||
|     size_t offset = Uniforms::GetIntUniformOffset(instr.flow_control.int_uniform_id); | ||||
|     mov(LOOPCOUNT, dword[UNIFORMS + offset]); | ||||
|     mov(LOOPCOUNT_REG, LOOPCOUNT); | ||||
|     shr(LOOPCOUNT_REG, 4); | ||||
|     and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
 | ||||
|  | @ -882,7 +882,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_ | |||
|     ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16); | ||||
|     mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL); | ||||
| 
 | ||||
|     mov(SETUP, ABI_PARAM1); | ||||
|     mov(UNIFORMS, ABI_PARAM1); | ||||
|     mov(STATE, ABI_PARAM2); | ||||
| 
 | ||||
|     // Zero address/loop  registers
 | ||||
|  |  | |||
|  | @ -34,7 +34,7 @@ public: | |||
|     JitShader(); | ||||
| 
 | ||||
|     void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const { | ||||
|         program(&setup, &state, instruction_labels[offset].getAddress()); | ||||
|         program(&setup.uniforms, &state, instruction_labels[offset].getAddress()); | ||||
|     } | ||||
| 
 | ||||
|     void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code, | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue