mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	Merge pull request #3662 from wwylele/shader-hash-cache
shader: avoid recomputing hash for the same program
This commit is contained in:
		
						commit
						048b0fc0d3
					
				
					 5 changed files with 62 additions and 24 deletions
				
			
		|  | @ -451,6 +451,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||||
|             LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset); |             LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset); | ||||||
|         } else { |         } else { | ||||||
|             g_state.gs.program_code[offset] = value; |             g_state.gs.program_code[offset] = value; | ||||||
|  |             g_state.gs.MarkProgramCodeDirty(); | ||||||
|             offset++; |             offset++; | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|  | @ -469,6 +470,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||||
|             LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset); |             LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset); | ||||||
|         } else { |         } else { | ||||||
|             g_state.gs.swizzle_data[offset] = value; |             g_state.gs.swizzle_data[offset] = value; | ||||||
|  |             g_state.gs.MarkSwizzleDataDirty(); | ||||||
|             offset++; |             offset++; | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|  | @ -518,8 +520,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||||
|             LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset); |             LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset); | ||||||
|         } else { |         } else { | ||||||
|             g_state.vs.program_code[offset] = value; |             g_state.vs.program_code[offset] = value; | ||||||
|  |             g_state.vs.MarkProgramCodeDirty(); | ||||||
|             if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { |             if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { | ||||||
|                 g_state.gs.program_code[offset] = value; |                 g_state.gs.program_code[offset] = value; | ||||||
|  |                 g_state.gs.MarkProgramCodeDirty(); | ||||||
|             } |             } | ||||||
|             offset++; |             offset++; | ||||||
|         } |         } | ||||||
|  | @ -539,8 +543,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||||
|             LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset); |             LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset); | ||||||
|         } else { |         } else { | ||||||
|             g_state.vs.swizzle_data[offset] = value; |             g_state.vs.swizzle_data[offset] = value; | ||||||
|  |             g_state.vs.MarkSwizzleDataDirty(); | ||||||
|             if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { |             if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { | ||||||
|                 g_state.gs.swizzle_data[offset] = value; |                 g_state.gs.swizzle_data[offset] = value; | ||||||
|  |                 g_state.gs.MarkSwizzleDataDirty(); | ||||||
|             } |             } | ||||||
|             offset++; |             offset++; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  | @ -12,6 +12,7 @@ | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "common/hash.h" | ||||||
| #include "common/vector_math.h" | #include "common/vector_math.h" | ||||||
| #include "video_core/pica_types.h" | #include "video_core/pica_types.h" | ||||||
| #include "video_core/regs_rasterizer.h" | #include "video_core/regs_rasterizer.h" | ||||||
|  | @ -173,27 +174,29 @@ struct GSUnitState : public UnitState { | ||||||
|     GSEmitter emitter; |     GSEmitter emitter; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct ShaderSetup { | struct Uniforms { | ||||||
|     struct { |  | ||||||
|     // The float uniforms are accessed by the shader JIT using SSE instructions, and are
 |     // The float uniforms are accessed by the shader JIT using SSE instructions, and are
 | ||||||
|     // therefore required to be 16-byte aligned.
 |     // therefore required to be 16-byte aligned.
 | ||||||
|     alignas(16) Math::Vec4<float24> f[96]; |     alignas(16) Math::Vec4<float24> f[96]; | ||||||
| 
 | 
 | ||||||
|     std::array<bool, 16> b; |     std::array<bool, 16> b; | ||||||
|     std::array<Math::Vec4<u8>, 4> i; |     std::array<Math::Vec4<u8>, 4> i; | ||||||
|     } uniforms; |  | ||||||
| 
 | 
 | ||||||
|     static size_t GetFloatUniformOffset(unsigned index) { |     static size_t GetFloatUniformOffset(unsigned index) { | ||||||
|         return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>); |         return offsetof(Uniforms, f) + index * sizeof(Math::Vec4<float24>); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     static size_t GetBoolUniformOffset(unsigned index) { |     static size_t GetBoolUniformOffset(unsigned index) { | ||||||
|         return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); |         return offsetof(Uniforms, b) + index * sizeof(bool); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     static size_t GetIntUniformOffset(unsigned index) { |     static size_t GetIntUniformOffset(unsigned index) { | ||||||
|         return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>); |         return offsetof(Uniforms, i) + index * sizeof(Math::Vec4<u8>); | ||||||
|     } |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct ShaderSetup { | ||||||
|  |     Uniforms uniforms; | ||||||
| 
 | 
 | ||||||
|     std::array<u32, MAX_PROGRAM_CODE_LENGTH> program_code; |     std::array<u32, MAX_PROGRAM_CODE_LENGTH> program_code; | ||||||
|     std::array<u32, MAX_SWIZZLE_DATA_LENGTH> swizzle_data; |     std::array<u32, MAX_SWIZZLE_DATA_LENGTH> swizzle_data; | ||||||
|  | @ -204,6 +207,36 @@ struct ShaderSetup { | ||||||
|         /// Used by the JIT, points to a compiled shader object.
 |         /// Used by the JIT, points to a compiled shader object.
 | ||||||
|         const void* cached_shader = nullptr; |         const void* cached_shader = nullptr; | ||||||
|     } engine_data; |     } engine_data; | ||||||
|  | 
 | ||||||
|  |     void MarkProgramCodeDirty() { | ||||||
|  |         program_code_hash_dirty = true; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void MarkSwizzleDataDirty() { | ||||||
|  |         swizzle_data_hash_dirty = true; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u64 GetProgramCodeHash() { | ||||||
|  |         if (program_code_hash_dirty) { | ||||||
|  |             program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); | ||||||
|  |             program_code_hash_dirty = false; | ||||||
|  |         } | ||||||
|  |         return program_code_hash; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u64 GetSwizzleDataHash() { | ||||||
|  |         if (swizzle_data_hash_dirty) { | ||||||
|  |             swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data)); | ||||||
|  |             swizzle_data_hash_dirty = false; | ||||||
|  |         } | ||||||
|  |         return swizzle_data_hash; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     bool program_code_hash_dirty = true; | ||||||
|  |     bool swizzle_data_hash_dirty = true; | ||||||
|  |     u64 program_code_hash = 0xDEADC0DE; | ||||||
|  |     u64 swizzle_data_hash = 0xDEADC0DE; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class ShaderEngine { | class ShaderEngine { | ||||||
|  |  | ||||||
|  | @ -2,7 +2,6 @@ | ||||||
| // Licensed under GPLv2 or any later version
 | // Licensed under GPLv2 or any later version
 | ||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
| #include "common/hash.h" |  | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "video_core/shader/shader.h" | #include "video_core/shader/shader.h" | ||||||
| #include "video_core/shader/shader_jit_x64.h" | #include "video_core/shader/shader_jit_x64.h" | ||||||
|  | @ -18,8 +17,8 @@ void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { | ||||||
|     ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); |     ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); | ||||||
|     setup.engine_data.entry_point = entry_point; |     setup.engine_data.entry_point = entry_point; | ||||||
| 
 | 
 | ||||||
|     u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code)); |     u64 code_hash = setup.GetProgramCodeHash(); | ||||||
|     u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data)); |     u64 swizzle_hash = setup.GetSwizzleDataHash(); | ||||||
| 
 | 
 | ||||||
|     u64 cache_key = code_hash ^ swizzle_hash; |     u64 cache_key = code_hash ^ swizzle_hash; | ||||||
|     auto iter = cache.find(cache_key); |     auto iter = cache.find(cache_key); | ||||||
|  |  | ||||||
|  | @ -104,7 +104,7 @@ const JitFunction instr_table[64] = { | ||||||
| // purposes, as documented below:
 | // purposes, as documented below:
 | ||||||
| 
 | 
 | ||||||
| /// Pointer to the uniform memory
 | /// Pointer to the uniform memory
 | ||||||
| static const Reg64 SETUP = r9; | static const Reg64 UNIFORMS = r9; | ||||||
| /// The two 32-bit VS address offset registers set by the MOVA instruction
 | /// The two 32-bit VS address offset registers set by the MOVA instruction
 | ||||||
| static const Reg64 ADDROFFS_REG_0 = r10; | static const Reg64 ADDROFFS_REG_0 = r10; | ||||||
| static const Reg64 ADDROFFS_REG_1 = r11; | static const Reg64 ADDROFFS_REG_1 = r11; | ||||||
|  | @ -139,7 +139,7 @@ static const Xmm NEGBIT = xmm15; | ||||||
| // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
 | ||||||
| static const BitSet32 persistent_regs = BuildRegSet({ | static const BitSet32 persistent_regs = BuildRegSet({ | ||||||
|     // Pointers to register blocks
 |     // Pointers to register blocks
 | ||||||
|     SETUP, |     UNIFORMS, | ||||||
|     STATE, |     STATE, | ||||||
|     // Cached registers
 |     // Cached registers
 | ||||||
|     ADDROFFS_REG_0, |     ADDROFFS_REG_0, | ||||||
|  | @ -184,8 +184,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | ||||||
|     size_t src_offset; |     size_t src_offset; | ||||||
| 
 | 
 | ||||||
|     if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { |     if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { | ||||||
|         src_ptr = SETUP; |         src_ptr = UNIFORMS; | ||||||
|         src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex()); |         src_offset = Uniforms::GetFloatUniformOffset(src_reg.GetIndex()); | ||||||
|     } else { |     } else { | ||||||
|         src_ptr = STATE; |         src_ptr = STATE; | ||||||
|         src_offset = UnitState::InputOffset(src_reg); |         src_offset = UnitState::InputOffset(src_reg); | ||||||
|  | @ -354,8 +354,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void JitShader::Compile_UniformCondition(Instruction instr) { | void JitShader::Compile_UniformCondition(Instruction instr) { | ||||||
|     size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); |     size_t offset = Uniforms::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); | ||||||
|     cmp(byte[SETUP + offset], 0); |     cmp(byte[UNIFORMS + offset], 0); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| BitSet32 JitShader::PersistentCallerSavedRegs() { | BitSet32 JitShader::PersistentCallerSavedRegs() { | ||||||
|  | @ -713,8 +713,8 @@ void JitShader::Compile_LOOP(Instruction instr) { | ||||||
|     // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
 |     // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
 | ||||||
|     // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
 |     // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
 | ||||||
|     // 4 bits) to be used as an offset into the 16-byte vector registers later
 |     // 4 bits) to be used as an offset into the 16-byte vector registers later
 | ||||||
|     size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id); |     size_t offset = Uniforms::GetIntUniformOffset(instr.flow_control.int_uniform_id); | ||||||
|     mov(LOOPCOUNT, dword[SETUP + offset]); |     mov(LOOPCOUNT, dword[UNIFORMS + offset]); | ||||||
|     mov(LOOPCOUNT_REG, LOOPCOUNT); |     mov(LOOPCOUNT_REG, LOOPCOUNT); | ||||||
|     shr(LOOPCOUNT_REG, 4); |     shr(LOOPCOUNT_REG, 4); | ||||||
|     and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
 |     and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
 | ||||||
|  | @ -882,7 +882,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_ | ||||||
|     ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16); |     ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16); | ||||||
|     mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL); |     mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL); | ||||||
| 
 | 
 | ||||||
|     mov(SETUP, ABI_PARAM1); |     mov(UNIFORMS, ABI_PARAM1); | ||||||
|     mov(STATE, ABI_PARAM2); |     mov(STATE, ABI_PARAM2); | ||||||
| 
 | 
 | ||||||
|     // Zero address/loop  registers
 |     // Zero address/loop  registers
 | ||||||
|  |  | ||||||
|  | @ -34,7 +34,7 @@ public: | ||||||
|     JitShader(); |     JitShader(); | ||||||
| 
 | 
 | ||||||
|     void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const { |     void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const { | ||||||
|         program(&setup, &state, instruction_labels[offset].getAddress()); |         program(&setup.uniforms, &state, instruction_labels[offset].getAddress()); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code, |     void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code, | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue