mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-30 21:30:04 +00:00 
			
		
		
		
	Refactor access to state in shader-jit
This commit is contained in:
		
							parent
							
								
									0d8bd3ba36
								
							
						
					
					
						commit
						4e01e9ffc5
					
				
					 4 changed files with 42 additions and 24 deletions
				
			
		|  | @ -64,6 +64,7 @@ MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | ||||||
| 
 | 
 | ||||||
| OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { | OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { | ||||||
|     auto& config = g_state.regs.vs; |     auto& config = g_state.regs.vs; | ||||||
|  |     auto& setup = g_state.vs; | ||||||
| 
 | 
 | ||||||
|     MICROPROFILE_SCOPE(GPU_Shader); |     MICROPROFILE_SCOPE(GPU_Shader); | ||||||
| 
 | 
 | ||||||
|  | @ -81,7 +82,7 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, | ||||||
| 
 | 
 | ||||||
| #ifdef ARCHITECTURE_x86_64 | #ifdef ARCHITECTURE_x86_64 | ||||||
|     if (VideoCore::g_shader_jit_enabled) |     if (VideoCore::g_shader_jit_enabled) | ||||||
|         jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); |         jit_shader->Run(setup, state, config.main_offset); | ||||||
|     else |     else | ||||||
|         RunInterpreter(state); |         RunInterpreter(state); | ||||||
| #else | #else | ||||||
|  |  | ||||||
|  | @ -283,10 +283,10 @@ struct UnitState { | ||||||
|     static size_t InputOffset(const SourceRegister& reg) { |     static size_t InputOffset(const SourceRegister& reg) { | ||||||
|         switch (reg.GetRegisterType()) { |         switch (reg.GetRegisterType()) { | ||||||
|         case RegisterType::Input: |         case RegisterType::Input: | ||||||
|             return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |             return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||||
| 
 | 
 | ||||||
|         case RegisterType::Temporary: |         case RegisterType::Temporary: | ||||||
|             return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |             return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||||
| 
 | 
 | ||||||
|         default: |         default: | ||||||
|             UNREACHABLE(); |             UNREACHABLE(); | ||||||
|  | @ -297,10 +297,10 @@ struct UnitState { | ||||||
|     static size_t OutputOffset(const DestRegister& reg) { |     static size_t OutputOffset(const DestRegister& reg) { | ||||||
|         switch (reg.GetRegisterType()) { |         switch (reg.GetRegisterType()) { | ||||||
|         case RegisterType::Output: |         case RegisterType::Output: | ||||||
|             return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |             return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||||
| 
 | 
 | ||||||
|         case RegisterType::Temporary: |         case RegisterType::Temporary: | ||||||
|             return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |             return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||||
| 
 | 
 | ||||||
|         default: |         default: | ||||||
|             UNREACHABLE(); |             UNREACHABLE(); | ||||||
|  | @ -323,6 +323,23 @@ struct ShaderSetup { | ||||||
|         std::array<Math::Vec4<u8>, 4> i; |         std::array<Math::Vec4<u8>, 4> i; | ||||||
|     } uniforms; |     } uniforms; | ||||||
| 
 | 
 | ||||||
|  |     static size_t UniformOffset(RegisterType type, unsigned index) { | ||||||
|  |         switch (type) { | ||||||
|  |         case RegisterType::FloatUniform: | ||||||
|  |             return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>); | ||||||
|  | 
 | ||||||
|  |         case RegisterType::BoolUniform: | ||||||
|  |             return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); | ||||||
|  | 
 | ||||||
|  |         case RegisterType::IntUniform: | ||||||
|  |             return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>); | ||||||
|  | 
 | ||||||
|  |         default: | ||||||
|  |             UNREACHABLE(); | ||||||
|  |             return 0; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     std::array<u32, 1024> program_code; |     std::array<u32, 1024> program_code; | ||||||
|     std::array<u32, 1024> swizzle_data; |     std::array<u32, 1024> swizzle_data; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -102,7 +102,7 @@ const JitFunction instr_table[64] = { | ||||||
| // purposes, as documented below:
 | // purposes, as documented below:
 | ||||||
| 
 | 
 | ||||||
| /// Pointer to the uniform memory
 | /// Pointer to the uniform memory
 | ||||||
| static const X64Reg UNIFORMS = R9; | static const X64Reg SETUP = R9; | ||||||
| /// The two 32-bit VS address offset registers set by the MOVA instruction
 | /// The two 32-bit VS address offset registers set by the MOVA instruction
 | ||||||
| static const X64Reg ADDROFFS_REG_0 = R10; | static const X64Reg ADDROFFS_REG_0 = R10; | ||||||
| static const X64Reg ADDROFFS_REG_1 = R11; | static const X64Reg ADDROFFS_REG_1 = R11; | ||||||
|  | @ -117,7 +117,7 @@ static const X64Reg COND0 = R13; | ||||||
| /// Result of the previous CMP instruction for the Y-component comparison
 | /// Result of the previous CMP instruction for the Y-component comparison
 | ||||||
| static const X64Reg COND1 = R14; | static const X64Reg COND1 = R14; | ||||||
| /// Pointer to the UnitState instance for the current VS unit
 | /// Pointer to the UnitState instance for the current VS unit
 | ||||||
| static const X64Reg REGISTERS = R15; | static const X64Reg STATE = R15; | ||||||
| /// SIMD scratch register
 | /// SIMD scratch register
 | ||||||
| static const X64Reg SCRATCH = XMM0; | static const X64Reg SCRATCH = XMM0; | ||||||
| /// Loaded with the first swizzled source register, otherwise can be used as a scratch register
 | /// Loaded with the first swizzled source register, otherwise can be used as a scratch register
 | ||||||
|  | @ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15; | ||||||
| // State registers that must not be modified by external functions calls
 | // State registers that must not be modified by external functions calls
 | ||||||
| // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
 | ||||||
| static const BitSet32 persistent_regs = { | static const BitSet32 persistent_regs = { | ||||||
|     UNIFORMS, REGISTERS, // Pointers to register blocks
 |     SETUP, STATE, // Pointers to register blocks
 | ||||||
|     ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
 |     ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
 | ||||||
|     ONE+16, NEGBIT+16, // Constants
 |     ONE+16, NEGBIT+16, // Constants
 | ||||||
| }; | }; | ||||||
|  | @ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | ||||||
|     size_t src_offset; |     size_t src_offset; | ||||||
| 
 | 
 | ||||||
|     if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { |     if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { | ||||||
|         src_ptr = UNIFORMS; |         src_ptr = SETUP; | ||||||
|         src_offset = src_reg.GetIndex() * sizeof(float24) * 4; |         src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex()); | ||||||
|     } else { |     } else { | ||||||
|         src_ptr = REGISTERS; |         src_ptr = STATE; | ||||||
|         src_offset = UnitState<false>::InputOffset(src_reg); |         src_offset = UnitState<false>::InputOffset(src_reg); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | ||||||
|     // If all components are enabled, write the result to the destination register
 |     // If all components are enabled, write the result to the destination register
 | ||||||
|     if (swiz.dest_mask == NO_DEST_REG_MASK) { |     if (swiz.dest_mask == NO_DEST_REG_MASK) { | ||||||
|         // Store dest back to memory
 |         // Store dest back to memory
 | ||||||
|         MOVAPS(MDisp(REGISTERS, dest_offset_disp), src); |         MOVAPS(MDisp(STATE, dest_offset_disp), src); | ||||||
| 
 | 
 | ||||||
|     } else { |     } else { | ||||||
|         // Not all components are enabled, so mask the result when storing to the destination register...
 |         // Not all components are enabled, so mask the result when storing to the destination register...
 | ||||||
|         MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp)); |         MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); | ||||||
| 
 | 
 | ||||||
|         if (Common::GetCPUCaps().sse4_1) { |         if (Common::GetCPUCaps().sse4_1) { | ||||||
|             u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); |             u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | ||||||
|  | @ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Store dest back to memory
 |         // Store dest back to memory
 | ||||||
|         MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH); |         MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void JitShader::Compile_UniformCondition(Instruction instr) { | void JitShader::Compile_UniformCondition(Instruction instr) { | ||||||
|     int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); |     int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); | ||||||
|     CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); |     CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| BitSet32 JitShader::PersistentCallerSavedRegs() { | BitSet32 JitShader::PersistentCallerSavedRegs() { | ||||||
|  | @ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) { | ||||||
| 
 | 
 | ||||||
|     looping = true; |     looping = true; | ||||||
| 
 | 
 | ||||||
|     int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>)); |     int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); | ||||||
|     MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset)); |     MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); | ||||||
|     MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); |     MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); | ||||||
|     SHR(32, R(LOOPCOUNT_REG), Imm8(8)); |     SHR(32, R(LOOPCOUNT_REG), Imm8(8)); | ||||||
|     AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
 |     AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
 | ||||||
|  | @ -826,8 +826,8 @@ void JitShader::Compile() { | ||||||
|     // The stack pointer is 8 modulo 16 at the entry of a procedure
 |     // The stack pointer is 8 modulo 16 at the entry of a procedure
 | ||||||
|     ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); |     ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); | ||||||
| 
 | 
 | ||||||
|     MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); |     MOV(PTRBITS, R(SETUP), R(ABI_PARAM1)); | ||||||
|     MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); |     MOV(PTRBITS, R(STATE), R(ABI_PARAM2)); | ||||||
| 
 | 
 | ||||||
|     // Zero address/loop  registers
 |     // Zero address/loop  registers
 | ||||||
|     XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); |     XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); | ||||||
|  | @ -845,7 +845,7 @@ void JitShader::Compile() { | ||||||
|     MOVAPS(NEGBIT, MatR(RAX)); |     MOVAPS(NEGBIT, MatR(RAX)); | ||||||
| 
 | 
 | ||||||
|     // Jump to start of the shader program
 |     // Jump to start of the shader program
 | ||||||
|     JMPptr(R(ABI_PARAM2)); |     JMPptr(R(ABI_PARAM3)); | ||||||
| 
 | 
 | ||||||
|     // Compile entire program
 |     // Compile entire program
 | ||||||
|     Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); |     Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); | ||||||
|  |  | ||||||
|  | @ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock { | ||||||
| public: | public: | ||||||
|     JitShader(); |     JitShader(); | ||||||
| 
 | 
 | ||||||
|     void Run(void* registers, unsigned offset) const { |     void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const { | ||||||
|         program(registers, code_ptr[offset]); |         program(&setup, &state, code_ptr[offset]); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void Compile(); |     void Compile(); | ||||||
|  | @ -117,7 +117,7 @@ private: | ||||||
|     /// Branches that need to be fixed up once the entire shader program is compiled
 |     /// Branches that need to be fixed up once the entire shader program is compiled
 | ||||||
|     std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; |     std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; | ||||||
| 
 | 
 | ||||||
|     using CompiledShader = void(void* registers, const u8* start_addr); |     using CompiledShader = void(const void* setup, void* state, const u8* start_addr); | ||||||
|     CompiledShader* program = nullptr; |     CompiledShader* program = nullptr; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue