mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	
						commit
						f40fabd688
					
				
					 6 changed files with 50 additions and 32 deletions
				
			
		|  | @ -64,6 +64,7 @@ MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | |||
| 
 | ||||
| OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { | ||||
|     auto& config = g_state.regs.vs; | ||||
|     auto& setup = g_state.vs; | ||||
| 
 | ||||
|     MICROPROFILE_SCOPE(GPU_Shader); | ||||
| 
 | ||||
|  | @ -81,11 +82,11 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, | |||
| 
 | ||||
| #ifdef ARCHITECTURE_x86_64 | ||||
|     if (VideoCore::g_shader_jit_enabled) | ||||
|         jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); | ||||
|         jit_shader->Run(setup, state, config.main_offset); | ||||
|     else | ||||
|         RunInterpreter(state); | ||||
|         RunInterpreter(setup, state, config.main_offset); | ||||
| #else | ||||
|     RunInterpreter(state); | ||||
|     RunInterpreter(setup, state, config.main_offset); | ||||
| #endif // ARCHITECTURE_x86_64
 | ||||
| 
 | ||||
|     // Setup output data
 | ||||
|  | @ -156,7 +157,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ | |||
|     state.conditional_code[0] = false; | ||||
|     state.conditional_code[1] = false; | ||||
| 
 | ||||
|     RunInterpreter(state); | ||||
|     RunInterpreter(setup, state, config.main_offset); | ||||
|     return state.debug; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -283,10 +283,10 @@ struct UnitState { | |||
|     static size_t InputOffset(const SourceRegister& reg) { | ||||
|         switch (reg.GetRegisterType()) { | ||||
|         case RegisterType::Input: | ||||
|             return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||
|             return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||
| 
 | ||||
|         case RegisterType::Temporary: | ||||
|             return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||
|             return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||
| 
 | ||||
|         default: | ||||
|             UNREACHABLE(); | ||||
|  | @ -297,10 +297,10 @@ struct UnitState { | |||
|     static size_t OutputOffset(const DestRegister& reg) { | ||||
|         switch (reg.GetRegisterType()) { | ||||
|         case RegisterType::Output: | ||||
|             return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||
|             return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||
| 
 | ||||
|         case RegisterType::Temporary: | ||||
|             return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||
|             return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||
| 
 | ||||
|         default: | ||||
|             UNREACHABLE(); | ||||
|  | @ -323,6 +323,23 @@ struct ShaderSetup { | |||
|         std::array<Math::Vec4<u8>, 4> i; | ||||
|     } uniforms; | ||||
| 
 | ||||
|     static size_t UniformOffset(RegisterType type, unsigned index) { | ||||
|         switch (type) { | ||||
|         case RegisterType::FloatUniform: | ||||
|             return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>); | ||||
| 
 | ||||
|         case RegisterType::BoolUniform: | ||||
|             return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); | ||||
| 
 | ||||
|         case RegisterType::IntUniform: | ||||
|             return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>); | ||||
| 
 | ||||
|         default: | ||||
|             UNREACHABLE(); | ||||
|             return 0; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     std::array<u32, 1024> program_code; | ||||
|     std::array<u32, 1024> swizzle_data; | ||||
| 
 | ||||
|  |  | |||
|  | @ -41,11 +41,11 @@ struct CallStackElement { | |||
| }; | ||||
| 
 | ||||
| template<bool Debug> | ||||
| void RunInterpreter(UnitState<Debug>& state) { | ||||
| void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { | ||||
|     // TODO: Is there a maximal size for this?
 | ||||
|     boost::container::static_vector<CallStackElement, 16> call_stack; | ||||
| 
 | ||||
|     u32 program_counter = g_state.regs.vs.main_offset; | ||||
|     u32 program_counter = offset; | ||||
| 
 | ||||
|     const auto& uniforms = g_state.vs.uniforms; | ||||
|     const auto& swizzle_data = g_state.vs.swizzle_data; | ||||
|  | @ -647,8 +647,8 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| } | ||||
| 
 | ||||
| // Explicit instantiation
 | ||||
| template void RunInterpreter(UnitState<false>& state); | ||||
| template void RunInterpreter(UnitState<true>& state); | ||||
| template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset); | ||||
| template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset); | ||||
| 
 | ||||
| } // namespace
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -11,7 +11,7 @@ namespace Shader { | |||
| template <bool Debug> struct UnitState; | ||||
| 
 | ||||
| template<bool Debug> | ||||
| void RunInterpreter(UnitState<Debug>& state); | ||||
| void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); | ||||
| 
 | ||||
| } // namespace
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -102,7 +102,7 @@ const JitFunction instr_table[64] = { | |||
| // purposes, as documented below:
 | ||||
| 
 | ||||
| /// Pointer to the uniform memory
 | ||||
| static const X64Reg UNIFORMS = R9; | ||||
| static const X64Reg SETUP = R9; | ||||
| /// The two 32-bit VS address offset registers set by the MOVA instruction
 | ||||
| static const X64Reg ADDROFFS_REG_0 = R10; | ||||
| static const X64Reg ADDROFFS_REG_1 = R11; | ||||
|  | @ -117,7 +117,7 @@ static const X64Reg COND0 = R13; | |||
| /// Result of the previous CMP instruction for the Y-component comparison
 | ||||
| static const X64Reg COND1 = R14; | ||||
| /// Pointer to the UnitState instance for the current VS unit
 | ||||
| static const X64Reg REGISTERS = R15; | ||||
| static const X64Reg STATE = R15; | ||||
| /// SIMD scratch register
 | ||||
| static const X64Reg SCRATCH = XMM0; | ||||
| /// Loaded with the first swizzled source register, otherwise can be used as a scratch register
 | ||||
|  | @ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15; | |||
| // State registers that must not be modified by external functions calls
 | ||||
| // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
 | ||||
| static const BitSet32 persistent_regs = { | ||||
|     UNIFORMS, REGISTERS, // Pointers to register blocks
 | ||||
|     SETUP, STATE, // Pointers to register blocks
 | ||||
|     ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
 | ||||
|     ONE+16, NEGBIT+16, // Constants
 | ||||
| }; | ||||
|  | @ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | |||
|     size_t src_offset; | ||||
| 
 | ||||
|     if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { | ||||
|         src_ptr = UNIFORMS; | ||||
|         src_offset = src_reg.GetIndex() * sizeof(float24) * 4; | ||||
|         src_ptr = SETUP; | ||||
|         src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex()); | ||||
|     } else { | ||||
|         src_ptr = REGISTERS; | ||||
|         src_ptr = STATE; | ||||
|         src_offset = UnitState<false>::InputOffset(src_reg); | ||||
|     } | ||||
| 
 | ||||
|  | @ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
|     // If all components are enabled, write the result to the destination register
 | ||||
|     if (swiz.dest_mask == NO_DEST_REG_MASK) { | ||||
|         // Store dest back to memory
 | ||||
|         MOVAPS(MDisp(REGISTERS, dest_offset_disp), src); | ||||
|         MOVAPS(MDisp(STATE, dest_offset_disp), src); | ||||
| 
 | ||||
|     } else { | ||||
|         // Not all components are enabled, so mask the result when storing to the destination register...
 | ||||
|         MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp)); | ||||
|         MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); | ||||
| 
 | ||||
|         if (Common::GetCPUCaps().sse4_1) { | ||||
|             u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | ||||
|  | @ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
|         } | ||||
| 
 | ||||
|         // Store dest back to memory
 | ||||
|         MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH); | ||||
|         MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | @ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { | |||
| } | ||||
| 
 | ||||
| void JitShader::Compile_UniformCondition(Instruction instr) { | ||||
|     int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); | ||||
|     CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); | ||||
|     int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); | ||||
|     CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); | ||||
| } | ||||
| 
 | ||||
| BitSet32 JitShader::PersistentCallerSavedRegs() { | ||||
|  | @ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) { | |||
| 
 | ||||
|     looping = true; | ||||
| 
 | ||||
|     int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>)); | ||||
|     MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset)); | ||||
|     int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); | ||||
|     MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); | ||||
|     MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); | ||||
|     SHR(32, R(LOOPCOUNT_REG), Imm8(8)); | ||||
|     AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
 | ||||
|  | @ -826,8 +826,8 @@ void JitShader::Compile() { | |||
|     // The stack pointer is 8 modulo 16 at the entry of a procedure
 | ||||
|     ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); | ||||
| 
 | ||||
|     MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); | ||||
|     MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); | ||||
|     MOV(PTRBITS, R(SETUP), R(ABI_PARAM1)); | ||||
|     MOV(PTRBITS, R(STATE), R(ABI_PARAM2)); | ||||
| 
 | ||||
|     // Zero address/loop  registers
 | ||||
|     XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); | ||||
|  | @ -845,7 +845,7 @@ void JitShader::Compile() { | |||
|     MOVAPS(NEGBIT, MatR(RAX)); | ||||
| 
 | ||||
|     // Jump to start of the shader program
 | ||||
|     JMPptr(R(ABI_PARAM2)); | ||||
|     JMPptr(R(ABI_PARAM3)); | ||||
| 
 | ||||
|     // Compile entire program
 | ||||
|     Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); | ||||
|  |  | |||
|  | @ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock { | |||
| public: | ||||
|     JitShader(); | ||||
| 
 | ||||
|     void Run(void* registers, unsigned offset) const { | ||||
|         program(registers, code_ptr[offset]); | ||||
|     void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const { | ||||
|         program(&setup, &state, code_ptr[offset]); | ||||
|     } | ||||
| 
 | ||||
|     void Compile(); | ||||
|  | @ -117,7 +117,7 @@ private: | |||
|     /// Branches that need to be fixed up once the entire shader program is compiled
 | ||||
|     std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; | ||||
| 
 | ||||
|     using CompiledShader = void(void* registers, const u8* start_addr); | ||||
|     using CompiledShader = void(const void* setup, void* state, const u8* start_addr); | ||||
|     CompiledShader* program = nullptr; | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue