mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	shader_jit: Compile nested loops
and use `T_NEAR` instead of the default in Compile_BREAKC
This commit is contained in:
		
							parent
							
								
									1382035d4d
								
							
						
					
					
						commit
						047e238d09
					
				
					 3 changed files with 155 additions and 31 deletions
				
			
		|  | @ -7,28 +7,28 @@ | ||||||
| #include <memory> | #include <memory> | ||||||
| #include <catch2/catch.hpp> | #include <catch2/catch.hpp> | ||||||
| #include <nihstro/inline_assembly.h> | #include <nihstro/inline_assembly.h> | ||||||
|  | #include "video_core/shader/shader_interpreter.h" | ||||||
| #include "video_core/shader/shader_jit_x64_compiler.h" | #include "video_core/shader/shader_jit_x64_compiler.h" | ||||||
| 
 | 
 | ||||||
| using float24 = Pica::float24; | using float24 = Pica::float24; | ||||||
| using JitShader = Pica::Shader::JitShader; | using JitShader = Pica::Shader::JitShader; | ||||||
|  | using ShaderInterpreter = Pica::Shader::InterpreterEngine; | ||||||
| 
 | 
 | ||||||
| using DestRegister = nihstro::DestRegister; | using DestRegister = nihstro::DestRegister; | ||||||
| using OpCode = nihstro::OpCode; | using OpCode = nihstro::OpCode; | ||||||
| using SourceRegister = nihstro::SourceRegister; | using SourceRegister = nihstro::SourceRegister; | ||||||
|  | using Type = nihstro::InlineAsm::Type; | ||||||
| 
 | 
 | ||||||
| static std::unique_ptr<JitShader> CompileShader(std::initializer_list<nihstro::InlineAsm> code) { | static std::unique_ptr<Pica::Shader::ShaderSetup> CompileShaderSetup( | ||||||
|  |     std::initializer_list<nihstro::InlineAsm> code) { | ||||||
|     const auto shbin = nihstro::InlineAsm::CompileToRawBinary(code); |     const auto shbin = nihstro::InlineAsm::CompileToRawBinary(code); | ||||||
| 
 | 
 | ||||||
|     std::array<u32, Pica::Shader::MAX_PROGRAM_CODE_LENGTH> program_code{}; |     auto shader = std::make_unique<Pica::Shader::ShaderSetup>(); | ||||||
|     std::array<u32, Pica::Shader::MAX_SWIZZLE_DATA_LENGTH> swizzle_data{}; |  | ||||||
| 
 | 
 | ||||||
|     std::transform(shbin.program.begin(), shbin.program.end(), program_code.begin(), |     std::transform(shbin.program.begin(), shbin.program.end(), shader->program_code.begin(), | ||||||
|                    [](const auto& x) { return x.hex; }); |                    [](const auto& x) { return x.hex; }); | ||||||
|     std::transform(shbin.swizzle_table.begin(), shbin.swizzle_table.end(), swizzle_data.begin(), |     std::transform(shbin.swizzle_table.begin(), shbin.swizzle_table.end(), | ||||||
|                    [](const auto& x) { return x.hex; }); |                    shader->swizzle_data.begin(), [](const auto& x) { return x.hex; }); | ||||||
| 
 |  | ||||||
|     auto shader = std::make_unique<JitShader>(); |  | ||||||
|     shader->Compile(&program_code, &swizzle_data); |  | ||||||
| 
 | 
 | ||||||
|     return shader; |     return shader; | ||||||
| } | } | ||||||
|  | @ -36,19 +36,32 @@ static std::unique_ptr<JitShader> CompileShader(std::initializer_list<nihstro::I | ||||||
| class ShaderTest { | class ShaderTest { | ||||||
| public: | public: | ||||||
|     explicit ShaderTest(std::initializer_list<nihstro::InlineAsm> code) |     explicit ShaderTest(std::initializer_list<nihstro::InlineAsm> code) | ||||||
|         : shader(CompileShader(code)) {} |         : shader_setup(CompileShaderSetup(code)) { | ||||||
|  |         shader_jit.Compile(&shader_setup->program_code, &shader_setup->swizzle_data); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     float Run(float input) { |     float Run(float input) { | ||||||
|         Pica::Shader::ShaderSetup shader_setup; |  | ||||||
|         Pica::Shader::UnitState shader_unit; |         Pica::Shader::UnitState shader_unit; | ||||||
| 
 |         RunJit(shader_unit, input); | ||||||
|         shader_unit.registers.input[0].x = float24::FromFloat32(input); |  | ||||||
|         shader->Run(shader_setup, shader_unit, 0); |  | ||||||
|         return shader_unit.registers.output[0].x.ToFloat32(); |         return shader_unit.registers.output[0].x.ToFloat32(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     void RunJit(Pica::Shader::UnitState& shader_unit, float input) { | ||||||
|  |         shader_unit.registers.input[0].x = float24::FromFloat32(input); | ||||||
|  |         shader_unit.registers.temporary[0].x = float24::FromFloat32(0); | ||||||
|  |         shader_jit.Run(*shader_setup, shader_unit, 0); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void RunInterpreter(Pica::Shader::UnitState& shader_unit, float input) { | ||||||
|  |         shader_unit.registers.input[0].x = float24::FromFloat32(input); | ||||||
|  |         shader_unit.registers.temporary[0].x = float24::FromFloat32(0); | ||||||
|  |         shader_interpreter.Run(*shader_setup, shader_unit); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| public: | public: | ||||||
|     std::unique_ptr<JitShader> shader; |     JitShader shader_jit; | ||||||
|  |     ShaderInterpreter shader_interpreter; | ||||||
|  |     std::unique_ptr<Pica::Shader::ShaderSetup> shader_setup; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| TEST_CASE("LG2", "[video_core][shader][shader_jit]") { | TEST_CASE("LG2", "[video_core][shader][shader_jit]") { | ||||||
|  | @ -89,3 +102,108 @@ TEST_CASE("EX2", "[video_core][shader][shader_jit]") { | ||||||
|     REQUIRE(shader.Run(79.7262742773f) == Approx(1.e24f)); |     REQUIRE(shader.Run(79.7262742773f) == Approx(1.e24f)); | ||||||
|     REQUIRE(std::isinf(shader.Run(800.f))); |     REQUIRE(std::isinf(shader.Run(800.f))); | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | TEST_CASE("Nested Loop", "[video_core][shader][shader_jit]") { | ||||||
|  |     const auto sh_input = SourceRegister::MakeInput(0); | ||||||
|  |     const auto sh_temp = SourceRegister::MakeTemporary(0); | ||||||
|  |     const auto sh_output = DestRegister::MakeOutput(0); | ||||||
|  | 
 | ||||||
|  |     std::array<Common::Vec4<u8>, 2> loop_parms{Common::Vec4<u8>{4, 0, 1, 0}, | ||||||
|  |                                                Common::Vec4<u8>{4, 0, 1, 0}}; | ||||||
|  | 
 | ||||||
|  |     auto shader_test = ShaderTest({ | ||||||
|  |         // clang-format off
 | ||||||
|  |         {OpCode::Id::LOOP, 0}, | ||||||
|  |             {OpCode::Id::LOOP, 1}, | ||||||
|  |                 {OpCode::Id::ADD, sh_temp, sh_temp, sh_input}, | ||||||
|  |             {Type::EndLoop}, | ||||||
|  |         {Type::EndLoop}, | ||||||
|  |         {OpCode::Id::MOV, sh_output, sh_temp}, | ||||||
|  |         {OpCode::Id::END}, | ||||||
|  |         // clang-format on
 | ||||||
|  |     }); | ||||||
|  | 
 | ||||||
|  |     shader_test.shader_setup->uniforms.i[0] = loop_parms[0]; | ||||||
|  |     shader_test.shader_setup->uniforms.i[1] = loop_parms[0]; | ||||||
|  | 
 | ||||||
|  |     const auto run_test_helper = [&shader_test](float input) { | ||||||
|  |         Pica::Shader::UnitState shader_unit_jit; | ||||||
|  |         Pica::Shader::UnitState shader_unit_inter; | ||||||
|  |         shader_test.RunJit(shader_unit_jit, input); | ||||||
|  |         shader_test.RunInterpreter(shader_unit_inter, input); | ||||||
|  | 
 | ||||||
|  |         REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() == | ||||||
|  |                 Approx(shader_unit_inter.registers.output[0].x.ToFloat32())); | ||||||
|  |         REQUIRE(shader_unit_jit.address_registers[2] == shader_unit_inter.address_registers[2]); | ||||||
|  |     }; | ||||||
|  |     { | ||||||
|  |         // Sanity check
 | ||||||
|  |         Pica::Shader::UnitState shader_unit_jit; | ||||||
|  |         shader_test.RunJit(shader_unit_jit, 1.0f); | ||||||
|  |         REQUIRE(shader_unit_jit.address_registers[2] == 6); | ||||||
|  |         REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() == Approx(25.0f)); | ||||||
|  | 
 | ||||||
|  |         Pica::Shader::UnitState shader_unit_inter; | ||||||
|  |         shader_test.RunInterpreter(shader_unit_inter, 2.0f); | ||||||
|  |         REQUIRE(shader_unit_inter.address_registers[2] == 6); | ||||||
|  |         REQUIRE(shader_unit_inter.registers.output[0].x.ToFloat32() == Approx(50.0f)); | ||||||
|  |     } | ||||||
|  |     run_test_helper(-5.f); | ||||||
|  |     run_test_helper(0.f); | ||||||
|  |     run_test_helper(2.f); | ||||||
|  |     run_test_helper(6.f); | ||||||
|  |     run_test_helper(79.7262742773f); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | TEST_CASE("Nested Loop Randomized", "[video_core][shader][shader_jit]") { | ||||||
|  |     const auto sh_input = SourceRegister::MakeInput(0); | ||||||
|  |     const auto sh_temp = SourceRegister::MakeTemporary(0); | ||||||
|  |     const auto sh_output = DestRegister::MakeOutput(0); | ||||||
|  | 
 | ||||||
|  |     auto shader_test = ShaderTest({ | ||||||
|  |         // clang-format off
 | ||||||
|  |         {OpCode::Id::LOOP, 0}, | ||||||
|  |             {OpCode::Id::LOOP, 1}, | ||||||
|  |                  {OpCode::Id::LOOP, 2}, | ||||||
|  |                     {OpCode::Id::LOOP, 3}, | ||||||
|  |                         {OpCode::Id::ADD, sh_temp, sh_temp, sh_input}, | ||||||
|  |                     {Type::EndLoop}, | ||||||
|  |                 {Type::EndLoop}, | ||||||
|  |             {Type::EndLoop}, | ||||||
|  |         {Type::EndLoop}, | ||||||
|  | 
 | ||||||
|  |         {OpCode::Id::MOV, sh_output, sh_temp}, | ||||||
|  |         {OpCode::Id::END}, | ||||||
|  |         // clang-format on
 | ||||||
|  |     }); | ||||||
|  | 
 | ||||||
|  |     const auto generate_loop_parms = [] { | ||||||
|  |         u8 iterations = 1 + rand(); | ||||||
|  |         u8 initial = 1 + rand(); | ||||||
|  |         u8 increment = 1 + rand(); | ||||||
|  | 
 | ||||||
|  |         Common::Vec4<u8> loop_parm{iterations, initial, increment, 0}; | ||||||
|  |         return Common::Vec4<u8>{iterations, initial, increment, 0}; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     const auto run_test_helper = [&shader_test](float input) { | ||||||
|  |         Pica::Shader::UnitState shader_unit_jit; | ||||||
|  |         Pica::Shader::UnitState shader_unit_inter; | ||||||
|  |         shader_test.RunJit(shader_unit_jit, input); | ||||||
|  |         shader_test.RunInterpreter(shader_unit_inter, input); | ||||||
|  | 
 | ||||||
|  |         REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() == | ||||||
|  |                 Approx(shader_unit_inter.registers.output[0].x.ToFloat32())); | ||||||
|  |         REQUIRE(shader_unit_jit.address_registers[2] == shader_unit_inter.address_registers[2]); | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     srand(time(0)); | ||||||
|  |     for (int i = 0; i < 10; i++) { | ||||||
|  |         shader_test.shader_setup->uniforms.i[0] = generate_loop_parms(); | ||||||
|  |         shader_test.shader_setup->uniforms.i[1] = generate_loop_parms(); | ||||||
|  |         shader_test.shader_setup->uniforms.i[2] = generate_loop_parms(); | ||||||
|  |         shader_test.shader_setup->uniforms.i[3] = generate_loop_parms(); | ||||||
|  |         float input = -(RAND_MAX / 2) + rand(); | ||||||
|  |         run_test_helper(input); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -595,11 +595,11 @@ void JitShader::Compile_END(Instruction instr) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void JitShader::Compile_BREAKC(Instruction instr) { | void JitShader::Compile_BREAKC(Instruction instr) { | ||||||
|     Compile_Assert(looping, "BREAKC must be inside a LOOP"); |     Compile_Assert(loop_depth, "BREAKC must be inside a LOOP"); | ||||||
|     if (looping) { |     if (loop_depth) { | ||||||
|         Compile_EvaluateCondition(instr); |         Compile_EvaluateCondition(instr); | ||||||
|         ASSERT(loop_break_label); |         ASSERT(!loop_break_labels.empty()); | ||||||
|         jnz(*loop_break_label); |         jnz(loop_break_labels.back(), T_NEAR); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -725,9 +725,11 @@ void JitShader::Compile_IF(Instruction instr) { | ||||||
| void JitShader::Compile_LOOP(Instruction instr) { | void JitShader::Compile_LOOP(Instruction instr) { | ||||||
|     Compile_Assert(instr.flow_control.dest_offset >= program_counter, |     Compile_Assert(instr.flow_control.dest_offset >= program_counter, | ||||||
|                    "Backwards loops not supported"); |                    "Backwards loops not supported"); | ||||||
|     Compile_Assert(!looping, "Nested loops not supported"); |     if (loop_depth++) { | ||||||
| 
 |         // LOOPCOUNT_REG is a "global", so we don't save it here.
 | ||||||
|     looping = true; |         push(LOOPINC.cvt64()); | ||||||
|  |         push(LOOPCOUNT.cvt64()); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
 |     // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
 | ||||||
|     // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
 |     // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
 | ||||||
|  | @ -746,16 +748,20 @@ void JitShader::Compile_LOOP(Instruction instr) { | ||||||
|     Label l_loop_start; |     Label l_loop_start; | ||||||
|     L(l_loop_start); |     L(l_loop_start); | ||||||
| 
 | 
 | ||||||
|     loop_break_label = Xbyak::Label(); |     loop_break_labels.emplace_back(Xbyak::Label()); | ||||||
|     Compile_Block(instr.flow_control.dest_offset + 1); |     Compile_Block(instr.flow_control.dest_offset + 1); | ||||||
| 
 | 
 | ||||||
|     add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
 |     add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
 | ||||||
|     sub(LOOPCOUNT, 1);           // Increment loop count by 1
 |     sub(LOOPCOUNT, 1);           // Increment loop count by 1
 | ||||||
|     jnz(l_loop_start);           // Loop if not equal
 |     jnz(l_loop_start);           // Loop if not equal
 | ||||||
|     L(*loop_break_label); |  | ||||||
|     loop_break_label.reset(); |  | ||||||
| 
 | 
 | ||||||
|     looping = false; |     L(loop_break_labels.back()); | ||||||
|  |     loop_break_labels.pop_back(); | ||||||
|  | 
 | ||||||
|  |     if (--loop_depth) { | ||||||
|  |         pop(LOOPCOUNT.cvt64()); | ||||||
|  |         pop(LOOPINC.cvt64()); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void JitShader::Compile_JMP(Instruction instr) { | void JitShader::Compile_JMP(Instruction instr) { | ||||||
|  | @ -892,7 +898,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_ | ||||||
|     // Reset flow control state
 |     // Reset flow control state
 | ||||||
|     program = (CompiledShader*)getCurr(); |     program = (CompiledShader*)getCurr(); | ||||||
|     program_counter = 0; |     program_counter = 0; | ||||||
|     looping = false; |     loop_depth = 0; | ||||||
|     instruction_labels.fill(Xbyak::Label()); |     instruction_labels.fill(Xbyak::Label()); | ||||||
| 
 | 
 | ||||||
|     // Find all `CALL` instructions and identify return locations
 |     // Find all `CALL` instructions and identify return locations
 | ||||||
|  |  | ||||||
|  | @ -120,15 +120,15 @@ private: | ||||||
|     /// Mapping of Pica VS instructions to pointers in the emitted code
 |     /// Mapping of Pica VS instructions to pointers in the emitted code
 | ||||||
|     std::array<Xbyak::Label, MAX_PROGRAM_CODE_LENGTH> instruction_labels; |     std::array<Xbyak::Label, MAX_PROGRAM_CODE_LENGTH> instruction_labels; | ||||||
| 
 | 
 | ||||||
|     /// Label pointing to the end of the current LOOP block. Used by the BREAKC instruction to break
 |     /// Labels pointing to the end of each nested LOOP block. Used by the BREAKC instruction to
 | ||||||
|     /// out of the loop.
 |     /// break out of a loop.
 | ||||||
|     std::optional<Xbyak::Label> loop_break_label; |     std::vector<Xbyak::Label> loop_break_labels; | ||||||
| 
 | 
 | ||||||
|     /// Offsets in code where a return needs to be inserted
 |     /// Offsets in code where a return needs to be inserted
 | ||||||
|     std::vector<unsigned> return_offsets; |     std::vector<unsigned> return_offsets; | ||||||
| 
 | 
 | ||||||
|     unsigned program_counter = 0; ///< Offset of the next instruction to decode
 |     unsigned program_counter = 0; ///< Offset of the next instruction to decode
 | ||||||
|     bool looping = false;         ///< True if compiling a loop, used to check for nested loops
 |     u8 loop_depth = 0;            ///< Depth of the (nested) loops currently compiled
 | ||||||
| 
 | 
 | ||||||
|     using CompiledShader = void(const void* setup, void* state, const u8* start_addr); |     using CompiledShader = void(const void* setup, void* state, const u8* start_addr); | ||||||
|     CompiledShader* program = nullptr; |     CompiledShader* program = nullptr; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue