mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	shader_jit: Compile nested loops
and use `T_NEAR` instead of the default in Compile_BREAKC
This commit is contained in:
		
							parent
							
								
									1382035d4d
								
							
						
					
					
						commit
						047e238d09
					
				
					 3 changed files with 155 additions and 31 deletions
				
			
		|  | @ -7,28 +7,28 @@ | |||
| #include <memory> | ||||
| #include <catch2/catch.hpp> | ||||
| #include <nihstro/inline_assembly.h> | ||||
| #include "video_core/shader/shader_interpreter.h" | ||||
| #include "video_core/shader/shader_jit_x64_compiler.h" | ||||
| 
 | ||||
| using float24 = Pica::float24; | ||||
| using JitShader = Pica::Shader::JitShader; | ||||
| using ShaderInterpreter = Pica::Shader::InterpreterEngine; | ||||
| 
 | ||||
| using DestRegister = nihstro::DestRegister; | ||||
| using OpCode = nihstro::OpCode; | ||||
| using SourceRegister = nihstro::SourceRegister; | ||||
| using Type = nihstro::InlineAsm::Type; | ||||
| 
 | ||||
| static std::unique_ptr<JitShader> CompileShader(std::initializer_list<nihstro::InlineAsm> code) { | ||||
| static std::unique_ptr<Pica::Shader::ShaderSetup> CompileShaderSetup( | ||||
|     std::initializer_list<nihstro::InlineAsm> code) { | ||||
|     const auto shbin = nihstro::InlineAsm::CompileToRawBinary(code); | ||||
| 
 | ||||
|     std::array<u32, Pica::Shader::MAX_PROGRAM_CODE_LENGTH> program_code{}; | ||||
|     std::array<u32, Pica::Shader::MAX_SWIZZLE_DATA_LENGTH> swizzle_data{}; | ||||
|     auto shader = std::make_unique<Pica::Shader::ShaderSetup>(); | ||||
| 
 | ||||
|     std::transform(shbin.program.begin(), shbin.program.end(), program_code.begin(), | ||||
|     std::transform(shbin.program.begin(), shbin.program.end(), shader->program_code.begin(), | ||||
|                    [](const auto& x) { return x.hex; }); | ||||
|     std::transform(shbin.swizzle_table.begin(), shbin.swizzle_table.end(), swizzle_data.begin(), | ||||
|                    [](const auto& x) { return x.hex; }); | ||||
| 
 | ||||
|     auto shader = std::make_unique<JitShader>(); | ||||
|     shader->Compile(&program_code, &swizzle_data); | ||||
|     std::transform(shbin.swizzle_table.begin(), shbin.swizzle_table.end(), | ||||
|                    shader->swizzle_data.begin(), [](const auto& x) { return x.hex; }); | ||||
| 
 | ||||
|     return shader; | ||||
| } | ||||
|  | @ -36,19 +36,32 @@ static std::unique_ptr<JitShader> CompileShader(std::initializer_list<nihstro::I | |||
| class ShaderTest { | ||||
| public: | ||||
|     explicit ShaderTest(std::initializer_list<nihstro::InlineAsm> code) | ||||
|         : shader(CompileShader(code)) {} | ||||
|         : shader_setup(CompileShaderSetup(code)) { | ||||
|         shader_jit.Compile(&shader_setup->program_code, &shader_setup->swizzle_data); | ||||
|     } | ||||
| 
 | ||||
|     float Run(float input) { | ||||
|         Pica::Shader::ShaderSetup shader_setup; | ||||
|         Pica::Shader::UnitState shader_unit; | ||||
| 
 | ||||
|         shader_unit.registers.input[0].x = float24::FromFloat32(input); | ||||
|         shader->Run(shader_setup, shader_unit, 0); | ||||
|         RunJit(shader_unit, input); | ||||
|         return shader_unit.registers.output[0].x.ToFloat32(); | ||||
|     } | ||||
| 
 | ||||
|     void RunJit(Pica::Shader::UnitState& shader_unit, float input) { | ||||
|         shader_unit.registers.input[0].x = float24::FromFloat32(input); | ||||
|         shader_unit.registers.temporary[0].x = float24::FromFloat32(0); | ||||
|         shader_jit.Run(*shader_setup, shader_unit, 0); | ||||
|     } | ||||
| 
 | ||||
|     void RunInterpreter(Pica::Shader::UnitState& shader_unit, float input) { | ||||
|         shader_unit.registers.input[0].x = float24::FromFloat32(input); | ||||
|         shader_unit.registers.temporary[0].x = float24::FromFloat32(0); | ||||
|         shader_interpreter.Run(*shader_setup, shader_unit); | ||||
|     } | ||||
| 
 | ||||
| public: | ||||
|     std::unique_ptr<JitShader> shader; | ||||
|     JitShader shader_jit; | ||||
|     ShaderInterpreter shader_interpreter; | ||||
|     std::unique_ptr<Pica::Shader::ShaderSetup> shader_setup; | ||||
| }; | ||||
| 
 | ||||
| TEST_CASE("LG2", "[video_core][shader][shader_jit]") { | ||||
|  | @ -89,3 +102,108 @@ TEST_CASE("EX2", "[video_core][shader][shader_jit]") { | |||
|     REQUIRE(shader.Run(79.7262742773f) == Approx(1.e24f)); | ||||
|     REQUIRE(std::isinf(shader.Run(800.f))); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Nested Loop", "[video_core][shader][shader_jit]") { | ||||
|     const auto sh_input = SourceRegister::MakeInput(0); | ||||
|     const auto sh_temp = SourceRegister::MakeTemporary(0); | ||||
|     const auto sh_output = DestRegister::MakeOutput(0); | ||||
| 
 | ||||
|     std::array<Common::Vec4<u8>, 2> loop_parms{Common::Vec4<u8>{4, 0, 1, 0}, | ||||
|                                                Common::Vec4<u8>{4, 0, 1, 0}}; | ||||
| 
 | ||||
|     auto shader_test = ShaderTest({ | ||||
|         // clang-format off
 | ||||
|         {OpCode::Id::LOOP, 0}, | ||||
|             {OpCode::Id::LOOP, 1}, | ||||
|                 {OpCode::Id::ADD, sh_temp, sh_temp, sh_input}, | ||||
|             {Type::EndLoop}, | ||||
|         {Type::EndLoop}, | ||||
|         {OpCode::Id::MOV, sh_output, sh_temp}, | ||||
|         {OpCode::Id::END}, | ||||
|         // clang-format on
 | ||||
|     }); | ||||
| 
 | ||||
|     shader_test.shader_setup->uniforms.i[0] = loop_parms[0]; | ||||
|     shader_test.shader_setup->uniforms.i[1] = loop_parms[0]; | ||||
| 
 | ||||
|     const auto run_test_helper = [&shader_test](float input) { | ||||
|         Pica::Shader::UnitState shader_unit_jit; | ||||
|         Pica::Shader::UnitState shader_unit_inter; | ||||
|         shader_test.RunJit(shader_unit_jit, input); | ||||
|         shader_test.RunInterpreter(shader_unit_inter, input); | ||||
| 
 | ||||
|         REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() == | ||||
|                 Approx(shader_unit_inter.registers.output[0].x.ToFloat32())); | ||||
|         REQUIRE(shader_unit_jit.address_registers[2] == shader_unit_inter.address_registers[2]); | ||||
|     }; | ||||
|     { | ||||
|         // Sanity check
 | ||||
|         Pica::Shader::UnitState shader_unit_jit; | ||||
|         shader_test.RunJit(shader_unit_jit, 1.0f); | ||||
|         REQUIRE(shader_unit_jit.address_registers[2] == 6); | ||||
|         REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() == Approx(25.0f)); | ||||
| 
 | ||||
|         Pica::Shader::UnitState shader_unit_inter; | ||||
|         shader_test.RunInterpreter(shader_unit_inter, 2.0f); | ||||
|         REQUIRE(shader_unit_inter.address_registers[2] == 6); | ||||
|         REQUIRE(shader_unit_inter.registers.output[0].x.ToFloat32() == Approx(50.0f)); | ||||
|     } | ||||
|     run_test_helper(-5.f); | ||||
|     run_test_helper(0.f); | ||||
|     run_test_helper(2.f); | ||||
|     run_test_helper(6.f); | ||||
|     run_test_helper(79.7262742773f); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Nested Loop Randomized", "[video_core][shader][shader_jit]") { | ||||
|     const auto sh_input = SourceRegister::MakeInput(0); | ||||
|     const auto sh_temp = SourceRegister::MakeTemporary(0); | ||||
|     const auto sh_output = DestRegister::MakeOutput(0); | ||||
| 
 | ||||
|     auto shader_test = ShaderTest({ | ||||
|         // clang-format off
 | ||||
|         {OpCode::Id::LOOP, 0}, | ||||
|             {OpCode::Id::LOOP, 1}, | ||||
|                  {OpCode::Id::LOOP, 2}, | ||||
|                     {OpCode::Id::LOOP, 3}, | ||||
|                         {OpCode::Id::ADD, sh_temp, sh_temp, sh_input}, | ||||
|                     {Type::EndLoop}, | ||||
|                 {Type::EndLoop}, | ||||
|             {Type::EndLoop}, | ||||
|         {Type::EndLoop}, | ||||
| 
 | ||||
|         {OpCode::Id::MOV, sh_output, sh_temp}, | ||||
|         {OpCode::Id::END}, | ||||
|         // clang-format on
 | ||||
|     }); | ||||
| 
 | ||||
|     const auto generate_loop_parms = [] { | ||||
|         u8 iterations = 1 + rand(); | ||||
|         u8 initial = 1 + rand(); | ||||
|         u8 increment = 1 + rand(); | ||||
| 
 | ||||
|         Common::Vec4<u8> loop_parm{iterations, initial, increment, 0}; | ||||
|         return Common::Vec4<u8>{iterations, initial, increment, 0}; | ||||
|     }; | ||||
| 
 | ||||
|     const auto run_test_helper = [&shader_test](float input) { | ||||
|         Pica::Shader::UnitState shader_unit_jit; | ||||
|         Pica::Shader::UnitState shader_unit_inter; | ||||
|         shader_test.RunJit(shader_unit_jit, input); | ||||
|         shader_test.RunInterpreter(shader_unit_inter, input); | ||||
| 
 | ||||
|         REQUIRE(shader_unit_jit.registers.output[0].x.ToFloat32() == | ||||
|                 Approx(shader_unit_inter.registers.output[0].x.ToFloat32())); | ||||
|         REQUIRE(shader_unit_jit.address_registers[2] == shader_unit_inter.address_registers[2]); | ||||
|     }; | ||||
| 
 | ||||
|     srand(time(0)); | ||||
|     for (int i = 0; i < 10; i++) { | ||||
|         shader_test.shader_setup->uniforms.i[0] = generate_loop_parms(); | ||||
|         shader_test.shader_setup->uniforms.i[1] = generate_loop_parms(); | ||||
|         shader_test.shader_setup->uniforms.i[2] = generate_loop_parms(); | ||||
|         shader_test.shader_setup->uniforms.i[3] = generate_loop_parms(); | ||||
|         float input = -(RAND_MAX / 2) + rand(); | ||||
|         run_test_helper(input); | ||||
|     } | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue