mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	Merge pull request #2476 from yuriks/shader-refactor3
Oh No! More shader changes!
This commit is contained in:
		
						commit
						97e06b0a0d
					
				
					 20 changed files with 184 additions and 180 deletions
				
			
		|  | @ -4,6 +4,7 @@ | |||
| 
 | ||||
| #include <cmath> | ||||
| #include <cstring> | ||||
| #include "common/bit_set.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "common/microprofile.h" | ||||
| #include "video_core/pica.h" | ||||
|  | @ -19,38 +20,32 @@ namespace Pica { | |||
| 
 | ||||
| namespace Shader { | ||||
| 
 | ||||
| OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, | ||||
|                                          u32 output_mask) { | ||||
| OutputVertex OutputVertex::FromAttributeBuffer(const Regs& regs, AttributeBuffer& input) { | ||||
|     // Setup output data
 | ||||
|     OutputVertex ret; | ||||
|     // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
 | ||||
|     // figure out what those circumstances are and enable the remaining outputs then.
 | ||||
|     unsigned index = 0; | ||||
|     for (unsigned i = 0; i < 7; ++i) { | ||||
|     union { | ||||
|         OutputVertex ret{}; | ||||
|         std::array<float24, 24> vertex_slots; | ||||
|     }; | ||||
|     static_assert(sizeof(vertex_slots) == sizeof(ret), "Struct and array have different sizes."); | ||||
| 
 | ||||
|         if (index >= regs.vs_output_total) | ||||
|             break; | ||||
|     unsigned int num_attributes = regs.vs_output_total; | ||||
|     ASSERT(num_attributes <= 7); | ||||
|     for (unsigned int i = 0; i < num_attributes; ++i) { | ||||
|         const auto& output_register_map = regs.vs_output_attributes[i]; | ||||
| 
 | ||||
|         if ((output_mask & (1 << i)) == 0) | ||||
|             continue; | ||||
| 
 | ||||
|         const auto& output_register_map = regs.vs_output_attributes[index]; | ||||
| 
 | ||||
|         u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, | ||||
|                             output_register_map.map_z, output_register_map.map_w}; | ||||
|         Regs::VSOutputAttributes::Semantic semantics[4] = { | ||||
|             output_register_map.map_x, output_register_map.map_y, output_register_map.map_z, | ||||
|             output_register_map.map_w}; | ||||
| 
 | ||||
|         for (unsigned comp = 0; comp < 4; ++comp) { | ||||
|             float24* out = ((float24*)&ret) + semantics[comp]; | ||||
|             if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | ||||
|                 *out = output_regs[i][comp]; | ||||
|             } else { | ||||
|                 // Zero output so that attributes which aren't output won't have denormals in them,
 | ||||
|                 // which would slow us down later.
 | ||||
|                 memset(out, 0, sizeof(*out)); | ||||
|             Regs::VSOutputAttributes::Semantic semantic = semantics[comp]; | ||||
|             float24* out = &vertex_slots[semantic]; | ||||
|             if (semantic < vertex_slots.size()) { | ||||
|                 *out = input.attr[i][comp]; | ||||
|             } else if (semantic != Regs::VSOutputAttributes::INVALID) { | ||||
|                 LOG_ERROR(HW_GPU, "Invalid/unknown semantic id: %u", (unsigned int)semantic); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         index++; | ||||
|     } | ||||
| 
 | ||||
|     // The hardware takes the absolute and saturates vertex colors like this, *before* doing
 | ||||
|  | @ -71,12 +66,20 @@ OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], co | |||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { | ||||
|     // Setup input register table
 | ||||
|     const auto& attribute_register_map = g_state.regs.vs.input_register_map; | ||||
| void UnitState::LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input) { | ||||
|     const unsigned max_attribute = config.max_input_attribute_index; | ||||
| 
 | ||||
|     for (int i = 0; i < num_attributes; i++) | ||||
|         registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; | ||||
|     for (unsigned attr = 0; attr <= max_attribute; ++attr) { | ||||
|         unsigned reg = config.GetRegisterForAttribute(attr); | ||||
|         registers.input[reg] = input.attr[attr]; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void UnitState::WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output) { | ||||
|     unsigned int output_i = 0; | ||||
|     for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) { | ||||
|         output.attr[output_i++] = registers.output[reg]; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | ||||
|  |  | |||
|  | @ -23,14 +23,11 @@ namespace Pica { | |||
| 
 | ||||
| namespace Shader { | ||||
| 
 | ||||
| struct InputVertex { | ||||
| struct AttributeBuffer { | ||||
|     alignas(16) Math::Vec4<float24> attr[16]; | ||||
| }; | ||||
| 
 | ||||
| struct OutputVertex { | ||||
|     OutputVertex() = default; | ||||
| 
 | ||||
|     // VS output attributes
 | ||||
|     Math::Vec4<float24> pos; | ||||
|     Math::Vec4<float24> quat; | ||||
|     Math::Vec4<float24> color; | ||||
|  | @ -42,43 +39,22 @@ struct OutputVertex { | |||
|     INSERT_PADDING_WORDS(1); | ||||
|     Math::Vec2<float24> tc2; | ||||
| 
 | ||||
|     // Padding for optimal alignment
 | ||||
|     INSERT_PADDING_WORDS(4); | ||||
| 
 | ||||
|     // Attributes used to store intermediate results
 | ||||
| 
 | ||||
|     // position after perspective divide
 | ||||
|     Math::Vec3<float24> screenpos; | ||||
|     INSERT_PADDING_WORDS(1); | ||||
| 
 | ||||
|     // Linear interpolation
 | ||||
|     // factor: 0=this, 1=vtx
 | ||||
|     void Lerp(float24 factor, const OutputVertex& vtx) { | ||||
|         pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); | ||||
| 
 | ||||
|         // TODO: Should perform perspective correct interpolation here...
 | ||||
|         tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); | ||||
|         tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); | ||||
|         tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); | ||||
| 
 | ||||
|         screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); | ||||
| 
 | ||||
|         color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); | ||||
|     } | ||||
| 
 | ||||
|     // Linear interpolation
 | ||||
|     // factor: 0=v0, 1=v1
 | ||||
|     static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) { | ||||
|         OutputVertex ret = v0; | ||||
|         ret.Lerp(factor, v1); | ||||
|         return ret; | ||||
|     } | ||||
| 
 | ||||
|     static OutputVertex FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs, | ||||
|                                       u32 output_mask); | ||||
|     static OutputVertex FromAttributeBuffer(const Regs& regs, AttributeBuffer& output); | ||||
| }; | ||||
| #define ASSERT_POS(var, pos)                                                                       \ | ||||
|     static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong "       \ | ||||
|                                                                         "offset.") | ||||
| ASSERT_POS(pos, Regs::VSOutputAttributes::POSITION_X); | ||||
| ASSERT_POS(quat, Regs::VSOutputAttributes::QUATERNION_X); | ||||
| ASSERT_POS(color, Regs::VSOutputAttributes::COLOR_R); | ||||
| ASSERT_POS(tc0, Regs::VSOutputAttributes::TEXCOORD0_U); | ||||
| ASSERT_POS(tc1, Regs::VSOutputAttributes::TEXCOORD1_U); | ||||
| ASSERT_POS(tc0_w, Regs::VSOutputAttributes::TEXCOORD0_W); | ||||
| ASSERT_POS(view, Regs::VSOutputAttributes::VIEW_X); | ||||
| ASSERT_POS(tc2, Regs::VSOutputAttributes::TEXCOORD2_U); | ||||
| #undef ASSERT_POS | ||||
| static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | ||||
| static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | ||||
| static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); | ||||
| 
 | ||||
| /**
 | ||||
|  * This structure contains the state information that needs to be unique for a shader unit. The 3DS | ||||
|  | @ -137,10 +113,12 @@ struct UnitState { | |||
|     /**
 | ||||
|      * Loads the unit state with an input vertex. | ||||
|      * | ||||
|      * @param input Input vertex into the shader | ||||
|      * @param num_attributes The number of vertex shader attributes to load | ||||
|      * @param config Shader configuration registers corresponding to the unit. | ||||
|      * @param input Attribute buffer to load into the input registers. | ||||
|      */ | ||||
|     void LoadInputVertex(const InputVertex& input, int num_attributes); | ||||
|     void LoadInput(const Regs::ShaderConfig& config, const AttributeBuffer& input); | ||||
| 
 | ||||
|     void WriteOutput(const Regs::ShaderConfig& config, AttributeBuffer& output); | ||||
| }; | ||||
| 
 | ||||
| struct ShaderSetup { | ||||
|  |  | |||
|  | @ -668,14 +668,14 @@ void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const { | |||
| } | ||||
| 
 | ||||
| DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, | ||||
|                                                     const InputVertex& input, | ||||
|                                                     int num_attributes) const { | ||||
|                                                     const AttributeBuffer& input, | ||||
|                                                     const Regs::ShaderConfig& config) const { | ||||
|     UnitState state; | ||||
|     DebugData<true> debug_data; | ||||
| 
 | ||||
|     // Setup input register table
 | ||||
|     boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); | ||||
|     state.LoadInputVertex(input, num_attributes); | ||||
|     state.LoadInput(config, input); | ||||
|     RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); | ||||
|     return debug_data; | ||||
| } | ||||
|  |  | |||
|  | @ -19,12 +19,11 @@ public: | |||
|     /**
 | ||||
|      * Produce debug information based on the given shader and input vertex | ||||
|      * @param input Input vertex into the shader | ||||
|      * @param num_attributes The number of vertex shader attributes | ||||
|      * @param config Configuration object for the shader pipeline | ||||
|      * @return Debug information for this shader with regards to the given vertex | ||||
|      */ | ||||
|     DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const InputVertex& input, | ||||
|                                      int num_attributes) const; | ||||
|     DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const AttributeBuffer& input, | ||||
|                                      const Regs::ShaderConfig& config) const; | ||||
| }; | ||||
| 
 | ||||
| } // namespace
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue