mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	Pica: Add vertex shader implementation.
This commit is contained in:
		
							parent
							
								
									d443f0a921
								
							
						
					
					
						commit
						c526512619
					
				
					 7 changed files with 722 additions and 10 deletions
				
			
		|  | @ -1,5 +1,6 @@ | ||||||
| set(SRCS    command_processor.cpp | set(SRCS    command_processor.cpp | ||||||
|             utils.cpp |             utils.cpp | ||||||
|  |             vertex_shader.cpp | ||||||
|             video_core.cpp |             video_core.cpp | ||||||
|             renderer_opengl/renderer_opengl.cpp) |             renderer_opengl/renderer_opengl.cpp) | ||||||
| 
 | 
 | ||||||
|  | @ -8,6 +9,7 @@ set(HEADERS command_processor.h | ||||||
|             utils.h |             utils.h | ||||||
|             video_core.h |             video_core.h | ||||||
|             renderer_base.h |             renderer_base.h | ||||||
|  |             vertex_shader.h | ||||||
|             video_core.h |             video_core.h | ||||||
|             renderer_opengl/renderer_opengl.h) |             renderer_opengl/renderer_opengl.h) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -2,9 +2,10 @@ | ||||||
| // Licensed under GPLv2
 | // Licensed under GPLv2
 | ||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
| #include "pica.h" |  | ||||||
| #include "command_processor.h" | #include "command_processor.h" | ||||||
| #include "math.h" | #include "math.h" | ||||||
|  | #include "pica.h" | ||||||
|  | #include "vertex_shader.h" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| namespace Pica { | namespace Pica { | ||||||
|  | @ -13,6 +14,14 @@ Regs registers; | ||||||
| 
 | 
 | ||||||
| namespace CommandProcessor { | namespace CommandProcessor { | ||||||
| 
 | 
 | ||||||
|  | static int float_regs_counter = 0; | ||||||
|  | 
 | ||||||
|  | static u32 uniform_write_buffer[4]; | ||||||
|  | 
 | ||||||
|  | // Used for VSLoadProgramData and VSLoadSwizzleData
 | ||||||
|  | static u32 vs_binary_write_offset = 0; | ||||||
|  | static u32 vs_swizzle_write_offset = 0; | ||||||
|  | 
 | ||||||
| static inline void WritePicaReg(u32 id, u32 value) { | static inline void WritePicaReg(u32 id, u32 value) { | ||||||
|     u32 old_value = registers[id]; |     u32 old_value = registers[id]; | ||||||
|     registers[id] = value; |     registers[id] = value; | ||||||
|  | @ -67,9 +76,7 @@ static inline void WritePicaReg(u32 id, u32 value) { | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 // Initialize data for the current vertex
 |                 // Initialize data for the current vertex
 | ||||||
|                 struct { |                 VertexShader::InputVertex input; | ||||||
|                     Math::Vec4<float24> attr[16]; |  | ||||||
|                 } input; |  | ||||||
| 
 | 
 | ||||||
|                 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { |                 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { | ||||||
|                     for (int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |                     for (int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||||
|  | @ -87,7 +94,7 @@ static inline void WritePicaReg(u32 id, u32 value) { | ||||||
|                                   input.attr[i][comp].ToFloat32()); |                                   input.attr[i][comp].ToFloat32()); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 // TODO: Run vertex data through vertex shader
 |                 VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); | ||||||
| 
 | 
 | ||||||
|                 if (is_indexed) { |                 if (is_indexed) { | ||||||
|                     // TODO: Add processed vertex to vertex cache!
 |                     // TODO: Add processed vertex to vertex cache!
 | ||||||
|  | @ -98,6 +105,97 @@ static inline void WritePicaReg(u32 id, u32 value) { | ||||||
|             break; |             break; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[3], 0x2c4): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[4], 0x2c5): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[5], 0x2c6): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): | ||||||
|  |         { | ||||||
|  |             auto& uniform_setup = registers.vs_uniform_setup; | ||||||
|  | 
 | ||||||
|  |             // TODO: Does actual hardware indeed keep an intermediate buffer or does
 | ||||||
|  |             //       it directly write the values?
 | ||||||
|  |             uniform_write_buffer[float_regs_counter++] = value; | ||||||
|  | 
 | ||||||
|  |             // Uniforms are written in a packed format such that 4 float24 values are encoded in
 | ||||||
|  |             // three 32-bit numbers. We write to internal memory once a full such vector is
 | ||||||
|  |             // written.
 | ||||||
|  |             if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || | ||||||
|  |                 (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { | ||||||
|  |                 float_regs_counter = 0; | ||||||
|  | 
 | ||||||
|  |                 auto& uniform = VertexShader::GetFloatUniform(uniform_setup.index); | ||||||
|  | 
 | ||||||
|  |                 if (uniform_setup.index > 95) { | ||||||
|  |                     ERROR_LOG(GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 // NOTE: The destination component order indeed is "backwards"
 | ||||||
|  |                 if (uniform_setup.IsFloat32()) { | ||||||
|  |                     for (auto i : {0,1,2,3}) | ||||||
|  |                         uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); | ||||||
|  |                 } else { | ||||||
|  |                     // TODO: Untested
 | ||||||
|  |                     uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8); | ||||||
|  |                     uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); | ||||||
|  |                     uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); | ||||||
|  |                     uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF); | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 DEBUG_LOG(GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, | ||||||
|  |                           uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), | ||||||
|  |                           uniform.w.ToFloat32()); | ||||||
|  | 
 | ||||||
|  |                 // TODO: Verify that this actually modifies the register!
 | ||||||
|  |                 uniform_setup.index = uniform_setup.index + 1; | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Seems to be used to reset the write pointer for VSLoadProgramData
 | ||||||
|  |         case PICA_REG_INDEX(vs_program.begin_load): | ||||||
|  |             vs_binary_write_offset = 0; | ||||||
|  |             break; | ||||||
|  | 
 | ||||||
|  |         // Load shader program code
 | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[2], 0x2ce): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[3], 0x2cf): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[4], 0x2d0): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[5], 0x2d1): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): | ||||||
|  |         { | ||||||
|  |             VertexShader::SubmitShaderMemoryChange(vs_binary_write_offset, value); | ||||||
|  |             vs_binary_write_offset++; | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Seems to be used to reset the write pointer for VSLoadSwizzleData
 | ||||||
|  |         case PICA_REG_INDEX(vs_swizzle_patterns.begin_load): | ||||||
|  |             vs_swizzle_write_offset = 0; | ||||||
|  |             break; | ||||||
|  | 
 | ||||||
|  |         // Load swizzle pattern data
 | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[2], 0x2d8): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[3], 0x2d9): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[4], 0x2da): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[5], 0x2db): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): | ||||||
|  |         { | ||||||
|  |             VertexShader::SubmitSwizzleDataChange(vs_swizzle_write_offset, value); | ||||||
|  |             vs_swizzle_write_offset++; | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         default: |         default: | ||||||
|             break; |             break; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -50,7 +50,39 @@ struct Regs { | ||||||
|     INSERT_PADDING_WORDS(0x1); |     INSERT_PADDING_WORDS(0x1); | ||||||
|     BitField<0, 24, u32> viewport_size_y; |     BitField<0, 24, u32> viewport_size_y; | ||||||
| 
 | 
 | ||||||
|     INSERT_PADDING_WORDS(0x1bc); |     INSERT_PADDING_WORDS(0xc); | ||||||
|  | 
 | ||||||
|  |     union { | ||||||
|  |         // Maps components of output vertex attributes to semantics
 | ||||||
|  |         enum Semantic : u32 | ||||||
|  |         { | ||||||
|  |             POSITION_X   =  0, | ||||||
|  |             POSITION_Y   =  1, | ||||||
|  |             POSITION_Z   =  2, | ||||||
|  |             POSITION_W   =  3, | ||||||
|  | 
 | ||||||
|  |             COLOR_R      =  8, | ||||||
|  |             COLOR_G      =  9, | ||||||
|  |             COLOR_B      = 10, | ||||||
|  |             COLOR_A      = 11, | ||||||
|  | 
 | ||||||
|  |             TEXCOORD0_U  = 12, | ||||||
|  |             TEXCOORD0_V  = 13, | ||||||
|  |             TEXCOORD1_U  = 14, | ||||||
|  |             TEXCOORD1_V  = 15, | ||||||
|  |             TEXCOORD2_U  = 22, | ||||||
|  |             TEXCOORD2_V  = 23, | ||||||
|  | 
 | ||||||
|  |             INVALID      = 31, | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         BitField< 0, 5, Semantic> map_x; | ||||||
|  |         BitField< 8, 5, Semantic> map_y; | ||||||
|  |         BitField<16, 5, Semantic> map_z; | ||||||
|  |         BitField<24, 5, Semantic> map_w; | ||||||
|  |     } vs_output_attributes[7]; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x1a9); | ||||||
| 
 | 
 | ||||||
|     struct { |     struct { | ||||||
|         enum class Format : u64 { |         enum class Format : u64 { | ||||||
|  | @ -133,7 +165,7 @@ struct Regs { | ||||||
| 
 | 
 | ||||||
|         // Attribute loaders map the source vertex data to input attributes
 |         // Attribute loaders map the source vertex data to input attributes
 | ||||||
|         // This e.g. allows to load different attributes from different memory locations
 |         // This e.g. allows to load different attributes from different memory locations
 | ||||||
|         struct Loader { |         struct { | ||||||
|             // Source attribute data offset from the base address
 |             // Source attribute data offset from the base address
 | ||||||
|             u32 data_offset; |             u32 data_offset; | ||||||
| 
 | 
 | ||||||
|  | @ -189,7 +221,90 @@ struct Regs { | ||||||
|     u32 trigger_draw; |     u32 trigger_draw; | ||||||
|     u32 trigger_draw_indexed; |     u32 trigger_draw_indexed; | ||||||
| 
 | 
 | ||||||
|     INSERT_PADDING_WORDS(0xd0); |     INSERT_PADDING_WORDS(0x8a); | ||||||
|  | 
 | ||||||
|  |     // Offset to shader program entry point (in words)
 | ||||||
|  |     BitField<0, 16, u32> vs_main_offset; | ||||||
|  | 
 | ||||||
|  |     union { | ||||||
|  |         BitField< 0, 4, u64> attribute0_register; | ||||||
|  |         BitField< 4, 4, u64> attribute1_register; | ||||||
|  |         BitField< 8, 4, u64> attribute2_register; | ||||||
|  |         BitField<12, 4, u64> attribute3_register; | ||||||
|  |         BitField<16, 4, u64> attribute4_register; | ||||||
|  |         BitField<20, 4, u64> attribute5_register; | ||||||
|  |         BitField<24, 4, u64> attribute6_register; | ||||||
|  |         BitField<28, 4, u64> attribute7_register; | ||||||
|  |         BitField<32, 4, u64> attribute8_register; | ||||||
|  |         BitField<36, 4, u64> attribute9_register; | ||||||
|  |         BitField<40, 4, u64> attribute10_register; | ||||||
|  |         BitField<44, 4, u64> attribute11_register; | ||||||
|  |         BitField<48, 4, u64> attribute12_register; | ||||||
|  |         BitField<52, 4, u64> attribute13_register; | ||||||
|  |         BitField<56, 4, u64> attribute14_register; | ||||||
|  |         BitField<60, 4, u64> attribute15_register; | ||||||
|  | 
 | ||||||
|  |         int GetRegisterForAttribute(int attribute_index) { | ||||||
|  |             u64 fields[] = { | ||||||
|  |                 attribute0_register,  attribute1_register,  attribute2_register,  attribute3_register, | ||||||
|  |                 attribute4_register,  attribute5_register,  attribute6_register,  attribute7_register, | ||||||
|  |                 attribute8_register,  attribute9_register,  attribute10_register, attribute11_register, | ||||||
|  |                 attribute12_register, attribute13_register, attribute14_register, attribute15_register, | ||||||
|  |             }; | ||||||
|  |             return (int)fields[attribute_index]; | ||||||
|  |         } | ||||||
|  |     } vs_input_register_map; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x3); | ||||||
|  | 
 | ||||||
|  |     struct { | ||||||
|  |         enum Format : u32 | ||||||
|  |         { | ||||||
|  |             FLOAT24 = 0, | ||||||
|  |             FLOAT32 = 1 | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         bool IsFloat32() const { | ||||||
|  |             return format == FLOAT32; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         union { | ||||||
|  |             // Index of the next uniform to write to
 | ||||||
|  |             // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices
 | ||||||
|  |             BitField<0, 7, u32> index; | ||||||
|  | 
 | ||||||
|  |             BitField<31, 1, Format> format; | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         // Writing to these registers sets the "current" uniform.
 | ||||||
|  |         // TODO: It's not clear how the hardware stores what the "current" uniform is.
 | ||||||
|  |         u32 set_value[8]; | ||||||
|  | 
 | ||||||
|  |     } vs_uniform_setup; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x2); | ||||||
|  | 
 | ||||||
|  |     struct { | ||||||
|  |         u32 begin_load; | ||||||
|  | 
 | ||||||
|  |         // Writing to these registers sets the "current" word in the shader program.
 | ||||||
|  |         // TODO: It's not clear how the hardware stores what the "current" word is.
 | ||||||
|  |         u32 set_word[8]; | ||||||
|  |     } vs_program; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x1); | ||||||
|  | 
 | ||||||
|  |     // This register group is used to load an internal table of swizzling patterns,
 | ||||||
|  |     // which are indexed by each shader instruction to specify vector component swizzling.
 | ||||||
|  |     struct { | ||||||
|  |         u32 begin_load; | ||||||
|  | 
 | ||||||
|  |         // Writing to these registers sets the "current" swizzle pattern in the table.
 | ||||||
|  |         // TODO: It's not clear how the hardware stores what the "current" swizzle pattern is.
 | ||||||
|  |         u32 set_word[8]; | ||||||
|  |     } vs_swizzle_patterns; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x22); | ||||||
| 
 | 
 | ||||||
| #undef INSERT_PADDING_WORDS_HELPER1 | #undef INSERT_PADDING_WORDS_HELPER1 | ||||||
| #undef INSERT_PADDING_WORDS_HELPER2 | #undef INSERT_PADDING_WORDS_HELPER2 | ||||||
|  | @ -219,6 +334,11 @@ struct Regs { | ||||||
|         ADD_FIELD(num_vertices); |         ADD_FIELD(num_vertices); | ||||||
|         ADD_FIELD(trigger_draw); |         ADD_FIELD(trigger_draw); | ||||||
|         ADD_FIELD(trigger_draw_indexed); |         ADD_FIELD(trigger_draw_indexed); | ||||||
|  |         ADD_FIELD(vs_main_offset); | ||||||
|  |         ADD_FIELD(vs_input_register_map); | ||||||
|  |         ADD_FIELD(vs_uniform_setup); | ||||||
|  |         ADD_FIELD(vs_program); | ||||||
|  |         ADD_FIELD(vs_swizzle_patterns); | ||||||
| 
 | 
 | ||||||
|         #undef ADD_FIELD |         #undef ADD_FIELD | ||||||
|         #endif // _MSC_VER
 |         #endif // _MSC_VER
 | ||||||
|  | @ -259,17 +379,25 @@ private: | ||||||
| 
 | 
 | ||||||
| ASSERT_REG_POSITION(viewport_size_x, 0x41); | ASSERT_REG_POSITION(viewport_size_x, 0x41); | ||||||
| ASSERT_REG_POSITION(viewport_size_y, 0x43); | ASSERT_REG_POSITION(viewport_size_y, 0x43); | ||||||
|  | ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); | ||||||
|  | ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); | ||||||
| ASSERT_REG_POSITION(vertex_attributes, 0x200); | ASSERT_REG_POSITION(vertex_attributes, 0x200); | ||||||
| ASSERT_REG_POSITION(index_array, 0x227); | ASSERT_REG_POSITION(index_array, 0x227); | ||||||
| ASSERT_REG_POSITION(num_vertices, 0x228); | ASSERT_REG_POSITION(num_vertices, 0x228); | ||||||
| ASSERT_REG_POSITION(trigger_draw, 0x22e); | ASSERT_REG_POSITION(trigger_draw, 0x22e); | ||||||
| ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); | ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); | ||||||
|  | ASSERT_REG_POSITION(vs_main_offset, 0x2ba); | ||||||
|  | ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); | ||||||
|  | ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); | ||||||
|  | ASSERT_REG_POSITION(vs_program, 0x2cb); | ||||||
|  | ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5); | ||||||
| 
 | 
 | ||||||
| #undef ASSERT_REG_POSITION | #undef ASSERT_REG_POSITION | ||||||
| #endif // !defined(_MSC_VER)
 | #endif // !defined(_MSC_VER)
 | ||||||
| 
 | 
 | ||||||
| // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
 | // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
 | ||||||
| static_assert(sizeof(Regs) == 0x300 * sizeof(u32), "Invalid total size of register set"); | static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); | ||||||
|  | static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); | ||||||
| 
 | 
 | ||||||
| extern Regs registers; // TODO: Not sure if we want to have one global instance for this
 | extern Regs registers; // TODO: Not sure if we want to have one global instance for this
 | ||||||
| 
 | 
 | ||||||
|  | @ -347,7 +475,6 @@ private: | ||||||
|     float value; |     float value; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| union CommandHeader { | union CommandHeader { | ||||||
|     CommandHeader(u32 h) : hex(h) {} |     CommandHeader(u32 h) : hex(h) {} | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										270
									
								
								src/video_core/vertex_shader.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										270
									
								
								src/video_core/vertex_shader.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,270 @@ | ||||||
|  | // Copyright 2014 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include "pica.h" | ||||||
|  | #include "vertex_shader.h" | ||||||
|  | #include <core/mem_map.h> | ||||||
|  | #include <common/file_util.h> | ||||||
|  | 
 | ||||||
|  | namespace Pica { | ||||||
|  | 
 | ||||||
|  | namespace VertexShader { | ||||||
|  | 
 | ||||||
|  | static struct { | ||||||
|  |     Math::Vec4<float24> f[96]; | ||||||
|  | } shader_uniforms; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
 | ||||||
|  | // For now, we just keep these local arrays around.
 | ||||||
|  | static u32 shader_memory[1024]; | ||||||
|  | static u32 swizzle_data[1024]; | ||||||
|  | 
 | ||||||
|  | void SubmitShaderMemoryChange(u32 addr, u32 value) | ||||||
|  | { | ||||||
|  |     shader_memory[addr] = value; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void SubmitSwizzleDataChange(u32 addr, u32 value) | ||||||
|  | { | ||||||
|  |     swizzle_data[addr] = value; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Math::Vec4<float24>& GetFloatUniform(u32 index) | ||||||
|  | { | ||||||
|  |     return shader_uniforms.f[index]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | struct VertexShaderState { | ||||||
|  |     u32* program_counter; | ||||||
|  | 
 | ||||||
|  |     const float24* input_register_table[16]; | ||||||
|  |     float24* output_register_table[7*4]; | ||||||
|  | 
 | ||||||
|  |     Math::Vec4<float24> temporary_registers[16]; | ||||||
|  |     bool status_registers[2]; | ||||||
|  | 
 | ||||||
|  |     enum { | ||||||
|  |         INVALID_ADDRESS = 0xFFFFFFFF | ||||||
|  |     }; | ||||||
|  |     u32 call_stack[8]; // TODO: What is the maximal call stack depth?
 | ||||||
|  |     u32* call_stack_pointer; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static void ProcessShaderCode(VertexShaderState& state) { | ||||||
|  |     while (true) { | ||||||
|  |         bool increment_pc = true; | ||||||
|  |         bool exit_loop = false; | ||||||
|  |         const Instruction& instr = *(const Instruction*)state.program_counter; | ||||||
|  | 
 | ||||||
|  |         const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] | ||||||
|  |                              : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x | ||||||
|  |                              : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x | ||||||
|  |                              : nullptr; | ||||||
|  |         const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2] | ||||||
|  |                              : &state.temporary_registers[instr.common.src2-0x10].x; | ||||||
|  |         // TODO: Unsure about the limit values
 | ||||||
|  |         float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest] | ||||||
|  |                              : (instr.common.dest <= 0x3C) ? nullptr | ||||||
|  |                              : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4] | ||||||
|  |                              : nullptr; | ||||||
|  | 
 | ||||||
|  |         const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | ||||||
|  | 
 | ||||||
|  |         const float24 src1[4] = { | ||||||
|  |             src1_[(int)swizzle.GetSelectorSrc1(0)], | ||||||
|  |             src1_[(int)swizzle.GetSelectorSrc1(1)], | ||||||
|  |             src1_[(int)swizzle.GetSelectorSrc1(2)], | ||||||
|  |             src1_[(int)swizzle.GetSelectorSrc1(3)], | ||||||
|  |         }; | ||||||
|  |         const float24 src2[4] = { | ||||||
|  |             src2_[(int)swizzle.GetSelectorSrc2(0)], | ||||||
|  |             src2_[(int)swizzle.GetSelectorSrc2(1)], | ||||||
|  |             src2_[(int)swizzle.GetSelectorSrc2(2)], | ||||||
|  |             src2_[(int)swizzle.GetSelectorSrc2(3)], | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         switch (instr.opcode) { | ||||||
|  |             case Instruction::OpCode::ADD: | ||||||
|  |             { | ||||||
|  |                 for (int i = 0; i < 4; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     dest[i] = src1[i] + src2[i]; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             case Instruction::OpCode::MUL: | ||||||
|  |             { | ||||||
|  |                 for (int i = 0; i < 4; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     dest[i] = src1[i] * src2[i]; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             case Instruction::OpCode::DP3: | ||||||
|  |             case Instruction::OpCode::DP4: | ||||||
|  |             { | ||||||
|  |                 float24 dot = float24::FromFloat32(0.f); | ||||||
|  |                 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; | ||||||
|  |                 for (int i = 0; i < num_components; ++i) | ||||||
|  |                     dot = dot + src1[i] * src2[i]; | ||||||
|  | 
 | ||||||
|  |                 for (int i = 0; i < num_components; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     dest[i] = dot; | ||||||
|  |                 } | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Reciprocal
 | ||||||
|  |             case Instruction::OpCode::RCP: | ||||||
|  |             { | ||||||
|  |                 for (int i = 0; i < 4; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     // TODO: Be stable against division by zero!
 | ||||||
|  |                     // TODO: I think this might be wrong... we should only use one component here
 | ||||||
|  |                     dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32()); | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Reciprocal Square Root
 | ||||||
|  |             case Instruction::OpCode::RSQ: | ||||||
|  |             { | ||||||
|  |                 for (int i = 0; i < 4; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     // TODO: Be stable against division by zero!
 | ||||||
|  |                     // TODO: I think this might be wrong... we should only use one component here
 | ||||||
|  |                     dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32())); | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             case Instruction::OpCode::MOV: | ||||||
|  |             { | ||||||
|  |                 for (int i = 0; i < 4; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     dest[i] = src1[i]; | ||||||
|  |                 } | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             case Instruction::OpCode::RET: | ||||||
|  |                 if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { | ||||||
|  |                     exit_loop = true; | ||||||
|  |                 } else { | ||||||
|  |                     state.program_counter = &shader_memory[*state.call_stack_pointer--]; | ||||||
|  |                     *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 break; | ||||||
|  | 
 | ||||||
|  |             case Instruction::OpCode::CALL: | ||||||
|  |                 increment_pc = false; | ||||||
|  | 
 | ||||||
|  |                 _dbg_assert_(GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); | ||||||
|  | 
 | ||||||
|  |                 *++state.call_stack_pointer = state.program_counter - shader_memory; | ||||||
|  |                 // TODO: Does this offset refer to the beginning of shader memory?
 | ||||||
|  |                 state.program_counter = &shader_memory[instr.flow_control.offset_words]; | ||||||
|  |                 break; | ||||||
|  | 
 | ||||||
|  |             case Instruction::OpCode::FLS: | ||||||
|  |                 // TODO: Do whatever needs to be done here?
 | ||||||
|  |                 break; | ||||||
|  | 
 | ||||||
|  |             default: | ||||||
|  |                 ERROR_LOG(GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | ||||||
|  |                           (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); | ||||||
|  |                 break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (increment_pc) | ||||||
|  |             ++state.program_counter; | ||||||
|  | 
 | ||||||
|  |         if (exit_loop) | ||||||
|  |             break; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | OutputVertex RunShader(const InputVertex& input, int num_attributes) | ||||||
|  | { | ||||||
|  |     VertexShaderState state; | ||||||
|  | 
 | ||||||
|  |     const u32* main = &shader_memory[registers.vs_main_offset]; | ||||||
|  |     state.program_counter = (u32*)main; | ||||||
|  | 
 | ||||||
|  |     // Setup input register table
 | ||||||
|  |     const auto& attribute_register_map = registers.vs_input_register_map; | ||||||
|  |     float24 dummy_register; | ||||||
|  |     std::fill(&state.input_register_table[0], &state.input_register_table[16], &dummy_register); | ||||||
|  |     if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x; | ||||||
|  |     if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x; | ||||||
|  |     if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x; | ||||||
|  |     if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x; | ||||||
|  |     if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x; | ||||||
|  |     if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x; | ||||||
|  |     if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x; | ||||||
|  |     if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x; | ||||||
|  |     if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x; | ||||||
|  |     if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x; | ||||||
|  |     if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x; | ||||||
|  |     if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x; | ||||||
|  |     if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x; | ||||||
|  |     if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x; | ||||||
|  |     if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; | ||||||
|  |     if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; | ||||||
|  | 
 | ||||||
|  |     // Setup output register table
 | ||||||
|  |     OutputVertex ret; | ||||||
|  |     for (int i = 0; i < 7; ++i) { | ||||||
|  |         const auto& output_register_map = registers.vs_output_attributes[i]; | ||||||
|  | 
 | ||||||
|  |         u32 semantics[4] = { | ||||||
|  |             output_register_map.map_x, output_register_map.map_y, | ||||||
|  |             output_register_map.map_z, output_register_map.map_w | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         for (int comp = 0; comp < 4; ++comp) | ||||||
|  |             state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     state.status_registers[0] = false; | ||||||
|  |     state.status_registers[1] = false; | ||||||
|  |     std::fill(state.call_stack, state.call_stack + sizeof(state.call_stack) / sizeof(state.call_stack[0]), | ||||||
|  |               VertexShaderState::INVALID_ADDRESS); | ||||||
|  |     state.call_stack_pointer = &state.call_stack[0]; | ||||||
|  | 
 | ||||||
|  |     ProcessShaderCode(state); | ||||||
|  | 
 | ||||||
|  |     DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | ||||||
|  |         ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | ||||||
|  |         ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | ||||||
|  |         ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); | ||||||
|  | 
 | ||||||
|  |     return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
							
								
								
									
										211
									
								
								src/video_core/vertex_shader.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										211
									
								
								src/video_core/vertex_shader.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,211 @@ | ||||||
|  | // Copyright 2014 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <initializer_list> | ||||||
|  | 
 | ||||||
|  | #include <common/common_types.h> | ||||||
|  | 
 | ||||||
|  | #include "math.h" | ||||||
|  | #include "pica.h" | ||||||
|  | 
 | ||||||
|  | namespace Pica { | ||||||
|  | 
 | ||||||
|  | namespace VertexShader { | ||||||
|  | 
 | ||||||
|  | struct InputVertex { | ||||||
|  |     Math::Vec4<float24> attr[16]; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct OutputVertex { | ||||||
|  |     OutputVertex() = default; | ||||||
|  | 
 | ||||||
|  |     // VS output attributes
 | ||||||
|  |     Math::Vec4<float24> pos; | ||||||
|  |     Math::Vec4<float24> dummy; // quaternions (not implemented, yet)
 | ||||||
|  |     Math::Vec4<float24> color; | ||||||
|  |     Math::Vec2<float24> tc0; | ||||||
|  |     float24 tc0_v; | ||||||
|  | 
 | ||||||
|  |     // Padding for optimal alignment
 | ||||||
|  |     float24 pad[14]; | ||||||
|  | 
 | ||||||
|  |     // Attributes used to store intermediate results
 | ||||||
|  | 
 | ||||||
|  |     // position after perspective divide
 | ||||||
|  |     Math::Vec3<float24> screenpos; | ||||||
|  | 
 | ||||||
|  |     // Linear interpolation
 | ||||||
|  |     // factor: 0=this, 1=vtx
 | ||||||
|  |     void Lerp(float24 factor, const OutputVertex& vtx) { | ||||||
|  |         pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); | ||||||
|  | 
 | ||||||
|  |         // TODO: Should perform perspective correct interpolation here...
 | ||||||
|  |         tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); | ||||||
|  | 
 | ||||||
|  |         screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); | ||||||
|  | 
 | ||||||
|  |         color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Linear interpolation
 | ||||||
|  |     // factor: 0=v0, 1=v1
 | ||||||
|  |     static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) { | ||||||
|  |         OutputVertex ret = v0; | ||||||
|  |         ret.Lerp(factor, v1); | ||||||
|  |         return ret; | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | ||||||
|  | 
 | ||||||
|  | union Instruction { | ||||||
|  |     enum class OpCode : u32 { | ||||||
|  |         ADD = 0x0, | ||||||
|  |         DP3 = 0x1, | ||||||
|  |         DP4 = 0x2, | ||||||
|  | 
 | ||||||
|  |         MUL = 0x8, | ||||||
|  | 
 | ||||||
|  |         MAX = 0xC, | ||||||
|  |         MIN = 0xD, | ||||||
|  |         RCP = 0xE, | ||||||
|  |         RSQ = 0xF, | ||||||
|  | 
 | ||||||
|  |         MOV = 0x13, | ||||||
|  | 
 | ||||||
|  |         RET = 0x21, | ||||||
|  |         FLS = 0x22, // Flush
 | ||||||
|  |         CALL = 0x24, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     std::string GetOpCodeName() const { | ||||||
|  |         std::map<OpCode, std::string> map = { | ||||||
|  |             { OpCode::ADD, "ADD" }, | ||||||
|  |             { OpCode::DP3, "DP3" }, | ||||||
|  |             { OpCode::DP4, "DP4" }, | ||||||
|  |             { OpCode::MUL, "MUL" }, | ||||||
|  |             { OpCode::MAX, "MAX" }, | ||||||
|  |             { OpCode::MIN, "MIN" }, | ||||||
|  |             { OpCode::RCP, "RCP" }, | ||||||
|  |             { OpCode::RSQ, "RSQ" }, | ||||||
|  |             { OpCode::MOV, "MOV" }, | ||||||
|  |             { OpCode::RET, "RET" }, | ||||||
|  |             { OpCode::FLS, "FLS" }, | ||||||
|  |         }; | ||||||
|  |         auto it = map.find(opcode); | ||||||
|  |         if (it == map.end()) | ||||||
|  |             return "UNK"; | ||||||
|  |         else | ||||||
|  |             return it->second; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u32 hex; | ||||||
|  | 
 | ||||||
|  |     BitField<0x1a, 0x6, OpCode> opcode; | ||||||
|  | 
 | ||||||
|  |     // General notes:
 | ||||||
|  |     //
 | ||||||
|  |     // When two input registers are used, one of them uses a 5-bit index while the other
 | ||||||
|  |     // one uses a 7-bit index. This is because at most one floating point uniform may be used
 | ||||||
|  |     // as an input.
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     // Format used e.g. by arithmetic instructions and comparisons
 | ||||||
|  |     // "src1" and "src2" specify register indices (i.e. indices referring to groups of 4 floats),
 | ||||||
|  |     // while "dest" addresses individual floats.
 | ||||||
|  |     union { | ||||||
|  |         BitField<0x00, 0x5, u32> operand_desc_id; | ||||||
|  |         BitField<0x07, 0x5, u32> src2; | ||||||
|  |         BitField<0x0c, 0x7, u32> src1; | ||||||
|  |         BitField<0x13, 0x7, u32> dest; | ||||||
|  |     } common; | ||||||
|  | 
 | ||||||
|  |     // Format used for flow control instructions ("if")
 | ||||||
|  |     union { | ||||||
|  |         BitField<0x00, 0x8, u32> num_instructions; | ||||||
|  |         BitField<0x0a, 0xc, u32> offset_words; | ||||||
|  |     } flow_control; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | union SwizzlePattern { | ||||||
|  |     u32 hex; | ||||||
|  | 
 | ||||||
|  |     enum class Selector : u32 { | ||||||
|  |         x = 0, | ||||||
|  |         y = 1, | ||||||
|  |         z = 2, | ||||||
|  |         w = 3 | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     Selector GetSelectorSrc1(int comp) const { | ||||||
|  |         Selector selectors[] = { | ||||||
|  |             src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3 | ||||||
|  |         }; | ||||||
|  |         return selectors[comp]; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     Selector GetSelectorSrc2(int comp) const { | ||||||
|  |         Selector selectors[] = { | ||||||
|  |             src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3 | ||||||
|  |         }; | ||||||
|  |         return selectors[comp]; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool DestComponentEnabled(int i) const { | ||||||
|  |         return (dest_mask & (0x8 >> i)); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::string SelectorToString(bool src2) const { | ||||||
|  |         std::map<Selector, std::string> map = { | ||||||
|  |             { Selector::x, "x" }, | ||||||
|  |             { Selector::y, "y" }, | ||||||
|  |             { Selector::z, "z" }, | ||||||
|  |             { Selector::w, "w" } | ||||||
|  |         }; | ||||||
|  |         std::string ret; | ||||||
|  |         for (int i = 0; i < 4; ++i) { | ||||||
|  |             ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i)); | ||||||
|  |         } | ||||||
|  |         return ret; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::string DestMaskToString() const { | ||||||
|  |         std::string ret; | ||||||
|  |         for (int i = 0; i < 4; ++i) { | ||||||
|  |             if (!DestComponentEnabled(i)) | ||||||
|  |                 ret += "_"; | ||||||
|  |             else | ||||||
|  |                 ret += "xyzw"[i]; | ||||||
|  |         } | ||||||
|  |         return ret; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x
 | ||||||
|  |     BitField< 0, 4, u32> dest_mask; | ||||||
|  | 
 | ||||||
|  |     BitField< 5, 2, Selector> src1_selector_3; | ||||||
|  |     BitField< 7, 2, Selector> src1_selector_2; | ||||||
|  |     BitField< 9, 2, Selector> src1_selector_1; | ||||||
|  |     BitField<11, 2, Selector> src1_selector_0; | ||||||
|  | 
 | ||||||
|  |     BitField<14, 2, Selector> src2_selector_3; | ||||||
|  |     BitField<16, 2, Selector> src2_selector_2; | ||||||
|  |     BitField<18, 2, Selector> src2_selector_1; | ||||||
|  |     BitField<20, 2, Selector> src2_selector_0; | ||||||
|  | 
 | ||||||
|  |     BitField<31, 1, u32> flag; // not sure what this means, maybe it's the sign?
 | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | void SubmitShaderMemoryChange(u32 addr, u32 value); | ||||||
|  | void SubmitSwizzleDataChange(u32 addr, u32 value); | ||||||
|  | 
 | ||||||
|  | OutputVertex RunShader(const InputVertex& input, int num_attributes); | ||||||
|  | 
 | ||||||
|  | Math::Vec4<float24>& GetFloatUniform(u32 index); | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | @ -22,6 +22,7 @@ | ||||||
|     <ClCompile Include="renderer_opengl\renderer_opengl.cpp" /> |     <ClCompile Include="renderer_opengl\renderer_opengl.cpp" /> | ||||||
|     <ClCompile Include="command_processor.cpp" /> |     <ClCompile Include="command_processor.cpp" /> | ||||||
|     <ClCompile Include="utils.cpp" /> |     <ClCompile Include="utils.cpp" /> | ||||||
|  |     <ClCompile Include="vertex_shader.cpp" /> | ||||||
|     <ClCompile Include="video_core.cpp" /> |     <ClCompile Include="video_core.cpp" /> | ||||||
|   </ItemGroup> |   </ItemGroup> | ||||||
|   <ItemGroup> |   <ItemGroup> | ||||||
|  | @ -31,6 +32,7 @@ | ||||||
|     <ClInclude Include="pica.h" /> |     <ClInclude Include="pica.h" /> | ||||||
|     <ClInclude Include="renderer_base.h" /> |     <ClInclude Include="renderer_base.h" /> | ||||||
|     <ClInclude Include="utils.h" /> |     <ClInclude Include="utils.h" /> | ||||||
|  |     <ClInclude Include="vertex_shader.h" /> | ||||||
|     <ClInclude Include="video_core.h" /> |     <ClInclude Include="video_core.h" /> | ||||||
|     <ClInclude Include="renderer_opengl\renderer_opengl.h" /> |     <ClInclude Include="renderer_opengl\renderer_opengl.h" /> | ||||||
|   </ItemGroup> |   </ItemGroup> | ||||||
|  |  | ||||||
|  | @ -11,6 +11,7 @@ | ||||||
|     </ClCompile> |     </ClCompile> | ||||||
|     <ClCompile Include="command_processor.cpp" /> |     <ClCompile Include="command_processor.cpp" /> | ||||||
|     <ClCompile Include="utils.cpp" /> |     <ClCompile Include="utils.cpp" /> | ||||||
|  |     <ClCompile Include="vertex_shader.cpp" /> | ||||||
|     <ClCompile Include="video_core.cpp" /> |     <ClCompile Include="video_core.cpp" /> | ||||||
|   </ItemGroup> |   </ItemGroup> | ||||||
|   <ItemGroup> |   <ItemGroup> | ||||||
|  | @ -23,6 +24,7 @@ | ||||||
|     <ClInclude Include="pica.h" /> |     <ClInclude Include="pica.h" /> | ||||||
|     <ClInclude Include="renderer_base.h" /> |     <ClInclude Include="renderer_base.h" /> | ||||||
|     <ClInclude Include="utils.h" /> |     <ClInclude Include="utils.h" /> | ||||||
|  |     <ClInclude Include="vertex_shader.h" /> | ||||||
|     <ClInclude Include="video_core.h" /> |     <ClInclude Include="video_core.h" /> | ||||||
|   </ItemGroup> |   </ItemGroup> | ||||||
|   <ItemGroup> |   <ItemGroup> | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue