mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	Videocore: Implement simple vertex caching
This gives a ~2/3 reduction in the amount of vertices that need to be processed through the vertex loaders and the vertex shader, yielding a good speedup.
This commit is contained in:
		
							parent
							
								
									4d086a4db4
								
							
						
					
					
						commit
						a96502edd3
					
				
					 1 changed files with 90 additions and 63 deletions
				
			
		|  | @ -206,88 +206,115 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||||
|                 std::map<u32, u32> ranges; |                 std::map<u32, u32> ranges; | ||||||
|             } memory_accesses; |             } memory_accesses; | ||||||
| 
 | 
 | ||||||
|  |             // Simple circular-replacement vertex cache
 | ||||||
|  |             // The size has been tuned for optimal balance between hit-rate and the cost of lookup
 | ||||||
|  |             const size_t VERTEX_CACHE_SIZE = 32; | ||||||
|  |             std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; | ||||||
|  |             std::array<VertexShader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache; | ||||||
|  | 
 | ||||||
|  |             unsigned int vertex_cache_pos = 0; | ||||||
|  |             vertex_cache_ids.fill(-1); | ||||||
|  | 
 | ||||||
|             for (unsigned int index = 0; index < regs.num_vertices; ++index) |             for (unsigned int index = 0; index < regs.num_vertices; ++index) | ||||||
|             { |             { | ||||||
|                 unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; |                 unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; | ||||||
| 
 | 
 | ||||||
|  |                 // -1 is a common special value used for primitive restart. Since it's unknown if
 | ||||||
|  |                 // the PICA supports it, and it would mess up the caching, guard against it here.
 | ||||||
|  |                 ASSERT(vertex != -1); | ||||||
|  | 
 | ||||||
|  |                 bool vertex_cache_hit = false; | ||||||
|  |                 VertexShader::OutputVertex output; | ||||||
|  | 
 | ||||||
|                 if (is_indexed) { |                 if (is_indexed) { | ||||||
|                     // TODO: Implement some sort of vertex cache!
 |  | ||||||
|                     if (g_debug_context && Pica::g_debug_context->recorder) { |                     if (g_debug_context && Pica::g_debug_context->recorder) { | ||||||
|                         int size = index_u16 ? 2 : 1; |                         int size = index_u16 ? 2 : 1; | ||||||
|                         memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); |                         memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); | ||||||
|                     } |                     } | ||||||
|                 } |  | ||||||
| 
 | 
 | ||||||
|                 // Initialize data for the current vertex
 |                     for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { | ||||||
|                 VertexShader::InputVertex input; |                         if (vertex == vertex_cache_ids[i]) { | ||||||
| 
 |                             output = vertex_cache[i]; | ||||||
|                 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { |                             vertex_cache_hit = true; | ||||||
|                     if (vertex_attribute_elements[i] != 0) { |                             break; | ||||||
|                         // Default attribute values set if array elements have < 4 components. This
 |  | ||||||
|                         // is *not* carried over from the default attribute settings even if they're
 |  | ||||||
|                         // enabled for this attribute.
 |  | ||||||
|                         static const float24 zero = float24::FromFloat32(0.0f); |  | ||||||
|                         static const float24 one = float24::FromFloat32(1.0f); |  | ||||||
|                         input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one); |  | ||||||
| 
 |  | ||||||
|                         // Load per-vertex data from the loader arrays
 |  | ||||||
|                         for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |  | ||||||
|                             u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; |  | ||||||
|                             const u8* srcdata = Memory::GetPhysicalPointer(source_addr); |  | ||||||
| 
 |  | ||||||
|                             if (g_debug_context && Pica::g_debug_context->recorder) { |  | ||||||
|                                 memory_accesses.AddAccess(source_addr, |  | ||||||
|                                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 |  | ||||||
|                                     : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); |  | ||||||
|                             } |  | ||||||
| 
 |  | ||||||
|                             const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata : |  | ||||||
|                                 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata : |  | ||||||
|                                 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *(s16*)srcdata : |  | ||||||
|                                 *(float*)srcdata; |  | ||||||
| 
 |  | ||||||
|                             input.attr[i][comp] = float24::FromFloat32(srcval); |  | ||||||
|                             LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f", |  | ||||||
|                                 comp, i, vertex, index, |  | ||||||
|                                 attribute_config.GetPhysicalBaseAddress(), |  | ||||||
|                                 vertex_attribute_sources[i] - base_address, |  | ||||||
|                                 vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], |  | ||||||
|                                 input.attr[i][comp].ToFloat32()); |  | ||||||
|                         } |                         } | ||||||
|                     } else if (attribute_config.IsDefaultAttribute(i)) { |  | ||||||
|                         // Load the default attribute if we're configured to do so
 |  | ||||||
|                         input.attr[i] = g_state.vs.default_attributes[i]; |  | ||||||
|                         LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", |  | ||||||
|                                   i, vertex, index, |  | ||||||
|                                   input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), |  | ||||||
|                                   input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); |  | ||||||
|                     } else { |  | ||||||
|                         // TODO(yuriks): In this case, no data gets loaded and the vertex remains
 |  | ||||||
|                         //              with the last value it had. This isn't currently maintained
 |  | ||||||
|                         //              as global state, however, and so won't work in Cita yet.
 |  | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 if (g_debug_context) |                 if (!vertex_cache_hit) { | ||||||
|                     g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); |                     // Initialize data for the current vertex
 | ||||||
|  |                     VertexShader::InputVertex input; | ||||||
|  | 
 | ||||||
|  |                     for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { | ||||||
|  |                         if (vertex_attribute_elements[i] != 0) { | ||||||
|  |                             // Default attribute values set if array elements have < 4 components. This
 | ||||||
|  |                             // is *not* carried over from the default attribute settings even if they're
 | ||||||
|  |                             // enabled for this attribute.
 | ||||||
|  |                             static const float24 zero = float24::FromFloat32(0.0f); | ||||||
|  |                             static const float24 one = float24::FromFloat32(1.0f); | ||||||
|  |                             input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one); | ||||||
|  | 
 | ||||||
|  |                             // Load per-vertex data from the loader arrays
 | ||||||
|  |                             for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||||
|  |                                 u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; | ||||||
|  |                                 const u8* srcdata = Memory::GetPhysicalPointer(source_addr); | ||||||
|  | 
 | ||||||
|  |                                 if (g_debug_context && Pica::g_debug_context->recorder) { | ||||||
|  |                                     memory_accesses.AddAccess(source_addr, | ||||||
|  |                                         (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 | ||||||
|  |                                         : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); | ||||||
|  |                                 } | ||||||
|  | 
 | ||||||
|  |                                 const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata : | ||||||
|  |                                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata : | ||||||
|  |                                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *(s16*)srcdata : | ||||||
|  |                                     *(float*)srcdata; | ||||||
|  | 
 | ||||||
|  |                                 input.attr[i][comp] = float24::FromFloat32(srcval); | ||||||
|  |                                 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f", | ||||||
|  |                                     comp, i, vertex, index, | ||||||
|  |                                     attribute_config.GetPhysicalBaseAddress(), | ||||||
|  |                                     vertex_attribute_sources[i] - base_address, | ||||||
|  |                                     vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], | ||||||
|  |                                     input.attr[i][comp].ToFloat32()); | ||||||
|  |                             } | ||||||
|  |                         } else if (attribute_config.IsDefaultAttribute(i)) { | ||||||
|  |                             // Load the default attribute if we're configured to do so
 | ||||||
|  |                             input.attr[i] = g_state.vs.default_attributes[i]; | ||||||
|  |                             LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", | ||||||
|  |                                       i, vertex, index, | ||||||
|  |                                       input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | ||||||
|  |                                       input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | ||||||
|  |                         } else { | ||||||
|  |                             // TODO(yuriks): In this case, no data gets loaded and the vertex
 | ||||||
|  |                             // remains with the last value it had. This isn't currently maintained
 | ||||||
|  |                             // as global state, however, and so won't work in Citra yet.
 | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     if (g_debug_context) | ||||||
|  |                         g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); | ||||||
| 
 | 
 | ||||||
| #if PICA_DUMP_GEOMETRY | #if PICA_DUMP_GEOMETRY | ||||||
|                 // NOTE: When dumping geometry, we simply assume that the first input attribute
 |                     // NOTE: When dumping geometry, we simply assume that the first input attribute
 | ||||||
|                 //       corresponds to the position for now.
 |                     //       corresponds to the position for now.
 | ||||||
|                 DebugUtils::GeometryDumper::Vertex dumped_vertex = { |                     DebugUtils::GeometryDumper::Vertex dumped_vertex = { | ||||||
|                     input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32() |                         input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32() | ||||||
|                 }; |                     }; | ||||||
|                 using namespace std::placeholders; |                     using namespace std::placeholders; | ||||||
|                 dumping_primitive_assembler.SubmitVertex(dumped_vertex, |                     dumping_primitive_assembler.SubmitVertex(dumped_vertex, | ||||||
|                                                          std::bind(&DebugUtils::GeometryDumper::AddTriangle, |                                                              std::bind(&DebugUtils::GeometryDumper::AddTriangle, | ||||||
|                                                                    &geometry_dumper, _1, _2, _3)); |                                                                        &geometry_dumper, _1, _2, _3)); | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|                 // Send to vertex shader
 |                     // Send to vertex shader
 | ||||||
|                 VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes(), g_state.regs.vs, g_state.vs); |                     output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes(), g_state.regs.vs, g_state.vs); | ||||||
| 
 | 
 | ||||||
|                 if (is_indexed) { |                     if (is_indexed) { | ||||||
|                     // TODO: Add processed vertex to vertex cache!
 |                         vertex_cache[vertex_cache_pos] = output; | ||||||
|  |                         vertex_cache_ids[vertex_cache_pos] = vertex; | ||||||
|  |                         vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; | ||||||
|  |                     } | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 if (Settings::values.use_hw_renderer) { |                 if (Settings::values.use_hw_renderer) { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue