mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	gl_rasterizer: implement AccelerateDrawBatch to emulate PICA shader on hardware
This commit is contained in:
		
							parent
							
								
									15d14be3cc
								
							
						
					
					
						commit
						9b448a0739
					
				
					 3 changed files with 376 additions and 36 deletions
				
			
		|  | @ -66,5 +66,10 @@ public: | |||
|                                    ScreenInfo& screen_info) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     /// Attempt to draw using hardware shaders
 | ||||
|     virtual bool AccelerateDrawBatch(bool is_indexed) { | ||||
|         return false; | ||||
|     } | ||||
| }; | ||||
| } // namespace VideoCore
 | ||||
|  |  | |||
|  | @ -12,6 +12,7 @@ | |||
| #include "common/logging/log.h" | ||||
| #include "common/math_util.h" | ||||
| #include "common/microprofile.h" | ||||
| #include "common/scope_exit.h" | ||||
| #include "common/vector_math.h" | ||||
| #include "core/hw/gpu.h" | ||||
| #include "video_core/pica_state.h" | ||||
|  | @ -26,13 +27,17 @@ | |||
| using PixelFormat = SurfaceParams::PixelFormat; | ||||
| using SurfaceType = SurfaceParams::SurfaceType; | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0)); | ||||
| MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128)); | ||||
| MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128)); | ||||
| MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); | ||||
| MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | ||||
| 
 | ||||
| RasterizerOpenGL::RasterizerOpenGL() | ||||
|     : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE), | ||||
|       uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE) { | ||||
|       uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE), | ||||
|       index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE) { | ||||
|     // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
 | ||||
|     state.clip_distance[0] = true; | ||||
| 
 | ||||
|  | @ -46,13 +51,9 @@ RasterizerOpenGL::RasterizerOpenGL() | |||
|     texture_cube_sampler.Create(); | ||||
|     state.texture_cube_unit.sampler = texture_cube_sampler.sampler.handle; | ||||
| 
 | ||||
|     // Generate VBO, VAO and UBO
 | ||||
|     vertex_array.Create(); | ||||
| 
 | ||||
|     state.draw.vertex_array = vertex_array.handle; | ||||
|     state.draw.vertex_buffer = vertex_buffer.GetHandle(); | ||||
|     state.draw.uniform_buffer = uniform_buffer.GetHandle(); | ||||
|     state.Apply(); | ||||
|     // Generate VAO
 | ||||
|     sw_vao.Create(); | ||||
|     hw_vao.Create(); | ||||
| 
 | ||||
|     uniform_block_data.dirty = true; | ||||
| 
 | ||||
|  | @ -67,10 +68,18 @@ RasterizerOpenGL::RasterizerOpenGL() | |||
|     uniform_block_data.proctex_diff_lut_dirty = true; | ||||
| 
 | ||||
|     glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); | ||||
|     uniform_size_aligned_vs = | ||||
|         Common::AlignUp<size_t>(sizeof(VSUniformData), uniform_buffer_alignment); | ||||
|     uniform_size_aligned_gs = | ||||
|         Common::AlignUp<size_t>(sizeof(GSUniformData), uniform_buffer_alignment); | ||||
|     uniform_size_aligned_fs = | ||||
|         Common::AlignUp<size_t>(sizeof(UniformData), uniform_buffer_alignment); | ||||
| 
 | ||||
|     // Set vertex attributes
 | ||||
|     // Set vertex attributes for software shader path
 | ||||
|     state.draw.vertex_array = sw_vao.handle; | ||||
|     state.draw.vertex_buffer = vertex_buffer.GetHandle(); | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, | ||||
|                           sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); | ||||
|     glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); | ||||
|  | @ -176,6 +185,11 @@ RasterizerOpenGL::RasterizerOpenGL() | |||
|     glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); | ||||
|     glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); | ||||
| 
 | ||||
|     // Bind index buffer for hardware shader path
 | ||||
|     state.draw.vertex_array = hw_vao.handle; | ||||
|     state.Apply(); | ||||
|     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle()); | ||||
| 
 | ||||
|     shader_program_manager = | ||||
|         std::make_unique<ShaderProgramManager>(GLAD_GL_ARB_separate_shader_objects); | ||||
| 
 | ||||
|  | @ -258,10 +272,253 @@ void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, | |||
|     vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); | ||||
| } | ||||
| 
 | ||||
| static constexpr std::array<GLenum, 4> vs_attrib_types{ | ||||
|     GL_BYTE,          // VertexAttributeFormat::BYTE
 | ||||
|     GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE
 | ||||
|     GL_SHORT,         // VertexAttributeFormat::SHORT
 | ||||
|     GL_FLOAT          // VertexAttributeFormat::FLOAT
 | ||||
| }; | ||||
| 
 | ||||
| struct VertexArrayInfo { | ||||
|     u32 vs_input_index_min; | ||||
|     u32 vs_input_index_max; | ||||
|     u32 vs_input_size; | ||||
| }; | ||||
| 
 | ||||
| RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { | ||||
|     const auto& regs = Pica::g_state.regs; | ||||
|     const auto& vertex_attributes = regs.pipeline.vertex_attributes; | ||||
| 
 | ||||
|     u32 vertex_min; | ||||
|     u32 vertex_max; | ||||
|     if (is_indexed) { | ||||
|         const auto& index_info = regs.pipeline.index_array; | ||||
|         PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset; | ||||
|         const u8* index_address_8 = Memory::GetPhysicalPointer(address); | ||||
|         const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); | ||||
|         bool index_u16 = index_info.format != 0; | ||||
| 
 | ||||
|         vertex_min = 0xFFFF; | ||||
|         vertex_max = 0; | ||||
|         std::size_t size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); | ||||
|         res_cache.FlushRegion(address, size, nullptr); | ||||
|         for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) { | ||||
|             u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index]; | ||||
|             vertex_min = std::min(vertex_min, vertex); | ||||
|             vertex_max = std::max(vertex_max, vertex); | ||||
|         } | ||||
|     } else { | ||||
|         vertex_min = regs.pipeline.vertex_offset; | ||||
|         vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1; | ||||
|     } | ||||
| 
 | ||||
|     u32 vertex_num = vertex_max - vertex_min + 1; | ||||
|     u32 vs_input_size = 0; | ||||
|     for (auto& loader : vertex_attributes.attribute_loaders) { | ||||
|         if (loader.component_count != 0) { | ||||
|             vs_input_size += loader.byte_count * vertex_num; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return {vertex_min, vertex_max, vs_input_size}; | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, | ||||
|                                         GLuint vs_input_index_min, GLuint vs_input_index_max) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_VAO); | ||||
|     const auto& regs = Pica::g_state.regs; | ||||
|     const auto& vertex_attributes = regs.pipeline.vertex_attributes; | ||||
|     PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); | ||||
| 
 | ||||
|     state.draw.vertex_array = hw_vao.handle; | ||||
|     state.draw.vertex_buffer = vertex_buffer.GetHandle(); | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     std::array<bool, 16> enable_attributes{}; | ||||
| 
 | ||||
|     for (const auto& loader : vertex_attributes.attribute_loaders) { | ||||
|         if (loader.component_count == 0 || loader.byte_count == 0) { | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         u32 offset = 0; | ||||
|         for (u32 comp = 0; comp < loader.component_count && comp < 12; ++comp) { | ||||
|             u32 attribute_index = loader.GetComponent(comp); | ||||
|             if (attribute_index < 12) { | ||||
|                 if (vertex_attributes.GetNumElements(attribute_index) != 0) { | ||||
|                     offset = Common::AlignUp( | ||||
|                         offset, vertex_attributes.GetElementSizeInBytes(attribute_index)); | ||||
| 
 | ||||
|                     u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index); | ||||
|                     GLint size = vertex_attributes.GetNumElements(attribute_index); | ||||
|                     GLenum type = vs_attrib_types[static_cast<u32>( | ||||
|                         vertex_attributes.GetFormat(attribute_index))]; | ||||
|                     GLsizei stride = loader.byte_count; | ||||
|                     glVertexAttribPointer(input_reg, size, type, GL_FALSE, stride, | ||||
|                                           reinterpret_cast<GLvoid*>(buffer_offset + offset)); | ||||
|                     enable_attributes[input_reg] = true; | ||||
| 
 | ||||
|                     offset += vertex_attributes.GetStride(attribute_index); | ||||
|                 } | ||||
|             } else { | ||||
|                 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings,
 | ||||
|                 // respectively
 | ||||
|                 offset = Common::AlignUp(offset, 4); | ||||
|                 offset += (attribute_index - 11) * 4; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         PAddr data_addr = | ||||
|             base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); | ||||
| 
 | ||||
|         u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; | ||||
|         u32 data_size = loader.byte_count * vertex_num; | ||||
| 
 | ||||
|         res_cache.FlushRegion(data_addr, data_size, nullptr); | ||||
|         std::memcpy(array_ptr, Memory::GetPhysicalPointer(data_addr), data_size); | ||||
| 
 | ||||
|         array_ptr += data_size; | ||||
|         buffer_offset += data_size; | ||||
|     } | ||||
| 
 | ||||
|     for (std::size_t i = 0; i < enable_attributes.size(); ++i) { | ||||
|         if (enable_attributes[i] != hw_vao_enabled_attributes[i]) { | ||||
|             if (enable_attributes[i]) { | ||||
|                 glEnableVertexAttribArray(i); | ||||
|             } else { | ||||
|                 glDisableVertexAttribArray(i); | ||||
|             } | ||||
|             hw_vao_enabled_attributes[i] = enable_attributes[i]; | ||||
|         } | ||||
| 
 | ||||
|         if (vertex_attributes.IsDefaultAttribute(i)) { | ||||
|             u32 reg = regs.vs.GetRegisterForAttribute(i); | ||||
|             if (!enable_attributes[reg]) { | ||||
|                 const auto& attr = Pica::g_state.input_default_attributes.attr[i]; | ||||
|                 glVertexAttrib4f(reg, attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(), | ||||
|                                  attr.w.ToFloat32()); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| bool RasterizerOpenGL::SetupVertexShader() { | ||||
|     MICROPROFILE_SCOPE(OpenGL_VS); | ||||
|     GLShader::PicaVSConfig vs_config(Pica::g_state.regs, Pica::g_state.vs); | ||||
|     return shader_program_manager->UseProgrammableVertexShader(vs_config, Pica::g_state.vs); | ||||
| } | ||||
| 
 | ||||
| bool RasterizerOpenGL::SetupGeometryShader() { | ||||
|     MICROPROFILE_SCOPE(OpenGL_GS); | ||||
|     const auto& regs = Pica::g_state.regs; | ||||
|     if (regs.pipeline.use_gs == Pica::PipelineRegs::UseGS::No) { | ||||
|         GLShader::PicaFixedGSConfig gs_config(regs); | ||||
|         shader_program_manager->UseFixedGeometryShader(gs_config); | ||||
|         return true; | ||||
|     } else { | ||||
|         GLShader::PicaGSConfig gs_config(regs, Pica::g_state.gs); | ||||
|         return shader_program_manager->UseProgrammableGeometryShader(gs_config, Pica::g_state.gs); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | ||||
|     const auto& regs = Pica::g_state.regs; | ||||
|     if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { | ||||
|         if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) { | ||||
|             return false; | ||||
|         } | ||||
|         if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) { | ||||
|             return false; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (!SetupVertexShader()) | ||||
|         return false; | ||||
| 
 | ||||
|     if (!SetupGeometryShader()) | ||||
|         return false; | ||||
| 
 | ||||
|     Draw(true, is_indexed); | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| static GLenum GetCurrentPrimitiveMode(bool use_gs) { | ||||
|     const auto& regs = Pica::g_state.regs; | ||||
|     if (use_gs) { | ||||
|         switch ((regs.gs.max_input_attribute_index + 1) / | ||||
|                 (regs.pipeline.vs_outmap_total_minus_1_a + 1)) { | ||||
|         case 1: | ||||
|             return GL_POINTS; | ||||
|         case 2: | ||||
|             return GL_LINES; | ||||
|         case 4: | ||||
|             return GL_LINES_ADJACENCY; | ||||
|         case 3: | ||||
|             return GL_TRIANGLES; | ||||
|         case 6: | ||||
|             return GL_TRIANGLES_ADJACENCY; | ||||
|         default: | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|     } else { | ||||
|         switch (regs.pipeline.triangle_topology) { | ||||
|         case Pica::PipelineRegs::TriangleTopology::Shader: | ||||
|         case Pica::PipelineRegs::TriangleTopology::List: | ||||
|             return GL_TRIANGLES; | ||||
|         case Pica::PipelineRegs::TriangleTopology::Fan: | ||||
|             return GL_TRIANGLE_FAN; | ||||
|         case Pica::PipelineRegs::TriangleTopology::Strip: | ||||
|             return GL_TRIANGLE_STRIP; | ||||
|         default: | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed, bool use_gs) { | ||||
|     const auto& regs = Pica::g_state.regs; | ||||
|     GLenum primitive_mode = GetCurrentPrimitiveMode(use_gs); | ||||
| 
 | ||||
|     auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed); | ||||
| 
 | ||||
|     state.draw.vertex_buffer = vertex_buffer.GetHandle(); | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     u8* buffer_ptr; | ||||
|     GLintptr buffer_offset; | ||||
|     std::tie(buffer_ptr, buffer_offset, std::ignore) = vertex_buffer.Map(vs_input_size, 4); | ||||
|     SetupVertexArray(buffer_ptr, buffer_offset, vs_input_index_min, vs_input_index_max); | ||||
|     vertex_buffer.Unmap(vs_input_size); | ||||
| 
 | ||||
|     shader_program_manager->ApplyTo(state); | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     if (is_indexed) { | ||||
|         bool index_u16 = regs.pipeline.index_array.format != 0; | ||||
|         std::size_t index_buffer_size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); | ||||
|         const u8* index_data = | ||||
|             Memory::GetPhysicalPointer(regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + | ||||
|                                        regs.pipeline.index_array.offset); | ||||
|         std::tie(buffer_ptr, buffer_offset, std::ignore) = index_buffer.Map(index_buffer_size, 4); | ||||
|         std::memcpy(buffer_ptr, index_data, index_buffer_size); | ||||
|         index_buffer.Unmap(index_buffer_size); | ||||
| 
 | ||||
|         glDrawRangeElementsBaseVertex( | ||||
|             primitive_mode, vs_input_index_min, vs_input_index_max, regs.pipeline.num_vertices, | ||||
|             index_u16 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE, | ||||
|             reinterpret_cast<const void*>(buffer_offset), -static_cast<GLint>(vs_input_index_min)); | ||||
|     } else { | ||||
|         glDrawArrays(primitive_mode, 0, regs.pipeline.num_vertices); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::DrawTriangles() { | ||||
|     if (vertex_batch.empty()) | ||||
|         return; | ||||
|     Draw(false, false); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Drawing); | ||||
|     const auto& regs = Pica::g_state.regs; | ||||
| 
 | ||||
|  | @ -474,7 +731,8 @@ void RasterizerOpenGL::DrawTriangles() { | |||
|     } | ||||
| 
 | ||||
|     // Sync the uniform data
 | ||||
|     UploadUniforms(); | ||||
|     const bool use_gs = regs.pipeline.use_gs == Pica::PipelineRegs::UseGS::Yes; | ||||
|     UploadUniforms(accelerate, use_gs); | ||||
| 
 | ||||
|     // Viewport can have negative offsets or larger
 | ||||
|     // dimensions than our framebuffer sub-rect.
 | ||||
|  | @ -487,22 +745,30 @@ void RasterizerOpenGL::DrawTriangles() { | |||
|     state.scissor.height = draw_rect.GetHeight(); | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     shader_program_manager->UseTrivialVertexShader(); | ||||
|     shader_program_manager->UseTrivialGeometryShader(); | ||||
|     shader_program_manager->ApplyTo(state); | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     // Draw the vertex batch
 | ||||
|     size_t max_vertices = 3 * (vertex_buffer.GetSize() / (3 * sizeof(HardwareVertex))); | ||||
|     for (size_t base_vertex = 0; base_vertex < vertex_batch.size(); base_vertex += max_vertices) { | ||||
|         size_t vertices = std::min(max_vertices, vertex_batch.size() - base_vertex); | ||||
|         size_t vertex_size = vertices * sizeof(HardwareVertex); | ||||
|         u8* vbo; | ||||
|         GLintptr offset; | ||||
|         std::tie(vbo, offset, std::ignore) = vertex_buffer.Map(vertex_size, sizeof(HardwareVertex)); | ||||
|         memcpy(vbo, vertex_batch.data() + base_vertex, vertex_size); | ||||
|         vertex_buffer.Unmap(vertex_size); | ||||
|         glDrawArrays(GL_TRIANGLES, offset / sizeof(HardwareVertex), (GLsizei)vertices); | ||||
|     if (accelerate) { | ||||
|         AccelerateDrawBatchInternal(is_indexed, use_gs); | ||||
|     } else { | ||||
|         state.draw.vertex_array = sw_vao.handle; | ||||
|         state.draw.vertex_buffer = vertex_buffer.GetHandle(); | ||||
|         shader_program_manager->UseTrivialVertexShader(); | ||||
|         shader_program_manager->UseTrivialGeometryShader(); | ||||
|         shader_program_manager->ApplyTo(state); | ||||
|         state.Apply(); | ||||
| 
 | ||||
|         std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex))); | ||||
|         for (std::size_t base_vertex = 0; base_vertex < vertex_batch.size(); | ||||
|              base_vertex += max_vertices) { | ||||
|             std::size_t vertices = std::min(max_vertices, vertex_batch.size() - base_vertex); | ||||
|             std::size_t vertex_size = vertices * sizeof(HardwareVertex); | ||||
|             u8* vbo; | ||||
|             GLintptr offset; | ||||
|             std::tie(vbo, offset, std::ignore) = | ||||
|                 vertex_buffer.Map(vertex_size, sizeof(HardwareVertex)); | ||||
|             std::memcpy(vbo, vertex_batch.data() + base_vertex, vertex_size); | ||||
|             vertex_buffer.Unmap(vertex_size); | ||||
|             glDrawArrays(GL_TRIANGLES, offset / sizeof(HardwareVertex), (GLsizei)vertices); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // Disable scissor test
 | ||||
|  | @ -1648,18 +1914,53 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::UploadUniforms() { | ||||
|     if (!uniform_block_data.dirty) | ||||
| void RasterizerOpenGL::UploadUniforms(bool accelerate_draw, bool use_gs) { | ||||
|     // glBindBufferRange below also changes the generic buffer binding point, so we sync the state
 | ||||
|     // first
 | ||||
|     state.draw.uniform_buffer = uniform_buffer.GetHandle(); | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     bool sync_vs = accelerate_draw; | ||||
|     bool sync_gs = accelerate_draw && use_gs; | ||||
|     bool sync_fs = uniform_block_data.dirty; | ||||
| 
 | ||||
|     if (!sync_vs && !sync_gs && !sync_fs) | ||||
|         return; | ||||
| 
 | ||||
|     size_t uniform_size = uniform_size_aligned_fs; | ||||
|     size_t uniform_size = | ||||
|         uniform_size_aligned_vs + uniform_size_aligned_gs + uniform_size_aligned_fs; | ||||
|     size_t used_bytes = 0; | ||||
|     u8* uniforms; | ||||
|     GLintptr offset; | ||||
|     std::tie(uniforms, offset, std::ignore) = | ||||
|     bool invalidate; | ||||
|     std::tie(uniforms, offset, invalidate) = | ||||
|         uniform_buffer.Map(uniform_size, uniform_buffer_alignment); | ||||
|     std::memcpy(uniforms, &uniform_block_data.data, sizeof(UniformData)); | ||||
|     uniform_buffer.Unmap(uniform_size); | ||||
|     glBindBufferRange(GL_UNIFORM_BUFFER, 0, uniform_buffer.GetHandle(), offset, | ||||
|                       sizeof(UniformData)); | ||||
|     uniform_block_data.dirty = false; | ||||
| 
 | ||||
|     if (sync_vs) { | ||||
|         VSUniformData vs_uniforms; | ||||
|         vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs); | ||||
|         std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(UniformBindings::VS), | ||||
|                           uniform_buffer.GetHandle(), offset + used_bytes, sizeof(VSUniformData)); | ||||
|         used_bytes += uniform_size_aligned_vs; | ||||
|     } | ||||
| 
 | ||||
|     if (sync_gs) { | ||||
|         GSUniformData gs_uniforms; | ||||
|         gs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.gs, Pica::g_state.gs); | ||||
|         std::memcpy(uniforms + used_bytes, &gs_uniforms, sizeof(gs_uniforms)); | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(UniformBindings::GS), | ||||
|                           uniform_buffer.GetHandle(), offset + used_bytes, sizeof(GSUniformData)); | ||||
|         used_bytes += uniform_size_aligned_gs; | ||||
|     } | ||||
| 
 | ||||
|     if (sync_fs || invalidate) { | ||||
|         std::memcpy(uniforms + used_bytes, &uniform_block_data.data, sizeof(UniformData)); | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(UniformBindings::Common), | ||||
|                           uniform_buffer.GetHandle(), offset + used_bytes, sizeof(UniformData)); | ||||
|         uniform_block_data.dirty = false; | ||||
|         used_bytes += uniform_size_aligned_fs; | ||||
|     } | ||||
| 
 | ||||
|     uniform_buffer.Unmap(used_bytes); | ||||
| } | ||||
|  |  | |||
|  | @ -50,6 +50,7 @@ public: | |||
|     bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; | ||||
|     bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, | ||||
|                            u32 pixel_stride, ScreenInfo& screen_info) override; | ||||
|     bool AccelerateDrawBatch(bool is_indexed) override; | ||||
| 
 | ||||
| private: | ||||
|     struct SamplerInfo { | ||||
|  | @ -73,6 +74,7 @@ private: | |||
| 
 | ||||
|     /// Structure that the hardware rendered vertices are composed of
 | ||||
|     struct HardwareVertex { | ||||
|         HardwareVertex() = default; | ||||
|         HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { | ||||
|             position[0] = v.pos.x.ToFloat32(); | ||||
|             position[1] = v.pos.y.ToFloat32(); | ||||
|  | @ -216,7 +218,32 @@ private: | |||
|     void SyncLightDistanceAttenuationScale(int light_index); | ||||
| 
 | ||||
|     /// Upload the uniform blocks to the uniform buffer object
 | ||||
|     void UploadUniforms(); | ||||
|     void UploadUniforms(bool accelerate_draw, bool use_gs); | ||||
| 
 | ||||
|     /// Generic draw function for DrawTriangles and AccelerateDrawBatch
 | ||||
|     void Draw(bool accelerate, bool is_indexed); | ||||
| 
 | ||||
|     /// Internal implementation for AccelerateDrawBatch
 | ||||
|     void AccelerateDrawBatchInternal(bool is_indexed, bool use_gs); | ||||
| 
 | ||||
|     struct VertexArrayInfo { | ||||
|         u32 vs_input_index_min; | ||||
|         u32 vs_input_index_max; | ||||
|         u32 vs_input_size; | ||||
|     }; | ||||
| 
 | ||||
|     /// Retrieve the range and the size of the input vertex
 | ||||
|     VertexArrayInfo AnalyzeVertexArray(bool is_indexed); | ||||
| 
 | ||||
|     /// Setup vertex array for AccelerateDrawBatch
 | ||||
|     void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, GLuint vs_input_index_min, | ||||
|                           GLuint vs_input_index_max); | ||||
| 
 | ||||
|     /// Setup vertex shader for AccelerateDrawBatch
 | ||||
|     bool SetupVertexShader(); | ||||
| 
 | ||||
|     /// Setup geometry shader for AccelerateDrawBatch
 | ||||
|     bool SetupGeometryShader(); | ||||
| 
 | ||||
|     OpenGLState state; | ||||
| 
 | ||||
|  | @ -242,14 +269,21 @@ private: | |||
| 
 | ||||
|     // They shall be big enough for about one frame.
 | ||||
|     static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024; | ||||
|     static constexpr size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024; | ||||
|     static constexpr size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; | ||||
| 
 | ||||
|     OGLVertexArray sw_vao; // VAO for software shader draw
 | ||||
|     OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
 | ||||
|     std::array<bool, 16> hw_vao_enabled_attributes{}; | ||||
| 
 | ||||
|     std::array<SamplerInfo, 3> texture_samplers; | ||||
|     OGLVertexArray vertex_array; | ||||
|     OGLStreamBuffer vertex_buffer; | ||||
|     OGLStreamBuffer uniform_buffer; | ||||
|     OGLStreamBuffer index_buffer; | ||||
|     OGLFramebuffer framebuffer; | ||||
|     GLint uniform_buffer_alignment; | ||||
|     size_t uniform_size_aligned_vs; | ||||
|     size_t uniform_size_aligned_gs; | ||||
|     size_t uniform_size_aligned_fs; | ||||
| 
 | ||||
|     SamplerInfo texture_cube_sampler; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue