mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	Merge pull request #3741 from wwylele/glvtx-last
gl_rasterizer: implement AccelerateDrawBatch to emulate PICA shader on hardware
This commit is contained in:
		
						commit
						64a8c8ee07
					
				
					 15 changed files with 492 additions and 44 deletions
				
			
		|  | @ -95,6 +95,11 @@ void Config::ReadValues() { | ||||||
| 
 | 
 | ||||||
|     // Renderer
 |     // Renderer
 | ||||||
|     Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true); |     Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true); | ||||||
|  |     Settings::values.use_hw_shader = sdl2_config->GetBoolean("Renderer", "use_hw_shader", true); | ||||||
|  |     Settings::values.shaders_accurate_gs = | ||||||
|  |         sdl2_config->GetBoolean("Renderer", "shaders_accurate_gs", true); | ||||||
|  |     Settings::values.shaders_accurate_mul = | ||||||
|  |         sdl2_config->GetBoolean("Renderer", "shaders_accurate_mul", false); | ||||||
|     Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); |     Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); | ||||||
|     Settings::values.resolution_factor = |     Settings::values.resolution_factor = | ||||||
|         static_cast<u16>(sdl2_config->GetInteger("Renderer", "resolution_factor", 1)); |         static_cast<u16>(sdl2_config->GetInteger("Renderer", "resolution_factor", 1)); | ||||||
|  |  | ||||||
|  | @ -77,6 +77,18 @@ use_cpu_jit = | ||||||
| # 0: Software, 1 (default): Hardware | # 0: Software, 1 (default): Hardware | ||||||
| use_hw_renderer = | use_hw_renderer = | ||||||
| 
 | 
 | ||||||
|  | # Whether to use hardware shaders to emulate 3DS shaders | ||||||
|  | # 0: Software, 1 (default): Hardware | ||||||
|  | use_hw_shader = | ||||||
|  | 
 | ||||||
|  | # Whether to use accurate multiplication in hardware shaders | ||||||
|  | # 0: Off (Default. Faster, but causes issues in some games) 1: On (Slower, but correct) | ||||||
|  | shaders_accurate_mul = | ||||||
|  | 
 | ||||||
|  | # Whether to fallback to software for geometry shaders | ||||||
|  | # 0: Off (Faster, but causes issues in some games) 1: On (Default. Slower, but correct) | ||||||
|  | shaders_accurate_gs = | ||||||
|  | 
 | ||||||
| # Whether to use the Just-In-Time (JIT) compiler for shader emulation | # Whether to use the Just-In-Time (JIT) compiler for shader emulation | ||||||
| # 0: Interpreter (slow), 1 (default): JIT (fast) | # 0: Interpreter (slow), 1 (default): JIT (fast) | ||||||
| use_shader_jit = | use_shader_jit = | ||||||
|  |  | ||||||
|  | @ -83,6 +83,10 @@ void Config::ReadValues() { | ||||||
| 
 | 
 | ||||||
|     qt_config->beginGroup("Renderer"); |     qt_config->beginGroup("Renderer"); | ||||||
|     Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", true).toBool(); |     Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", true).toBool(); | ||||||
|  |     Settings::values.use_hw_shader = qt_config->value("use_hw_shader", true).toBool(); | ||||||
|  |     Settings::values.shaders_accurate_gs = qt_config->value("shaders_accurate_gs", true).toBool(); | ||||||
|  |     Settings::values.shaders_accurate_mul = | ||||||
|  |         qt_config->value("shaders_accurate_mul", false).toBool(); | ||||||
|     Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); |     Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); | ||||||
|     Settings::values.resolution_factor = |     Settings::values.resolution_factor = | ||||||
|         static_cast<u16>(qt_config->value("resolution_factor", 1).toInt()); |         static_cast<u16>(qt_config->value("resolution_factor", 1).toInt()); | ||||||
|  | @ -272,6 +276,9 @@ void Config::SaveValues() { | ||||||
| 
 | 
 | ||||||
|     qt_config->beginGroup("Renderer"); |     qt_config->beginGroup("Renderer"); | ||||||
|     qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer); |     qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer); | ||||||
|  |     qt_config->setValue("use_hw_shader", Settings::values.use_hw_shader); | ||||||
|  |     qt_config->setValue("shaders_accurate_gs", Settings::values.shaders_accurate_gs); | ||||||
|  |     qt_config->setValue("shaders_accurate_mul", Settings::values.shaders_accurate_mul); | ||||||
|     qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit); |     qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit); | ||||||
|     qt_config->setValue("resolution_factor", Settings::values.resolution_factor); |     qt_config->setValue("resolution_factor", Settings::values.resolution_factor); | ||||||
|     qt_config->setValue("use_vsync", Settings::values.use_vsync); |     qt_config->setValue("use_vsync", Settings::values.use_vsync); | ||||||
|  |  | ||||||
|  | @ -22,6 +22,9 @@ void Apply() { | ||||||
| 
 | 
 | ||||||
|     VideoCore::g_hw_renderer_enabled = values.use_hw_renderer; |     VideoCore::g_hw_renderer_enabled = values.use_hw_renderer; | ||||||
|     VideoCore::g_shader_jit_enabled = values.use_shader_jit; |     VideoCore::g_shader_jit_enabled = values.use_shader_jit; | ||||||
|  |     VideoCore::g_hw_shader_enabled = values.use_hw_shader; | ||||||
|  |     VideoCore::g_hw_shader_accurate_gs = values.shaders_accurate_gs; | ||||||
|  |     VideoCore::g_hw_shader_accurate_mul = values.shaders_accurate_mul; | ||||||
| 
 | 
 | ||||||
|     if (VideoCore::g_emu_window) { |     if (VideoCore::g_emu_window) { | ||||||
|         auto layout = VideoCore::g_emu_window->GetFramebufferLayout(); |         auto layout = VideoCore::g_emu_window->GetFramebufferLayout(); | ||||||
|  |  | ||||||
|  | @ -107,6 +107,9 @@ struct Values { | ||||||
| 
 | 
 | ||||||
|     // Renderer
 |     // Renderer
 | ||||||
|     bool use_hw_renderer; |     bool use_hw_renderer; | ||||||
|  |     bool use_hw_shader; | ||||||
|  |     bool shaders_accurate_gs; | ||||||
|  |     bool shaders_accurate_mul; | ||||||
|     bool use_shader_jit; |     bool use_shader_jit; | ||||||
|     u16 resolution_factor; |     u16 resolution_factor; | ||||||
|     bool use_vsync; |     bool use_vsync; | ||||||
|  |  | ||||||
|  | @ -286,6 +286,38 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||||
|         if (g_debug_context) |         if (g_debug_context) | ||||||
|             g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); |             g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); | ||||||
| 
 | 
 | ||||||
|  |         PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; | ||||||
|  | 
 | ||||||
|  |         bool accelerate_draw = VideoCore::g_hw_shader_enabled && primitive_assembler.IsEmpty(); | ||||||
|  | 
 | ||||||
|  |         if (regs.pipeline.use_gs == PipelineRegs::UseGS::No) { | ||||||
|  |             auto topology = primitive_assembler.GetTopology(); | ||||||
|  |             if (topology == PipelineRegs::TriangleTopology::Shader || | ||||||
|  |                 topology == PipelineRegs::TriangleTopology::List) { | ||||||
|  |                 accelerate_draw = accelerate_draw && (regs.pipeline.num_vertices % 3) == 0; | ||||||
|  |             } | ||||||
|  |             // TODO (wwylele): for Strip/Fan topology, if the primitive assember is not restarted
 | ||||||
|  |             // after this draw call, the buffered vertex from this draw should "leak" to the next
 | ||||||
|  |             // draw, in which case we should buffer the vertex into the software primitive assember,
 | ||||||
|  |             // or disable accelerate draw completely. However, there is not game found yet that does
 | ||||||
|  |             // this, so this is left unimplemented for now. Revisit this when an issue is found in
 | ||||||
|  |             // games.
 | ||||||
|  |         } else { | ||||||
|  |             if (VideoCore::g_hw_shader_accurate_gs) { | ||||||
|  |                 accelerate_draw = false; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed)); | ||||||
|  | 
 | ||||||
|  |         if (accelerate_draw && | ||||||
|  |             VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(is_indexed)) { | ||||||
|  |             if (g_debug_context) { | ||||||
|  |                 g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         // Processes information about internal vertex attributes to figure out how a vertex is
 |         // Processes information about internal vertex attributes to figure out how a vertex is
 | ||||||
|         // loaded.
 |         // loaded.
 | ||||||
|         // Later, these can be compiled and cached.
 |         // Later, these can be compiled and cached.
 | ||||||
|  | @ -294,15 +326,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||||
|         Shader::OutputVertex::ValidateSemantics(regs.rasterizer); |         Shader::OutputVertex::ValidateSemantics(regs.rasterizer); | ||||||
| 
 | 
 | ||||||
|         // Load vertices
 |         // Load vertices
 | ||||||
|         bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed)); |  | ||||||
| 
 |  | ||||||
|         const auto& index_info = regs.pipeline.index_array; |         const auto& index_info = regs.pipeline.index_array; | ||||||
|         const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); |         const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); | ||||||
|         const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); |         const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); | ||||||
|         bool index_u16 = index_info.format != 0; |         bool index_u16 = index_info.format != 0; | ||||||
| 
 | 
 | ||||||
|         PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; |  | ||||||
| 
 |  | ||||||
|         if (g_debug_context && g_debug_context->recorder) { |         if (g_debug_context && g_debug_context->recorder) { | ||||||
|             for (int i = 0; i < 3; ++i) { |             for (int i = 0; i < 3; ++i) { | ||||||
|                 const auto texture = regs.texturing.GetTextures()[i]; |                 const auto texture = regs.texturing.GetTextures()[i]; | ||||||
|  |  | ||||||
|  | @ -71,6 +71,16 @@ void PrimitiveAssembler<VertexType>::Reconfigure(PipelineRegs::TriangleTopology | ||||||
|     this->topology = topology; |     this->topology = topology; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | template <typename VertexType> | ||||||
|  | bool PrimitiveAssembler<VertexType>::IsEmpty() const { | ||||||
|  |     return buffer_index == 0 && strip_ready == false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template <typename VertexType> | ||||||
|  | PipelineRegs::TriangleTopology PrimitiveAssembler<VertexType>::GetTopology() const { | ||||||
|  |     return topology; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| // explicitly instantiate use cases
 | // explicitly instantiate use cases
 | ||||||
| template struct PrimitiveAssembler<Shader::OutputVertex>; | template struct PrimitiveAssembler<Shader::OutputVertex>; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -45,6 +45,16 @@ struct PrimitiveAssembler { | ||||||
|      */ |      */ | ||||||
|     void Reconfigure(PipelineRegs::TriangleTopology topology); |     void Reconfigure(PipelineRegs::TriangleTopology topology); | ||||||
| 
 | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Returns whether the PrimitiveAssembler has an empty internal buffer. | ||||||
|  |      */ | ||||||
|  |     bool IsEmpty() const; | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Returns the current topology. | ||||||
|  |      */ | ||||||
|  |     PipelineRegs::TriangleTopology GetTopology() const; | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     PipelineRegs::TriangleTopology topology; |     PipelineRegs::TriangleTopology topology; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -66,5 +66,10 @@ public: | ||||||
|                                    ScreenInfo& screen_info) { |                                    ScreenInfo& screen_info) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     /// Attempt to draw using hardware shaders
 | ||||||
|  |     virtual bool AccelerateDrawBatch(bool is_indexed) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
| }; | }; | ||||||
| } // namespace VideoCore
 | } // namespace VideoCore
 | ||||||
|  |  | ||||||
|  | @ -12,6 +12,7 @@ | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "common/math_util.h" | #include "common/math_util.h" | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
|  | #include "common/scope_exit.h" | ||||||
| #include "common/vector_math.h" | #include "common/vector_math.h" | ||||||
| #include "core/hw/gpu.h" | #include "core/hw/gpu.h" | ||||||
| #include "video_core/pica_state.h" | #include "video_core/pica_state.h" | ||||||
|  | @ -26,13 +27,17 @@ | ||||||
| using PixelFormat = SurfaceParams::PixelFormat; | using PixelFormat = SurfaceParams::PixelFormat; | ||||||
| using SurfaceType = SurfaceParams::SurfaceType; | using SurfaceType = SurfaceParams::SurfaceType; | ||||||
| 
 | 
 | ||||||
|  | MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0)); | ||||||
|  | MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128)); | ||||||
|  | MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128)); | ||||||
| MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | ||||||
| MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); | MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); | ||||||
| MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | ||||||
| 
 | 
 | ||||||
| RasterizerOpenGL::RasterizerOpenGL() | RasterizerOpenGL::RasterizerOpenGL() | ||||||
|     : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE), |     : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE), | ||||||
|       uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE) { |       uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE), | ||||||
|  |       index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE) { | ||||||
|     // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
 |     // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
 | ||||||
|     state.clip_distance[0] = true; |     state.clip_distance[0] = true; | ||||||
| 
 | 
 | ||||||
|  | @ -46,13 +51,9 @@ RasterizerOpenGL::RasterizerOpenGL() | ||||||
|     texture_cube_sampler.Create(); |     texture_cube_sampler.Create(); | ||||||
|     state.texture_cube_unit.sampler = texture_cube_sampler.sampler.handle; |     state.texture_cube_unit.sampler = texture_cube_sampler.sampler.handle; | ||||||
| 
 | 
 | ||||||
|     // Generate VBO, VAO and UBO
 |     // Generate VAO
 | ||||||
|     vertex_array.Create(); |     sw_vao.Create(); | ||||||
| 
 |     hw_vao.Create(); | ||||||
|     state.draw.vertex_array = vertex_array.handle; |  | ||||||
|     state.draw.vertex_buffer = vertex_buffer.GetHandle(); |  | ||||||
|     state.draw.uniform_buffer = uniform_buffer.GetHandle(); |  | ||||||
|     state.Apply(); |  | ||||||
| 
 | 
 | ||||||
|     uniform_block_data.dirty = true; |     uniform_block_data.dirty = true; | ||||||
| 
 | 
 | ||||||
|  | @ -67,10 +68,18 @@ RasterizerOpenGL::RasterizerOpenGL() | ||||||
|     uniform_block_data.proctex_diff_lut_dirty = true; |     uniform_block_data.proctex_diff_lut_dirty = true; | ||||||
| 
 | 
 | ||||||
|     glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); |     glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); | ||||||
|  |     uniform_size_aligned_vs = | ||||||
|  |         Common::AlignUp<size_t>(sizeof(VSUniformData), uniform_buffer_alignment); | ||||||
|  |     uniform_size_aligned_gs = | ||||||
|  |         Common::AlignUp<size_t>(sizeof(GSUniformData), uniform_buffer_alignment); | ||||||
|     uniform_size_aligned_fs = |     uniform_size_aligned_fs = | ||||||
|         Common::AlignUp<size_t>(sizeof(UniformData), uniform_buffer_alignment); |         Common::AlignUp<size_t>(sizeof(UniformData), uniform_buffer_alignment); | ||||||
| 
 | 
 | ||||||
|     // Set vertex attributes
 |     // Set vertex attributes for software shader path
 | ||||||
|  |     state.draw.vertex_array = sw_vao.handle; | ||||||
|  |     state.draw.vertex_buffer = vertex_buffer.GetHandle(); | ||||||
|  |     state.Apply(); | ||||||
|  | 
 | ||||||
|     glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, |     glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, | ||||||
|                           sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); |                           sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); | ||||||
|     glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); |     glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); | ||||||
|  | @ -176,6 +185,11 @@ RasterizerOpenGL::RasterizerOpenGL() | ||||||
|     glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); |     glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); | ||||||
|     glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); |     glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); | ||||||
| 
 | 
 | ||||||
|  |     // Bind index buffer for hardware shader path
 | ||||||
|  |     state.draw.vertex_array = hw_vao.handle; | ||||||
|  |     state.Apply(); | ||||||
|  |     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle()); | ||||||
|  | 
 | ||||||
|     shader_program_manager = |     shader_program_manager = | ||||||
|         std::make_unique<ShaderProgramManager>(GLAD_GL_ARB_separate_shader_objects); |         std::make_unique<ShaderProgramManager>(GLAD_GL_ARB_separate_shader_objects); | ||||||
| 
 | 
 | ||||||
|  | @ -258,10 +272,264 @@ void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, | ||||||
|     vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); |     vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static constexpr std::array<GLenum, 4> vs_attrib_types{ | ||||||
|  |     GL_BYTE,          // VertexAttributeFormat::BYTE
 | ||||||
|  |     GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE
 | ||||||
|  |     GL_SHORT,         // VertexAttributeFormat::SHORT
 | ||||||
|  |     GL_FLOAT          // VertexAttributeFormat::FLOAT
 | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct VertexArrayInfo { | ||||||
|  |     u32 vs_input_index_min; | ||||||
|  |     u32 vs_input_index_max; | ||||||
|  |     u32 vs_input_size; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { | ||||||
|  |     const auto& regs = Pica::g_state.regs; | ||||||
|  |     const auto& vertex_attributes = regs.pipeline.vertex_attributes; | ||||||
|  | 
 | ||||||
|  |     u32 vertex_min; | ||||||
|  |     u32 vertex_max; | ||||||
|  |     if (is_indexed) { | ||||||
|  |         const auto& index_info = regs.pipeline.index_array; | ||||||
|  |         PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset; | ||||||
|  |         const u8* index_address_8 = Memory::GetPhysicalPointer(address); | ||||||
|  |         const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); | ||||||
|  |         bool index_u16 = index_info.format != 0; | ||||||
|  | 
 | ||||||
|  |         vertex_min = 0xFFFF; | ||||||
|  |         vertex_max = 0; | ||||||
|  |         std::size_t size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); | ||||||
|  |         res_cache.FlushRegion(address, size, nullptr); | ||||||
|  |         for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) { | ||||||
|  |             u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index]; | ||||||
|  |             vertex_min = std::min(vertex_min, vertex); | ||||||
|  |             vertex_max = std::max(vertex_max, vertex); | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         vertex_min = regs.pipeline.vertex_offset; | ||||||
|  |         vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u32 vertex_num = vertex_max - vertex_min + 1; | ||||||
|  |     u32 vs_input_size = 0; | ||||||
|  |     for (auto& loader : vertex_attributes.attribute_loaders) { | ||||||
|  |         if (loader.component_count != 0) { | ||||||
|  |             vs_input_size += loader.byte_count * vertex_num; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return {vertex_min, vertex_max, vs_input_size}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, | ||||||
|  |                                         GLuint vs_input_index_min, GLuint vs_input_index_max) { | ||||||
|  |     MICROPROFILE_SCOPE(OpenGL_VAO); | ||||||
|  |     const auto& regs = Pica::g_state.regs; | ||||||
|  |     const auto& vertex_attributes = regs.pipeline.vertex_attributes; | ||||||
|  |     PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); | ||||||
|  | 
 | ||||||
|  |     state.draw.vertex_array = hw_vao.handle; | ||||||
|  |     state.draw.vertex_buffer = vertex_buffer.GetHandle(); | ||||||
|  |     state.Apply(); | ||||||
|  | 
 | ||||||
|  |     std::array<bool, 16> enable_attributes{}; | ||||||
|  | 
 | ||||||
|  |     for (const auto& loader : vertex_attributes.attribute_loaders) { | ||||||
|  |         if (loader.component_count == 0 || loader.byte_count == 0) { | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         u32 offset = 0; | ||||||
|  |         for (u32 comp = 0; comp < loader.component_count && comp < 12; ++comp) { | ||||||
|  |             u32 attribute_index = loader.GetComponent(comp); | ||||||
|  |             if (attribute_index < 12) { | ||||||
|  |                 if (vertex_attributes.GetNumElements(attribute_index) != 0) { | ||||||
|  |                     offset = Common::AlignUp( | ||||||
|  |                         offset, vertex_attributes.GetElementSizeInBytes(attribute_index)); | ||||||
|  | 
 | ||||||
|  |                     u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index); | ||||||
|  |                     GLint size = vertex_attributes.GetNumElements(attribute_index); | ||||||
|  |                     GLenum type = vs_attrib_types[static_cast<u32>( | ||||||
|  |                         vertex_attributes.GetFormat(attribute_index))]; | ||||||
|  |                     GLsizei stride = loader.byte_count; | ||||||
|  |                     glVertexAttribPointer(input_reg, size, type, GL_FALSE, stride, | ||||||
|  |                                           reinterpret_cast<GLvoid*>(buffer_offset + offset)); | ||||||
|  |                     enable_attributes[input_reg] = true; | ||||||
|  | 
 | ||||||
|  |                     offset += vertex_attributes.GetStride(attribute_index); | ||||||
|  |                 } | ||||||
|  |             } else { | ||||||
|  |                 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings,
 | ||||||
|  |                 // respectively
 | ||||||
|  |                 offset = Common::AlignUp(offset, 4); | ||||||
|  |                 offset += (attribute_index - 11) * 4; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         PAddr data_addr = | ||||||
|  |             base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); | ||||||
|  | 
 | ||||||
|  |         u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; | ||||||
|  |         u32 data_size = loader.byte_count * vertex_num; | ||||||
|  | 
 | ||||||
|  |         res_cache.FlushRegion(data_addr, data_size, nullptr); | ||||||
|  |         std::memcpy(array_ptr, Memory::GetPhysicalPointer(data_addr), data_size); | ||||||
|  | 
 | ||||||
|  |         array_ptr += data_size; | ||||||
|  |         buffer_offset += data_size; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     for (std::size_t i = 0; i < enable_attributes.size(); ++i) { | ||||||
|  |         if (enable_attributes[i] != hw_vao_enabled_attributes[i]) { | ||||||
|  |             if (enable_attributes[i]) { | ||||||
|  |                 glEnableVertexAttribArray(i); | ||||||
|  |             } else { | ||||||
|  |                 glDisableVertexAttribArray(i); | ||||||
|  |             } | ||||||
|  |             hw_vao_enabled_attributes[i] = enable_attributes[i]; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (vertex_attributes.IsDefaultAttribute(i)) { | ||||||
|  |             u32 reg = regs.vs.GetRegisterForAttribute(i); | ||||||
|  |             if (!enable_attributes[reg]) { | ||||||
|  |                 const auto& attr = Pica::g_state.input_default_attributes.attr[i]; | ||||||
|  |                 glVertexAttrib4f(reg, attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(), | ||||||
|  |                                  attr.w.ToFloat32()); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool RasterizerOpenGL::SetupVertexShader() { | ||||||
|  |     MICROPROFILE_SCOPE(OpenGL_VS); | ||||||
|  |     GLShader::PicaVSConfig vs_config(Pica::g_state.regs, Pica::g_state.vs); | ||||||
|  |     return shader_program_manager->UseProgrammableVertexShader(vs_config, Pica::g_state.vs); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool RasterizerOpenGL::SetupGeometryShader() { | ||||||
|  |     MICROPROFILE_SCOPE(OpenGL_GS); | ||||||
|  |     const auto& regs = Pica::g_state.regs; | ||||||
|  |     if (regs.pipeline.use_gs == Pica::PipelineRegs::UseGS::No) { | ||||||
|  |         GLShader::PicaFixedGSConfig gs_config(regs); | ||||||
|  |         shader_program_manager->UseFixedGeometryShader(gs_config); | ||||||
|  |         return true; | ||||||
|  |     } else { | ||||||
|  |         GLShader::PicaGSConfig gs_config(regs, Pica::g_state.gs); | ||||||
|  |         return shader_program_manager->UseProgrammableGeometryShader(gs_config, Pica::g_state.gs); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | ||||||
|  |     const auto& regs = Pica::g_state.regs; | ||||||
|  |     if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { | ||||||
|  |         if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |         if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (!SetupVertexShader()) | ||||||
|  |         return false; | ||||||
|  | 
 | ||||||
|  |     if (!SetupGeometryShader()) | ||||||
|  |         return false; | ||||||
|  | 
 | ||||||
|  |     return Draw(true, is_indexed); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static GLenum GetCurrentPrimitiveMode(bool use_gs) { | ||||||
|  |     const auto& regs = Pica::g_state.regs; | ||||||
|  |     if (use_gs) { | ||||||
|  |         switch ((regs.gs.max_input_attribute_index + 1) / | ||||||
|  |                 (regs.pipeline.vs_outmap_total_minus_1_a + 1)) { | ||||||
|  |         case 1: | ||||||
|  |             return GL_POINTS; | ||||||
|  |         case 2: | ||||||
|  |             return GL_LINES; | ||||||
|  |         case 4: | ||||||
|  |             return GL_LINES_ADJACENCY; | ||||||
|  |         case 3: | ||||||
|  |             return GL_TRIANGLES; | ||||||
|  |         case 6: | ||||||
|  |             return GL_TRIANGLES_ADJACENCY; | ||||||
|  |         default: | ||||||
|  |             UNREACHABLE(); | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         switch (regs.pipeline.triangle_topology) { | ||||||
|  |         case Pica::PipelineRegs::TriangleTopology::Shader: | ||||||
|  |         case Pica::PipelineRegs::TriangleTopology::List: | ||||||
|  |             return GL_TRIANGLES; | ||||||
|  |         case Pica::PipelineRegs::TriangleTopology::Fan: | ||||||
|  |             return GL_TRIANGLE_FAN; | ||||||
|  |         case Pica::PipelineRegs::TriangleTopology::Strip: | ||||||
|  |             return GL_TRIANGLE_STRIP; | ||||||
|  |         default: | ||||||
|  |             UNREACHABLE(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed, bool use_gs) { | ||||||
|  |     const auto& regs = Pica::g_state.regs; | ||||||
|  |     GLenum primitive_mode = GetCurrentPrimitiveMode(use_gs); | ||||||
|  | 
 | ||||||
|  |     auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed); | ||||||
|  | 
 | ||||||
|  |     if (vs_input_size > VERTEX_BUFFER_SIZE) { | ||||||
|  |         NGLOG_WARNING(Render_OpenGL, "Too large vertex input size {}", vs_input_size); | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     state.draw.vertex_buffer = vertex_buffer.GetHandle(); | ||||||
|  |     state.Apply(); | ||||||
|  | 
 | ||||||
|  |     u8* buffer_ptr; | ||||||
|  |     GLintptr buffer_offset; | ||||||
|  |     std::tie(buffer_ptr, buffer_offset, std::ignore) = vertex_buffer.Map(vs_input_size, 4); | ||||||
|  |     SetupVertexArray(buffer_ptr, buffer_offset, vs_input_index_min, vs_input_index_max); | ||||||
|  |     vertex_buffer.Unmap(vs_input_size); | ||||||
|  | 
 | ||||||
|  |     shader_program_manager->ApplyTo(state); | ||||||
|  |     state.Apply(); | ||||||
|  | 
 | ||||||
|  |     if (is_indexed) { | ||||||
|  |         bool index_u16 = regs.pipeline.index_array.format != 0; | ||||||
|  |         std::size_t index_buffer_size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); | ||||||
|  | 
 | ||||||
|  |         if (index_buffer_size > INDEX_BUFFER_SIZE) { | ||||||
|  |             NGLOG_WARNING(Render_OpenGL, "Too large index input size {}", index_buffer_size); | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         const u8* index_data = | ||||||
|  |             Memory::GetPhysicalPointer(regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + | ||||||
|  |                                        regs.pipeline.index_array.offset); | ||||||
|  |         std::tie(buffer_ptr, buffer_offset, std::ignore) = index_buffer.Map(index_buffer_size, 4); | ||||||
|  |         std::memcpy(buffer_ptr, index_data, index_buffer_size); | ||||||
|  |         index_buffer.Unmap(index_buffer_size); | ||||||
|  | 
 | ||||||
|  |         glDrawRangeElementsBaseVertex( | ||||||
|  |             primitive_mode, vs_input_index_min, vs_input_index_max, regs.pipeline.num_vertices, | ||||||
|  |             index_u16 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE, | ||||||
|  |             reinterpret_cast<const void*>(buffer_offset), -static_cast<GLint>(vs_input_index_min)); | ||||||
|  |     } else { | ||||||
|  |         glDrawArrays(primitive_mode, 0, regs.pipeline.num_vertices); | ||||||
|  |     } | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void RasterizerOpenGL::DrawTriangles() { | void RasterizerOpenGL::DrawTriangles() { | ||||||
|     if (vertex_batch.empty()) |     if (vertex_batch.empty()) | ||||||
|         return; |         return; | ||||||
|  |     Draw(false, false); | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
|  | bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { | ||||||
|     MICROPROFILE_SCOPE(OpenGL_Drawing); |     MICROPROFILE_SCOPE(OpenGL_Drawing); | ||||||
|     const auto& regs = Pica::g_state.regs; |     const auto& regs = Pica::g_state.regs; | ||||||
| 
 | 
 | ||||||
|  | @ -474,7 +742,8 @@ void RasterizerOpenGL::DrawTriangles() { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // Sync the uniform data
 |     // Sync the uniform data
 | ||||||
|     UploadUniforms(); |     const bool use_gs = regs.pipeline.use_gs == Pica::PipelineRegs::UseGS::Yes; | ||||||
|  |     UploadUniforms(accelerate, use_gs); | ||||||
| 
 | 
 | ||||||
|     // Viewport can have negative offsets or larger
 |     // Viewport can have negative offsets or larger
 | ||||||
|     // dimensions than our framebuffer sub-rect.
 |     // dimensions than our framebuffer sub-rect.
 | ||||||
|  | @ -487,23 +756,32 @@ void RasterizerOpenGL::DrawTriangles() { | ||||||
|     state.scissor.height = draw_rect.GetHeight(); |     state.scissor.height = draw_rect.GetHeight(); | ||||||
|     state.Apply(); |     state.Apply(); | ||||||
| 
 | 
 | ||||||
|  |     // Draw the vertex batch
 | ||||||
|  |     bool succeeded = true; | ||||||
|  |     if (accelerate) { | ||||||
|  |         succeeded = AccelerateDrawBatchInternal(is_indexed, use_gs); | ||||||
|  |     } else { | ||||||
|  |         state.draw.vertex_array = sw_vao.handle; | ||||||
|  |         state.draw.vertex_buffer = vertex_buffer.GetHandle(); | ||||||
|         shader_program_manager->UseTrivialVertexShader(); |         shader_program_manager->UseTrivialVertexShader(); | ||||||
|         shader_program_manager->UseTrivialGeometryShader(); |         shader_program_manager->UseTrivialGeometryShader(); | ||||||
|         shader_program_manager->ApplyTo(state); |         shader_program_manager->ApplyTo(state); | ||||||
|         state.Apply(); |         state.Apply(); | ||||||
| 
 | 
 | ||||||
|     // Draw the vertex batch
 |         std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex))); | ||||||
|     size_t max_vertices = 3 * (vertex_buffer.GetSize() / (3 * sizeof(HardwareVertex))); |         for (std::size_t base_vertex = 0; base_vertex < vertex_batch.size(); | ||||||
|     for (size_t base_vertex = 0; base_vertex < vertex_batch.size(); base_vertex += max_vertices) { |              base_vertex += max_vertices) { | ||||||
|         size_t vertices = std::min(max_vertices, vertex_batch.size() - base_vertex); |             std::size_t vertices = std::min(max_vertices, vertex_batch.size() - base_vertex); | ||||||
|         size_t vertex_size = vertices * sizeof(HardwareVertex); |             std::size_t vertex_size = vertices * sizeof(HardwareVertex); | ||||||
|             u8* vbo; |             u8* vbo; | ||||||
|             GLintptr offset; |             GLintptr offset; | ||||||
|         std::tie(vbo, offset, std::ignore) = vertex_buffer.Map(vertex_size, sizeof(HardwareVertex)); |             std::tie(vbo, offset, std::ignore) = | ||||||
|         memcpy(vbo, vertex_batch.data() + base_vertex, vertex_size); |                 vertex_buffer.Map(vertex_size, sizeof(HardwareVertex)); | ||||||
|  |             std::memcpy(vbo, vertex_batch.data() + base_vertex, vertex_size); | ||||||
|             vertex_buffer.Unmap(vertex_size); |             vertex_buffer.Unmap(vertex_size); | ||||||
|             glDrawArrays(GL_TRIANGLES, offset / sizeof(HardwareVertex), (GLsizei)vertices); |             glDrawArrays(GL_TRIANGLES, offset / sizeof(HardwareVertex), (GLsizei)vertices); | ||||||
|         } |         } | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     // Disable scissor test
 |     // Disable scissor test
 | ||||||
|     state.scissor.enabled = false; |     state.scissor.enabled = false; | ||||||
|  | @ -532,6 +810,8 @@ void RasterizerOpenGL::DrawTriangles() { | ||||||
|         res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), |         res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), | ||||||
|                                    depth_surface); |                                    depth_surface); | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     return succeeded; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | ||||||
|  | @ -1648,18 +1928,53 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::UploadUniforms() { | void RasterizerOpenGL::UploadUniforms(bool accelerate_draw, bool use_gs) { | ||||||
|     if (!uniform_block_data.dirty) |     // glBindBufferRange below also changes the generic buffer binding point, so we sync the state
 | ||||||
|  |     // first
 | ||||||
|  |     state.draw.uniform_buffer = uniform_buffer.GetHandle(); | ||||||
|  |     state.Apply(); | ||||||
|  | 
 | ||||||
|  |     bool sync_vs = accelerate_draw; | ||||||
|  |     bool sync_gs = accelerate_draw && use_gs; | ||||||
|  |     bool sync_fs = uniform_block_data.dirty; | ||||||
|  | 
 | ||||||
|  |     if (!sync_vs && !sync_gs && !sync_fs) | ||||||
|         return; |         return; | ||||||
| 
 | 
 | ||||||
|     size_t uniform_size = uniform_size_aligned_fs; |     size_t uniform_size = | ||||||
|  |         uniform_size_aligned_vs + uniform_size_aligned_gs + uniform_size_aligned_fs; | ||||||
|  |     size_t used_bytes = 0; | ||||||
|     u8* uniforms; |     u8* uniforms; | ||||||
|     GLintptr offset; |     GLintptr offset; | ||||||
|     std::tie(uniforms, offset, std::ignore) = |     bool invalidate; | ||||||
|  |     std::tie(uniforms, offset, invalidate) = | ||||||
|         uniform_buffer.Map(uniform_size, uniform_buffer_alignment); |         uniform_buffer.Map(uniform_size, uniform_buffer_alignment); | ||||||
|     std::memcpy(uniforms, &uniform_block_data.data, sizeof(UniformData)); | 
 | ||||||
|     uniform_buffer.Unmap(uniform_size); |     if (sync_vs) { | ||||||
|     glBindBufferRange(GL_UNIFORM_BUFFER, 0, uniform_buffer.GetHandle(), offset, |         VSUniformData vs_uniforms; | ||||||
|                       sizeof(UniformData)); |         vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs); | ||||||
|     uniform_block_data.dirty = false; |         std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); | ||||||
|  |         glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(UniformBindings::VS), | ||||||
|  |                           uniform_buffer.GetHandle(), offset + used_bytes, sizeof(VSUniformData)); | ||||||
|  |         used_bytes += uniform_size_aligned_vs; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (sync_gs) { | ||||||
|  |         GSUniformData gs_uniforms; | ||||||
|  |         gs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.gs, Pica::g_state.gs); | ||||||
|  |         std::memcpy(uniforms + used_bytes, &gs_uniforms, sizeof(gs_uniforms)); | ||||||
|  |         glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(UniformBindings::GS), | ||||||
|  |                           uniform_buffer.GetHandle(), offset + used_bytes, sizeof(GSUniformData)); | ||||||
|  |         used_bytes += uniform_size_aligned_gs; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (sync_fs || invalidate) { | ||||||
|  |         std::memcpy(uniforms + used_bytes, &uniform_block_data.data, sizeof(UniformData)); | ||||||
|  |         glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(UniformBindings::Common), | ||||||
|  |                           uniform_buffer.GetHandle(), offset + used_bytes, sizeof(UniformData)); | ||||||
|  |         uniform_block_data.dirty = false; | ||||||
|  |         used_bytes += uniform_size_aligned_fs; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     uniform_buffer.Unmap(used_bytes); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -50,6 +50,7 @@ public: | ||||||
|     bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; |     bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; | ||||||
|     bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, |     bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, | ||||||
|                            u32 pixel_stride, ScreenInfo& screen_info) override; |                            u32 pixel_stride, ScreenInfo& screen_info) override; | ||||||
|  |     bool AccelerateDrawBatch(bool is_indexed) override; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     struct SamplerInfo { |     struct SamplerInfo { | ||||||
|  | @ -73,6 +74,7 @@ private: | ||||||
| 
 | 
 | ||||||
|     /// Structure that the hardware rendered vertices are composed of
 |     /// Structure that the hardware rendered vertices are composed of
 | ||||||
|     struct HardwareVertex { |     struct HardwareVertex { | ||||||
|  |         HardwareVertex() = default; | ||||||
|         HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { |         HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { | ||||||
|             position[0] = v.pos.x.ToFloat32(); |             position[0] = v.pos.x.ToFloat32(); | ||||||
|             position[1] = v.pos.y.ToFloat32(); |             position[1] = v.pos.y.ToFloat32(); | ||||||
|  | @ -216,7 +218,32 @@ private: | ||||||
|     void SyncLightDistanceAttenuationScale(int light_index); |     void SyncLightDistanceAttenuationScale(int light_index); | ||||||
| 
 | 
 | ||||||
|     /// Upload the uniform blocks to the uniform buffer object
 |     /// Upload the uniform blocks to the uniform buffer object
 | ||||||
|     void UploadUniforms(); |     void UploadUniforms(bool accelerate_draw, bool use_gs); | ||||||
|  | 
 | ||||||
|  |     /// Generic draw function for DrawTriangles and AccelerateDrawBatch
 | ||||||
|  |     bool Draw(bool accelerate, bool is_indexed); | ||||||
|  | 
 | ||||||
|  |     /// Internal implementation for AccelerateDrawBatch
 | ||||||
|  |     bool AccelerateDrawBatchInternal(bool is_indexed, bool use_gs); | ||||||
|  | 
 | ||||||
|  |     struct VertexArrayInfo { | ||||||
|  |         u32 vs_input_index_min; | ||||||
|  |         u32 vs_input_index_max; | ||||||
|  |         u32 vs_input_size; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     /// Retrieve the range and the size of the input vertex
 | ||||||
|  |     VertexArrayInfo AnalyzeVertexArray(bool is_indexed); | ||||||
|  | 
 | ||||||
|  |     /// Setup vertex array for AccelerateDrawBatch
 | ||||||
|  |     void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, GLuint vs_input_index_min, | ||||||
|  |                           GLuint vs_input_index_max); | ||||||
|  | 
 | ||||||
|  |     /// Setup vertex shader for AccelerateDrawBatch
 | ||||||
|  |     bool SetupVertexShader(); | ||||||
|  | 
 | ||||||
|  |     /// Setup geometry shader for AccelerateDrawBatch
 | ||||||
|  |     bool SetupGeometryShader(); | ||||||
| 
 | 
 | ||||||
|     OpenGLState state; |     OpenGLState state; | ||||||
| 
 | 
 | ||||||
|  | @ -242,14 +269,21 @@ private: | ||||||
| 
 | 
 | ||||||
|     // They shall be big enough for about one frame.
 |     // They shall be big enough for about one frame.
 | ||||||
|     static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024; |     static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024; | ||||||
|  |     static constexpr size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024; | ||||||
|     static constexpr size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; |     static constexpr size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; | ||||||
| 
 | 
 | ||||||
|  |     OGLVertexArray sw_vao; // VAO for software shader draw
 | ||||||
|  |     OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
 | ||||||
|  |     std::array<bool, 16> hw_vao_enabled_attributes{}; | ||||||
|  | 
 | ||||||
|     std::array<SamplerInfo, 3> texture_samplers; |     std::array<SamplerInfo, 3> texture_samplers; | ||||||
|     OGLVertexArray vertex_array; |  | ||||||
|     OGLStreamBuffer vertex_buffer; |     OGLStreamBuffer vertex_buffer; | ||||||
|     OGLStreamBuffer uniform_buffer; |     OGLStreamBuffer uniform_buffer; | ||||||
|  |     OGLStreamBuffer index_buffer; | ||||||
|     OGLFramebuffer framebuffer; |     OGLFramebuffer framebuffer; | ||||||
|     GLint uniform_buffer_alignment; |     GLint uniform_buffer_alignment; | ||||||
|  |     size_t uniform_size_aligned_vs; | ||||||
|  |     size_t uniform_size_aligned_gs; | ||||||
|     size_t uniform_size_aligned_fs; |     size_t uniform_size_aligned_fs; | ||||||
| 
 | 
 | ||||||
|     SamplerInfo texture_cube_sampler; |     SamplerInfo texture_cube_sampler; | ||||||
|  |  | ||||||
|  | @ -18,6 +18,7 @@ | ||||||
| #include "video_core/renderer_opengl/gl_shader_decompiler.h" | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_gen.h" | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_util.h" | #include "video_core/renderer_opengl/gl_shader_util.h" | ||||||
|  | #include "video_core/video_core.h" | ||||||
| 
 | 
 | ||||||
| using Pica::FramebufferRegs; | using Pica::FramebufferRegs; | ||||||
| using Pica::LightingRegs; | using Pica::LightingRegs; | ||||||
|  | @ -226,7 +227,7 @@ void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::Sh | ||||||
|     program_hash = setup.GetProgramCodeHash(); |     program_hash = setup.GetProgramCodeHash(); | ||||||
|     swizzle_hash = setup.GetSwizzleDataHash(); |     swizzle_hash = setup.GetSwizzleDataHash(); | ||||||
|     main_offset = regs.main_offset; |     main_offset = regs.main_offset; | ||||||
|     sanitize_mul = false; // TODO (wwylele): stubbed now. Should sync with user settings
 |     sanitize_mul = VideoCore::g_hw_shader_accurate_mul; | ||||||
| 
 | 
 | ||||||
|     num_outputs = 0; |     num_outputs = 0; | ||||||
|     output_map.fill(16); |     output_map.fill(16); | ||||||
|  |  | ||||||
|  | @ -14,16 +14,26 @@ OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coh | ||||||
|     gl_buffer.Create(); |     gl_buffer.Create(); | ||||||
|     glBindBuffer(gl_target, gl_buffer.handle); |     glBindBuffer(gl_target, gl_buffer.handle); | ||||||
| 
 | 
 | ||||||
|  |     GLsizeiptr allocate_size = size; | ||||||
|  |     if (target == GL_ARRAY_BUFFER) { | ||||||
|  |         // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
 | ||||||
|  |         // read position is near the end and is an out-of-bound access to the vertex buffer. This is
 | ||||||
|  |         // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
 | ||||||
|  |         // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
 | ||||||
|  |         // crash.
 | ||||||
|  |         allocate_size *= 2; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     if (GLAD_GL_ARB_buffer_storage) { |     if (GLAD_GL_ARB_buffer_storage) { | ||||||
|         persistent = true; |         persistent = true; | ||||||
|         coherent = prefer_coherent; |         coherent = prefer_coherent; | ||||||
|         GLbitfield flags = |         GLbitfield flags = | ||||||
|             GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); |             GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); | ||||||
|         glBufferStorage(gl_target, buffer_size, nullptr, flags); |         glBufferStorage(gl_target, allocate_size, nullptr, flags); | ||||||
|         mapped_ptr = static_cast<u8*>(glMapBufferRange( |         mapped_ptr = static_cast<u8*>(glMapBufferRange( | ||||||
|             gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); |             gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); | ||||||
|     } else { |     } else { | ||||||
|         glBufferData(gl_target, buffer_size, nullptr, GL_STREAM_DRAW); |         glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -19,7 +19,9 @@ std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin | ||||||
| 
 | 
 | ||||||
| std::atomic<bool> g_hw_renderer_enabled; | std::atomic<bool> g_hw_renderer_enabled; | ||||||
| std::atomic<bool> g_shader_jit_enabled; | std::atomic<bool> g_shader_jit_enabled; | ||||||
| std::atomic<bool> g_vsync_enabled; | std::atomic<bool> g_hw_shader_enabled; | ||||||
|  | std::atomic<bool> g_hw_shader_accurate_gs; | ||||||
|  | std::atomic<bool> g_hw_shader_accurate_mul; | ||||||
| 
 | 
 | ||||||
| /// Initialize the video core
 | /// Initialize the video core
 | ||||||
| bool Init(EmuWindow* emu_window) { | bool Init(EmuWindow* emu_window) { | ||||||
|  |  | ||||||
|  | @ -22,6 +22,9 @@ extern EmuWindow* g_emu_window;                  ///< Emu window | ||||||
| // qt ui)
 | // qt ui)
 | ||||||
| extern std::atomic<bool> g_hw_renderer_enabled; | extern std::atomic<bool> g_hw_renderer_enabled; | ||||||
| extern std::atomic<bool> g_shader_jit_enabled; | extern std::atomic<bool> g_shader_jit_enabled; | ||||||
|  | extern std::atomic<bool> g_hw_shader_enabled; | ||||||
|  | extern std::atomic<bool> g_hw_shader_accurate_gs; | ||||||
|  | extern std::atomic<bool> g_hw_shader_accurate_mul; | ||||||
| 
 | 
 | ||||||
| /// Start the video core
 | /// Start the video core
 | ||||||
| void Start(); | void Start(); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue