mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	renderer_software: Multi-thread processing (#6698)
* renderer_software: Multi-thread processing * Doubles the performance in most cases * renderer_software: Move memory access out of the raster loop * Profiling shows this has a significant impact
This commit is contained in:
		
							parent
							
								
									8b218e1b7d
								
							
						
					
					
						commit
						d1f600601d
					
				
					 4 changed files with 201 additions and 181 deletions
				
			
		|  | @ -41,10 +41,22 @@ Framebuffer::Framebuffer(Memory::MemorySystem& memory_, const Pica::FramebufferR | ||||||
| 
 | 
 | ||||||
| Framebuffer::~Framebuffer() = default; | Framebuffer::~Framebuffer() = default; | ||||||
| 
 | 
 | ||||||
| void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const { | void Framebuffer::Bind() { | ||||||
|     const auto& framebuffer = regs.framebuffer; |     PAddr addr = regs.framebuffer.GetColorBufferPhysicalAddress(); | ||||||
|     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); |     if (color_addr != addr) [[unlikely]] { | ||||||
|  |         color_addr = addr; | ||||||
|  |         color_buffer = memory.GetPhysicalPointer(color_addr); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|  |     addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|  |     if (depth_addr != addr) [[unlikely]] { | ||||||
|  |         depth_addr = addr; | ||||||
|  |         depth_buffer = memory.GetPhysicalPointer(depth_addr); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void Framebuffer::DrawPixel(u32 x, u32 y, const Common::Vec4<u8>& color) const { | ||||||
|  |     const auto& framebuffer = regs.framebuffer; | ||||||
|     // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
 |     // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
 | ||||||
|     // NOTE: The framebuffer height register contains the actual FB height minus one.
 |     // NOTE: The framebuffer height register contains the actual FB height minus one.
 | ||||||
|     y = framebuffer.height - y; |     y = framebuffer.height - y; | ||||||
|  | @ -54,8 +66,7 @@ void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const { | ||||||
|         GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); |         GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | ||||||
|     const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + |     const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||||||
|                            coarse_y * framebuffer.width * bytes_per_pixel; |                            coarse_y * framebuffer.width * bytes_per_pixel; | ||||||
|     u8* depth_buffer = memory.GetPhysicalPointer(addr); |     u8* dst_pixel = color_buffer + dst_offset; | ||||||
|     u8* dst_pixel = depth_buffer + dst_offset; |  | ||||||
| 
 | 
 | ||||||
|     switch (framebuffer.color_format) { |     switch (framebuffer.color_format) { | ||||||
|     case FramebufferRegs::ColorFormat::RGBA8: |     case FramebufferRegs::ColorFormat::RGBA8: | ||||||
|  | @ -80,10 +91,8 @@ void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const { | const Common::Vec4<u8> Framebuffer::GetPixel(u32 x, u32 y) const { | ||||||
|     const auto& framebuffer = regs.framebuffer; |     const auto& framebuffer = regs.framebuffer; | ||||||
|     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); |  | ||||||
| 
 |  | ||||||
|     y = framebuffer.height - y; |     y = framebuffer.height - y; | ||||||
| 
 | 
 | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|  | @ -91,7 +100,6 @@ const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const { | ||||||
|         GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); |         GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | ||||||
|     const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + |     const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||||||
|                            coarse_y * framebuffer.width * bytes_per_pixel; |                            coarse_y * framebuffer.width * bytes_per_pixel; | ||||||
|     const u8* color_buffer = memory.GetPhysicalPointer(addr); |  | ||||||
|     const u8* src_pixel = color_buffer + src_offset; |     const u8* src_pixel = color_buffer + src_offset; | ||||||
| 
 | 
 | ||||||
|     switch (framebuffer.color_format) { |     switch (framebuffer.color_format) { | ||||||
|  | @ -114,10 +122,8 @@ const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const { | ||||||
|     return {0, 0, 0, 0}; |     return {0, 0, 0, 0}; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u32 Framebuffer::GetDepth(int x, int y) const { | u32 Framebuffer::GetDepth(u32 x, u32 y) const { | ||||||
|     const auto& framebuffer = regs.framebuffer; |     const auto& framebuffer = regs.framebuffer; | ||||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); |  | ||||||
| 
 |  | ||||||
|     y = framebuffer.height - y; |     y = framebuffer.height - y; | ||||||
| 
 | 
 | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|  | @ -125,7 +131,6 @@ u32 Framebuffer::GetDepth(int x, int y) const { | ||||||
|     const u32 stride = framebuffer.width * bytes_per_pixel; |     const u32 stride = framebuffer.width * bytes_per_pixel; | ||||||
| 
 | 
 | ||||||
|     const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |     const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||||
|     const u8* depth_buffer = memory.GetPhysicalPointer(addr); |  | ||||||
|     const u8* src_pixel = depth_buffer + src_offset; |     const u8* src_pixel = depth_buffer + src_offset; | ||||||
| 
 | 
 | ||||||
|     switch (framebuffer.depth_format) { |     switch (framebuffer.depth_format) { | ||||||
|  | @ -143,10 +148,8 @@ u32 Framebuffer::GetDepth(int x, int y) const { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u8 Framebuffer::GetStencil(int x, int y) const { | u8 Framebuffer::GetStencil(u32 x, u32 y) const { | ||||||
|     const auto& framebuffer = regs.framebuffer; |     const auto& framebuffer = regs.framebuffer; | ||||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); |  | ||||||
| 
 |  | ||||||
|     y = framebuffer.height - y; |     y = framebuffer.height - y; | ||||||
| 
 | 
 | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|  | @ -154,7 +157,6 @@ u8 Framebuffer::GetStencil(int x, int y) const { | ||||||
|     const u32 stride = framebuffer.width * bytes_per_pixel; |     const u32 stride = framebuffer.width * bytes_per_pixel; | ||||||
| 
 | 
 | ||||||
|     const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |     const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||||
|     const u8* depth_buffer = memory.GetPhysicalPointer(addr); |  | ||||||
|     const u8* src_pixel = depth_buffer + src_offset; |     const u8* src_pixel = depth_buffer + src_offset; | ||||||
| 
 | 
 | ||||||
|     switch (framebuffer.depth_format) { |     switch (framebuffer.depth_format) { | ||||||
|  | @ -169,10 +171,8 @@ u8 Framebuffer::GetStencil(int x, int y) const { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Framebuffer::SetDepth(int x, int y, u32 value) const { | void Framebuffer::SetDepth(u32 x, u32 y, u32 value) const { | ||||||
|     const auto& framebuffer = regs.framebuffer; |     const auto& framebuffer = regs.framebuffer; | ||||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); |  | ||||||
| 
 |  | ||||||
|     y = framebuffer.height - y; |     y = framebuffer.height - y; | ||||||
| 
 | 
 | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|  | @ -180,7 +180,6 @@ void Framebuffer::SetDepth(int x, int y, u32 value) const { | ||||||
|     const u32 stride = framebuffer.width * bytes_per_pixel; |     const u32 stride = framebuffer.width * bytes_per_pixel; | ||||||
| 
 | 
 | ||||||
|     const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |     const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||||
|     u8* depth_buffer = memory.GetPhysicalPointer(addr); |  | ||||||
|     u8* dst_pixel = depth_buffer + dst_offset; |     u8* dst_pixel = depth_buffer + dst_offset; | ||||||
| 
 | 
 | ||||||
|     switch (framebuffer.depth_format) { |     switch (framebuffer.depth_format) { | ||||||
|  | @ -201,10 +200,8 @@ void Framebuffer::SetDepth(int x, int y, u32 value) const { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Framebuffer::SetStencil(int x, int y, u8 value) const { | void Framebuffer::SetStencil(u32 x, u32 y, u8 value) const { | ||||||
|     const auto& framebuffer = regs.framebuffer; |     const auto& framebuffer = regs.framebuffer; | ||||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); |  | ||||||
| 
 |  | ||||||
|     y = framebuffer.height - y; |     y = framebuffer.height - y; | ||||||
| 
 | 
 | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|  | @ -212,7 +209,6 @@ void Framebuffer::SetStencil(int x, int y, u8 value) const { | ||||||
|     const u32 stride = framebuffer.width * bytes_per_pixel; |     const u32 stride = framebuffer.width * bytes_per_pixel; | ||||||
| 
 | 
 | ||||||
|     const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |     const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||||
|     u8* depth_buffer = memory.GetPhysicalPointer(addr); |  | ||||||
|     u8* dst_pixel = depth_buffer + dst_offset; |     u8* dst_pixel = depth_buffer + dst_offset; | ||||||
| 
 | 
 | ||||||
|     switch (framebuffer.depth_format) { |     switch (framebuffer.depth_format) { | ||||||
|  | @ -231,7 +227,7 @@ void Framebuffer::SetStencil(int x, int y, u8 value) const { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Framebuffer::DrawShadowMapPixel(int x, int y, u32 depth, u8 stencil) const { | void Framebuffer::DrawShadowMapPixel(u32 x, u32 y, u32 depth, u8 stencil) const { | ||||||
|     const auto& framebuffer = regs.framebuffer; |     const auto& framebuffer = regs.framebuffer; | ||||||
|     const auto& shadow = regs.shadow; |     const auto& shadow = regs.shadow; | ||||||
|     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); |     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); | ||||||
|  |  | ||||||
|  | @ -23,30 +23,37 @@ public: | ||||||
|     explicit Framebuffer(Memory::MemorySystem& memory, const Pica::FramebufferRegs& framebuffer); |     explicit Framebuffer(Memory::MemorySystem& memory, const Pica::FramebufferRegs& framebuffer); | ||||||
|     ~Framebuffer(); |     ~Framebuffer(); | ||||||
| 
 | 
 | ||||||
|  |     /// Updates the framebuffer addresses from the PICA registers.
 | ||||||
|  |     void Bind(); | ||||||
|  | 
 | ||||||
|     /// Draws a pixel at the specified coordinates.
 |     /// Draws a pixel at the specified coordinates.
 | ||||||
|     void DrawPixel(int x, int y, const Common::Vec4<u8>& color) const; |     void DrawPixel(u32 x, u32 y, const Common::Vec4<u8>& color) const; | ||||||
| 
 | 
 | ||||||
|     /// Returns the current color at the specified coordinates.
 |     /// Returns the current color at the specified coordinates.
 | ||||||
|     [[nodiscard]] const Common::Vec4<u8> GetPixel(int x, int y) const; |     [[nodiscard]] const Common::Vec4<u8> GetPixel(u32 x, u32 y) const; | ||||||
| 
 | 
 | ||||||
|     /// Returns the depth value at the specified coordinates.
 |     /// Returns the depth value at the specified coordinates.
 | ||||||
|     [[nodiscard]] u32 GetDepth(int x, int y) const; |     [[nodiscard]] u32 GetDepth(u32 x, u32 y) const; | ||||||
| 
 | 
 | ||||||
|     /// Returns the stencil value at the specified coordinates.
 |     /// Returns the stencil value at the specified coordinates.
 | ||||||
|     [[nodiscard]] u8 GetStencil(int x, int y) const; |     [[nodiscard]] u8 GetStencil(u32 x, u32 y) const; | ||||||
| 
 | 
 | ||||||
|     /// Stores the provided depth value at the specified coordinates.
 |     /// Stores the provided depth value at the specified coordinates.
 | ||||||
|     void SetDepth(int x, int y, u32 value) const; |     void SetDepth(u32 x, u32 y, u32 value) const; | ||||||
| 
 | 
 | ||||||
|     /// Stores the provided stencil value at the specified coordinates.
 |     /// Stores the provided stencil value at the specified coordinates.
 | ||||||
|     void SetStencil(int x, int y, u8 value) const; |     void SetStencil(u32 x, u32 y, u8 value) const; | ||||||
| 
 | 
 | ||||||
|     /// Draws a pixel to the shadow buffer.
 |     /// Draws a pixel to the shadow buffer.
 | ||||||
|     void DrawShadowMapPixel(int x, int y, u32 depth, u8 stencil) const; |     void DrawShadowMapPixel(u32 x, u32 y, u32 depth, u8 stencil) const; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     Memory::MemorySystem& memory; |     Memory::MemorySystem& memory; | ||||||
|     const Pica::FramebufferRegs& regs; |     const Pica::FramebufferRegs& regs; | ||||||
|  |     PAddr color_addr; | ||||||
|  |     u8* color_buffer{}; | ||||||
|  |     PAddr depth_addr; | ||||||
|  |     u8* depth_buffer{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| u8 PerformStencilAction(Pica::FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref); | u8 PerformStencilAction(Pica::FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref); | ||||||
|  |  | ||||||
|  | @ -96,7 +96,9 @@ private: | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| RasterizerSoftware::RasterizerSoftware(Memory::MemorySystem& memory_) | RasterizerSoftware::RasterizerSoftware(Memory::MemorySystem& memory_) | ||||||
|     : memory{memory_}, state{Pica::g_state}, regs{state.regs}, fb{memory, regs.framebuffer} {} |     : memory{memory_}, state{Pica::g_state}, regs{state.regs}, | ||||||
|  |       num_sw_threads{std::max(std::thread::hardware_concurrency(), 2U)}, | ||||||
|  |       sw_workers{num_sw_threads, "SwRenderer workers"}, fb{memory, regs.framebuffer} {} | ||||||
| 
 | 
 | ||||||
| void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0, | void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0, | ||||||
|                                      const Pica::Shader::OutputVertex& v1, |                                      const Pica::Shader::OutputVertex& v1, | ||||||
|  | @ -289,15 +291,18 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con | ||||||
| 
 | 
 | ||||||
|     const auto w_inverse = Common::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); |     const auto w_inverse = Common::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); | ||||||
| 
 | 
 | ||||||
|     auto textures = regs.texturing.GetTextures(); |     const auto textures = regs.texturing.GetTextures(); | ||||||
|     const auto tev_stages = regs.texturing.GetTevStages(); |     const auto tev_stages = regs.texturing.GetTevStages(); | ||||||
| 
 | 
 | ||||||
|  |     fb.Bind(); | ||||||
|  | 
 | ||||||
|     // Enter rasterization loop, starting at the center of the topleft bounding box corner.
 |     // Enter rasterization loop, starting at the center of the topleft bounding box corner.
 | ||||||
|     // TODO: Not sure if looping through x first might be faster
 |     // TODO: Not sure if looping through x first might be faster
 | ||||||
|     for (u16 y = min_y + 8; y < max_y; y += 0x10) { |     for (u16 y = min_y + 8; y < max_y; y += 0x10) { | ||||||
|  |         const auto process_scanline = [&, y] { | ||||||
|             for (u16 x = min_x + 8; x < max_x; x += 0x10) { |             for (u16 x = min_x + 8; x < max_x; x += 0x10) { | ||||||
|             // Do not process the pixel if it's inside the scissor box and the scissor mode is set
 |                 // Do not process the pixel if it's inside the scissor box and the scissor mode is
 | ||||||
|             // to Exclude.
 |                 // set to Exclude.
 | ||||||
|                 if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Exclude) { |                 if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Exclude) { | ||||||
|                     if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) { |                     if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) { | ||||||
|                         continue; |                         continue; | ||||||
|  | @ -360,11 +365,13 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con | ||||||
|                  *     one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1) |                  *     one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1) | ||||||
|                  *     u = u_over_w / one_over_w |                  *     u = u_over_w / one_over_w | ||||||
|                  * |                  * | ||||||
|              * The generalization to three vertices is straightforward in baricentric coordinates. |                  * The generalization to three vertices is straightforward in baricentric | ||||||
|  |                  *coordinates. | ||||||
|                  **/ |                  **/ | ||||||
|                 const auto get_interpolated_attribute = [&](f24 attr0, f24 attr1, f24 attr2) { |                 const auto get_interpolated_attribute = [&](f24 attr0, f24 attr1, f24 attr2) { | ||||||
|                     auto attr_over_w = Common::MakeVec(attr0, attr1, attr2); |                     auto attr_over_w = Common::MakeVec(attr0, attr1, attr2); | ||||||
|                 f24 interpolated_attr_over_w = Common::Dot(attr_over_w, baricentric_coordinates); |                     f24 interpolated_attr_over_w = | ||||||
|  |                         Common::Dot(attr_over_w, baricentric_coordinates); | ||||||
|                     return interpolated_attr_over_w * interpolated_w_inverse; |                     return interpolated_attr_over_w * interpolated_w_inverse; | ||||||
|                 }; |                 }; | ||||||
| 
 | 
 | ||||||
|  | @ -405,9 +412,12 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con | ||||||
|                 if (!regs.lighting.disable) { |                 if (!regs.lighting.disable) { | ||||||
|                     const auto normquat = |                     const auto normquat = | ||||||
|                         Common::Quaternion<f32>{ |                         Common::Quaternion<f32>{ | ||||||
|                         {get_interpolated_attribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(), |                             {get_interpolated_attribute(v0.quat.x, v1.quat.x, v2.quat.x) | ||||||
|                          get_interpolated_attribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(), |                                  .ToFloat32(), | ||||||
|                          get_interpolated_attribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()}, |                              get_interpolated_attribute(v0.quat.y, v1.quat.y, v2.quat.y) | ||||||
|  |                                  .ToFloat32(), | ||||||
|  |                              get_interpolated_attribute(v0.quat.z, v1.quat.z, v2.quat.z) | ||||||
|  |                                  .ToFloat32()}, | ||||||
|                             get_interpolated_attribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), |                             get_interpolated_attribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), | ||||||
|                         } |                         } | ||||||
|                             .Normalized(); |                             .Normalized(); | ||||||
|  | @ -417,20 +427,22 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con | ||||||
|                         get_interpolated_attribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), |                         get_interpolated_attribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), | ||||||
|                         get_interpolated_attribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(), |                         get_interpolated_attribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(), | ||||||
|                     }; |                     }; | ||||||
|                 std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors( |                     std::tie(primary_fragment_color, secondary_fragment_color) = | ||||||
|                     regs.lighting, state.lighting, normquat, view, texture_color); |                         ComputeFragmentsColors(regs.lighting, state.lighting, normquat, view, | ||||||
|  |                                                texture_color); | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 // Write the TEV stages.
 |                 // Write the TEV stages.
 | ||||||
|             auto combiner_output = WriteTevConfig(texture_color, tev_stages, primary_color, |                 auto combiner_output = | ||||||
|                                                   primary_fragment_color, secondary_fragment_color); |                     WriteTevConfig(texture_color, tev_stages, primary_color, primary_fragment_color, | ||||||
|  |                                    secondary_fragment_color); | ||||||
| 
 | 
 | ||||||
|                 const auto& output_merger = regs.framebuffer.output_merger; |                 const auto& output_merger = regs.framebuffer.output_merger; | ||||||
|                 if (output_merger.fragment_operation_mode == |                 if (output_merger.fragment_operation_mode == | ||||||
|                     FramebufferRegs::FragmentOperationMode::Shadow) { |                     FramebufferRegs::FragmentOperationMode::Shadow) { | ||||||
|                 u32 depth_int = static_cast<u32>(depth * 0xFFFFFF); |                     const u32 depth_int = static_cast<u32>(depth * 0xFFFFFF); | ||||||
|                     // Use green color as the shadow intensity
 |                     // Use green color as the shadow intensity
 | ||||||
|                 u8 stencil = combiner_output.y; |                     const u8 stencil = combiner_output.y; | ||||||
|                     fb.DrawShadowMapPixel(x >> 4, y >> 4, depth_int, stencil); |                     fb.DrawShadowMapPixel(x >> 4, y >> 4, depth_int, stencil); | ||||||
|                     // Skip the normal output merger pipeline if it is in shadow mode
 |                     // Skip the normal output merger pipeline if it is in shadow mode
 | ||||||
|                     continue; |                     continue; | ||||||
|  | @ -440,7 +452,7 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con | ||||||
|                 if (!DoAlphaTest(combiner_output.a())) { |                 if (!DoAlphaTest(combiner_output.a())) { | ||||||
|                     continue; |                     continue; | ||||||
|                 } |                 } | ||||||
|             WriteFog(combiner_output, depth); |                 WriteFog(depth, combiner_output); | ||||||
|                 if (!DoDepthStencilTest(x, y, depth)) { |                 if (!DoDepthStencilTest(x, y, depth)) { | ||||||
|                     continue; |                     continue; | ||||||
|                 } |                 } | ||||||
|  | @ -449,7 +461,10 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con | ||||||
|                     fb.DrawPixel(x >> 4, y >> 4, result); |                     fb.DrawPixel(x >> 4, y >> 4, result); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |         }; | ||||||
|  |         sw_workers.QueueWork(std::move(process_scanline)); | ||||||
|     } |     } | ||||||
|  |     sw_workers.WaitForRequests(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor( | std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor( | ||||||
|  | @ -573,7 +588,7 @@ std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor( | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y, | Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y, | ||||||
|                                                 Common::Vec4<u8>& combiner_output) const { |                                                 Common::Vec4<u8> combiner_output) const { | ||||||
|     const auto dest = fb.GetPixel(x >> 4, y >> 4); |     const auto dest = fb.GetPixel(x >> 4, y >> 4); | ||||||
|     Common::Vec4<u8> blend_output = combiner_output; |     Common::Vec4<u8> blend_output = combiner_output; | ||||||
| 
 | 
 | ||||||
|  | @ -771,7 +786,7 @@ Common::Vec4<u8> RasterizerSoftware::WriteTevConfig( | ||||||
|     return combiner_output; |     return combiner_output; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerSoftware::WriteFog(Common::Vec4<u8>& combiner_output, float depth) const { | void RasterizerSoftware::WriteFog(float depth, Common::Vec4<u8>& combiner_output) const { | ||||||
|     /**
 |     /**
 | ||||||
|      * Apply fog combiner. Not fully accurate. We'd have to know what data type is used to |      * Apply fog combiner. Not fully accurate. We'd have to know what data type is used to | ||||||
|      * store the depth etc. Using float for now until we know more about Pica datatypes. |      * store the depth etc. Using float for now until we know more about Pica datatypes. | ||||||
|  |  | ||||||
|  | @ -5,7 +5,7 @@ | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
| #include <span> | #include <span> | ||||||
| 
 | #include "common/thread_worker.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/regs_texturing.h" | #include "video_core/regs_texturing.h" | ||||||
| #include "video_core/renderer_software/sw_clipper.h" | #include "video_core/renderer_software/sw_clipper.h" | ||||||
|  | @ -52,7 +52,7 @@ private: | ||||||
|         std::span<const Pica::TexturingRegs::FullTextureConfig, 3> textures, f24 tc0_w) const; |         std::span<const Pica::TexturingRegs::FullTextureConfig, 3> textures, f24 tc0_w) const; | ||||||
| 
 | 
 | ||||||
|     /// Returns the final pixel color with blending or logic ops applied.
 |     /// Returns the final pixel color with blending or logic ops applied.
 | ||||||
|     Common::Vec4<u8> PixelColor(u16 x, u16 y, Common::Vec4<u8>& combiner_output) const; |     Common::Vec4<u8> PixelColor(u16 x, u16 y, Common::Vec4<u8> combiner_output) const; | ||||||
| 
 | 
 | ||||||
|     /// Emulates the TEV configuration and returns the combiner output.
 |     /// Emulates the TEV configuration and returns the combiner output.
 | ||||||
|     Common::Vec4<u8> WriteTevConfig( |     Common::Vec4<u8> WriteTevConfig( | ||||||
|  | @ -62,7 +62,7 @@ private: | ||||||
|         Common::Vec4<u8> secondary_fragment_color); |         Common::Vec4<u8> secondary_fragment_color); | ||||||
| 
 | 
 | ||||||
|     /// Blends fog to the combiner output if enabled.
 |     /// Blends fog to the combiner output if enabled.
 | ||||||
|     void WriteFog(Common::Vec4<u8>& combiner_output, float depth) const; |     void WriteFog(float depth, Common::Vec4<u8>& combiner_output) const; | ||||||
| 
 | 
 | ||||||
|     /// Performs the alpha test. Returns false if the test failed.
 |     /// Performs the alpha test. Returns false if the test failed.
 | ||||||
|     bool DoAlphaTest(u8 alpha) const; |     bool DoAlphaTest(u8 alpha) const; | ||||||
|  | @ -74,6 +74,8 @@ private: | ||||||
|     Memory::MemorySystem& memory; |     Memory::MemorySystem& memory; | ||||||
|     Pica::State& state; |     Pica::State& state; | ||||||
|     const Pica::Regs& regs; |     const Pica::Regs& regs; | ||||||
|  |     size_t num_sw_threads; | ||||||
|  |     Common::ThreadWorker sw_workers; | ||||||
|     Framebuffer fb; |     Framebuffer fb; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue