mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	Merge pull request #2562 from yuriks/pica-refactor3
Re-organize software rasterizer code
This commit is contained in:
		
						commit
						1bf449d752
					
				
					 12 changed files with 661 additions and 563 deletions
				
			
		|  | @ -1,10 +1,8 @@ | |||
| set(SRCS | ||||
|             clipper.cpp | ||||
|             command_processor.cpp | ||||
|             debug_utils/debug_utils.cpp | ||||
|             pica.cpp | ||||
|             primitive_assembly.cpp | ||||
|             rasterizer.cpp | ||||
|             regs.cpp | ||||
|             renderer_base.cpp | ||||
|             renderer_opengl/gl_rasterizer.cpp | ||||
|  | @ -15,7 +13,11 @@ set(SRCS | |||
|             renderer_opengl/renderer_opengl.cpp | ||||
|             shader/shader.cpp | ||||
|             shader/shader_interpreter.cpp | ||||
|             swrasterizer.cpp | ||||
|             swrasterizer/clipper.cpp | ||||
|             swrasterizer/framebuffer.cpp | ||||
|             swrasterizer/rasterizer.cpp | ||||
|             swrasterizer/swrasterizer.cpp | ||||
|             swrasterizer/texturing.cpp | ||||
|             texture/etc1.cpp | ||||
|             texture/texture_decode.cpp | ||||
|             vertex_loader.cpp | ||||
|  | @ -23,7 +25,6 @@ set(SRCS | |||
|             ) | ||||
| 
 | ||||
| set(HEADERS | ||||
|             clipper.h | ||||
|             command_processor.h | ||||
|             debug_utils/debug_utils.h | ||||
|             gpu_debugger.h | ||||
|  | @ -31,7 +32,6 @@ set(HEADERS | |||
|             pica_state.h | ||||
|             pica_types.h | ||||
|             primitive_assembly.h | ||||
|             rasterizer.h | ||||
|             rasterizer_interface.h | ||||
|             regs.h | ||||
|             regs_framebuffer.h | ||||
|  | @ -52,7 +52,11 @@ set(HEADERS | |||
|             shader/debug_data.h | ||||
|             shader/shader.h | ||||
|             shader/shader_interpreter.h | ||||
|             swrasterizer.h | ||||
|             swrasterizer/clipper.h | ||||
|             swrasterizer/framebuffer.h | ||||
|             swrasterizer/rasterizer.h | ||||
|             swrasterizer/swrasterizer.h | ||||
|             swrasterizer/texturing.h | ||||
|             texture/etc1.h | ||||
|             texture/texture_decode.h | ||||
|             utils.h | ||||
|  |  | |||
|  | @ -6,7 +6,7 @@ | |||
| #include <memory> | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/renderer_opengl/gl_rasterizer.h" | ||||
| #include "video_core/swrasterizer.h" | ||||
| #include "video_core/swrasterizer/swrasterizer.h" | ||||
| #include "video_core/video_core.h" | ||||
| 
 | ||||
| void RendererBase::RefreshRasterizerSetting() { | ||||
|  |  | |||
|  | @ -11,11 +11,11 @@ | |||
| #include "common/common_types.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "common/vector_math.h" | ||||
| #include "video_core/clipper.h" | ||||
| #include "video_core/pica_state.h" | ||||
| #include "video_core/pica_types.h" | ||||
| #include "video_core/rasterizer.h" | ||||
| #include "video_core/shader/shader.h" | ||||
| #include "video_core/swrasterizer/clipper.h" | ||||
| #include "video_core/swrasterizer/rasterizer.h" | ||||
| 
 | ||||
| using Pica::Rasterizer::Vertex; | ||||
| 
 | ||||
							
								
								
									
										358
									
								
								src/video_core/swrasterizer/framebuffer.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										358
									
								
								src/video_core/swrasterizer/framebuffer.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,358 @@ | |||
| // Copyright 2017 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <algorithm> | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/color.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "common/math_util.h" | ||||
| #include "common/vector_math.h" | ||||
| #include "core/hw/gpu.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/pica_state.h" | ||||
| #include "video_core/regs_framebuffer.h" | ||||
| #include "video_core/swrasterizer/framebuffer.h" | ||||
| #include "video_core/utils.h" | ||||
| 
 | ||||
| namespace Pica { | ||||
| namespace Rasterizer { | ||||
| 
 | ||||
| void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); | ||||
| 
 | ||||
|     // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
 | ||||
|     // NOTE: The framebuffer height register contains the actual FB height minus one.
 | ||||
|     y = framebuffer.height - y; | ||||
| 
 | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = | ||||
|         GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | ||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||||
|                      coarse_y * framebuffer.width * bytes_per_pixel; | ||||
|     u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; | ||||
| 
 | ||||
|     switch (framebuffer.color_format) { | ||||
|     case FramebufferRegs::ColorFormat::RGBA8: | ||||
|         Color::EncodeRGBA8(color, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGB8: | ||||
|         Color::EncodeRGB8(color, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGB5A1: | ||||
|         Color::EncodeRGB5A1(color, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGB565: | ||||
|         Color::EncodeRGB565(color, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGBA4: | ||||
|         Color::EncodeRGBA4(color, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", | ||||
|                      framebuffer.color_format.Value()); | ||||
|         UNIMPLEMENTED(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| const Math::Vec4<u8> GetPixel(int x, int y) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); | ||||
| 
 | ||||
|     y = framebuffer.height - y; | ||||
| 
 | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = | ||||
|         GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | ||||
|     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||||
|                      coarse_y * framebuffer.width * bytes_per_pixel; | ||||
|     u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; | ||||
| 
 | ||||
|     switch (framebuffer.color_format) { | ||||
|     case FramebufferRegs::ColorFormat::RGBA8: | ||||
|         return Color::DecodeRGBA8(src_pixel); | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGB8: | ||||
|         return Color::DecodeRGB8(src_pixel); | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGB5A1: | ||||
|         return Color::DecodeRGB5A1(src_pixel); | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGB565: | ||||
|         return Color::DecodeRGB565(src_pixel); | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGBA4: | ||||
|         return Color::DecodeRGBA4(src_pixel); | ||||
| 
 | ||||
|     default: | ||||
|         LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", | ||||
|                      framebuffer.color_format.Value()); | ||||
|         UNIMPLEMENTED(); | ||||
|     } | ||||
| 
 | ||||
|     return {0, 0, 0, 0}; | ||||
| } | ||||
| 
 | ||||
| u32 GetDepth(int x, int y) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||
| 
 | ||||
|     y = framebuffer.height - y; | ||||
| 
 | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); | ||||
|     u32 stride = framebuffer.width * bytes_per_pixel; | ||||
| 
 | ||||
|     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||
|     u8* src_pixel = depth_buffer + src_offset; | ||||
| 
 | ||||
|     switch (framebuffer.depth_format) { | ||||
|     case FramebufferRegs::DepthFormat::D16: | ||||
|         return Color::DecodeD16(src_pixel); | ||||
|     case FramebufferRegs::DepthFormat::D24: | ||||
|         return Color::DecodeD24(src_pixel); | ||||
|     case FramebufferRegs::DepthFormat::D24S8: | ||||
|         return Color::DecodeD24S8(src_pixel).x; | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | ||||
|         UNIMPLEMENTED(); | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| u8 GetStencil(int x, int y) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||
| 
 | ||||
|     y = framebuffer.height - y; | ||||
| 
 | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); | ||||
|     u32 stride = framebuffer.width * bytes_per_pixel; | ||||
| 
 | ||||
|     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||
|     u8* src_pixel = depth_buffer + src_offset; | ||||
| 
 | ||||
|     switch (framebuffer.depth_format) { | ||||
|     case FramebufferRegs::DepthFormat::D24S8: | ||||
|         return Color::DecodeD24S8(src_pixel).y; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_WARNING( | ||||
|             HW_GPU, | ||||
|             "GetStencil called for function which doesn't have a stencil component (format %u)", | ||||
|             framebuffer.depth_format); | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void SetDepth(int x, int y, u32 value) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||
| 
 | ||||
|     y = framebuffer.height - y; | ||||
| 
 | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); | ||||
|     u32 stride = framebuffer.width * bytes_per_pixel; | ||||
| 
 | ||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||
|     u8* dst_pixel = depth_buffer + dst_offset; | ||||
| 
 | ||||
|     switch (framebuffer.depth_format) { | ||||
|     case FramebufferRegs::DepthFormat::D16: | ||||
|         Color::EncodeD16(value, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::DepthFormat::D24: | ||||
|         Color::EncodeD24(value, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::DepthFormat::D24S8: | ||||
|         Color::EncodeD24X8(value, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | ||||
|         UNIMPLEMENTED(); | ||||
|         break; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void SetStencil(int x, int y, u8 value) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||
| 
 | ||||
|     y = framebuffer.height - y; | ||||
| 
 | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); | ||||
|     u32 stride = framebuffer.width * bytes_per_pixel; | ||||
| 
 | ||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||
|     u8* dst_pixel = depth_buffer + dst_offset; | ||||
| 
 | ||||
|     switch (framebuffer.depth_format) { | ||||
|     case Pica::FramebufferRegs::DepthFormat::D16: | ||||
|     case Pica::FramebufferRegs::DepthFormat::D24: | ||||
|         // Nothing to do
 | ||||
|         break; | ||||
| 
 | ||||
|     case Pica::FramebufferRegs::DepthFormat::D24S8: | ||||
|         Color::EncodeX24S8(value, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | ||||
|         UNIMPLEMENTED(); | ||||
|         break; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref) { | ||||
|     switch (action) { | ||||
|     case FramebufferRegs::StencilAction::Keep: | ||||
|         return old_stencil; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::Zero: | ||||
|         return 0; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::Replace: | ||||
|         return ref; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::Increment: | ||||
|         // Saturated increment
 | ||||
|         return std::min<u8>(old_stencil, 254) + 1; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::Decrement: | ||||
|         // Saturated decrement
 | ||||
|         return std::max<u8>(old_stencil, 1) - 1; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::Invert: | ||||
|         return ~old_stencil; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::IncrementWrap: | ||||
|         return old_stencil + 1; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::DecrementWrap: | ||||
|         return old_stencil - 1; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); | ||||
|         UNIMPLEMENTED(); | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| Math::Vec4<u8> EvaluateBlendEquation(const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | ||||
|                                      const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | ||||
|                                      FramebufferRegs::BlendEquation equation) { | ||||
|     Math::Vec4<int> result; | ||||
| 
 | ||||
|     auto src_result = (src * srcfactor).Cast<int>(); | ||||
|     auto dst_result = (dest * destfactor).Cast<int>(); | ||||
| 
 | ||||
|     switch (equation) { | ||||
|     case FramebufferRegs::BlendEquation::Add: | ||||
|         result = (src_result + dst_result) / 255; | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::BlendEquation::Subtract: | ||||
|         result = (src_result - dst_result) / 255; | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::BlendEquation::ReverseSubtract: | ||||
|         result = (dst_result - src_result) / 255; | ||||
|         break; | ||||
| 
 | ||||
|     // TODO: How do these two actually work?  OpenGL doesn't include the blend factors in the
 | ||||
|     //       min/max computations, but is this what the 3DS actually does?
 | ||||
|     case FramebufferRegs::BlendEquation::Min: | ||||
|         result.r() = std::min(src.r(), dest.r()); | ||||
|         result.g() = std::min(src.g(), dest.g()); | ||||
|         result.b() = std::min(src.b(), dest.b()); | ||||
|         result.a() = std::min(src.a(), dest.a()); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::BlendEquation::Max: | ||||
|         result.r() = std::max(src.r(), dest.r()); | ||||
|         result.g() = std::max(src.g(), dest.g()); | ||||
|         result.b() = std::max(src.b(), dest.b()); | ||||
|         result.a() = std::max(src.a(), dest.a()); | ||||
|         break; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); | ||||
|         UNIMPLEMENTED(); | ||||
|     } | ||||
| 
 | ||||
|     return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), | ||||
|                           MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); | ||||
| }; | ||||
| 
 | ||||
| u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) { | ||||
|     switch (op) { | ||||
|     case FramebufferRegs::LogicOp::Clear: | ||||
|         return 0; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::And: | ||||
|         return src & dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::AndReverse: | ||||
|         return src & ~dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Copy: | ||||
|         return src; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Set: | ||||
|         return 255; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::CopyInverted: | ||||
|         return ~src; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::NoOp: | ||||
|         return dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Invert: | ||||
|         return ~dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Nand: | ||||
|         return ~(src & dest); | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Or: | ||||
|         return src | dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Nor: | ||||
|         return ~(src | dest); | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Xor: | ||||
|         return src ^ dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Equiv: | ||||
|         return ~(src ^ dest); | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::AndInverted: | ||||
|         return ~src & dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::OrReverse: | ||||
|         return src | ~dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::OrInverted: | ||||
|         return ~src | dest; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| } // namespace Rasterizer
 | ||||
| } // namespace Pica
 | ||||
							
								
								
									
										29
									
								
								src/video_core/swrasterizer/framebuffer.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								src/video_core/swrasterizer/framebuffer.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,29 @@ | |||
| // Copyright 2017 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "common/vector_math.h" | ||||
| #include "video_core/regs_framebuffer.h" | ||||
| 
 | ||||
| namespace Pica { | ||||
| namespace Rasterizer { | ||||
| 
 | ||||
| void DrawPixel(int x, int y, const Math::Vec4<u8>& color); | ||||
| const Math::Vec4<u8> GetPixel(int x, int y); | ||||
| u32 GetDepth(int x, int y); | ||||
| u8 GetStencil(int x, int y); | ||||
| void SetDepth(int x, int y, u32 value); | ||||
| void SetStencil(int x, int y, u8 value); | ||||
| u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref); | ||||
| 
 | ||||
| Math::Vec4<u8> EvaluateBlendEquation(const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | ||||
|                                      const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | ||||
|                                      FramebufferRegs::BlendEquation equation); | ||||
| 
 | ||||
| u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op); | ||||
| 
 | ||||
| } // namespace Rasterizer
 | ||||
| } // namespace Pica
 | ||||
|  | @ -18,254 +18,19 @@ | |||
| #include "video_core/debug_utils/debug_utils.h" | ||||
| #include "video_core/pica_state.h" | ||||
| #include "video_core/pica_types.h" | ||||
| #include "video_core/rasterizer.h" | ||||
| #include "video_core/regs_framebuffer.h" | ||||
| #include "video_core/regs_rasterizer.h" | ||||
| #include "video_core/regs_texturing.h" | ||||
| #include "video_core/shader/shader.h" | ||||
| #include "video_core/swrasterizer/framebuffer.h" | ||||
| #include "video_core/swrasterizer/rasterizer.h" | ||||
| #include "video_core/swrasterizer/texturing.h" | ||||
| #include "video_core/texture/texture_decode.h" | ||||
| #include "video_core/utils.h" | ||||
| 
 | ||||
| namespace Pica { | ||||
| 
 | ||||
| namespace Rasterizer { | ||||
| 
 | ||||
| static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); | ||||
| 
 | ||||
|     // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
 | ||||
|     // NOTE: The framebuffer height register contains the actual FB height minus one.
 | ||||
|     y = framebuffer.height - y; | ||||
| 
 | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = | ||||
|         GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | ||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||||
|                      coarse_y * framebuffer.width * bytes_per_pixel; | ||||
|     u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; | ||||
| 
 | ||||
|     switch (framebuffer.color_format) { | ||||
|     case FramebufferRegs::ColorFormat::RGBA8: | ||||
|         Color::EncodeRGBA8(color, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGB8: | ||||
|         Color::EncodeRGB8(color, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGB5A1: | ||||
|         Color::EncodeRGB5A1(color, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGB565: | ||||
|         Color::EncodeRGB565(color, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGBA4: | ||||
|         Color::EncodeRGBA4(color, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", | ||||
|                      framebuffer.color_format.Value()); | ||||
|         UNIMPLEMENTED(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static const Math::Vec4<u8> GetPixel(int x, int y) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); | ||||
| 
 | ||||
|     y = framebuffer.height - y; | ||||
| 
 | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = | ||||
|         GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | ||||
|     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||||
|                      coarse_y * framebuffer.width * bytes_per_pixel; | ||||
|     u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; | ||||
| 
 | ||||
|     switch (framebuffer.color_format) { | ||||
|     case FramebufferRegs::ColorFormat::RGBA8: | ||||
|         return Color::DecodeRGBA8(src_pixel); | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGB8: | ||||
|         return Color::DecodeRGB8(src_pixel); | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGB5A1: | ||||
|         return Color::DecodeRGB5A1(src_pixel); | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGB565: | ||||
|         return Color::DecodeRGB565(src_pixel); | ||||
| 
 | ||||
|     case FramebufferRegs::ColorFormat::RGBA4: | ||||
|         return Color::DecodeRGBA4(src_pixel); | ||||
| 
 | ||||
|     default: | ||||
|         LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", | ||||
|                      framebuffer.color_format.Value()); | ||||
|         UNIMPLEMENTED(); | ||||
|     } | ||||
| 
 | ||||
|     return {0, 0, 0, 0}; | ||||
| } | ||||
| 
 | ||||
| static u32 GetDepth(int x, int y) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||
| 
 | ||||
|     y = framebuffer.height - y; | ||||
| 
 | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); | ||||
|     u32 stride = framebuffer.width * bytes_per_pixel; | ||||
| 
 | ||||
|     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||
|     u8* src_pixel = depth_buffer + src_offset; | ||||
| 
 | ||||
|     switch (framebuffer.depth_format) { | ||||
|     case FramebufferRegs::DepthFormat::D16: | ||||
|         return Color::DecodeD16(src_pixel); | ||||
|     case FramebufferRegs::DepthFormat::D24: | ||||
|         return Color::DecodeD24(src_pixel); | ||||
|     case FramebufferRegs::DepthFormat::D24S8: | ||||
|         return Color::DecodeD24S8(src_pixel).x; | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | ||||
|         UNIMPLEMENTED(); | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static u8 GetStencil(int x, int y) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||
| 
 | ||||
|     y = framebuffer.height - y; | ||||
| 
 | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); | ||||
|     u32 stride = framebuffer.width * bytes_per_pixel; | ||||
| 
 | ||||
|     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||
|     u8* src_pixel = depth_buffer + src_offset; | ||||
| 
 | ||||
|     switch (framebuffer.depth_format) { | ||||
|     case FramebufferRegs::DepthFormat::D24S8: | ||||
|         return Color::DecodeD24S8(src_pixel).y; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_WARNING( | ||||
|             HW_GPU, | ||||
|             "GetStencil called for function which doesn't have a stencil component (format %u)", | ||||
|             framebuffer.depth_format); | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void SetDepth(int x, int y, u32 value) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||
| 
 | ||||
|     y = framebuffer.height - y; | ||||
| 
 | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); | ||||
|     u32 stride = framebuffer.width * bytes_per_pixel; | ||||
| 
 | ||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||
|     u8* dst_pixel = depth_buffer + dst_offset; | ||||
| 
 | ||||
|     switch (framebuffer.depth_format) { | ||||
|     case FramebufferRegs::DepthFormat::D16: | ||||
|         Color::EncodeD16(value, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::DepthFormat::D24: | ||||
|         Color::EncodeD24(value, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::DepthFormat::D24S8: | ||||
|         Color::EncodeD24X8(value, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | ||||
|         UNIMPLEMENTED(); | ||||
|         break; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void SetStencil(int x, int y, u8 value) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||
| 
 | ||||
|     y = framebuffer.height - y; | ||||
| 
 | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); | ||||
|     u32 stride = framebuffer.width * bytes_per_pixel; | ||||
| 
 | ||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||
|     u8* dst_pixel = depth_buffer + dst_offset; | ||||
| 
 | ||||
|     switch (framebuffer.depth_format) { | ||||
|     case Pica::FramebufferRegs::DepthFormat::D16: | ||||
|     case Pica::FramebufferRegs::DepthFormat::D24: | ||||
|         // Nothing to do
 | ||||
|         break; | ||||
| 
 | ||||
|     case Pica::FramebufferRegs::DepthFormat::D24S8: | ||||
|         Color::EncodeX24S8(value, dst_pixel); | ||||
|         break; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | ||||
|         UNIMPLEMENTED(); | ||||
|         break; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref) { | ||||
|     switch (action) { | ||||
|     case FramebufferRegs::StencilAction::Keep: | ||||
|         return old_stencil; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::Zero: | ||||
|         return 0; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::Replace: | ||||
|         return ref; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::Increment: | ||||
|         // Saturated increment
 | ||||
|         return std::min<u8>(old_stencil, 254) + 1; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::Decrement: | ||||
|         // Saturated decrement
 | ||||
|         return std::max<u8>(old_stencil, 1) - 1; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::Invert: | ||||
|         return ~old_stencil; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::IncrementWrap: | ||||
|         return old_stencil + 1; | ||||
| 
 | ||||
|     case FramebufferRegs::StencilAction::DecrementWrap: | ||||
|         return old_stencil - 1; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); | ||||
|         UNIMPLEMENTED(); | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 | ||||
| struct Fix12P4 { | ||||
|     Fix12P4() {} | ||||
|  | @ -539,34 +304,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
|                 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))) | ||||
|                             .ToFloat32(); | ||||
| 
 | ||||
|                 static auto GetWrappedTexCoord = [](TexturingRegs::TextureConfig::WrapMode mode, | ||||
|                                                     int val, unsigned size) { | ||||
|                     switch (mode) { | ||||
|                     case TexturingRegs::TextureConfig::ClampToEdge: | ||||
|                         val = std::max(val, 0); | ||||
|                         val = std::min(val, (int)size - 1); | ||||
|                         return val; | ||||
| 
 | ||||
|                     case TexturingRegs::TextureConfig::ClampToBorder: | ||||
|                         return val; | ||||
| 
 | ||||
|                     case TexturingRegs::TextureConfig::Repeat: | ||||
|                         return (int)((unsigned)val % size); | ||||
| 
 | ||||
|                     case TexturingRegs::TextureConfig::MirroredRepeat: { | ||||
|                         unsigned int coord = ((unsigned)val % (2 * size)); | ||||
|                         if (coord >= size) | ||||
|                             coord = 2 * size - 1 - coord; | ||||
|                         return (int)coord; | ||||
|                     } | ||||
| 
 | ||||
|                     default: | ||||
|                         LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); | ||||
|                         UNIMPLEMENTED(); | ||||
|                         return 0; | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder && | ||||
|                      (s < 0 || static_cast<u32>(s) >= texture.config.width)) || | ||||
|                     (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder && | ||||
|  | @ -615,9 +352,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
|                  ++tev_stage_index) { | ||||
|                 const auto& tev_stage = tev_stages[tev_stage_index]; | ||||
|                 using Source = TexturingRegs::TevStageConfig::Source; | ||||
|                 using ColorModifier = TexturingRegs::TevStageConfig::ColorModifier; | ||||
|                 using AlphaModifier = TexturingRegs::TevStageConfig::AlphaModifier; | ||||
|                 using Operation = TexturingRegs::TevStageConfig::Operation; | ||||
| 
 | ||||
|                 auto GetSource = [&](Source source) -> Math::Vec4<u8> { | ||||
|                     switch (source) { | ||||
|  | @ -657,187 +391,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 static auto GetColorModifier = [](ColorModifier factor, | ||||
|                                                   const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | ||||
|                     switch (factor) { | ||||
|                     case ColorModifier::SourceColor: | ||||
|                         return values.rgb(); | ||||
| 
 | ||||
|                     case ColorModifier::OneMinusSourceColor: | ||||
|                         return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>(); | ||||
| 
 | ||||
|                     case ColorModifier::SourceAlpha: | ||||
|                         return values.aaa(); | ||||
| 
 | ||||
|                     case ColorModifier::OneMinusSourceAlpha: | ||||
|                         return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>(); | ||||
| 
 | ||||
|                     case ColorModifier::SourceRed: | ||||
|                         return values.rrr(); | ||||
| 
 | ||||
|                     case ColorModifier::OneMinusSourceRed: | ||||
|                         return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>(); | ||||
| 
 | ||||
|                     case ColorModifier::SourceGreen: | ||||
|                         return values.ggg(); | ||||
| 
 | ||||
|                     case ColorModifier::OneMinusSourceGreen: | ||||
|                         return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>(); | ||||
| 
 | ||||
|                     case ColorModifier::SourceBlue: | ||||
|                         return values.bbb(); | ||||
| 
 | ||||
|                     case ColorModifier::OneMinusSourceBlue: | ||||
|                         return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>(); | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 static auto GetAlphaModifier = [](AlphaModifier factor, | ||||
|                                                   const Math::Vec4<u8>& values) -> u8 { | ||||
|                     switch (factor) { | ||||
|                     case AlphaModifier::SourceAlpha: | ||||
|                         return values.a(); | ||||
| 
 | ||||
|                     case AlphaModifier::OneMinusSourceAlpha: | ||||
|                         return 255 - values.a(); | ||||
| 
 | ||||
|                     case AlphaModifier::SourceRed: | ||||
|                         return values.r(); | ||||
| 
 | ||||
|                     case AlphaModifier::OneMinusSourceRed: | ||||
|                         return 255 - values.r(); | ||||
| 
 | ||||
|                     case AlphaModifier::SourceGreen: | ||||
|                         return values.g(); | ||||
| 
 | ||||
|                     case AlphaModifier::OneMinusSourceGreen: | ||||
|                         return 255 - values.g(); | ||||
| 
 | ||||
|                     case AlphaModifier::SourceBlue: | ||||
|                         return values.b(); | ||||
| 
 | ||||
|                     case AlphaModifier::OneMinusSourceBlue: | ||||
|                         return 255 - values.b(); | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 static auto ColorCombine = [](Operation op, | ||||
|                                               const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | ||||
|                     switch (op) { | ||||
|                     case Operation::Replace: | ||||
|                         return input[0]; | ||||
| 
 | ||||
|                     case Operation::Modulate: | ||||
|                         return ((input[0] * input[1]) / 255).Cast<u8>(); | ||||
| 
 | ||||
|                     case Operation::Add: { | ||||
|                         auto result = input[0] + input[1]; | ||||
|                         result.r() = std::min(255, result.r()); | ||||
|                         result.g() = std::min(255, result.g()); | ||||
|                         result.b() = std::min(255, result.b()); | ||||
|                         return result.Cast<u8>(); | ||||
|                     } | ||||
| 
 | ||||
|                     case Operation::AddSigned: { | ||||
|                         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
 | ||||
|                         // (byte) 128 is correct
 | ||||
|                         auto result = input[0].Cast<int>() + input[1].Cast<int>() - | ||||
|                                       Math::MakeVec<int>(128, 128, 128); | ||||
|                         result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); | ||||
|                         result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); | ||||
|                         result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); | ||||
|                         return result.Cast<u8>(); | ||||
|                     } | ||||
| 
 | ||||
|                     case Operation::Lerp: | ||||
|                         return ((input[0] * input[2] + | ||||
|                                  input[1] * | ||||
|                                      (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / | ||||
|                                 255) | ||||
|                             .Cast<u8>(); | ||||
| 
 | ||||
|                     case Operation::Subtract: { | ||||
|                         auto result = input[0].Cast<int>() - input[1].Cast<int>(); | ||||
|                         result.r() = std::max(0, result.r()); | ||||
|                         result.g() = std::max(0, result.g()); | ||||
|                         result.b() = std::max(0, result.b()); | ||||
|                         return result.Cast<u8>(); | ||||
|                     } | ||||
| 
 | ||||
|                     case Operation::MultiplyThenAdd: { | ||||
|                         auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; | ||||
|                         result.r() = std::min(255, result.r()); | ||||
|                         result.g() = std::min(255, result.g()); | ||||
|                         result.b() = std::min(255, result.b()); | ||||
|                         return result.Cast<u8>(); | ||||
|                     } | ||||
| 
 | ||||
|                     case Operation::AddThenMultiply: { | ||||
|                         auto result = input[0] + input[1]; | ||||
|                         result.r() = std::min(255, result.r()); | ||||
|                         result.g() = std::min(255, result.g()); | ||||
|                         result.b() = std::min(255, result.b()); | ||||
|                         result = (result * input[2].Cast<int>()) / 255; | ||||
|                         return result.Cast<u8>(); | ||||
|                     } | ||||
|                     case Operation::Dot3_RGB: { | ||||
|                         // Not fully accurate.
 | ||||
|                         // Worst case scenario seems to yield a +/-3 error
 | ||||
|                         // Some HW results indicate that the per-component computation can't have a
 | ||||
|                         // higher precision than 1/256,
 | ||||
|                         // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb(
 | ||||
|                         // (0x80,g0,b0),(0x80,g1,b1) ) give different results
 | ||||
|                         int result = | ||||
|                             ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | ||||
|                             ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | ||||
|                             ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | ||||
|                         result = std::max(0, std::min(255, result)); | ||||
|                         return {(u8)result, (u8)result, (u8)result}; | ||||
|                     } | ||||
|                     default: | ||||
|                         LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); | ||||
|                         UNIMPLEMENTED(); | ||||
|                         return {0, 0, 0}; | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 { | ||||
|                     switch (op) { | ||||
|                     case Operation::Replace: | ||||
|                         return input[0]; | ||||
| 
 | ||||
|                     case Operation::Modulate: | ||||
|                         return input[0] * input[1] / 255; | ||||
| 
 | ||||
|                     case Operation::Add: | ||||
|                         return std::min(255, input[0] + input[1]); | ||||
| 
 | ||||
|                     case Operation::AddSigned: { | ||||
|                         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
 | ||||
|                         // (byte) 128 is correct
 | ||||
|                         auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; | ||||
|                         return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); | ||||
|                     } | ||||
| 
 | ||||
|                     case Operation::Lerp: | ||||
|                         return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; | ||||
| 
 | ||||
|                     case Operation::Subtract: | ||||
|                         return std::max(0, (int)input[0] - (int)input[1]); | ||||
| 
 | ||||
|                     case Operation::MultiplyThenAdd: | ||||
|                         return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); | ||||
| 
 | ||||
|                     case Operation::AddThenMultiply: | ||||
|                         return (std::min(255, (input[0] + input[1])) * input[2]) / 255; | ||||
| 
 | ||||
|                     default: | ||||
|                         LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op); | ||||
|                         UNIMPLEMENTED(); | ||||
|                         return 0; | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 // color combiner
 | ||||
|                 // NOTE: Not sure if the alpha combiner might use the color output of the previous
 | ||||
|                 //       stage as input. Hence, we currently don't directly write the result to
 | ||||
|  | @ -1152,56 +705,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
|                     return combiner_output[channel]; | ||||
|                 }; | ||||
| 
 | ||||
|                 static auto EvaluateBlendEquation = []( | ||||
|                     const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | ||||
|                     const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | ||||
|                     FramebufferRegs::BlendEquation equation) { | ||||
| 
 | ||||
|                     Math::Vec4<int> result; | ||||
| 
 | ||||
|                     auto src_result = (src * srcfactor).Cast<int>(); | ||||
|                     auto dst_result = (dest * destfactor).Cast<int>(); | ||||
| 
 | ||||
|                     switch (equation) { | ||||
|                     case FramebufferRegs::BlendEquation::Add: | ||||
|                         result = (src_result + dst_result) / 255; | ||||
|                         break; | ||||
| 
 | ||||
|                     case FramebufferRegs::BlendEquation::Subtract: | ||||
|                         result = (src_result - dst_result) / 255; | ||||
|                         break; | ||||
| 
 | ||||
|                     case FramebufferRegs::BlendEquation::ReverseSubtract: | ||||
|                         result = (dst_result - src_result) / 255; | ||||
|                         break; | ||||
| 
 | ||||
|                     // TODO: How do these two actually work?
 | ||||
|                     //       OpenGL doesn't include the blend factors in the min/max computations,
 | ||||
|                     //       but is this what the 3DS actually does?
 | ||||
|                     case FramebufferRegs::BlendEquation::Min: | ||||
|                         result.r() = std::min(src.r(), dest.r()); | ||||
|                         result.g() = std::min(src.g(), dest.g()); | ||||
|                         result.b() = std::min(src.b(), dest.b()); | ||||
|                         result.a() = std::min(src.a(), dest.a()); | ||||
|                         break; | ||||
| 
 | ||||
|                     case FramebufferRegs::BlendEquation::Max: | ||||
|                         result.r() = std::max(src.r(), dest.r()); | ||||
|                         result.g() = std::max(src.g(), dest.g()); | ||||
|                         result.b() = std::max(src.b(), dest.b()); | ||||
|                         result.a() = std::max(src.a(), dest.a()); | ||||
|                         break; | ||||
| 
 | ||||
|                     default: | ||||
|                         LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); | ||||
|                         UNIMPLEMENTED(); | ||||
|                     } | ||||
| 
 | ||||
|                     return Math::Vec4<u8>( | ||||
|                         MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), | ||||
|                         MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); | ||||
|                 }; | ||||
| 
 | ||||
|                 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), | ||||
|                                                LookupFactor(1, params.factor_source_rgb), | ||||
|                                                LookupFactor(2, params.factor_source_rgb), | ||||
|  | @ -1218,58 +721,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
|                                                          dstfactor, params.blend_equation_a) | ||||
|                                        .a(); | ||||
|             } else { | ||||
|                 static auto LogicOp = [](u8 src, u8 dest, FramebufferRegs::LogicOp op) -> u8 { | ||||
|                     switch (op) { | ||||
|                     case FramebufferRegs::LogicOp::Clear: | ||||
|                         return 0; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::And: | ||||
|                         return src & dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::AndReverse: | ||||
|                         return src & ~dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Copy: | ||||
|                         return src; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Set: | ||||
|                         return 255; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::CopyInverted: | ||||
|                         return ~src; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::NoOp: | ||||
|                         return dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Invert: | ||||
|                         return ~dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Nand: | ||||
|                         return ~(src & dest); | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Or: | ||||
|                         return src | dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Nor: | ||||
|                         return ~(src | dest); | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Xor: | ||||
|                         return src ^ dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Equiv: | ||||
|                         return ~(src ^ dest); | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::AndInverted: | ||||
|                         return ~src & dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::OrReverse: | ||||
|                         return src | ~dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::OrInverted: | ||||
|                         return ~src | dest; | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 blend_output = | ||||
|                     Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), | ||||
|                                   LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), | ||||
|  | @ -2,8 +2,8 @@ | |||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "video_core/clipper.h" | ||||
| #include "video_core/swrasterizer.h" | ||||
| #include "video_core/swrasterizer/clipper.h" | ||||
| #include "video_core/swrasterizer/swrasterizer.h" | ||||
| 
 | ||||
| namespace VideoCore { | ||||
| 
 | ||||
							
								
								
									
										228
									
								
								src/video_core/swrasterizer/texturing.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										228
									
								
								src/video_core/swrasterizer/texturing.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,228 @@ | |||
| // Copyright 2017 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <algorithm> | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/math_util.h" | ||||
| #include "common/vector_math.h" | ||||
| #include "video_core/regs_texturing.h" | ||||
| #include "video_core/swrasterizer/texturing.h" | ||||
| 
 | ||||
| namespace Pica { | ||||
| namespace Rasterizer { | ||||
| 
 | ||||
| using TevStageConfig = TexturingRegs::TevStageConfig; | ||||
| 
 | ||||
| int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) { | ||||
|     switch (mode) { | ||||
|     case TexturingRegs::TextureConfig::ClampToEdge: | ||||
|         val = std::max(val, 0); | ||||
|         val = std::min(val, (int)size - 1); | ||||
|         return val; | ||||
| 
 | ||||
|     case TexturingRegs::TextureConfig::ClampToBorder: | ||||
|         return val; | ||||
| 
 | ||||
|     case TexturingRegs::TextureConfig::Repeat: | ||||
|         return (int)((unsigned)val % size); | ||||
| 
 | ||||
|     case TexturingRegs::TextureConfig::MirroredRepeat: { | ||||
|         unsigned int coord = ((unsigned)val % (2 * size)); | ||||
|         if (coord >= size) | ||||
|             coord = 2 * size - 1 - coord; | ||||
|         return (int)coord; | ||||
|     } | ||||
| 
 | ||||
|     default: | ||||
|         LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); | ||||
|         UNIMPLEMENTED(); | ||||
|         return 0; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| Math::Vec3<u8> GetColorModifier(TevStageConfig::ColorModifier factor, | ||||
|                                 const Math::Vec4<u8>& values) { | ||||
|     using ColorModifier = TevStageConfig::ColorModifier; | ||||
| 
 | ||||
|     switch (factor) { | ||||
|     case ColorModifier::SourceColor: | ||||
|         return values.rgb(); | ||||
| 
 | ||||
|     case ColorModifier::OneMinusSourceColor: | ||||
|         return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>(); | ||||
| 
 | ||||
|     case ColorModifier::SourceAlpha: | ||||
|         return values.aaa(); | ||||
| 
 | ||||
|     case ColorModifier::OneMinusSourceAlpha: | ||||
|         return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>(); | ||||
| 
 | ||||
|     case ColorModifier::SourceRed: | ||||
|         return values.rrr(); | ||||
| 
 | ||||
|     case ColorModifier::OneMinusSourceRed: | ||||
|         return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>(); | ||||
| 
 | ||||
|     case ColorModifier::SourceGreen: | ||||
|         return values.ggg(); | ||||
| 
 | ||||
|     case ColorModifier::OneMinusSourceGreen: | ||||
|         return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>(); | ||||
| 
 | ||||
|     case ColorModifier::SourceBlue: | ||||
|         return values.bbb(); | ||||
| 
 | ||||
|     case ColorModifier::OneMinusSourceBlue: | ||||
|         return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>(); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4<u8>& values) { | ||||
|     using AlphaModifier = TevStageConfig::AlphaModifier; | ||||
| 
 | ||||
|     switch (factor) { | ||||
|     case AlphaModifier::SourceAlpha: | ||||
|         return values.a(); | ||||
| 
 | ||||
|     case AlphaModifier::OneMinusSourceAlpha: | ||||
|         return 255 - values.a(); | ||||
| 
 | ||||
|     case AlphaModifier::SourceRed: | ||||
|         return values.r(); | ||||
| 
 | ||||
|     case AlphaModifier::OneMinusSourceRed: | ||||
|         return 255 - values.r(); | ||||
| 
 | ||||
|     case AlphaModifier::SourceGreen: | ||||
|         return values.g(); | ||||
| 
 | ||||
|     case AlphaModifier::OneMinusSourceGreen: | ||||
|         return 255 - values.g(); | ||||
| 
 | ||||
|     case AlphaModifier::SourceBlue: | ||||
|         return values.b(); | ||||
| 
 | ||||
|     case AlphaModifier::OneMinusSourceBlue: | ||||
|         return 255 - values.b(); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> input[3]) { | ||||
|     using Operation = TevStageConfig::Operation; | ||||
| 
 | ||||
|     switch (op) { | ||||
|     case Operation::Replace: | ||||
|         return input[0]; | ||||
| 
 | ||||
|     case Operation::Modulate: | ||||
|         return ((input[0] * input[1]) / 255).Cast<u8>(); | ||||
| 
 | ||||
|     case Operation::Add: { | ||||
|         auto result = input[0] + input[1]; | ||||
|         result.r() = std::min(255, result.r()); | ||||
|         result.g() = std::min(255, result.g()); | ||||
|         result.b() = std::min(255, result.b()); | ||||
|         return result.Cast<u8>(); | ||||
|     } | ||||
| 
 | ||||
|     case Operation::AddSigned: { | ||||
|         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
 | ||||
|         // (byte) 128 is correct
 | ||||
|         auto result = | ||||
|             input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); | ||||
|         result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); | ||||
|         result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); | ||||
|         result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); | ||||
|         return result.Cast<u8>(); | ||||
|     } | ||||
| 
 | ||||
|     case Operation::Lerp: | ||||
|         return ((input[0] * input[2] + | ||||
|                  input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / | ||||
|                 255) | ||||
|             .Cast<u8>(); | ||||
| 
 | ||||
|     case Operation::Subtract: { | ||||
|         auto result = input[0].Cast<int>() - input[1].Cast<int>(); | ||||
|         result.r() = std::max(0, result.r()); | ||||
|         result.g() = std::max(0, result.g()); | ||||
|         result.b() = std::max(0, result.b()); | ||||
|         return result.Cast<u8>(); | ||||
|     } | ||||
| 
 | ||||
|     case Operation::MultiplyThenAdd: { | ||||
|         auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; | ||||
|         result.r() = std::min(255, result.r()); | ||||
|         result.g() = std::min(255, result.g()); | ||||
|         result.b() = std::min(255, result.b()); | ||||
|         return result.Cast<u8>(); | ||||
|     } | ||||
| 
 | ||||
|     case Operation::AddThenMultiply: { | ||||
|         auto result = input[0] + input[1]; | ||||
|         result.r() = std::min(255, result.r()); | ||||
|         result.g() = std::min(255, result.g()); | ||||
|         result.b() = std::min(255, result.b()); | ||||
|         result = (result * input[2].Cast<int>()) / 255; | ||||
|         return result.Cast<u8>(); | ||||
|     } | ||||
|     case Operation::Dot3_RGB: { | ||||
|         // Not fully accurate.  Worst case scenario seems to yield a +/-3 error.  Some HW results
 | ||||
|         // indicate that the per-component computation can't have a higher precision than 1/256,
 | ||||
|         // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give
 | ||||
|         // different results.
 | ||||
|         int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | ||||
|                      ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | ||||
|                      ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | ||||
|         result = std::max(0, std::min(255, result)); | ||||
|         return {(u8)result, (u8)result, (u8)result}; | ||||
|     } | ||||
|     default: | ||||
|         LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); | ||||
|         UNIMPLEMENTED(); | ||||
|         return {0, 0, 0}; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| u8 AlphaCombine(TevStageConfig::Operation op, const std::array<u8, 3>& input) { | ||||
|     switch (op) { | ||||
|         using Operation = TevStageConfig::Operation; | ||||
|     case Operation::Replace: | ||||
|         return input[0]; | ||||
| 
 | ||||
|     case Operation::Modulate: | ||||
|         return input[0] * input[1] / 255; | ||||
| 
 | ||||
|     case Operation::Add: | ||||
|         return std::min(255, input[0] + input[1]); | ||||
| 
 | ||||
|     case Operation::AddSigned: { | ||||
|         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
 | ||||
|         auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; | ||||
|         return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); | ||||
|     } | ||||
| 
 | ||||
|     case Operation::Lerp: | ||||
|         return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; | ||||
| 
 | ||||
|     case Operation::Subtract: | ||||
|         return std::max(0, (int)input[0] - (int)input[1]); | ||||
| 
 | ||||
|     case Operation::MultiplyThenAdd: | ||||
|         return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); | ||||
| 
 | ||||
|     case Operation::AddThenMultiply: | ||||
|         return (std::min(255, (input[0] + input[1])) * input[2]) / 255; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op); | ||||
|         UNIMPLEMENTED(); | ||||
|         return 0; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| } // namespace Rasterizer
 | ||||
| } // namespace Pica
 | ||||
							
								
								
									
										28
									
								
								src/video_core/swrasterizer/texturing.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								src/video_core/swrasterizer/texturing.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,28 @@ | |||
| // Copyright 2017 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "common/vector_math.h" | ||||
| #include "video_core/regs_texturing.h" | ||||
| 
 | ||||
| namespace Pica { | ||||
| namespace Rasterizer { | ||||
| 
 | ||||
| int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size); | ||||
| 
 | ||||
| Math::Vec3<u8> GetColorModifier(TexturingRegs::TevStageConfig::ColorModifier factor, | ||||
|                                 const Math::Vec4<u8>& values); | ||||
| 
 | ||||
| u8 GetAlphaModifier(TexturingRegs::TevStageConfig::AlphaModifier factor, | ||||
|                     const Math::Vec4<u8>& values); | ||||
| 
 | ||||
| Math::Vec3<u8> ColorCombine(TexturingRegs::TevStageConfig::Operation op, | ||||
|                             const Math::Vec3<u8> input[3]); | ||||
| 
 | ||||
| u8 AlphaCombine(TexturingRegs::TevStageConfig::Operation op, const std::array<u8, 3>& input); | ||||
| 
 | ||||
| } // namespace Rasterizer
 | ||||
| } // namespace Pica
 | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue