mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	SWRasterizer: Convert large no-capture lambdas to standalone functions
This commit is contained in:
		
							parent
							
								
									e1ad7d69b9
								
							
						
					
					
						commit
						f9026e8a7a
					
				
					 1 changed files with 310 additions and 315 deletions
				
			
		|  | @ -28,9 +28,318 @@ | |||
| #include "video_core/utils.h" | ||||
| 
 | ||||
| namespace Pica { | ||||
| 
 | ||||
| namespace Rasterizer { | ||||
| 
 | ||||
| using TevStageConfig = TexturingRegs::TevStageConfig; | ||||
| 
 | ||||
| static int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) { | ||||
|     switch (mode) { | ||||
|     case TexturingRegs::TextureConfig::ClampToEdge: | ||||
|         val = std::max(val, 0); | ||||
|         val = std::min(val, (int)size - 1); | ||||
|         return val; | ||||
| 
 | ||||
|     case TexturingRegs::TextureConfig::ClampToBorder: | ||||
|         return val; | ||||
| 
 | ||||
|     case TexturingRegs::TextureConfig::Repeat: | ||||
|         return (int)((unsigned)val % size); | ||||
| 
 | ||||
|     case TexturingRegs::TextureConfig::MirroredRepeat: { | ||||
|         unsigned int coord = ((unsigned)val % (2 * size)); | ||||
|         if (coord >= size) | ||||
|             coord = 2 * size - 1 - coord; | ||||
|         return (int)coord; | ||||
|     } | ||||
| 
 | ||||
|     default: | ||||
|         LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); | ||||
|         UNIMPLEMENTED(); | ||||
|         return 0; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| static Math::Vec3<u8> GetColorModifier(TevStageConfig::ColorModifier factor, | ||||
|                                        const Math::Vec4<u8>& values) { | ||||
|     using ColorModifier = TevStageConfig::ColorModifier; | ||||
| 
 | ||||
|     switch (factor) { | ||||
|     case ColorModifier::SourceColor: | ||||
|         return values.rgb(); | ||||
| 
 | ||||
|     case ColorModifier::OneMinusSourceColor: | ||||
|         return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>(); | ||||
| 
 | ||||
|     case ColorModifier::SourceAlpha: | ||||
|         return values.aaa(); | ||||
| 
 | ||||
|     case ColorModifier::OneMinusSourceAlpha: | ||||
|         return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>(); | ||||
| 
 | ||||
|     case ColorModifier::SourceRed: | ||||
|         return values.rrr(); | ||||
| 
 | ||||
|     case ColorModifier::OneMinusSourceRed: | ||||
|         return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>(); | ||||
| 
 | ||||
|     case ColorModifier::SourceGreen: | ||||
|         return values.ggg(); | ||||
| 
 | ||||
|     case ColorModifier::OneMinusSourceGreen: | ||||
|         return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>(); | ||||
| 
 | ||||
|     case ColorModifier::SourceBlue: | ||||
|         return values.bbb(); | ||||
| 
 | ||||
|     case ColorModifier::OneMinusSourceBlue: | ||||
|         return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>(); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| static u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4<u8>& values) { | ||||
|     using AlphaModifier = TevStageConfig::AlphaModifier; | ||||
| 
 | ||||
|     switch (factor) { | ||||
|     case AlphaModifier::SourceAlpha: | ||||
|         return values.a(); | ||||
| 
 | ||||
|     case AlphaModifier::OneMinusSourceAlpha: | ||||
|         return 255 - values.a(); | ||||
| 
 | ||||
|     case AlphaModifier::SourceRed: | ||||
|         return values.r(); | ||||
| 
 | ||||
|     case AlphaModifier::OneMinusSourceRed: | ||||
|         return 255 - values.r(); | ||||
| 
 | ||||
|     case AlphaModifier::SourceGreen: | ||||
|         return values.g(); | ||||
| 
 | ||||
|     case AlphaModifier::OneMinusSourceGreen: | ||||
|         return 255 - values.g(); | ||||
| 
 | ||||
|     case AlphaModifier::SourceBlue: | ||||
|         return values.b(); | ||||
| 
 | ||||
|     case AlphaModifier::OneMinusSourceBlue: | ||||
|         return 255 - values.b(); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| static Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> input[3]) { | ||||
|     using Operation = TevStageConfig::Operation; | ||||
| 
 | ||||
|     switch (op) { | ||||
|     case Operation::Replace: | ||||
|         return input[0]; | ||||
| 
 | ||||
|     case Operation::Modulate: | ||||
|         return ((input[0] * input[1]) / 255).Cast<u8>(); | ||||
| 
 | ||||
|     case Operation::Add: { | ||||
|         auto result = input[0] + input[1]; | ||||
|         result.r() = std::min(255, result.r()); | ||||
|         result.g() = std::min(255, result.g()); | ||||
|         result.b() = std::min(255, result.b()); | ||||
|         return result.Cast<u8>(); | ||||
|     } | ||||
| 
 | ||||
|     case Operation::AddSigned: { | ||||
|         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
 | ||||
|         // (byte) 128 is correct
 | ||||
|         auto result = | ||||
|             input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); | ||||
|         result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); | ||||
|         result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); | ||||
|         result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); | ||||
|         return result.Cast<u8>(); | ||||
|     } | ||||
| 
 | ||||
|     case Operation::Lerp: | ||||
|         return ((input[0] * input[2] + | ||||
|                  input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / | ||||
|                 255) | ||||
|             .Cast<u8>(); | ||||
| 
 | ||||
|     case Operation::Subtract: { | ||||
|         auto result = input[0].Cast<int>() - input[1].Cast<int>(); | ||||
|         result.r() = std::max(0, result.r()); | ||||
|         result.g() = std::max(0, result.g()); | ||||
|         result.b() = std::max(0, result.b()); | ||||
|         return result.Cast<u8>(); | ||||
|     } | ||||
| 
 | ||||
|     case Operation::MultiplyThenAdd: { | ||||
|         auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; | ||||
|         result.r() = std::min(255, result.r()); | ||||
|         result.g() = std::min(255, result.g()); | ||||
|         result.b() = std::min(255, result.b()); | ||||
|         return result.Cast<u8>(); | ||||
|     } | ||||
| 
 | ||||
|     case Operation::AddThenMultiply: { | ||||
|         auto result = input[0] + input[1]; | ||||
|         result.r() = std::min(255, result.r()); | ||||
|         result.g() = std::min(255, result.g()); | ||||
|         result.b() = std::min(255, result.b()); | ||||
|         result = (result * input[2].Cast<int>()) / 255; | ||||
|         return result.Cast<u8>(); | ||||
|     } | ||||
|     case Operation::Dot3_RGB: { | ||||
|         // Not fully accurate.  Worst case scenario seems to yield a +/-3 error.  Some HW results
 | ||||
|         // indicate that the per-component computation can't have a higher precision than 1/256,
 | ||||
|         // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give
 | ||||
|         // different results.
 | ||||
|         int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | ||||
|                      ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | ||||
|                      ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | ||||
|         result = std::max(0, std::min(255, result)); | ||||
|         return {(u8)result, (u8)result, (u8)result}; | ||||
|     } | ||||
|     default: | ||||
|         LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); | ||||
|         UNIMPLEMENTED(); | ||||
|         return {0, 0, 0}; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| static u8 AlphaCombine(TevStageConfig::Operation op, const std::array<u8, 3>& input) { | ||||
|     switch (op) { | ||||
|         using Operation = TevStageConfig::Operation; | ||||
|     case Operation::Replace: | ||||
|         return input[0]; | ||||
| 
 | ||||
|     case Operation::Modulate: | ||||
|         return input[0] * input[1] / 255; | ||||
| 
 | ||||
|     case Operation::Add: | ||||
|         return std::min(255, input[0] + input[1]); | ||||
| 
 | ||||
|     case Operation::AddSigned: { | ||||
|         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
 | ||||
|         auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; | ||||
|         return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); | ||||
|     } | ||||
| 
 | ||||
|     case Operation::Lerp: | ||||
|         return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; | ||||
| 
 | ||||
|     case Operation::Subtract: | ||||
|         return std::max(0, (int)input[0] - (int)input[1]); | ||||
| 
 | ||||
|     case Operation::MultiplyThenAdd: | ||||
|         return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); | ||||
| 
 | ||||
|     case Operation::AddThenMultiply: | ||||
|         return (std::min(255, (input[0] + input[1])) * input[2]) / 255; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op); | ||||
|         UNIMPLEMENTED(); | ||||
|         return 0; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| static Math::Vec4<u8> EvaluateBlendEquation(const Math::Vec4<u8>& src, | ||||
|                                             const Math::Vec4<u8>& srcfactor, | ||||
|                                             const Math::Vec4<u8>& dest, | ||||
|                                             const Math::Vec4<u8>& destfactor, | ||||
|                                             FramebufferRegs::BlendEquation equation) { | ||||
|     Math::Vec4<int> result; | ||||
| 
 | ||||
|     auto src_result = (src * srcfactor).Cast<int>(); | ||||
|     auto dst_result = (dest * destfactor).Cast<int>(); | ||||
| 
 | ||||
|     switch (equation) { | ||||
|     case FramebufferRegs::BlendEquation::Add: | ||||
|         result = (src_result + dst_result) / 255; | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::BlendEquation::Subtract: | ||||
|         result = (src_result - dst_result) / 255; | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::BlendEquation::ReverseSubtract: | ||||
|         result = (dst_result - src_result) / 255; | ||||
|         break; | ||||
| 
 | ||||
|     // TODO: How do these two actually work?  OpenGL doesn't include the blend factors in the
 | ||||
|     //       min/max computations, but is this what the 3DS actually does?
 | ||||
|     case FramebufferRegs::BlendEquation::Min: | ||||
|         result.r() = std::min(src.r(), dest.r()); | ||||
|         result.g() = std::min(src.g(), dest.g()); | ||||
|         result.b() = std::min(src.b(), dest.b()); | ||||
|         result.a() = std::min(src.a(), dest.a()); | ||||
|         break; | ||||
| 
 | ||||
|     case FramebufferRegs::BlendEquation::Max: | ||||
|         result.r() = std::max(src.r(), dest.r()); | ||||
|         result.g() = std::max(src.g(), dest.g()); | ||||
|         result.b() = std::max(src.b(), dest.b()); | ||||
|         result.a() = std::max(src.a(), dest.a()); | ||||
|         break; | ||||
| 
 | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); | ||||
|         UNIMPLEMENTED(); | ||||
|     } | ||||
| 
 | ||||
|     return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), | ||||
|                           MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); | ||||
| }; | ||||
| 
 | ||||
| static u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) { | ||||
|     switch (op) { | ||||
|     case FramebufferRegs::LogicOp::Clear: | ||||
|         return 0; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::And: | ||||
|         return src & dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::AndReverse: | ||||
|         return src & ~dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Copy: | ||||
|         return src; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Set: | ||||
|         return 255; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::CopyInverted: | ||||
|         return ~src; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::NoOp: | ||||
|         return dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Invert: | ||||
|         return ~dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Nand: | ||||
|         return ~(src & dest); | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Or: | ||||
|         return src | dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Nor: | ||||
|         return ~(src | dest); | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Xor: | ||||
|         return src ^ dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::Equiv: | ||||
|         return ~(src ^ dest); | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::AndInverted: | ||||
|         return ~src & dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::OrReverse: | ||||
|         return src | ~dest; | ||||
| 
 | ||||
|     case FramebufferRegs::LogicOp::OrInverted: | ||||
|         return ~src | dest; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 | ||||
| struct Fix12P4 { | ||||
|     Fix12P4() {} | ||||
|  | @ -304,34 +613,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
|                 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))) | ||||
|                             .ToFloat32(); | ||||
| 
 | ||||
|                 static auto GetWrappedTexCoord = [](TexturingRegs::TextureConfig::WrapMode mode, | ||||
|                                                     int val, unsigned size) { | ||||
|                     switch (mode) { | ||||
|                     case TexturingRegs::TextureConfig::ClampToEdge: | ||||
|                         val = std::max(val, 0); | ||||
|                         val = std::min(val, (int)size - 1); | ||||
|                         return val; | ||||
| 
 | ||||
|                     case TexturingRegs::TextureConfig::ClampToBorder: | ||||
|                         return val; | ||||
| 
 | ||||
|                     case TexturingRegs::TextureConfig::Repeat: | ||||
|                         return (int)((unsigned)val % size); | ||||
| 
 | ||||
|                     case TexturingRegs::TextureConfig::MirroredRepeat: { | ||||
|                         unsigned int coord = ((unsigned)val % (2 * size)); | ||||
|                         if (coord >= size) | ||||
|                             coord = 2 * size - 1 - coord; | ||||
|                         return (int)coord; | ||||
|                     } | ||||
| 
 | ||||
|                     default: | ||||
|                         LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); | ||||
|                         UNIMPLEMENTED(); | ||||
|                         return 0; | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder && | ||||
|                      (s < 0 || static_cast<u32>(s) >= texture.config.width)) || | ||||
|                     (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder && | ||||
|  | @ -380,9 +661,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
|                  ++tev_stage_index) { | ||||
|                 const auto& tev_stage = tev_stages[tev_stage_index]; | ||||
|                 using Source = TexturingRegs::TevStageConfig::Source; | ||||
|                 using ColorModifier = TexturingRegs::TevStageConfig::ColorModifier; | ||||
|                 using AlphaModifier = TexturingRegs::TevStageConfig::AlphaModifier; | ||||
|                 using Operation = TexturingRegs::TevStageConfig::Operation; | ||||
| 
 | ||||
|                 auto GetSource = [&](Source source) -> Math::Vec4<u8> { | ||||
|                     switch (source) { | ||||
|  | @ -422,187 +700,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 static auto GetColorModifier = [](ColorModifier factor, | ||||
|                                                   const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | ||||
|                     switch (factor) { | ||||
|                     case ColorModifier::SourceColor: | ||||
|                         return values.rgb(); | ||||
| 
 | ||||
|                     case ColorModifier::OneMinusSourceColor: | ||||
|                         return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>(); | ||||
| 
 | ||||
|                     case ColorModifier::SourceAlpha: | ||||
|                         return values.aaa(); | ||||
| 
 | ||||
|                     case ColorModifier::OneMinusSourceAlpha: | ||||
|                         return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>(); | ||||
| 
 | ||||
|                     case ColorModifier::SourceRed: | ||||
|                         return values.rrr(); | ||||
| 
 | ||||
|                     case ColorModifier::OneMinusSourceRed: | ||||
|                         return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>(); | ||||
| 
 | ||||
|                     case ColorModifier::SourceGreen: | ||||
|                         return values.ggg(); | ||||
| 
 | ||||
|                     case ColorModifier::OneMinusSourceGreen: | ||||
|                         return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>(); | ||||
| 
 | ||||
|                     case ColorModifier::SourceBlue: | ||||
|                         return values.bbb(); | ||||
| 
 | ||||
|                     case ColorModifier::OneMinusSourceBlue: | ||||
|                         return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>(); | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 static auto GetAlphaModifier = [](AlphaModifier factor, | ||||
|                                                   const Math::Vec4<u8>& values) -> u8 { | ||||
|                     switch (factor) { | ||||
|                     case AlphaModifier::SourceAlpha: | ||||
|                         return values.a(); | ||||
| 
 | ||||
|                     case AlphaModifier::OneMinusSourceAlpha: | ||||
|                         return 255 - values.a(); | ||||
| 
 | ||||
|                     case AlphaModifier::SourceRed: | ||||
|                         return values.r(); | ||||
| 
 | ||||
|                     case AlphaModifier::OneMinusSourceRed: | ||||
|                         return 255 - values.r(); | ||||
| 
 | ||||
|                     case AlphaModifier::SourceGreen: | ||||
|                         return values.g(); | ||||
| 
 | ||||
|                     case AlphaModifier::OneMinusSourceGreen: | ||||
|                         return 255 - values.g(); | ||||
| 
 | ||||
|                     case AlphaModifier::SourceBlue: | ||||
|                         return values.b(); | ||||
| 
 | ||||
|                     case AlphaModifier::OneMinusSourceBlue: | ||||
|                         return 255 - values.b(); | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 static auto ColorCombine = [](Operation op, | ||||
|                                               const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | ||||
|                     switch (op) { | ||||
|                     case Operation::Replace: | ||||
|                         return input[0]; | ||||
| 
 | ||||
|                     case Operation::Modulate: | ||||
|                         return ((input[0] * input[1]) / 255).Cast<u8>(); | ||||
| 
 | ||||
|                     case Operation::Add: { | ||||
|                         auto result = input[0] + input[1]; | ||||
|                         result.r() = std::min(255, result.r()); | ||||
|                         result.g() = std::min(255, result.g()); | ||||
|                         result.b() = std::min(255, result.b()); | ||||
|                         return result.Cast<u8>(); | ||||
|                     } | ||||
| 
 | ||||
|                     case Operation::AddSigned: { | ||||
|                         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
 | ||||
|                         // (byte) 128 is correct
 | ||||
|                         auto result = input[0].Cast<int>() + input[1].Cast<int>() - | ||||
|                                       Math::MakeVec<int>(128, 128, 128); | ||||
|                         result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); | ||||
|                         result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); | ||||
|                         result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); | ||||
|                         return result.Cast<u8>(); | ||||
|                     } | ||||
| 
 | ||||
|                     case Operation::Lerp: | ||||
|                         return ((input[0] * input[2] + | ||||
|                                  input[1] * | ||||
|                                      (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / | ||||
|                                 255) | ||||
|                             .Cast<u8>(); | ||||
| 
 | ||||
|                     case Operation::Subtract: { | ||||
|                         auto result = input[0].Cast<int>() - input[1].Cast<int>(); | ||||
|                         result.r() = std::max(0, result.r()); | ||||
|                         result.g() = std::max(0, result.g()); | ||||
|                         result.b() = std::max(0, result.b()); | ||||
|                         return result.Cast<u8>(); | ||||
|                     } | ||||
| 
 | ||||
|                     case Operation::MultiplyThenAdd: { | ||||
|                         auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; | ||||
|                         result.r() = std::min(255, result.r()); | ||||
|                         result.g() = std::min(255, result.g()); | ||||
|                         result.b() = std::min(255, result.b()); | ||||
|                         return result.Cast<u8>(); | ||||
|                     } | ||||
| 
 | ||||
|                     case Operation::AddThenMultiply: { | ||||
|                         auto result = input[0] + input[1]; | ||||
|                         result.r() = std::min(255, result.r()); | ||||
|                         result.g() = std::min(255, result.g()); | ||||
|                         result.b() = std::min(255, result.b()); | ||||
|                         result = (result * input[2].Cast<int>()) / 255; | ||||
|                         return result.Cast<u8>(); | ||||
|                     } | ||||
|                     case Operation::Dot3_RGB: { | ||||
|                         // Not fully accurate.
 | ||||
|                         // Worst case scenario seems to yield a +/-3 error
 | ||||
|                         // Some HW results indicate that the per-component computation can't have a
 | ||||
|                         // higher precision than 1/256,
 | ||||
|                         // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb(
 | ||||
|                         // (0x80,g0,b0),(0x80,g1,b1) ) give different results
 | ||||
|                         int result = | ||||
|                             ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | ||||
|                             ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | ||||
|                             ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | ||||
|                         result = std::max(0, std::min(255, result)); | ||||
|                         return {(u8)result, (u8)result, (u8)result}; | ||||
|                     } | ||||
|                     default: | ||||
|                         LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); | ||||
|                         UNIMPLEMENTED(); | ||||
|                         return {0, 0, 0}; | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 { | ||||
|                     switch (op) { | ||||
|                     case Operation::Replace: | ||||
|                         return input[0]; | ||||
| 
 | ||||
|                     case Operation::Modulate: | ||||
|                         return input[0] * input[1] / 255; | ||||
| 
 | ||||
|                     case Operation::Add: | ||||
|                         return std::min(255, input[0] + input[1]); | ||||
| 
 | ||||
|                     case Operation::AddSigned: { | ||||
|                         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
 | ||||
|                         // (byte) 128 is correct
 | ||||
|                         auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; | ||||
|                         return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); | ||||
|                     } | ||||
| 
 | ||||
|                     case Operation::Lerp: | ||||
|                         return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; | ||||
| 
 | ||||
|                     case Operation::Subtract: | ||||
|                         return std::max(0, (int)input[0] - (int)input[1]); | ||||
| 
 | ||||
|                     case Operation::MultiplyThenAdd: | ||||
|                         return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); | ||||
| 
 | ||||
|                     case Operation::AddThenMultiply: | ||||
|                         return (std::min(255, (input[0] + input[1])) * input[2]) / 255; | ||||
| 
 | ||||
|                     default: | ||||
|                         LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op); | ||||
|                         UNIMPLEMENTED(); | ||||
|                         return 0; | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 // color combiner
 | ||||
|                 // NOTE: Not sure if the alpha combiner might use the color output of the previous
 | ||||
|                 //       stage as input. Hence, we currently don't directly write the result to
 | ||||
|  | @ -917,56 +1014,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
|                     return combiner_output[channel]; | ||||
|                 }; | ||||
| 
 | ||||
|                 static auto EvaluateBlendEquation = []( | ||||
|                     const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | ||||
|                     const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | ||||
|                     FramebufferRegs::BlendEquation equation) { | ||||
| 
 | ||||
|                     Math::Vec4<int> result; | ||||
| 
 | ||||
|                     auto src_result = (src * srcfactor).Cast<int>(); | ||||
|                     auto dst_result = (dest * destfactor).Cast<int>(); | ||||
| 
 | ||||
|                     switch (equation) { | ||||
|                     case FramebufferRegs::BlendEquation::Add: | ||||
|                         result = (src_result + dst_result) / 255; | ||||
|                         break; | ||||
| 
 | ||||
|                     case FramebufferRegs::BlendEquation::Subtract: | ||||
|                         result = (src_result - dst_result) / 255; | ||||
|                         break; | ||||
| 
 | ||||
|                     case FramebufferRegs::BlendEquation::ReverseSubtract: | ||||
|                         result = (dst_result - src_result) / 255; | ||||
|                         break; | ||||
| 
 | ||||
|                     // TODO: How do these two actually work?
 | ||||
|                     //       OpenGL doesn't include the blend factors in the min/max computations,
 | ||||
|                     //       but is this what the 3DS actually does?
 | ||||
|                     case FramebufferRegs::BlendEquation::Min: | ||||
|                         result.r() = std::min(src.r(), dest.r()); | ||||
|                         result.g() = std::min(src.g(), dest.g()); | ||||
|                         result.b() = std::min(src.b(), dest.b()); | ||||
|                         result.a() = std::min(src.a(), dest.a()); | ||||
|                         break; | ||||
| 
 | ||||
|                     case FramebufferRegs::BlendEquation::Max: | ||||
|                         result.r() = std::max(src.r(), dest.r()); | ||||
|                         result.g() = std::max(src.g(), dest.g()); | ||||
|                         result.b() = std::max(src.b(), dest.b()); | ||||
|                         result.a() = std::max(src.a(), dest.a()); | ||||
|                         break; | ||||
| 
 | ||||
|                     default: | ||||
|                         LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); | ||||
|                         UNIMPLEMENTED(); | ||||
|                     } | ||||
| 
 | ||||
|                     return Math::Vec4<u8>( | ||||
|                         MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), | ||||
|                         MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); | ||||
|                 }; | ||||
| 
 | ||||
|                 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), | ||||
|                                                LookupFactor(1, params.factor_source_rgb), | ||||
|                                                LookupFactor(2, params.factor_source_rgb), | ||||
|  | @ -983,58 +1030,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
|                                                          dstfactor, params.blend_equation_a) | ||||
|                                        .a(); | ||||
|             } else { | ||||
|                 static auto LogicOp = [](u8 src, u8 dest, FramebufferRegs::LogicOp op) -> u8 { | ||||
|                     switch (op) { | ||||
|                     case FramebufferRegs::LogicOp::Clear: | ||||
|                         return 0; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::And: | ||||
|                         return src & dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::AndReverse: | ||||
|                         return src & ~dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Copy: | ||||
|                         return src; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Set: | ||||
|                         return 255; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::CopyInverted: | ||||
|                         return ~src; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::NoOp: | ||||
|                         return dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Invert: | ||||
|                         return ~dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Nand: | ||||
|                         return ~(src & dest); | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Or: | ||||
|                         return src | dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Nor: | ||||
|                         return ~(src | dest); | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Xor: | ||||
|                         return src ^ dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::Equiv: | ||||
|                         return ~(src ^ dest); | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::AndInverted: | ||||
|                         return ~src & dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::OrReverse: | ||||
|                         return src | ~dest; | ||||
| 
 | ||||
|                     case FramebufferRegs::LogicOp::OrInverted: | ||||
|                         return ~src | dest; | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 blend_output = | ||||
|                     Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), | ||||
|                                   LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue