mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-30 21:30:04 +00:00 
			
		
		
		
	
						commit
						ba0bfe7d82
					
				
					 4 changed files with 118 additions and 20 deletions
				
			
		
							
								
								
									
										2
									
								
								externals/nihstro
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								externals/nihstro
									
										
									
									
										vendored
									
									
								
							|  | @ -1 +1 @@ | ||||||
| Subproject commit 4a78588b308564f7ebae193e0ae00d9a0d5741d5 | Subproject commit 81f1804a43f625e3a1a20752c0db70a413410380 | ||||||
|  | @ -226,7 +226,8 @@ struct Regs { | ||||||
|             Texture1               = 0x4, |             Texture1               = 0x4, | ||||||
|             Texture2               = 0x5, |             Texture2               = 0x5, | ||||||
|             Texture3               = 0x6, |             Texture3               = 0x6, | ||||||
|             // 0x7-0xc = primary color??
 | 
 | ||||||
|  |             PreviousBuffer         = 0xd, | ||||||
|             Constant               = 0xe, |             Constant               = 0xe, | ||||||
|             Previous               = 0xf, |             Previous               = 0xf, | ||||||
|         }; |         }; | ||||||
|  | @ -299,7 +300,18 @@ struct Regs { | ||||||
|             BitField<24, 8, u32> const_a; |             BitField<24, 8, u32> const_a; | ||||||
|         }; |         }; | ||||||
| 
 | 
 | ||||||
|         INSERT_PADDING_WORDS(0x1); |         union { | ||||||
|  |             BitField< 0, 2, u32> color_scale; | ||||||
|  |             BitField<16, 2, u32> alpha_scale; | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         inline unsigned GetColorMultiplier() const { | ||||||
|  |             return (color_scale < 3) ? (1 << color_scale) : 1; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         inline unsigned GetAlphaMultiplier() const { | ||||||
|  |             return (alpha_scale < 3) ? (1 << alpha_scale) : 1; | ||||||
|  |         } | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     TevStageConfig tev_stage0; |     TevStageConfig tev_stage0; | ||||||
|  | @ -309,11 +321,36 @@ struct Regs { | ||||||
|     TevStageConfig tev_stage2; |     TevStageConfig tev_stage2; | ||||||
|     INSERT_PADDING_WORDS(0x3); |     INSERT_PADDING_WORDS(0x3); | ||||||
|     TevStageConfig tev_stage3; |     TevStageConfig tev_stage3; | ||||||
|     INSERT_PADDING_WORDS(0x13); |     INSERT_PADDING_WORDS(0x3); | ||||||
|  | 
 | ||||||
|  |     union { | ||||||
|  |         // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
 | ||||||
|  |         // these masks are set
 | ||||||
|  |         BitField< 8, 4, u32> update_mask_rgb; | ||||||
|  |         BitField<12, 4, u32> update_mask_a; | ||||||
|  | 
 | ||||||
|  |         bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { | ||||||
|  |             return (stage_index < 4) && (update_mask_rgb & (1 << stage_index)); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { | ||||||
|  |             return (stage_index < 4) && (update_mask_a & (1 << stage_index)); | ||||||
|  |         } | ||||||
|  |     } tev_combiner_buffer_input; | ||||||
|  |      | ||||||
|  |     INSERT_PADDING_WORDS(0xf); | ||||||
|     TevStageConfig tev_stage4; |     TevStageConfig tev_stage4; | ||||||
|     INSERT_PADDING_WORDS(0x3); |     INSERT_PADDING_WORDS(0x3); | ||||||
|     TevStageConfig tev_stage5; |     TevStageConfig tev_stage5; | ||||||
|     INSERT_PADDING_WORDS(0x3); | 
 | ||||||
|  |     union { | ||||||
|  |         BitField< 0, 8, u32> r; | ||||||
|  |         BitField< 8, 8, u32> g; | ||||||
|  |         BitField<16, 8, u32> b; | ||||||
|  |         BitField<24, 8, u32> a; | ||||||
|  |     } tev_combiner_buffer_color; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x2); | ||||||
| 
 | 
 | ||||||
|     const std::array<Regs::TevStageConfig,6> GetTevStages() const { |     const std::array<Regs::TevStageConfig,6> GetTevStages() const { | ||||||
|         return { tev_stage0, tev_stage1, |         return { tev_stage0, tev_stage1, | ||||||
|  | @ -426,9 +463,7 @@ struct Regs { | ||||||
|         D24S8  = 3 |         D24S8  = 3 | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     /*
 |     // Returns the number of bytes in the specified depth format
 | ||||||
|      * Returns the number of bytes in the specified depth format |  | ||||||
|      */ |  | ||||||
|     static u32 BytesPerDepthPixel(DepthFormat format) { |     static u32 BytesPerDepthPixel(DepthFormat format) { | ||||||
|         switch (format) { |         switch (format) { | ||||||
|         case DepthFormat::D16: |         case DepthFormat::D16: | ||||||
|  | @ -443,6 +478,20 @@ struct Regs { | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     // Returns the number of bits per depth component of the specified depth format
 | ||||||
|  |     static u32 DepthBitsPerPixel(DepthFormat format) { | ||||||
|  |         switch (format) { | ||||||
|  |         case DepthFormat::D16: | ||||||
|  |             return 16; | ||||||
|  |         case DepthFormat::D24: | ||||||
|  |         case DepthFormat::D24S8: | ||||||
|  |             return 24; | ||||||
|  |         default: | ||||||
|  |             LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); | ||||||
|  |             UNIMPLEMENTED(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     struct { |     struct { | ||||||
|         // Components are laid out in reverse byte order, most significant bits first.
 |         // Components are laid out in reverse byte order, most significant bits first.
 | ||||||
|         enum ColorFormat : u32 { |         enum ColorFormat : u32 { | ||||||
|  | @ -784,8 +833,10 @@ struct Regs { | ||||||
|         ADD_FIELD(tev_stage1); |         ADD_FIELD(tev_stage1); | ||||||
|         ADD_FIELD(tev_stage2); |         ADD_FIELD(tev_stage2); | ||||||
|         ADD_FIELD(tev_stage3); |         ADD_FIELD(tev_stage3); | ||||||
|  |         ADD_FIELD(tev_combiner_buffer_input); | ||||||
|         ADD_FIELD(tev_stage4); |         ADD_FIELD(tev_stage4); | ||||||
|         ADD_FIELD(tev_stage5); |         ADD_FIELD(tev_stage5); | ||||||
|  |         ADD_FIELD(tev_combiner_buffer_color); | ||||||
|         ADD_FIELD(output_merger); |         ADD_FIELD(output_merger); | ||||||
|         ADD_FIELD(framebuffer); |         ADD_FIELD(framebuffer); | ||||||
|         ADD_FIELD(vertex_attributes); |         ADD_FIELD(vertex_attributes); | ||||||
|  | @ -859,8 +910,10 @@ ASSERT_REG_POSITION(tev_stage0, 0xc0); | ||||||
| ASSERT_REG_POSITION(tev_stage1, 0xc8); | ASSERT_REG_POSITION(tev_stage1, 0xc8); | ||||||
| ASSERT_REG_POSITION(tev_stage2, 0xd0); | ASSERT_REG_POSITION(tev_stage2, 0xd0); | ||||||
| ASSERT_REG_POSITION(tev_stage3, 0xd8); | ASSERT_REG_POSITION(tev_stage3, 0xd8); | ||||||
|  | ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0); | ||||||
| ASSERT_REG_POSITION(tev_stage4, 0xf0); | ASSERT_REG_POSITION(tev_stage4, 0xf0); | ||||||
| ASSERT_REG_POSITION(tev_stage5, 0xf8); | ASSERT_REG_POSITION(tev_stage5, 0xf8); | ||||||
|  | ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); | ||||||
| ASSERT_REG_POSITION(output_merger, 0x100); | ASSERT_REG_POSITION(output_merger, 0x100); | ||||||
| ASSERT_REG_POSITION(framebuffer, 0x110); | ASSERT_REG_POSITION(framebuffer, 0x110); | ||||||
| ASSERT_REG_POSITION(vertex_attributes, 0x200); | ASSERT_REG_POSITION(vertex_attributes, 0x200); | ||||||
|  |  | ||||||
|  | @ -90,7 +90,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return {}; |     return {0, 0, 0, 0}; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static u32 GetDepth(int x, int y) { | static u32 GetDepth(int x, int y) { | ||||||
|  | @ -376,7 +376,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||||
|             // with some basic arithmetic. Alpha combiners can be configured separately but work
 |             // with some basic arithmetic. Alpha combiners can be configured separately but work
 | ||||||
|             // analogously.
 |             // analogously.
 | ||||||
|             Math::Vec4<u8> combiner_output; |             Math::Vec4<u8> combiner_output; | ||||||
|             for (const auto& tev_stage : tev_stages) { |             Math::Vec4<u8> combiner_buffer = { | ||||||
|  |                 registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g, | ||||||
|  |                 registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a | ||||||
|  |             }; | ||||||
|  | 
 | ||||||
|  |             for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { | ||||||
|  |                 const auto& tev_stage = tev_stages[tev_stage_index]; | ||||||
|                 using Source = Regs::TevStageConfig::Source; |                 using Source = Regs::TevStageConfig::Source; | ||||||
|                 using ColorModifier = Regs::TevStageConfig::ColorModifier; |                 using ColorModifier = Regs::TevStageConfig::ColorModifier; | ||||||
|                 using AlphaModifier = Regs::TevStageConfig::AlphaModifier; |                 using AlphaModifier = Regs::TevStageConfig::AlphaModifier; | ||||||
|  | @ -398,6 +404,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||||
|                     case Source::Texture2: |                     case Source::Texture2: | ||||||
|                         return texture_color[2]; |                         return texture_color[2]; | ||||||
| 
 | 
 | ||||||
|  |                     case Source::PreviousBuffer: | ||||||
|  |                         return combiner_buffer; | ||||||
|  | 
 | ||||||
|                     case Source::Constant: |                     case Source::Constant: | ||||||
|                         return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; |                         return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; | ||||||
| 
 | 
 | ||||||
|  | @ -407,7 +416,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||||
|                     default: |                     default: | ||||||
|                         LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source); |                         LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source); | ||||||
|                         UNIMPLEMENTED(); |                         UNIMPLEMENTED(); | ||||||
|                         return {}; |                         return {0, 0, 0, 0}; | ||||||
|                     } |                     } | ||||||
|                 }; |                 }; | ||||||
| 
 | 
 | ||||||
|  | @ -490,6 +499,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||||
|                         return result.Cast<u8>(); |                         return result.Cast<u8>(); | ||||||
|                     } |                     } | ||||||
| 
 | 
 | ||||||
|  |                     case Operation::AddSigned: | ||||||
|  |                     { | ||||||
|  |                         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
 | ||||||
|  |                         auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); | ||||||
|  |                         result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); | ||||||
|  |                         result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); | ||||||
|  |                         result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); | ||||||
|  |                         return result.Cast<u8>(); | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|                     case Operation::Lerp: |                     case Operation::Lerp: | ||||||
|                         return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); |                         return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); | ||||||
| 
 | 
 | ||||||
|  | @ -524,7 +543,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||||
|                     default: |                     default: | ||||||
|                         LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); |                         LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); | ||||||
|                         UNIMPLEMENTED(); |                         UNIMPLEMENTED(); | ||||||
|                         return {}; |                         return {0, 0, 0}; | ||||||
|                     } |                     } | ||||||
|                 }; |                 }; | ||||||
| 
 | 
 | ||||||
|  | @ -578,7 +597,20 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||||
|                 }; |                 }; | ||||||
|                 auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); |                 auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); | ||||||
| 
 | 
 | ||||||
|                 combiner_output = Math::MakeVec(color_output, alpha_output); |                 combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); | ||||||
|  |                 combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); | ||||||
|  |                 combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); | ||||||
|  |                 combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); | ||||||
|  | 
 | ||||||
|  |                 if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { | ||||||
|  |                     combiner_buffer.r() = combiner_output.r(); | ||||||
|  |                     combiner_buffer.g() = combiner_output.g(); | ||||||
|  |                     combiner_buffer.b() = combiner_output.b(); | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { | ||||||
|  |                     combiner_buffer.a() = combiner_output.a(); | ||||||
|  |                 } | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             if (registers.output_merger.alpha_test.enable) { |             if (registers.output_merger.alpha_test.enable) { | ||||||
|  | @ -624,9 +656,10 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||||
| 
 | 
 | ||||||
|             // TODO: Does depth indeed only get written even if depth testing is enabled?
 |             // TODO: Does depth indeed only get written even if depth testing is enabled?
 | ||||||
|             if (registers.output_merger.depth_test_enable) { |             if (registers.output_merger.depth_test_enable) { | ||||||
|                 u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 + |                 unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format); | ||||||
|  |                 u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + | ||||||
|                                v1.screenpos[2].ToFloat32() * w1 + |                                v1.screenpos[2].ToFloat32() * w1 + | ||||||
|                             v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); |                                v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); | ||||||
|                 u32 ref_z = GetDepth(x >> 4, y >> 4); |                 u32 ref_z = GetDepth(x >> 4, y >> 4); | ||||||
| 
 | 
 | ||||||
|                 bool pass = false; |                 bool pass = false; | ||||||
|  |  | ||||||
|  | @ -235,6 +235,15 @@ static void ProcessShaderCode(VertexShaderState& state) { | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|  |             case OpCode::Id::FLR: | ||||||
|  |                 for (int i = 0; i < 4; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); | ||||||
|  |                 } | ||||||
|  |                 break; | ||||||
|  | 
 | ||||||
|             case OpCode::Id::MAX: |             case OpCode::Id::MAX: | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  | @ -366,12 +375,15 @@ static void ProcessShaderCode(VertexShaderState& state) { | ||||||
| 
 | 
 | ||||||
|         case OpCode::Type::MultiplyAdd: |         case OpCode::Type::MultiplyAdd: | ||||||
|         { |         { | ||||||
|             if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) { |             if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||  | ||||||
|  |                 (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { | ||||||
|                 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id]; |                 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id]; | ||||||
| 
 | 
 | ||||||
|                 const float24* src1_ = LookupSourceRegister(instr.mad.src1); |                 bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); | ||||||
|                 const float24* src2_ = LookupSourceRegister(instr.mad.src2); | 
 | ||||||
|                 const float24* src3_ = LookupSourceRegister(instr.mad.src3); |                 const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); | ||||||
|  |                 const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted)); | ||||||
|  |                 const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted)); | ||||||
| 
 | 
 | ||||||
|                 const bool negate_src1 = ((bool)swizzle.negate_src1 != false); |                 const bool negate_src1 = ((bool)swizzle.negate_src1 != false); | ||||||
|                 const bool negate_src2 = ((bool)swizzle.negate_src2 != false); |                 const bool negate_src2 = ((bool)swizzle.negate_src2 != false); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue