mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	Merge pull request #358 from neobrain/pica_progress2
pica_progress followups
This commit is contained in:
		
						commit
						7c8f6ca051
					
				
					 11 changed files with 385 additions and 125 deletions
				
			
		|  | @ -10,6 +10,7 @@ | |||
| #include <QPushButton> | ||||
| #include <QSpinBox> | ||||
| 
 | ||||
| #include "video_core/color.h" | ||||
| #include "video_core/pica.h" | ||||
| 
 | ||||
| #include "graphics_framebuffer.hxx" | ||||
|  | @ -202,7 +203,8 @@ void GraphicsFramebufferWidget::OnUpdate() | |||
|         framebuffer_address = framebuffer.GetColorBufferPhysicalAddress(); | ||||
|         framebuffer_width = framebuffer.GetWidth(); | ||||
|         framebuffer_height = framebuffer.GetHeight(); | ||||
|         framebuffer_format = static_cast<Format>(framebuffer.color_format); | ||||
|         // TODO: It's unknown how this format is actually specified
 | ||||
|         framebuffer_format = Format::RGBA8; | ||||
| 
 | ||||
|         break; | ||||
|     } | ||||
|  | @ -258,10 +260,10 @@ void GraphicsFramebufferWidget::OnUpdate() | |||
|         for (unsigned y = 0; y < framebuffer_height; ++y) { | ||||
|             for (unsigned x = 0; x < framebuffer_width; ++x) { | ||||
|                 u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2); | ||||
|                 u8 r = (value >> 11) & 0x1F; | ||||
|                 u8 g = (value >> 6) & 0x1F; | ||||
|                 u8 b = (value >> 1) & 0x1F; | ||||
|                 u8 a = value & 1; | ||||
|                 u8 r = Color::Convert5To8((value >> 11) & 0x1F); | ||||
|                 u8 g = Color::Convert5To8((value >> 6) & 0x1F); | ||||
|                 u8 b = Color::Convert5To8((value >> 1) & 0x1F); | ||||
|                 u8 a = Color::Convert1To8(value & 1); | ||||
| 
 | ||||
|                 decoded_image.setPixel(x, y, qRgba(r, g, b, 255/*a*/)); | ||||
|             } | ||||
|  |  | |||
|  | @ -94,11 +94,15 @@ inline void Write(u32 addr, const T data) { | |||
|                         int r, g, b, a; | ||||
|                     } source_color = { 0, 0, 0, 0 }; | ||||
| 
 | ||||
|                     // Cheap emulation of horizontal scaling: Just skip each second pixel of the
 | ||||
|                     // input framebuffer. We keep track of this in the pixel_skip variable.
 | ||||
|                     unsigned pixel_skip = (config.scale_horizontally != 0) ? 2 : 1; | ||||
| 
 | ||||
|                     switch (config.input_format) { | ||||
|                     case Regs::PixelFormat::RGBA8: | ||||
|                     { | ||||
|                         // TODO: Most likely got the component order messed up.
 | ||||
|                         u8* srcptr = source_pointer + x * 4 + y * config.input_width * 4; | ||||
|                         u8* srcptr = source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip; | ||||
|                         source_color.r = srcptr[0]; // blue
 | ||||
|                         source_color.g = srcptr[1]; // green
 | ||||
|                         source_color.b = srcptr[2]; // red
 | ||||
|  |  | |||
|  | @ -157,6 +157,9 @@ struct Regs { | |||
|             BitField< 8, 3, PixelFormat> input_format; | ||||
|             BitField<12, 3, PixelFormat> output_format; | ||||
|             BitField<16, 1, u32> output_tiled;     // stores output in a tiled format
 | ||||
| 
 | ||||
|             // TODO: Not really sure if this actually scales, or even resizes at all.
 | ||||
|             BitField<24, 1, u32> scale_horizontally; | ||||
|         }; | ||||
| 
 | ||||
|         INSERT_PADDING_WORDS(0x1); | ||||
|  |  | |||
							
								
								
									
										32
									
								
								src/video_core/color.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								src/video_core/color.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,32 @@ | |||
| // Copyright 2014 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| namespace Color { | ||||
| 
 | ||||
| /// Convert a 1-bit color component to 8 bit
 | ||||
| static inline u8 Convert1To8(u8 value) { | ||||
|     return value * 255; | ||||
| } | ||||
| 
 | ||||
| /// Convert a 4-bit color component to 8 bit
 | ||||
| static inline u8 Convert4To8(u8 value) { | ||||
|     return (value << 4) | value; | ||||
| } | ||||
| 
 | ||||
| /// Convert a 5-bit color component to 8 bit
 | ||||
| static inline u8 Convert5To8(u8 value) { | ||||
|     return (value << 3) | (value >> 2); | ||||
| } | ||||
| 
 | ||||
| /// Convert a 6-bit color component to 8 bit
 | ||||
| static inline u8 Convert6To8(u8 value) { | ||||
|     return (value << 2) | (value >> 4); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| } // namespace
 | ||||
|  | @ -112,6 +112,11 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
|                 // Initialize data for the current vertex
 | ||||
|                 VertexShader::InputVertex input; | ||||
| 
 | ||||
|                 // Load a debugging token to check whether this gets loaded by the running
 | ||||
|                 // application or not.
 | ||||
|                 static const float24 debug_token = float24::FromRawFloat24(0x00abcdef); | ||||
|                 input.attr[0].w = debug_token; | ||||
| 
 | ||||
|                 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { | ||||
|                     for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||
|                         const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i])); | ||||
|  | @ -136,6 +141,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
|                     } | ||||
|                 } | ||||
| 
 | ||||
|                 // HACK: Some games do not initialize the vertex position's w component. This leads
 | ||||
|                 //       to critical issues since it messes up perspective division. As a
 | ||||
|                 //       workaround, we force the fourth component to 1.0 if we find this to be the
 | ||||
|                 //       case.
 | ||||
|                 //       To do this, we additionally have to assume that the first input attribute
 | ||||
|                 //       is the vertex position, since there's no information about this other than
 | ||||
|                 //       the empiric observation that this is usually the case.
 | ||||
|                 if (input.attr[0].w == debug_token) | ||||
|                     input.attr[0].w = float24::FromFloat32(1.0); | ||||
| 
 | ||||
|                 if (g_debug_context) | ||||
|                     g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); | ||||
| 
 | ||||
|  | @ -173,6 +188,19 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 
 | ||||
|             break; | ||||
| 
 | ||||
|         case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1): | ||||
|         case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[1], 0x2b2): | ||||
|         case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3): | ||||
|         case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): | ||||
|         { | ||||
|             int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); | ||||
|             auto values = registers.vs_int_uniforms[index]; | ||||
|             VertexShader::GetIntUniform(index) = Math::Vec4<u8>(values.x, values.y, values.z, values.w); | ||||
|             LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", | ||||
|                       index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): | ||||
|         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): | ||||
|         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): | ||||
|  |  | |||
|  | @ -19,6 +19,7 @@ | |||
| #include "common/log.h" | ||||
| #include "common/file_util.h" | ||||
| 
 | ||||
| #include "video_core/color.h" | ||||
| #include "video_core/math.h" | ||||
| #include "video_core/pica.h" | ||||
| 
 | ||||
|  | @ -359,29 +360,26 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
|         u8 g = ((source_ptr) >> 6) & 0x1F; | ||||
|         u8 b = (source_ptr >> 1) & 0x1F; | ||||
|         u8 a = source_ptr & 1; | ||||
|         return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), disable_alpha ? 255 : (a * 255)); | ||||
|         return Math::MakeVec<u8>(Color::Convert5To8(r), Color::Convert5To8(g), | ||||
|                                  Color::Convert5To8(b), disable_alpha ? 255 : Color::Convert1To8(a)); | ||||
|     } | ||||
| 
 | ||||
|     case Regs::TextureFormat::RGB565: | ||||
|     { | ||||
|         const u16 source_ptr = *(const u16*)(source + offset * 2); | ||||
|         u8 r = (source_ptr >> 11) & 0x1F; | ||||
|         u8 g = ((source_ptr) >> 5) & 0x3F; | ||||
|         u8 b = (source_ptr) & 0x1F; | ||||
|         return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 2) | (g >> 4), (b << 3) | (b >> 2), 255); | ||||
|         u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F); | ||||
|         u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F); | ||||
|         u8 b = Color::Convert5To8((source_ptr) & 0x1F); | ||||
|         return Math::MakeVec<u8>(r, g, b, 255); | ||||
|     } | ||||
| 
 | ||||
|     case Regs::TextureFormat::RGBA4: | ||||
|     { | ||||
|         const u8* source_ptr = source + offset * 2; | ||||
|         u8 r = source_ptr[1] >> 4; | ||||
|         u8 g = source_ptr[1] & 0xFF; | ||||
|         u8 b = source_ptr[0] >> 4; | ||||
|         u8 a = source_ptr[0] & 0xFF; | ||||
|         r = (r << 4) | r; | ||||
|         g = (g << 4) | g; | ||||
|         b = (b << 4) | b; | ||||
|         a = (a << 4) | a; | ||||
|         u8 r = Color::Convert4To8(source_ptr[1] >> 4); | ||||
|         u8 g = Color::Convert4To8(source_ptr[1] & 0xF); | ||||
|         u8 b = Color::Convert4To8(source_ptr[0] >> 4); | ||||
|         u8 a = Color::Convert4To8(source_ptr[0] & 0xF); | ||||
|         return { r, g, b, disable_alpha ? (u8)255 : a }; | ||||
|     } | ||||
| 
 | ||||
|  | @ -389,13 +387,11 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
|     { | ||||
|         const u8* source_ptr = source + offset * 2; | ||||
| 
 | ||||
|         // TODO: component order not verified
 | ||||
| 
 | ||||
|         if (disable_alpha) { | ||||
|             // Show intensity as red, alpha as green
 | ||||
|             return { source_ptr[0], source_ptr[1], 0, 255 }; | ||||
|             return { source_ptr[1], source_ptr[0], 0, 255 }; | ||||
|         } else { | ||||
|             return { source_ptr[0], source_ptr[0], source_ptr[0], source_ptr[1]}; | ||||
|             return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]}; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|  | @ -418,14 +414,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
| 
 | ||||
|     case Regs::TextureFormat::IA4: | ||||
|     { | ||||
|         const u8* source_ptr = source + offset / 2; | ||||
|         const u8* source_ptr = source + offset; | ||||
| 
 | ||||
|         // TODO: component order not verified
 | ||||
| 
 | ||||
|         u8 i = (*source_ptr) & 0xF; | ||||
|         u8 a = ((*source_ptr) & 0xF0) >> 4; | ||||
|         a |= a << 4; | ||||
|         i |= i << 4; | ||||
|         u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); | ||||
|         u8 a = Color::Convert4To8((*source_ptr) & 0xF); | ||||
| 
 | ||||
|         if (disable_alpha) { | ||||
|             // Show intensity as red, alpha as green
 | ||||
|  | @ -439,15 +431,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture | |||
|     { | ||||
|         const u8* source_ptr = source + offset / 2; | ||||
| 
 | ||||
|         // TODO: component order not verified
 | ||||
| 
 | ||||
|         u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4); | ||||
|         a |= a << 4; | ||||
|         a = Color::Convert4To8(a); | ||||
| 
 | ||||
|         if (disable_alpha) { | ||||
|             return { *source_ptr, *source_ptr, *source_ptr, 255 }; | ||||
|             return { a, a, a, 255 }; | ||||
|         } else { | ||||
|             return { 0, 0, 0, *source_ptr }; | ||||
|             return { 0, 0, 0, a }; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
|  | @ -50,7 +50,19 @@ struct Regs { | |||
| 
 | ||||
|     u32 trigger_irq; | ||||
| 
 | ||||
|     INSERT_PADDING_WORDS(0x30); | ||||
|     INSERT_PADDING_WORDS(0x2f); | ||||
| 
 | ||||
|     enum class CullMode : u32 { | ||||
|         // Select which polygons are considered to be "frontfacing".
 | ||||
|         KeepAll              = 0, | ||||
|         KeepClockWise        = 1, | ||||
|         KeepCounterClockWise = 2, | ||||
|         // TODO: What does the third value imply?
 | ||||
|     }; | ||||
| 
 | ||||
|     union { | ||||
|         BitField<0, 2, CullMode> cull_mode; | ||||
|     }; | ||||
| 
 | ||||
|     BitField<0, 24, u32> viewport_size_x; | ||||
| 
 | ||||
|  | @ -289,7 +301,7 @@ struct Regs { | |||
|     TevStageConfig tev_stage4; | ||||
|     INSERT_PADDING_WORDS(0x3); | ||||
|     TevStageConfig tev_stage5; | ||||
|     INSERT_PADDING_WORDS(0x13); | ||||
|     INSERT_PADDING_WORDS(0x3); | ||||
| 
 | ||||
|     const std::array<Regs::TevStageConfig,6> GetTevStages() const { | ||||
|         return { tev_stage0, tev_stage1, | ||||
|  | @ -297,6 +309,60 @@ struct Regs { | |||
|                  tev_stage4, tev_stage5 }; | ||||
|     }; | ||||
| 
 | ||||
|     struct { | ||||
|         enum DepthFunc : u32 { | ||||
|             Always      = 1, | ||||
|             LessThan    = 4, | ||||
|             GreaterThan = 6, | ||||
|         }; | ||||
| 
 | ||||
|         union { | ||||
|             // If false, logic blending is used
 | ||||
|             BitField<8, 1, u32> alphablend_enable; | ||||
|         }; | ||||
| 
 | ||||
|         union { | ||||
|             enum BlendEquation : u32 { | ||||
|                 Add = 0, | ||||
|             }; | ||||
| 
 | ||||
|             enum BlendFactor : u32 { | ||||
|                 Zero = 0, | ||||
|                 One = 1, | ||||
| 
 | ||||
|                 SourceAlpha = 6, | ||||
|                 OneMinusSourceAlpha = 7, | ||||
|             }; | ||||
| 
 | ||||
|             BitField< 0, 8, BlendEquation> blend_equation_rgb; | ||||
|             BitField< 8, 8, BlendEquation> blend_equation_a; | ||||
| 
 | ||||
|             BitField<16, 4, BlendFactor> factor_source_rgb; | ||||
|             BitField<20, 4, BlendFactor> factor_dest_rgb; | ||||
| 
 | ||||
|             BitField<24, 4, BlendFactor> factor_source_a; | ||||
|             BitField<28, 4, BlendFactor> factor_dest_a; | ||||
|         } alpha_blending; | ||||
| 
 | ||||
|         union { | ||||
|             enum Op { | ||||
|                 Set = 4, | ||||
|             }; | ||||
| 
 | ||||
|             BitField<0, 4, Op> op; | ||||
|         } logic_op; | ||||
| 
 | ||||
|         INSERT_PADDING_WORDS(0x4); | ||||
| 
 | ||||
|         union { | ||||
|             BitField< 0, 1, u32> depth_test_enable; | ||||
|             BitField< 4, 3, DepthFunc> depth_test_func; | ||||
|             BitField<12, 1, u32> depth_write_enable; | ||||
|         }; | ||||
| 
 | ||||
|         INSERT_PADDING_WORDS(0x8); | ||||
|     } output_merger; | ||||
| 
 | ||||
|     struct { | ||||
|         enum ColorFormat : u32 { | ||||
|             RGBA8    = 0, | ||||
|  | @ -495,8 +561,14 @@ struct Regs { | |||
|     INSERT_PADDING_WORDS(0x51); | ||||
| 
 | ||||
|     BitField<0, 16, u32> vs_bool_uniforms; | ||||
|     union { | ||||
|         BitField< 0, 8, u32> x; | ||||
|         BitField< 8, 8, u32> y; | ||||
|         BitField<16, 8, u32> z; | ||||
|         BitField<24, 8, u32> w; | ||||
|     } vs_int_uniforms[4]; | ||||
| 
 | ||||
|     INSERT_PADDING_WORDS(0x9); | ||||
|     INSERT_PADDING_WORDS(0x5); | ||||
| 
 | ||||
|     // Offset to shader program entry point (in words)
 | ||||
|     BitField<0, 16, u32> vs_main_offset; | ||||
|  | @ -599,6 +671,7 @@ struct Regs { | |||
|             } while(false) | ||||
| 
 | ||||
|         ADD_FIELD(trigger_irq); | ||||
|         ADD_FIELD(cull_mode); | ||||
|         ADD_FIELD(viewport_size_x); | ||||
|         ADD_FIELD(viewport_size_y); | ||||
|         ADD_FIELD(viewport_depth_range); | ||||
|  | @ -617,6 +690,7 @@ struct Regs { | |||
|         ADD_FIELD(tev_stage3); | ||||
|         ADD_FIELD(tev_stage4); | ||||
|         ADD_FIELD(tev_stage5); | ||||
|         ADD_FIELD(output_merger); | ||||
|         ADD_FIELD(framebuffer); | ||||
|         ADD_FIELD(vertex_attributes); | ||||
|         ADD_FIELD(index_array); | ||||
|  | @ -625,6 +699,7 @@ struct Regs { | |||
|         ADD_FIELD(trigger_draw_indexed); | ||||
|         ADD_FIELD(triangle_topology); | ||||
|         ADD_FIELD(vs_bool_uniforms); | ||||
|         ADD_FIELD(vs_int_uniforms); | ||||
|         ADD_FIELD(vs_main_offset); | ||||
|         ADD_FIELD(vs_input_register_map); | ||||
|         ADD_FIELD(vs_uniform_setup); | ||||
|  | @ -668,6 +743,7 @@ private: | |||
| #define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position") | ||||
| 
 | ||||
| ASSERT_REG_POSITION(trigger_irq, 0x10); | ||||
| ASSERT_REG_POSITION(cull_mode, 0x40); | ||||
| ASSERT_REG_POSITION(viewport_size_x, 0x41); | ||||
| ASSERT_REG_POSITION(viewport_size_y, 0x43); | ||||
| ASSERT_REG_POSITION(viewport_depth_range, 0x4d); | ||||
|  | @ -688,6 +764,7 @@ ASSERT_REG_POSITION(tev_stage2, 0xd0); | |||
| ASSERT_REG_POSITION(tev_stage3, 0xd8); | ||||
| ASSERT_REG_POSITION(tev_stage4, 0xf0); | ||||
| ASSERT_REG_POSITION(tev_stage5, 0xf8); | ||||
| ASSERT_REG_POSITION(output_merger, 0x100); | ||||
| ASSERT_REG_POSITION(framebuffer, 0x110); | ||||
| ASSERT_REG_POSITION(vertex_attributes, 0x200); | ||||
| ASSERT_REG_POSITION(index_array, 0x227); | ||||
|  | @ -696,6 +773,7 @@ ASSERT_REG_POSITION(trigger_draw, 0x22e); | |||
| ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); | ||||
| ASSERT_REG_POSITION(triangle_topology, 0x25e); | ||||
| ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); | ||||
| ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); | ||||
| ASSERT_REG_POSITION(vs_main_offset, 0x2ba); | ||||
| ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); | ||||
| ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); | ||||
|  |  | |||
|  | @ -18,51 +18,82 @@ namespace Pica { | |||
| namespace Rasterizer { | ||||
| 
 | ||||
| static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | ||||
|     u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress()))); | ||||
|     const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); | ||||
|     u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); | ||||
|     u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); | ||||
| 
 | ||||
|     // Assuming RGBA8 format until actual framebuffer format handling is implemented
 | ||||
|     *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value; | ||||
| } | ||||
| 
 | ||||
| static const Math::Vec4<u8> GetPixel(int x, int y) { | ||||
|     const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); | ||||
|     u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); | ||||
| 
 | ||||
|     u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth()); | ||||
|     Math::Vec4<u8> ret; | ||||
|     ret.a() = value >> 24; | ||||
|     ret.r() = (value >> 16) & 0xFF; | ||||
|     ret.g() = (value >> 8) & 0xFF; | ||||
|     ret.b() = value & 0xFF; | ||||
|     return ret; | ||||
|  } | ||||
| 
 | ||||
| static u32 GetDepth(int x, int y) { | ||||
|     u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); | ||||
|     const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|     u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | ||||
| 
 | ||||
|     // Assuming 16-bit depth buffer format until actual format handling is implemented
 | ||||
|     return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); | ||||
| } | ||||
| 
 | ||||
| static void SetDepth(int x, int y, u16 value) { | ||||
|     u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); | ||||
|     const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|     u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | ||||
| 
 | ||||
|     // Assuming 16-bit depth buffer format until actual format handling is implemented
 | ||||
|     *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; | ||||
| } | ||||
| 
 | ||||
| // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 | ||||
| struct Fix12P4 { | ||||
|     Fix12P4() {} | ||||
|     Fix12P4(u16 val) : val(val) {} | ||||
| 
 | ||||
|     static u16 FracMask() { return 0xF; } | ||||
|     static u16 IntMask() { return (u16)~0xF; } | ||||
| 
 | ||||
|     operator u16() const { | ||||
|         return val; | ||||
|     } | ||||
| 
 | ||||
|     bool operator < (const Fix12P4& oth) const { | ||||
|         return (u16)*this < (u16)oth; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     u16 val; | ||||
| }; | ||||
| 
 | ||||
| /**
 | ||||
|  * Calculate signed area of the triangle spanned by the three argument vertices. | ||||
|  * The sign denotes an orientation. | ||||
|  * | ||||
|  * @todo define orientation concretely. | ||||
|  */ | ||||
| static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, | ||||
|                        const Math::Vec2<Fix12P4>& vtx2, | ||||
|                        const Math::Vec2<Fix12P4>& vtx3) { | ||||
|     const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); | ||||
|     const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); | ||||
|     // TODO: There is a very small chance this will overflow for sizeof(int) == 4
 | ||||
|     return Math::Cross(vec1, vec2).z; | ||||
| }; | ||||
| 
 | ||||
| void ProcessTriangle(const VertexShader::OutputVertex& v0, | ||||
|                      const VertexShader::OutputVertex& v1, | ||||
|                      const VertexShader::OutputVertex& v2) | ||||
| { | ||||
|     // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 | ||||
|     struct Fix12P4 { | ||||
|         Fix12P4() {} | ||||
|         Fix12P4(u16 val) : val(val) {} | ||||
| 
 | ||||
|         static u16 FracMask() { return 0xF; } | ||||
|         static u16 IntMask() { return (u16)~0xF; } | ||||
| 
 | ||||
|         operator u16() const { | ||||
|             return val; | ||||
|         } | ||||
| 
 | ||||
|         bool operator < (const Fix12P4& oth) const { | ||||
|             return (u16)*this < (u16)oth; | ||||
|         } | ||||
| 
 | ||||
|     private: | ||||
|         u16 val; | ||||
|     }; | ||||
| 
 | ||||
|     // vertex positions in rasterizer coordinates
 | ||||
|     auto FloatToFix = [](float24 flt) { | ||||
|                           return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f)); | ||||
|  | @ -70,10 +101,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
|     auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) { | ||||
|                                              return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | ||||
|                                          }; | ||||
| 
 | ||||
|     Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), | ||||
|                                    ScreenToRasterizerCoordinates(v1.screenpos), | ||||
|                                    ScreenToRasterizerCoordinates(v2.screenpos) }; | ||||
| 
 | ||||
|     if (registers.cull_mode == Regs::CullMode::KeepClockWise) { | ||||
|         // Reverse vertex order and use the CCW code path.
 | ||||
|         std::swap(vtxpos[1], vtxpos[2]); | ||||
|     } | ||||
| 
 | ||||
|     if (registers.cull_mode != Regs::CullMode::KeepAll) { | ||||
|         // Cull away triangles which are wound clockwise.
 | ||||
|         // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
 | ||||
|         if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) | ||||
|             return; | ||||
|     } | ||||
| 
 | ||||
|     // TODO: Proper scissor rect test!
 | ||||
|     u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); | ||||
|     u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | ||||
|  | @ -116,18 +160,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
|         for (u16 x = min_x; x < max_x; x += 0x10) { | ||||
| 
 | ||||
|             // Calculate the barycentric coordinates w0, w1 and w2
 | ||||
|             auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, | ||||
|                                const Math::Vec2<Fix12P4>& vtx2, | ||||
|                                const Math::Vec2<Fix12P4>& vtx3) { | ||||
|                 const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); | ||||
|                 const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); | ||||
|                 // TODO: There is a very small chance this will overflow for sizeof(int) == 4
 | ||||
|                 return Math::Cross(vec1, vec2).z; | ||||
|             }; | ||||
| 
 | ||||
|             int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); | ||||
|             int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); | ||||
|             int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); | ||||
|             int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); | ||||
|             int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); | ||||
|             int w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); | ||||
|             int wsum = w0 + w1 + w2; | ||||
| 
 | ||||
|             // If current pixel is not covered by the current primitive
 | ||||
|  | @ -201,8 +236,8 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
|                             return 0; | ||||
|                     } | ||||
|                 }; | ||||
|                 s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width); | ||||
|                 t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height); | ||||
|                 s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); | ||||
|                 t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); | ||||
| 
 | ||||
|                 u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress())); | ||||
|                 auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); | ||||
|  | @ -279,12 +314,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | ||||
|                 static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | ||||
|                     switch (factor) | ||||
|                     { | ||||
|                     case ColorModifier::SourceColor: | ||||
|                         return values.rgb(); | ||||
| 
 | ||||
|                     case ColorModifier::OneMinusSourceColor: | ||||
|                         return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>(); | ||||
| 
 | ||||
|                     case ColorModifier::SourceAlpha: | ||||
|                         return { values.a(), values.a(), values.a() }; | ||||
| 
 | ||||
|  | @ -295,7 +333,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { | ||||
|                 static auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { | ||||
|                     switch (factor) { | ||||
|                     case AlphaModifier::SourceAlpha: | ||||
|                         return value; | ||||
|  | @ -310,7 +348,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | ||||
|                 static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | ||||
|                     switch (op) { | ||||
|                     case Operation::Replace: | ||||
|                         return input[0]; | ||||
|  | @ -330,6 +368,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
|                     case Operation::Lerp: | ||||
|                         return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); | ||||
| 
 | ||||
|                     case Operation::Subtract: | ||||
|                     { | ||||
|                         auto result = input[0].Cast<int>() - input[1].Cast<int>(); | ||||
|                         result.r() = std::max(0, result.r()); | ||||
|                         result.g() = std::max(0, result.g()); | ||||
|                         result.b() = std::max(0, result.b()); | ||||
|                         return result.Cast<u8>(); | ||||
|                     } | ||||
| 
 | ||||
|                     default: | ||||
|                         LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); | ||||
|                         _dbg_assert_(HW_GPU, 0); | ||||
|  | @ -337,7 +384,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { | ||||
|                 static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { | ||||
|                     switch (op) { | ||||
|                     case Operation::Replace: | ||||
|                         return input[0]; | ||||
|  | @ -351,6 +398,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
|                     case Operation::Lerp: | ||||
|                         return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; | ||||
| 
 | ||||
|                     case Operation::Subtract: | ||||
|                         return std::max(0, (int)input[0] - (int)input[1]); | ||||
| 
 | ||||
|                     default: | ||||
|                         LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); | ||||
|                         _dbg_assert_(HW_GPU, 0); | ||||
|  | @ -381,12 +431,111 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
|                 combiner_output = Math::MakeVec(color_output, alpha_output); | ||||
|             } | ||||
| 
 | ||||
|             // TODO: Not sure if the multiplication by 65535 has already been taken care
 | ||||
|             // of when transforming to screen coordinates or not.
 | ||||
|             u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + | ||||
|                            (float)v1.screenpos[2].ToFloat32() * w1 + | ||||
|                            (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); | ||||
|             SetDepth(x >> 4, y >> 4, z); | ||||
|             // TODO: Does depth indeed only get written even if depth testing is enabled?
 | ||||
|             if (registers.output_merger.depth_test_enable) { | ||||
|                 u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 + | ||||
|                             v1.screenpos[2].ToFloat32() * w1 + | ||||
|                             v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); | ||||
|                 u16 ref_z = GetDepth(x >> 4, y >> 4); | ||||
| 
 | ||||
|                 bool pass = false; | ||||
| 
 | ||||
|                 switch (registers.output_merger.depth_test_func) { | ||||
|                 case registers.output_merger.Always: | ||||
|                     pass = true; | ||||
|                     break; | ||||
| 
 | ||||
|                 case registers.output_merger.LessThan: | ||||
|                     pass = z < ref_z; | ||||
|                     break; | ||||
| 
 | ||||
|                 case registers.output_merger.GreaterThan: | ||||
|                     pass = z > ref_z; | ||||
|                     break; | ||||
| 
 | ||||
|                 default: | ||||
|                     LOG_ERROR(HW_GPU, "Unknown depth test function %x", registers.output_merger.depth_test_func.Value()); | ||||
|                     break; | ||||
|                 } | ||||
| 
 | ||||
|                 if (!pass) | ||||
|                     continue; | ||||
| 
 | ||||
|                 if (registers.output_merger.depth_write_enable) | ||||
|                     SetDepth(x >> 4, y >> 4, z); | ||||
|             } | ||||
| 
 | ||||
|             auto dest = GetPixel(x >> 4, y >> 4); | ||||
| 
 | ||||
|             if (registers.output_merger.alphablend_enable) { | ||||
|                 auto params = registers.output_merger.alpha_blending; | ||||
| 
 | ||||
|                 auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> { | ||||
|                     switch(factor) { | ||||
|                     case params.Zero: | ||||
|                         return Math::Vec3<u8>(0, 0, 0); | ||||
| 
 | ||||
|                     case params.One: | ||||
|                         return Math::Vec3<u8>(255, 255, 255); | ||||
| 
 | ||||
|                     case params.SourceAlpha: | ||||
|                         return Math::MakeVec(combiner_output.a(), combiner_output.a(), combiner_output.a()); | ||||
| 
 | ||||
|                     case params.OneMinusSourceAlpha: | ||||
|                         return Math::Vec3<u8>(255-combiner_output.a(), 255-combiner_output.a(), 255-combiner_output.a()); | ||||
| 
 | ||||
|                     default: | ||||
|                         LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); | ||||
|                         exit(0); | ||||
|                         break; | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 { | ||||
|                     switch(factor) { | ||||
|                     case params.Zero: | ||||
|                         return 0; | ||||
| 
 | ||||
|                     case params.One: | ||||
|                         return 255; | ||||
| 
 | ||||
|                     case params.SourceAlpha: | ||||
|                         return combiner_output.a(); | ||||
| 
 | ||||
|                     case params.OneMinusSourceAlpha: | ||||
|                         return 255 - combiner_output.a(); | ||||
| 
 | ||||
|                     default: | ||||
|                         LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); | ||||
|                         exit(0); | ||||
|                         break; | ||||
|                     } | ||||
|                 }; | ||||
| 
 | ||||
|                 auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), | ||||
|                                                LookupFactorA(params.factor_source_a)); | ||||
|                 auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), | ||||
|                                                LookupFactorA(params.factor_dest_a)); | ||||
| 
 | ||||
|                 switch (params.blend_equation_rgb) { | ||||
|                 case params.Add: | ||||
|                 { | ||||
|                     auto result = (combiner_output * srcfactor + dest * dstfactor) / 255; | ||||
|                     result.r() = std::min(255, result.r()); | ||||
|                     result.g() = std::min(255, result.g()); | ||||
|                     result.b() = std::min(255, result.b()); | ||||
|                     combiner_output = result.Cast<u8>(); | ||||
|                     break; | ||||
|                 } | ||||
| 
 | ||||
|                 default: | ||||
|                     LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value()); | ||||
|                     exit(0); | ||||
|                 } | ||||
|             } else { | ||||
|                 LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op); | ||||
|                 exit(0); | ||||
|             } | ||||
| 
 | ||||
|             DrawPixel(x >> 4, y >> 4, combiner_output); | ||||
|         } | ||||
|  |  | |||
|  | @ -8,32 +8,6 @@ | |||
| 
 | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| namespace FormatPrecision { | ||||
| 
 | ||||
| /// Adjust RGBA8 color with RGBA6 precision
 | ||||
| static inline u32 rgba8_with_rgba6(u32 src) { | ||||
|     u32 color = src; | ||||
|     color &= 0xFCFCFCFC; | ||||
|     color |= (color >> 6) & 0x03030303; | ||||
|     return color; | ||||
| } | ||||
| 
 | ||||
| /// Adjust RGBA8 color with RGB565 precision
 | ||||
| static inline u32 rgba8_with_rgb565(u32 src) { | ||||
|     u32 color = (src & 0xF8FCF8); | ||||
|     color |= (color >> 5) & 0x070007; | ||||
|     color |= (color >> 6) & 0x000300; | ||||
|     color |= 0xFF000000; | ||||
|     return color; | ||||
| } | ||||
| 
 | ||||
| /// Adjust Z24 depth value with Z16 precision
 | ||||
| static inline u32 z24_with_z16(u32 src) { | ||||
|     return (src & 0xFFFF00) | (src >> 16); | ||||
| } | ||||
| 
 | ||||
| } // namespace
 | ||||
| 
 | ||||
| namespace VideoCore { | ||||
| 
 | ||||
| /// Structure for the TGA texture format (for dumping)
 | ||||
|  |  | |||
|  | @ -30,6 +30,8 @@ static struct { | |||
|     Math::Vec4<float24> f[96]; | ||||
| 
 | ||||
|     std::array<bool,16> b; | ||||
| 
 | ||||
|     std::array<Math::Vec4<u8>,4> i; | ||||
| } shader_uniforms; | ||||
| 
 | ||||
| // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
 | ||||
|  | @ -37,33 +39,31 @@ static struct { | |||
| static std::array<u32, 1024> shader_memory; | ||||
| static std::array<u32, 1024> swizzle_data; | ||||
| 
 | ||||
| void SubmitShaderMemoryChange(u32 addr, u32 value) | ||||
| { | ||||
| void SubmitShaderMemoryChange(u32 addr, u32 value) { | ||||
|     shader_memory[addr] = value; | ||||
| } | ||||
| 
 | ||||
| void SubmitSwizzleDataChange(u32 addr, u32 value) | ||||
| { | ||||
| void SubmitSwizzleDataChange(u32 addr, u32 value) { | ||||
|     swizzle_data[addr] = value; | ||||
| } | ||||
| 
 | ||||
| Math::Vec4<float24>& GetFloatUniform(u32 index) | ||||
| { | ||||
| Math::Vec4<float24>& GetFloatUniform(u32 index) { | ||||
|     return shader_uniforms.f[index]; | ||||
| } | ||||
| 
 | ||||
| bool& GetBoolUniform(u32 index) | ||||
| { | ||||
| bool& GetBoolUniform(u32 index) { | ||||
|     return shader_uniforms.b[index]; | ||||
| } | ||||
| 
 | ||||
| const std::array<u32, 1024>& GetShaderBinary() | ||||
| { | ||||
| Math::Vec4<u8>& GetIntUniform(u32 index) { | ||||
|     return shader_uniforms.i[index]; | ||||
| } | ||||
| 
 | ||||
| const std::array<u32, 1024>& GetShaderBinary() { | ||||
|     return shader_memory; | ||||
| } | ||||
| 
 | ||||
| const std::array<u32, 1024>& GetSwizzlePatterns() | ||||
| { | ||||
| const std::array<u32, 1024>& GetSwizzlePatterns() { | ||||
|     return swizzle_data; | ||||
| } | ||||
| 
 | ||||
|  | @ -437,8 +437,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| OutputVertex RunShader(const InputVertex& input, int num_attributes) | ||||
| { | ||||
| OutputVertex RunShader(const InputVertex& input, int num_attributes) { | ||||
|     VertexShaderState state; | ||||
| 
 | ||||
|     const u32* main = &shader_memory[registers.vs_main_offset]; | ||||
|  |  | |||
|  | @ -73,6 +73,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes); | |||
| 
 | ||||
| Math::Vec4<float24>& GetFloatUniform(u32 index); | ||||
| bool& GetBoolUniform(u32 index); | ||||
| Math::Vec4<u8>& GetIntUniform(u32 index); | ||||
| 
 | ||||
| const std::array<u32, 1024>& GetShaderBinary(); | ||||
| const std::array<u32, 1024>& GetSwizzlePatterns(); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue