mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 22:00:05 +00:00 
			
		
		
		
	
						commit
						dc09a2ecb5
					
				
					 5 changed files with 740 additions and 180 deletions
				
			
		|  | @ -108,6 +108,7 @@ set(SRCS | ||||||
|             hw/gpu.cpp |             hw/gpu.cpp | ||||||
|             hw/hw.cpp |             hw/hw.cpp | ||||||
|             hw/lcd.cpp |             hw/lcd.cpp | ||||||
|  |             hw/y2r.cpp | ||||||
|             loader/3dsx.cpp |             loader/3dsx.cpp | ||||||
|             loader/elf.cpp |             loader/elf.cpp | ||||||
|             loader/loader.cpp |             loader/loader.cpp | ||||||
|  | @ -233,6 +234,7 @@ set(HEADERS | ||||||
|             hw/gpu.h |             hw/gpu.h | ||||||
|             hw/hw.h |             hw/hw.h | ||||||
|             hw/lcd.h |             hw/lcd.h | ||||||
|  |             hw/y2r.h | ||||||
|             loader/3dsx.h |             loader/3dsx.h | ||||||
|             loader/elf.h |             loader/elf.h | ||||||
|             loader/loader.h |             loader/loader.h | ||||||
|  |  | ||||||
|  | @ -9,8 +9,8 @@ | ||||||
| #include "core/hle/hle.h" | #include "core/hle/hle.h" | ||||||
| #include "core/hle/kernel/event.h" | #include "core/hle/kernel/event.h" | ||||||
| #include "core/hle/service/y2r_u.h" | #include "core/hle/service/y2r_u.h" | ||||||
|  | #include "core/hw/y2r.h" | ||||||
| #include "core/mem_map.h" | #include "core/mem_map.h" | ||||||
| #include "core/memory.h" |  | ||||||
| 
 | 
 | ||||||
| #include "video_core/utils.h" | #include "video_core/utils.h" | ||||||
| #include "video_core/video_core.h" | #include "video_core/video_core.h" | ||||||
|  | @ -20,75 +20,73 @@ | ||||||
| 
 | 
 | ||||||
| namespace Y2R_U { | namespace Y2R_U { | ||||||
| 
 | 
 | ||||||
| enum class InputFormat { |  | ||||||
|     /// 8-bit input, with YUV components in separate planes and using 4:2:2 subsampling.
 |  | ||||||
|     YUV422_Indiv8 = 0, |  | ||||||
|     /// 8-bit input, with YUV components in separate planes and using 4:2:0 subsampling.
 |  | ||||||
|     YUV420_Indiv8 = 1, |  | ||||||
| 
 |  | ||||||
|     YUV422_INDIV_16 = 2, |  | ||||||
|     YUV420_INDIV_16 = 3, |  | ||||||
|     YUV422_BATCH = 4, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| enum class OutputFormat { |  | ||||||
|     Rgb32 = 0, |  | ||||||
|     Rgb24 = 1, |  | ||||||
|     Rgb16_555 = 2, |  | ||||||
|     Rgb16_565 = 3, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| enum class Rotation { |  | ||||||
|     None = 0, |  | ||||||
|     Clockwise_90 = 1, |  | ||||||
|     Clockwise_180 = 2, |  | ||||||
|     Clockwise_270 = 3, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| enum class BlockAlignment { |  | ||||||
|     /// Image is output in linear format suitable for use as a framebuffer.
 |  | ||||||
|     Linear = 0, |  | ||||||
|     /// Image is output in tiled PICA format, suitable for use as a texture.
 |  | ||||||
|     Block8x8 = 1, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| enum class StandardCoefficient { |  | ||||||
|     ITU_Rec601 = 0, |  | ||||||
|     ITU_Rec709 = 1, |  | ||||||
|     ITU_Rec601_Scaling = 2, |  | ||||||
|     ITU_Rec709_Scaling = 3, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| static Kernel::SharedPtr<Kernel::Event> completion_event; |  | ||||||
| 
 |  | ||||||
| struct ConversionParameters { | struct ConversionParameters { | ||||||
|     InputFormat input_format; |     InputFormat input_format; | ||||||
|     OutputFormat output_format; |     OutputFormat output_format; | ||||||
|     Rotation rotation; |     Rotation rotation; | ||||||
|     BlockAlignment alignment; |     BlockAlignment block_alignment; | ||||||
|     u16 input_line_width; |     u16 input_line_width; | ||||||
|     u16 input_lines; |     u16 input_lines; | ||||||
|  |     StandardCoefficient standard_coefficient; | ||||||
|  |     u8 reserved; | ||||||
|  |     u16 alpha; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size"); | ||||||
| 
 | 
 | ||||||
|     // Input parameters for the Y (luma) plane
 | static Kernel::SharedPtr<Kernel::Event> completion_event; | ||||||
|     VAddr srcY_address; | static ConversionConfiguration conversion; | ||||||
|     u32 srcY_image_size; |  | ||||||
|     u16 srcY_transfer_unit; |  | ||||||
|     u16 srcY_stride; |  | ||||||
| 
 | 
 | ||||||
|     // Output parameters for the conversion results
 | static const CoefficientSet standard_coefficients[4] = { | ||||||
|     VAddr dst_address; |     {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601
 | ||||||
|     u32 dst_image_size; |     {{ 0x100, 0x193, 0x77, 0x2F, 0x1DB, -0x1933,  0xA7C, -0x1D51 }}, // ITU_Rec709
 | ||||||
|     u16 dst_transfer_unit; |     {{ 0x12A, 0x198, 0xD0, 0x64, 0x204, -0x1BDE, 0x10F2, -0x229B }}, // ITU_Rec601_Scaling
 | ||||||
|     u16 dst_stride; |     {{ 0x12A, 0x1CA, 0x88, 0x36, 0x21C, -0x1F04,  0x99C, -0x2421 }}, // ITU_Rec709_Scaling
 | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static ConversionParameters conversion_params; | ResultCode ConversionConfiguration::SetInputLineWidth(u16 width) { | ||||||
|  |     if (width == 0 || width > 1024 || width % 8 != 0) { | ||||||
|  |         return ResultCode(ErrorDescription::OutOfRange, ErrorModule::CAM, | ||||||
|  |             ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053FD
 | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Note: The hardware uses the register value 0 to represent a width of 1024, so for a width of
 | ||||||
|  |     // 1024 the `camera` module would set the value 0 here, but we don't need to emulate this
 | ||||||
|  |     // internal detail.
 | ||||||
|  |     this->input_line_width = width; | ||||||
|  |     return RESULT_SUCCESS; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | ResultCode ConversionConfiguration::SetInputLines(u16 lines) { | ||||||
|  |     if (lines == 0 || lines > 1024) { | ||||||
|  |         return ResultCode(ErrorDescription::OutOfRange, ErrorModule::CAM, | ||||||
|  |             ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053FD
 | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Note: In what appears to be a bug, the `camera` module does not set the hardware register at
 | ||||||
|  |     // all if `lines` is 1024, so the conversion uses the last value that was set. The intention
 | ||||||
|  |     // was probably to set it to 0 like in SetInputLineWidth.
 | ||||||
|  |     if (lines != 1024) { | ||||||
|  |         this->input_lines = lines; | ||||||
|  |     } | ||||||
|  |     return RESULT_SUCCESS; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) { | ||||||
|  |     size_t index = static_cast<size_t>(standard_coefficient); | ||||||
|  |     if (index >= 4) { | ||||||
|  |         return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM, | ||||||
|  |             ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED
 | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::memcpy(coefficients.data(), standard_coefficients[index].data(), sizeof(coefficients)); | ||||||
|  |     return RESULT_SUCCESS; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| static void SetInputFormat(Service::Interface* self) { | static void SetInputFormat(Service::Interface* self) { | ||||||
|     u32* cmd_buff = Kernel::GetCommandBuffer(); |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
| 
 | 
 | ||||||
|     conversion_params.input_format = static_cast<InputFormat>(cmd_buff[1]); |     conversion.input_format = static_cast<InputFormat>(cmd_buff[1]); | ||||||
|     LOG_DEBUG(Service_Y2R, "called input_format=%u", conversion_params.input_format); |     LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format); | ||||||
| 
 | 
 | ||||||
|     cmd_buff[1] = RESULT_SUCCESS.raw; |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
| } | } | ||||||
|  | @ -96,8 +94,8 @@ static void SetInputFormat(Service::Interface* self) { | ||||||
| static void SetOutputFormat(Service::Interface* self) { | static void SetOutputFormat(Service::Interface* self) { | ||||||
|     u32* cmd_buff = Kernel::GetCommandBuffer(); |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
| 
 | 
 | ||||||
|     conversion_params.output_format = static_cast<OutputFormat>(cmd_buff[1]); |     conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]); | ||||||
|     LOG_DEBUG(Service_Y2R, "called output_format=%u", conversion_params.output_format); |     LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format); | ||||||
| 
 | 
 | ||||||
|     cmd_buff[1] = RESULT_SUCCESS.raw; |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
| } | } | ||||||
|  | @ -105,8 +103,8 @@ static void SetOutputFormat(Service::Interface* self) { | ||||||
| static void SetRotation(Service::Interface* self) { | static void SetRotation(Service::Interface* self) { | ||||||
|     u32* cmd_buff = Kernel::GetCommandBuffer(); |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
| 
 | 
 | ||||||
|     conversion_params.rotation = static_cast<Rotation>(cmd_buff[1]); |     conversion.rotation = static_cast<Rotation>(cmd_buff[1]); | ||||||
|     LOG_DEBUG(Service_Y2R, "called rotation=%u", conversion_params.rotation); |     LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation); | ||||||
| 
 | 
 | ||||||
|     cmd_buff[1] = RESULT_SUCCESS.raw; |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
| } | } | ||||||
|  | @ -114,18 +112,26 @@ static void SetRotation(Service::Interface* self) { | ||||||
| static void SetBlockAlignment(Service::Interface* self) { | static void SetBlockAlignment(Service::Interface* self) { | ||||||
|     u32* cmd_buff = Kernel::GetCommandBuffer(); |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
| 
 | 
 | ||||||
|     conversion_params.alignment = static_cast<BlockAlignment>(cmd_buff[1]); |     conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]); | ||||||
|     LOG_DEBUG(Service_Y2R, "called alignment=%u", conversion_params.alignment); |     LOG_DEBUG(Service_Y2R, "called alignment=%hhu", conversion.block_alignment); | ||||||
| 
 | 
 | ||||||
|     cmd_buff[1] = RESULT_SUCCESS.raw; |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void SetTransferEndInterrupt(Service::Interface* self) { | ||||||
|  |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
|  | 
 | ||||||
|  |     cmd_buff[0] = 0x000D0040; | ||||||
|  |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
|  |     LOG_DEBUG(Service_Y2R, "(STUBBED) called"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
| * Y2R_U::GetTransferEndEvent service function |  * Y2R_U::GetTransferEndEvent service function | ||||||
| *  Outputs: |  *  Outputs: | ||||||
| *      1 : Result of function, 0 on success, otherwise error code |  *      1 : Result of function, 0 on success, otherwise error code | ||||||
| *      3 : The handle of the completion event |  *      3 : The handle of the completion event | ||||||
| */ |  */ | ||||||
| static void GetTransferEndEvent(Service::Interface* self) { | static void GetTransferEndEvent(Service::Interface* self) { | ||||||
|     u32* cmd_buff = Kernel::GetCommandBuffer(); |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
| 
 | 
 | ||||||
|  | @ -137,14 +143,59 @@ static void GetTransferEndEvent(Service::Interface* self) { | ||||||
| static void SetSendingY(Service::Interface* self) { | static void SetSendingY(Service::Interface* self) { | ||||||
|     u32* cmd_buff = Kernel::GetCommandBuffer(); |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
| 
 | 
 | ||||||
|     conversion_params.srcY_address = cmd_buff[1]; |     conversion.src_Y.address = cmd_buff[1]; | ||||||
|     conversion_params.srcY_image_size = cmd_buff[2]; |     conversion.src_Y.image_size = cmd_buff[2]; | ||||||
|     conversion_params.srcY_transfer_unit = cmd_buff[3]; |     conversion.src_Y.transfer_unit = cmd_buff[3]; | ||||||
|     conversion_params.srcY_stride = cmd_buff[4]; |     conversion.src_Y.gap = cmd_buff[4]; | ||||||
|     u32 src_process_handle = cmd_buff[6]; |     u32 src_process_handle = cmd_buff[6]; | ||||||
|     LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " |     LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " | ||||||
|         "src_process_handle=0x%08X", conversion_params.srcY_image_size, |         "src_process_handle=0x%08X", conversion.src_Y.image_size, | ||||||
|         conversion_params.srcY_transfer_unit, conversion_params.srcY_stride, src_process_handle); |         conversion.src_Y.transfer_unit, conversion.src_Y.gap, src_process_handle); | ||||||
|  | 
 | ||||||
|  |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void SetSendingU(Service::Interface* self) { | ||||||
|  |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
|  | 
 | ||||||
|  |     conversion.src_U.address = cmd_buff[1]; | ||||||
|  |     conversion.src_U.image_size = cmd_buff[2]; | ||||||
|  |     conversion.src_U.transfer_unit = cmd_buff[3]; | ||||||
|  |     conversion.src_U.gap = cmd_buff[4]; | ||||||
|  |     u32 src_process_handle = cmd_buff[6]; | ||||||
|  |     LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " | ||||||
|  |         "src_process_handle=0x%08X", conversion.src_U.image_size, | ||||||
|  |         conversion.src_U.transfer_unit, conversion.src_U.gap, src_process_handle); | ||||||
|  | 
 | ||||||
|  |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void SetSendingV(Service::Interface* self) { | ||||||
|  |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
|  | 
 | ||||||
|  |     conversion.src_V.address = cmd_buff[1]; | ||||||
|  |     conversion.src_V.image_size = cmd_buff[2]; | ||||||
|  |     conversion.src_V.transfer_unit = cmd_buff[3]; | ||||||
|  |     conversion.src_V.gap = cmd_buff[4]; | ||||||
|  |     u32 src_process_handle = cmd_buff[6]; | ||||||
|  |     LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " | ||||||
|  |         "src_process_handle=0x%08X", conversion.src_V.image_size, | ||||||
|  |         conversion.src_V.transfer_unit, conversion.src_V.gap, src_process_handle); | ||||||
|  | 
 | ||||||
|  |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void SetSendingYUYV(Service::Interface* self) { | ||||||
|  |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
|  | 
 | ||||||
|  |     conversion.src_YUYV.address = cmd_buff[1]; | ||||||
|  |     conversion.src_YUYV.image_size = cmd_buff[2]; | ||||||
|  |     conversion.src_YUYV.transfer_unit = cmd_buff[3]; | ||||||
|  |     conversion.src_YUYV.gap = cmd_buff[4]; | ||||||
|  |     u32 src_process_handle = cmd_buff[6]; | ||||||
|  |     LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " | ||||||
|  |         "src_process_handle=0x%08X", conversion.src_YUYV.image_size, | ||||||
|  |         conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, src_process_handle); | ||||||
| 
 | 
 | ||||||
|     cmd_buff[1] = RESULT_SUCCESS.raw; |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
| } | } | ||||||
|  | @ -152,14 +203,14 @@ static void SetSendingY(Service::Interface* self) { | ||||||
| static void SetReceiving(Service::Interface* self) { | static void SetReceiving(Service::Interface* self) { | ||||||
|     u32* cmd_buff = Kernel::GetCommandBuffer(); |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
| 
 | 
 | ||||||
|     conversion_params.dst_address = cmd_buff[1]; |     conversion.dst.address = cmd_buff[1]; | ||||||
|     conversion_params.dst_image_size = cmd_buff[2]; |     conversion.dst.image_size = cmd_buff[2]; | ||||||
|     conversion_params.dst_transfer_unit = cmd_buff[3]; |     conversion.dst.transfer_unit = cmd_buff[3]; | ||||||
|     conversion_params.dst_stride = cmd_buff[4]; |     conversion.dst.gap = cmd_buff[4]; | ||||||
|     u32 dst_process_handle = cmd_buff[6]; |     u32 dst_process_handle = cmd_buff[6]; | ||||||
|     LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " |     LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " | ||||||
|         "dst_process_handle=0x%08X", conversion_params.dst_image_size, |         "dst_process_handle=0x%08X", conversion.dst.image_size, | ||||||
|         conversion_params.dst_transfer_unit, conversion_params.dst_stride, |         conversion.dst.transfer_unit, conversion.dst.gap, | ||||||
|         dst_process_handle); |         dst_process_handle); | ||||||
| 
 | 
 | ||||||
|     cmd_buff[1] = RESULT_SUCCESS.raw; |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
|  | @ -168,17 +219,42 @@ static void SetReceiving(Service::Interface* self) { | ||||||
| static void SetInputLineWidth(Service::Interface* self) { | static void SetInputLineWidth(Service::Interface* self) { | ||||||
|     u32* cmd_buff = Kernel::GetCommandBuffer(); |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
| 
 | 
 | ||||||
|     conversion_params.input_line_width = cmd_buff[1]; |     LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]); | ||||||
|     LOG_DEBUG(Service_Y2R, "input_line_width=%u", conversion_params.input_line_width); |     cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw; | ||||||
| 
 |  | ||||||
|     cmd_buff[1] = RESULT_SUCCESS.raw; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void SetInputLines(Service::Interface* self) { | static void SetInputLines(Service::Interface* self) { | ||||||
|     u32* cmd_buff = Kernel::GetCommandBuffer(); |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
| 
 | 
 | ||||||
|     conversion_params.input_lines = cmd_buff[1]; |     LOG_DEBUG(Service_Y2R, "called input_line_number=%u", cmd_buff[1]); | ||||||
|     LOG_DEBUG(Service_Y2R, "input_line_number=%u", conversion_params.input_lines); |     cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void SetCoefficient(Service::Interface* self) { | ||||||
|  |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
|  | 
 | ||||||
|  |     const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]); | ||||||
|  |     std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet)); | ||||||
|  |     LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]", | ||||||
|  |             coefficients[0], coefficients[1], coefficients[2], coefficients[3], | ||||||
|  |             coefficients[4], coefficients[5], coefficients[6], coefficients[7]); | ||||||
|  | 
 | ||||||
|  |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void SetStandardCoefficient(Service::Interface* self) { | ||||||
|  |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
|  | 
 | ||||||
|  |     LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", cmd_buff[1]); | ||||||
|  | 
 | ||||||
|  |     cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)cmd_buff[1]).raw; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void SetAlpha(Service::Interface* self) { | ||||||
|  |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
|  | 
 | ||||||
|  |     conversion.alpha = cmd_buff[1]; | ||||||
|  |     LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha); | ||||||
| 
 | 
 | ||||||
|     cmd_buff[1] = RESULT_SUCCESS.raw; |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
| } | } | ||||||
|  | @ -186,89 +262,13 @@ static void SetInputLines(Service::Interface* self) { | ||||||
| static void StartConversion(Service::Interface* self) { | static void StartConversion(Service::Interface* self) { | ||||||
|     u32* cmd_buff = Kernel::GetCommandBuffer(); |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
| 
 | 
 | ||||||
|     const u8* srcY_buffer = Memory::GetPointer(conversion_params.srcY_address); |     HW::Y2R::PerformConversion(conversion); | ||||||
|     u8* dst_buffer = Memory::GetPointer(conversion_params.dst_address); |  | ||||||
| 
 | 
 | ||||||
|     // TODO: support color and other kinds of conversions
 |     // dst_image_size would seem to be perfect for this, but it doesn't include the gap :(
 | ||||||
|     ASSERT(conversion_params.input_format == InputFormat::YUV422_Indiv8 |     u32 total_output_size = conversion.input_lines * | ||||||
|         || conversion_params.input_format == InputFormat::YUV420_Indiv8); |         (conversion.dst.transfer_unit + conversion.dst.gap); | ||||||
|     ASSERT(conversion_params.output_format == OutputFormat::Rgb24); |  | ||||||
|     ASSERT(conversion_params.rotation == Rotation::None); |  | ||||||
|     const int bpp = 3; |  | ||||||
| 
 |  | ||||||
|     switch (conversion_params.alignment) { |  | ||||||
|     case BlockAlignment::Linear: |  | ||||||
|     { |  | ||||||
|         const size_t input_lines = conversion_params.input_lines; |  | ||||||
|         const size_t input_line_width = conversion_params.input_line_width; |  | ||||||
|         const size_t srcY_stride = conversion_params.srcY_stride; |  | ||||||
|         const size_t dst_stride = conversion_params.dst_stride; |  | ||||||
| 
 |  | ||||||
|         size_t srcY_offset = 0; |  | ||||||
|         size_t dst_offset = 0; |  | ||||||
| 
 |  | ||||||
|         for (size_t line = 0; line < input_lines; ++line) { |  | ||||||
|             for (size_t i = 0; i < input_line_width; ++i) { |  | ||||||
|                 u8 Y = srcY_buffer[srcY_offset]; |  | ||||||
|                 dst_buffer[dst_offset + 0] = Y; |  | ||||||
|                 dst_buffer[dst_offset + 1] = Y; |  | ||||||
|                 dst_buffer[dst_offset + 2] = Y; |  | ||||||
| 
 |  | ||||||
|                 srcY_offset += 1; |  | ||||||
|                 dst_offset += bpp; |  | ||||||
|             } |  | ||||||
|             srcY_offset += srcY_stride; |  | ||||||
|             dst_offset += dst_stride; |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case BlockAlignment::Block8x8: |  | ||||||
|     { |  | ||||||
|         const size_t input_lines = conversion_params.input_lines; |  | ||||||
|         const size_t input_line_width = conversion_params.input_line_width; |  | ||||||
|         const size_t srcY_stride = conversion_params.srcY_stride; |  | ||||||
|         const size_t dst_transfer_unit = conversion_params.dst_transfer_unit; |  | ||||||
|         const size_t dst_stride = conversion_params.dst_stride; |  | ||||||
| 
 |  | ||||||
|         size_t srcY_offset = 0; |  | ||||||
|         size_t dst_tile_line_offs = 0; |  | ||||||
| 
 |  | ||||||
|         const size_t tile_size = 8 * 8 * bpp; |  | ||||||
| 
 |  | ||||||
|         for (size_t line = 0; line < input_lines;) { |  | ||||||
|             size_t max_line = line + 8; |  | ||||||
| 
 |  | ||||||
|             for (; line < max_line; ++line) { |  | ||||||
|                 for (size_t x = 0; x < input_line_width; ++x) { |  | ||||||
|                     size_t tile_x = x / 8; |  | ||||||
| 
 |  | ||||||
|                     size_t dst_tile_offs = dst_tile_line_offs + tile_x * tile_size; |  | ||||||
|                     size_t tile_i = VideoCore::MortonInterleave((u32)x, (u32)line); |  | ||||||
| 
 |  | ||||||
|                     size_t dst_offset = dst_tile_offs + tile_i * bpp; |  | ||||||
| 
 |  | ||||||
|                     u8 Y = srcY_buffer[srcY_offset]; |  | ||||||
|                     dst_buffer[dst_offset + 0] = Y; |  | ||||||
|                     dst_buffer[dst_offset + 1] = Y; |  | ||||||
|                     dst_buffer[dst_offset + 2] = Y; |  | ||||||
| 
 |  | ||||||
|                     srcY_offset += 1; |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 srcY_offset += srcY_stride; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             dst_tile_line_offs += dst_transfer_unit + dst_stride; |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // dst_image_size would seem to be perfect for this, but it doesn't include the stride :(
 |  | ||||||
|     u32 total_output_size = conversion_params.input_lines * |  | ||||||
|         (conversion_params.dst_transfer_unit + conversion_params.dst_stride); |  | ||||||
|     VideoCore::g_renderer->hw_rasterizer->NotifyFlush( |     VideoCore::g_renderer->hw_rasterizer->NotifyFlush( | ||||||
|         Memory::VirtualToPhysicalAddress(conversion_params.dst_address), total_output_size); |         Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size); | ||||||
| 
 | 
 | ||||||
|     LOG_DEBUG(Service_Y2R, "called"); |     LOG_DEBUG(Service_Y2R, "called"); | ||||||
|     completion_event->Signal(); |     completion_event->Signal(); | ||||||
|  | @ -276,12 +276,20 @@ static void StartConversion(Service::Interface* self) { | ||||||
|     cmd_buff[1] = RESULT_SUCCESS.raw; |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void StopConversion(Service::Interface* self) { | ||||||
|  |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
|  | 
 | ||||||
|  |     cmd_buff[0] = 0x00270040; | ||||||
|  |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
|  |     LOG_DEBUG(Service_Y2R, "called"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
| * Y2R_U::IsBusyConversion service function |  * Y2R_U::IsBusyConversion service function | ||||||
| *  Outputs: |  *  Outputs: | ||||||
| *      1 : Result of function, 0 on success, otherwise error code |  *      1 : Result of function, 0 on success, otherwise error code | ||||||
| *      2 : 1 if there's a conversion running, otherwise 0. |  *      2 : 1 if there's a conversion running, otherwise 0. | ||||||
| */ |  */ | ||||||
| static void IsBusyConversion(Service::Interface* self) { | static void IsBusyConversion(Service::Interface* self) { | ||||||
|     u32* cmd_buff = Kernel::GetCommandBuffer(); |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
| 
 | 
 | ||||||
|  | @ -290,6 +298,40 @@ static void IsBusyConversion(Service::Interface* self) { | ||||||
|     LOG_DEBUG(Service_Y2R, "called"); |     LOG_DEBUG(Service_Y2R, "called"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /**
 | ||||||
|  |  * Y2R_U::SetConversionParams service function | ||||||
|  |  */ | ||||||
|  | static void SetConversionParams(Service::Interface* self) { | ||||||
|  |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
|  | 
 | ||||||
|  |     auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]); | ||||||
|  |     LOG_DEBUG(Service_Y2R, | ||||||
|  |         "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu " | ||||||
|  |         "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu " | ||||||
|  |         "reserved=%hhu alpha=%hX", | ||||||
|  |         params->input_format, params->output_format, params->rotation, params->block_alignment, | ||||||
|  |         params->input_line_width, params->input_lines, params->standard_coefficient, | ||||||
|  |         params->reserved, params->alpha); | ||||||
|  | 
 | ||||||
|  |     ResultCode result = RESULT_SUCCESS; | ||||||
|  | 
 | ||||||
|  |     conversion.input_format = params->input_format; | ||||||
|  |     conversion.output_format = params->output_format; | ||||||
|  |     conversion.rotation = params->rotation; | ||||||
|  |     conversion.block_alignment = params->block_alignment; | ||||||
|  |     result = conversion.SetInputLineWidth(params->input_line_width); | ||||||
|  |     if (result.IsError()) goto cleanup; | ||||||
|  |     result = conversion.SetInputLines(params->input_lines); | ||||||
|  |     if (result.IsError()) goto cleanup; | ||||||
|  |     result = conversion.SetStandardCoefficient(params->standard_coefficient); | ||||||
|  |     if (result.IsError()) goto cleanup; | ||||||
|  |     conversion.alpha = params->alpha; | ||||||
|  | 
 | ||||||
|  | cleanup: | ||||||
|  |     cmd_buff[0] = 0x00290040; // TODO verify
 | ||||||
|  |     cmd_buff[1] = result.raw; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static void PingProcess(Service::Interface* self) { | static void PingProcess(Service::Interface* self) { | ||||||
|     u32* cmd_buff = Kernel::GetCommandBuffer(); |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
| 
 | 
 | ||||||
|  | @ -298,27 +340,63 @@ static void PingProcess(Service::Interface* self) { | ||||||
|     LOG_WARNING(Service_Y2R, "(STUBBED) called"); |     LOG_WARNING(Service_Y2R, "(STUBBED) called"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void DriverInitialize(Service::Interface* self) { | ||||||
|  |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
|  | 
 | ||||||
|  |     conversion.input_format = InputFormat::YUV422_Indiv8; | ||||||
|  |     conversion.output_format = OutputFormat::RGBA8; | ||||||
|  |     conversion.rotation = Rotation::None; | ||||||
|  |     conversion.block_alignment = BlockAlignment::Linear; | ||||||
|  |     conversion.coefficients.fill(0); | ||||||
|  |     conversion.SetInputLineWidth(1024); | ||||||
|  |     conversion.SetInputLines(1024); | ||||||
|  |     conversion.alpha = 0; | ||||||
|  | 
 | ||||||
|  |     ConversionBuffer zero_buffer = {}; | ||||||
|  |     conversion.src_Y = zero_buffer; | ||||||
|  |     conversion.src_U = zero_buffer; | ||||||
|  |     conversion.src_V = zero_buffer; | ||||||
|  |     conversion.dst = zero_buffer; | ||||||
|  | 
 | ||||||
|  |     completion_event->Clear(); | ||||||
|  | 
 | ||||||
|  |     cmd_buff[0] = 0x002B0040; | ||||||
|  |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
|  |     LOG_DEBUG(Service_Y2R, "called"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void DriverFinalize(Service::Interface* self) { | ||||||
|  |     u32* cmd_buff = Kernel::GetCommandBuffer(); | ||||||
|  | 
 | ||||||
|  |     cmd_buff[0] = 0x002C0040; | ||||||
|  |     cmd_buff[1] = RESULT_SUCCESS.raw; | ||||||
|  |     LOG_DEBUG(Service_Y2R, "called"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| const Interface::FunctionInfo FunctionTable[] = { | const Interface::FunctionInfo FunctionTable[] = { | ||||||
|     {0x00010040, SetInputFormat,          "SetInputFormat"}, |     {0x00010040, SetInputFormat,          "SetInputFormat"}, | ||||||
|     {0x00030040, SetOutputFormat,         "SetOutputFormat"}, |     {0x00030040, SetOutputFormat,         "SetOutputFormat"}, | ||||||
|     {0x00050040, SetRotation,             "SetRotation"}, |     {0x00050040, SetRotation,             "SetRotation"}, | ||||||
|     {0x00070040, SetBlockAlignment,       "SetBlockAlignment"}, |     {0x00070040, SetBlockAlignment,       "SetBlockAlignment"}, | ||||||
|     {0x000D0040, nullptr,                 "SetTransferEndInterrupt"}, |     {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"}, | ||||||
|     {0x000F0000, GetTransferEndEvent,     "GetTransferEndEvent"}, |     {0x000F0000, GetTransferEndEvent,     "GetTransferEndEvent"}, | ||||||
|     {0x00100102, SetSendingY,             "SetSendingY"}, |     {0x00100102, SetSendingY,             "SetSendingY"}, | ||||||
|     {0x00110102, nullptr,                 "SetSendingU"}, |     {0x00110102, SetSendingU,             "SetSendingU"}, | ||||||
|     {0x00120102, nullptr,                 "SetSendingV"}, |     {0x00120102, SetSendingV,             "SetSendingV"}, | ||||||
|  |     {0x00130102, SetSendingYUYV,          "SetSendingYUYV"}, | ||||||
|     {0x00180102, SetReceiving,            "SetReceiving"}, |     {0x00180102, SetReceiving,            "SetReceiving"}, | ||||||
|     {0x001A0040, SetInputLineWidth,       "SetInputLineWidth"}, |     {0x001A0040, SetInputLineWidth,       "SetInputLineWidth"}, | ||||||
|     {0x001C0040, SetInputLines,           "SetInputLines"}, |     {0x001C0040, SetInputLines,           "SetInputLines"}, | ||||||
|     {0x00200040, nullptr,                 "SetStandardCoefficient"}, |     {0x001E0100, SetCoefficient,          "SetCoefficient"}, | ||||||
|     {0x00220040, nullptr,                 "SetAlpha"}, |     {0x00200040, SetStandardCoefficient,  "SetStandardCoefficient"}, | ||||||
|  |     {0x00220040, SetAlpha,                "SetAlpha"}, | ||||||
|     {0x00260000, StartConversion,         "StartConversion"}, |     {0x00260000, StartConversion,         "StartConversion"}, | ||||||
|     {0x00270000, nullptr,                 "StopConversion"}, |     {0x00270000, StopConversion,          "StopConversion"}, | ||||||
|     {0x00280000, IsBusyConversion,        "IsBusyConversion"}, |     {0x00280000, IsBusyConversion,        "IsBusyConversion"}, | ||||||
|  |     {0x002901C0, SetConversionParams,     "SetConversionParams"}, | ||||||
|     {0x002A0000, PingProcess,             "PingProcess"}, |     {0x002A0000, PingProcess,             "PingProcess"}, | ||||||
|     {0x002B0000, nullptr,                 "DriverInitialize"}, |     {0x002B0000, DriverInitialize,        "DriverInitialize"}, | ||||||
|     {0x002C0000, nullptr,                 "DriverFinalize"}, |     {0x002C0000, DriverFinalize,          "DriverFinalize"}, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////
 | ////////////////////////////////////////////////////////////////////////////////////////////////////
 | ||||||
|  | @ -326,7 +404,7 @@ const Interface::FunctionInfo FunctionTable[] = { | ||||||
| 
 | 
 | ||||||
| Interface::Interface() { | Interface::Interface() { | ||||||
|     completion_event = Kernel::Event::Create(RESETTYPE_ONESHOT, "Y2R:Completed"); |     completion_event = Kernel::Event::Create(RESETTYPE_ONESHOT, "Y2R:Completed"); | ||||||
|     std::memset(&conversion_params, 0, sizeof(conversion_params)); |     std::memset(&conversion, 0, sizeof(conversion)); | ||||||
| 
 | 
 | ||||||
|     Register(FunctionTable); |     Register(FunctionTable); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -4,6 +4,10 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
|  | #include <array> | ||||||
|  | 
 | ||||||
|  | #include "common/common_types.h" | ||||||
|  | 
 | ||||||
| #include "core/hle/service/service.h" | #include "core/hle/service/service.h" | ||||||
| 
 | 
 | ||||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////
 | ////////////////////////////////////////////////////////////////////////////////////////////////////
 | ||||||
|  | @ -11,6 +15,98 @@ | ||||||
| 
 | 
 | ||||||
| namespace Y2R_U { | namespace Y2R_U { | ||||||
| 
 | 
 | ||||||
|  | enum class InputFormat : u8 { | ||||||
|  |     /// 8-bit input, with YUV components in separate planes and 4:2:2 subsampling.
 | ||||||
|  |     YUV422_Indiv8 = 0, | ||||||
|  |     /// 8-bit input, with YUV components in separate planes and 4:2:0 subsampling.
 | ||||||
|  |     YUV420_Indiv8 = 1, | ||||||
|  | 
 | ||||||
|  |     /// 16-bit input (only LSB used), with YUV components in separate planes and 4:2:2 subsampling.
 | ||||||
|  |     YUV422_Indiv16 = 2, | ||||||
|  |     /// 16-bit input (only LSB used), with YUV components in separate planes and 4:2:0 subsampling.
 | ||||||
|  |     YUV420_Indiv16 = 3, | ||||||
|  | 
 | ||||||
|  |     /// 8-bit input, with a single interleaved stream in YUYV format and 4:2:2 subsampling.
 | ||||||
|  |     YUYV422_Interleaved = 4, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | enum class OutputFormat : u8 { | ||||||
|  |     RGBA8 = 0, | ||||||
|  |     RGB8 = 1, | ||||||
|  |     RGB5A1 = 2, | ||||||
|  |     RGB565 = 3, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | enum class Rotation : u8 { | ||||||
|  |     None = 0, | ||||||
|  |     Clockwise_90 = 1, | ||||||
|  |     Clockwise_180 = 2, | ||||||
|  |     Clockwise_270 = 3, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | enum class BlockAlignment : u8 { | ||||||
|  |     /// Image is output in linear format suitable for use as a framebuffer.
 | ||||||
|  |     Linear = 0, | ||||||
|  |     /// Image is output in tiled PICA format, suitable for use as a texture.
 | ||||||
|  |     Block8x8 = 1, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | enum class StandardCoefficient : u8 { | ||||||
|  |     /// ITU Rec. BT.601 primaries, with PC ranges.
 | ||||||
|  |     ITU_Rec601 = 0, | ||||||
|  |     /// ITU Rec. BT.709 primaries, with PC ranges.
 | ||||||
|  |     ITU_Rec709 = 1, | ||||||
|  |     /// ITU Rec. BT.601 primaries, with TV ranges.
 | ||||||
|  |     ITU_Rec601_Scaling = 2, | ||||||
|  |     /// ITU Rec. BT.709 primaries, with TV ranges.
 | ||||||
|  |     ITU_Rec709_Scaling = 3, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * A set of coefficients configuring the RGB to YUV conversion. Coefficients 0-4 are unsigned 2.8 | ||||||
|  |  * fixed pointer numbers representing entries on the conversion matrix, while coefficient 5-7 are | ||||||
|  |  * signed 11.5 fixed point numbers added as offsets to the RGB result. | ||||||
|  |  * | ||||||
|  |  * The overall conversion process formula is: | ||||||
|  |  * ``` | ||||||
|  |  * R = trunc((c_0 * Y           + c_1 * V) + c_5 + 0.75) | ||||||
|  |  * G = trunc((c_0 * Y - c_3 * U - c_2 * V) + c_6 + 0.75) | ||||||
|  |  * B = trunc((c_0 * Y + c_4 * U          ) + c_7 + 0.75) | ||||||
|  |  * ``` | ||||||
|  |  */ | ||||||
|  | using CoefficientSet = std::array<s16, 8>; | ||||||
|  | 
 | ||||||
|  | struct ConversionBuffer { | ||||||
|  |     /// Current reading/writing address of this buffer.
 | ||||||
|  |     VAddr address; | ||||||
|  |     /// Remaining amount of bytes to be DMAed, does not include the inter-trasfer gap.
 | ||||||
|  |     u32 image_size; | ||||||
|  |     /// Size of a single DMA transfer.
 | ||||||
|  |     u16 transfer_unit; | ||||||
|  |     /// Amount of bytes to be skipped between copying each `transfer_unit` bytes.
 | ||||||
|  |     u16 gap; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct ConversionConfiguration { | ||||||
|  |     InputFormat input_format; | ||||||
|  |     OutputFormat output_format; | ||||||
|  |     Rotation rotation; | ||||||
|  |     BlockAlignment block_alignment; | ||||||
|  |     u16 input_line_width; | ||||||
|  |     u16 input_lines; | ||||||
|  |     CoefficientSet coefficients; | ||||||
|  |     u16 alpha; | ||||||
|  | 
 | ||||||
|  |     /// Input parameters for the Y (luma) plane
 | ||||||
|  |     ConversionBuffer src_Y, src_U, src_V, src_YUYV; | ||||||
|  |     /// Output parameters for the conversion results
 | ||||||
|  |     ConversionBuffer dst; | ||||||
|  | 
 | ||||||
|  |     ResultCode SetInputLineWidth(u16 width); | ||||||
|  |     ResultCode SetInputLines(u16 lines); | ||||||
|  |     ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient); | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| class Interface : public Service::Interface { | class Interface : public Service::Interface { | ||||||
| public: | public: | ||||||
|     Interface(); |     Interface(); | ||||||
|  |  | ||||||
							
								
								
									
										369
									
								
								src/core/hw/y2r.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										369
									
								
								src/core/hw/y2r.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,369 @@ | ||||||
|  | // Copyright 2015 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include <array> | ||||||
|  | #include <numeric> | ||||||
|  | 
 | ||||||
|  | #include "common/assert.h" | ||||||
|  | #include "common/color.h" | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "common/math_util.h" | ||||||
|  | #include "common/vector_math.h" | ||||||
|  | 
 | ||||||
|  | #include "core/hle/service/y2r_u.h" | ||||||
|  | #include "core/memory.h" | ||||||
|  | 
 | ||||||
|  | namespace HW { | ||||||
|  | namespace Y2R { | ||||||
|  | 
 | ||||||
|  | using namespace Y2R_U; | ||||||
|  | 
 | ||||||
|  | static const size_t MAX_TILES = 1024 / 8; | ||||||
|  | static const size_t TILE_SIZE = 8 * 8; | ||||||
|  | using ImageTile = std::array<u32, TILE_SIZE>; | ||||||
|  | 
 | ||||||
|  | /// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles.
 | ||||||
|  | static void ConvertYUVToRGB(InputFormat input_format, | ||||||
|  |         const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[], | ||||||
|  |         unsigned int width, unsigned int height, const CoefficientSet& coefficients) { | ||||||
|  | 
 | ||||||
|  |     for (unsigned int y = 0; y < height; ++y) { | ||||||
|  |         for (unsigned int x = 0; x < width; ++x) { | ||||||
|  |             s32 Y, U, V; | ||||||
|  |             switch (input_format) { | ||||||
|  |             case InputFormat::YUV422_Indiv8: | ||||||
|  |             case InputFormat::YUV422_Indiv16: | ||||||
|  |                 Y = input_Y[y * width + x]; | ||||||
|  |                 U = input_U[(y * width + x) / 2]; | ||||||
|  |                 V = input_V[(y * width + x) / 2]; | ||||||
|  |                 break; | ||||||
|  |             case InputFormat::YUV420_Indiv8: | ||||||
|  |             case InputFormat::YUV420_Indiv16: | ||||||
|  |                 Y = input_Y[y * width + x]; | ||||||
|  |                 U = input_U[((y / 2) * width + x) / 2]; | ||||||
|  |                 V = input_V[((y / 2) * width + x) / 2]; | ||||||
|  |                 break; | ||||||
|  |             case InputFormat::YUYV422_Interleaved: | ||||||
|  |                 Y = input_Y[(y * width + x) * 2]; | ||||||
|  |                 U = input_Y[(y * width + (x / 2) * 2) * 2 + 1]; | ||||||
|  |                 V = input_Y[(y * width + (x / 2) * 2) * 2 + 3]; | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // This conversion process is bit-exact with hardware, as far as could be tested.
 | ||||||
|  |             auto& c = coefficients; | ||||||
|  |             s32 cY = c[0]*Y; | ||||||
|  | 
 | ||||||
|  |             s32 r = cY          + c[1]*V; | ||||||
|  |             s32 g = cY - c[3]*U - c[2]*V; | ||||||
|  |             s32 b = cY + c[4]*U; | ||||||
|  | 
 | ||||||
|  |             const s32 rounding_offset = 0x18; | ||||||
|  |             r = (r >> 3) + c[5] + rounding_offset; | ||||||
|  |             g = (g >> 3) + c[6] + rounding_offset; | ||||||
|  |             b = (b >> 3) + c[7] + rounding_offset; | ||||||
|  | 
 | ||||||
|  |             unsigned int tile = x / 8; | ||||||
|  |             unsigned int tile_x = x % 8; | ||||||
|  |             u32* out = &output[tile][y * 8 + tile_x]; | ||||||
|  | 
 | ||||||
|  |             using MathUtil::Clamp; | ||||||
|  |             *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | | ||||||
|  |                    ((u32)Clamp(g >> 5, 0, 0xFF) << 16) | | ||||||
|  |                    ((u32)Clamp(b >> 5, 0, 0xFF) << 8); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit.
 | ||||||
|  | template <size_t N> | ||||||
|  | static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) { | ||||||
|  |     const u8* input = Memory::GetPointer(buf.address); | ||||||
|  | 
 | ||||||
|  |     size_t output_unit = buf.transfer_unit / N; | ||||||
|  |     ASSERT(amount_of_data % output_unit == 0); | ||||||
|  | 
 | ||||||
|  |     while (amount_of_data > 0) { | ||||||
|  |         for (size_t i = 0; i < output_unit; ++i) { | ||||||
|  |             output[i] = input[i * N]; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         output += output_unit; | ||||||
|  |         input += buf.transfer_unit + buf.gap; | ||||||
|  | 
 | ||||||
|  |         buf.address += buf.transfer_unit + buf.gap; | ||||||
|  |         buf.image_size -= buf.transfer_unit; | ||||||
|  |         amount_of_data -= output_unit; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer.
 | ||||||
|  | static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data, | ||||||
|  |         OutputFormat output_format, u8 alpha) { | ||||||
|  | 
 | ||||||
|  |     u8* output = Memory::GetPointer(buf.address); | ||||||
|  | 
 | ||||||
|  |     while (amount_of_data > 0) { | ||||||
|  |         u8* unit_end = output + buf.transfer_unit; | ||||||
|  |         while (output < unit_end) { | ||||||
|  |             u32 color = *input++; | ||||||
|  |             Math::Vec4<u8> col_vec{ | ||||||
|  |                 (color >> 24) & 0xFF, (color >> 16) & 0xFF, (color >>  8) & 0xFF, alpha, | ||||||
|  |             }; | ||||||
|  | 
 | ||||||
|  |             switch (output_format) { | ||||||
|  |             case OutputFormat::RGBA8: | ||||||
|  |                 Color::EncodeRGBA8(col_vec, output); | ||||||
|  |                 output += 4; | ||||||
|  |                 break; | ||||||
|  |             case OutputFormat::RGB8: | ||||||
|  |                 Color::EncodeRGB8(col_vec, output); | ||||||
|  |                 output += 3; | ||||||
|  |                 break; | ||||||
|  |             case OutputFormat::RGB5A1: | ||||||
|  |                 Color::EncodeRGB5A1(col_vec, output); | ||||||
|  |                 output += 2; | ||||||
|  |                 break; | ||||||
|  |             case OutputFormat::RGB565: | ||||||
|  |                 Color::EncodeRGB565(col_vec, output); | ||||||
|  |                 output += 2; | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             amount_of_data -= 1; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         output += buf.gap; | ||||||
|  |         buf.address += buf.transfer_unit + buf.gap; | ||||||
|  |         buf.image_size -= buf.transfer_unit; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static const u8 linear_lut[64] = { | ||||||
|  |      0,  1,  2,  3,  4,  5,  6,  7, | ||||||
|  |      8,  9, 10, 11, 12, 13, 14, 15, | ||||||
|  |     16, 17, 18, 19, 20, 21, 22, 23, | ||||||
|  |     24, 25, 26, 27, 28, 29, 30, 31, | ||||||
|  |     32, 33, 34, 35, 36, 37, 38, 39, | ||||||
|  |     40, 41, 42, 43, 44, 45, 46, 47, | ||||||
|  |     48, 49, 50, 51, 52, 53, 54, 55, | ||||||
|  |     56, 57, 58, 59, 60, 61, 62, 63, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static const u8 morton_lut[64] = { | ||||||
|  |      0,  1,  4,  5, 16, 17, 20, 21, | ||||||
|  |      2,  3,  6,  7, 18, 19, 22, 23, | ||||||
|  |      8,  9, 12, 13, 24, 25, 28, 29, | ||||||
|  |     10, 11, 14, 15, 26, 27, 30, 31, | ||||||
|  |     32, 33, 36, 37, 48, 49, 52, 53, | ||||||
|  |     34, 35, 38, 39, 50, 51, 54, 55, | ||||||
|  |     40, 41, 44, 45, 56, 57, 60, 61, | ||||||
|  |     42, 43, 46, 47, 58, 59, 62, 63, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | ||||||
|  |     for (int i = 0; i < height * 8; ++i) { | ||||||
|  |         output[out_map[i]] = input[i]; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | ||||||
|  |     int out_i = 0; | ||||||
|  |     for (int x = 0; x < 8; ++x) { | ||||||
|  |         for (int y = height - 1; y >= 0; --y) { | ||||||
|  |             output[out_map[out_i++]] = input[y * 8 + x]; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | ||||||
|  |     int out_i = 0; | ||||||
|  |     for (int i = height * 8 - 1; i >= 0; --i) { | ||||||
|  |         output[out_map[out_i++]] = input[i]; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | ||||||
|  |     int out_i = 0; | ||||||
|  |     for (int x = 8-1; x >= 0; --x) { | ||||||
|  |         for (int y = 0; y < height; ++y) { | ||||||
|  |             output[out_map[out_i++]] = input[y * 8 + x]; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void WriteTileToOutput(u32* output, const ImageTile& tile, int height, int line_stride) { | ||||||
|  |     for (int y = 0; y < height; ++y) { | ||||||
|  |         for (int x = 0; x < 8; ++x) { | ||||||
|  |             output[y * line_stride + x] = tile[y * 8 + x]; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Performs a Y2R colorspace conversion. | ||||||
|  |  * | ||||||
|  |  * The Y2R hardware implements hardware-accelerated YUV to RGB colorspace conversions. It is most | ||||||
|  |  * commonly used for video playback or to display camera input to the screen. | ||||||
|  |  * | ||||||
|  |  * The conversion process is quite configurable, and can be divided in distinct steps. From | ||||||
|  |  * observation, it appears that the hardware buffers a single 8-pixel tall strip of image data | ||||||
|  |  * internally and converts it in one go before writing to the output and loading the next strip. | ||||||
|  |  * | ||||||
|  |  * The steps taken to convert one strip of image data are: | ||||||
|  |  * | ||||||
|  |  * - The hardware receives data via CDMA (http://3dbrew.org/wiki/Corelink_DMA_Engines), which is
 | ||||||
|  |  *   presumably stored in one or more internal buffers. This process can be done in several separate | ||||||
|  |  *   transfers, as long as they don't exceed the size of the internal image buffer. This allows | ||||||
|  |  *   flexibility in input strides. | ||||||
|  |  * - The input data is decoded into a YUV tuple. Several formats are suported, see the `InputFormat` | ||||||
|  |  *   enum. | ||||||
|  |  * - The YUV tuple is converted, using fixed point calculations, to RGB. This step can be configured | ||||||
|  |  *   using a set of coefficients to support different colorspace standards. See `CoefficientSet`. | ||||||
|  |  * - The strip can be optionally rotated 90, 180 or 270 degrees. Since each strip is processed | ||||||
|  |  *   independently, this notably rotates each *strip*, not the entire image. This means that for 90 | ||||||
|  |  *   or 270 degree rotations, the output will be in terms of several 8 x height images, and for any | ||||||
|  |  *   non-zero rotation the strips will have to be re-arranged so that the parts of the image will | ||||||
|  |  *   not be shuffled together. This limitation makes this a feature of somewhat dubious utility. 90 | ||||||
|  |  *   or 270 degree rotations in images with non-even height don't seem to work properly. | ||||||
|  |  * - The data is converted to the output RGB format. See the `OutputFormat` enum. | ||||||
|  |  * - The data can be output either linearly line-by-line or in the swizzled 8x8 tile format used by | ||||||
|  |  *   the PICA. This is decided by the `BlockAlignment` enum. If 8x8 alignment is used, then the | ||||||
|  |  *   image must have a height divisible by 8. The image width must always be divisible by 8. | ||||||
|  |  * - The final data is then CDMAed out to main memory and the next image strip is processed. This | ||||||
|  |  *   offers the same flexibility as the input stage. | ||||||
|  |  * | ||||||
|  |  * In this implementation, to avoid the combinatorial explosion of parameter combinations, common | ||||||
|  |  * intermediate formats are used and where possible tables or parameters are used instead of | ||||||
|  |  * diverging code paths to keep the amount of branches in check. Some steps are also merged to | ||||||
|  |  * increase efficiency. | ||||||
|  |  * | ||||||
|  |  * Output for all valid settings combinations matches hardware, however output in some edge-cases | ||||||
|  |  * differs: | ||||||
|  |  * | ||||||
|  |  * - `Block8x8` alignment with non-mod8 height produces different garbage patterns on the last | ||||||
|  |  *   strip, especially when combined with rotation. | ||||||
|  |  * - Hardware, when using `Linear` alignment with a non-even height and 90 or 270 degree rotation | ||||||
|  |  *   produces misaligned output on the last strip. This implmentation produces output with the | ||||||
|  |  *   correct "expected" alignment. | ||||||
|  |  * | ||||||
|  |  * Hardware behaves strangely (doesn't fire the completion interrupt, for example) in these cases, | ||||||
|  |  * so they are believed to be invalid configurations anyway. | ||||||
|  |  */ | ||||||
|  | void PerformConversion(ConversionConfiguration& cvt) { | ||||||
|  |     ASSERT(cvt.input_line_width % 8 == 0); | ||||||
|  |     ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0); | ||||||
|  |     // Tiles per row
 | ||||||
|  |     size_t num_tiles = cvt.input_line_width / 8; | ||||||
|  |     ASSERT(num_tiles < MAX_TILES); | ||||||
|  | 
 | ||||||
|  |     // Buffer used as a CDMA source/target.
 | ||||||
|  |     std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]); | ||||||
|  |     // Intermediate storage for decoded 8x8 image tiles. Always stored as RGB32.
 | ||||||
|  |     std::unique_ptr<ImageTile[]> tiles(new ImageTile[num_tiles]); | ||||||
|  |     ImageTile tmp_tile; | ||||||
|  | 
 | ||||||
|  |     // LUT used to remap writes to a tile. Used to allow linear or swizzled output without
 | ||||||
|  |     // requiring two different code paths.
 | ||||||
|  |     const u8* tile_remap; | ||||||
|  |     switch (cvt.block_alignment) { | ||||||
|  |     case BlockAlignment::Linear: | ||||||
|  |         tile_remap = linear_lut; break; | ||||||
|  |     case BlockAlignment::Block8x8: | ||||||
|  |         tile_remap = morton_lut; break; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     for (unsigned int y = 0; y < cvt.input_lines; y += 8) { | ||||||
|  |         unsigned int row_height = std::min(cvt.input_lines - y, 8u); | ||||||
|  | 
 | ||||||
|  |         // Total size in pixels of incoming data required for this strip.
 | ||||||
|  |         const size_t row_data_size = row_height * cvt.input_line_width; | ||||||
|  | 
 | ||||||
|  |         u8* input_Y = data_buffer.get(); | ||||||
|  |         u8* input_U = input_Y + 8 * cvt.input_line_width; | ||||||
|  |         u8* input_V = input_U + 8 * cvt.input_line_width / 2; | ||||||
|  | 
 | ||||||
|  |         switch (cvt.input_format) { | ||||||
|  |         case InputFormat::YUV422_Indiv8: | ||||||
|  |             ReceiveData<1>(input_Y, cvt.src_Y, row_data_size); | ||||||
|  |             ReceiveData<1>(input_U, cvt.src_U, row_data_size / 2); | ||||||
|  |             ReceiveData<1>(input_V, cvt.src_V, row_data_size / 2); | ||||||
|  |             break; | ||||||
|  |         case InputFormat::YUV420_Indiv8: | ||||||
|  |             ReceiveData<1>(input_Y, cvt.src_Y, row_data_size); | ||||||
|  |             ReceiveData<1>(input_U, cvt.src_U, row_data_size / 4); | ||||||
|  |             ReceiveData<1>(input_V, cvt.src_V, row_data_size / 4); | ||||||
|  |             break; | ||||||
|  |         case InputFormat::YUV422_Indiv16: | ||||||
|  |             ReceiveData<2>(input_Y, cvt.src_Y, row_data_size); | ||||||
|  |             ReceiveData<2>(input_U, cvt.src_U, row_data_size / 2); | ||||||
|  |             ReceiveData<2>(input_V, cvt.src_V, row_data_size / 2); | ||||||
|  |             break; | ||||||
|  |         case InputFormat::YUV420_Indiv16: | ||||||
|  |             ReceiveData<2>(input_Y, cvt.src_Y, row_data_size); | ||||||
|  |             ReceiveData<2>(input_U, cvt.src_U, row_data_size / 4); | ||||||
|  |             ReceiveData<2>(input_V, cvt.src_V, row_data_size / 4); | ||||||
|  |             break; | ||||||
|  |         case InputFormat::YUYV422_Interleaved: | ||||||
|  |             input_U = nullptr; | ||||||
|  |             input_V = nullptr; | ||||||
|  |             ReceiveData<1>(input_Y, cvt.src_YUYV, row_data_size * 2); | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Note(yuriks): If additional optimization is required, input_format can be moved to a
 | ||||||
|  |         // template parameter, so that its dispatch can be moved to outside the inner loop.
 | ||||||
|  |         ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(), | ||||||
|  |                 cvt.input_line_width, row_height, cvt.coefficients); | ||||||
|  | 
 | ||||||
|  |         u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); | ||||||
|  | 
 | ||||||
|  |         for (int i = 0; i < num_tiles; ++i) { | ||||||
|  |             int image_strip_width, output_stride; | ||||||
|  | 
 | ||||||
|  |             switch (cvt.rotation) { | ||||||
|  |             case Rotation::None: | ||||||
|  |                 RotateTile0(tiles[i], tmp_tile, row_height, tile_remap); | ||||||
|  |                 image_strip_width = cvt.input_line_width; | ||||||
|  |                 output_stride = 8; | ||||||
|  |                 break; | ||||||
|  |             case Rotation::Clockwise_90: | ||||||
|  |                 RotateTile90(tiles[i], tmp_tile, row_height, tile_remap); | ||||||
|  |                 image_strip_width = 8; | ||||||
|  |                 output_stride = 8 * row_height; | ||||||
|  |                 break; | ||||||
|  |             case Rotation::Clockwise_180: | ||||||
|  |                 // For 180 and 270 degree rotations we also invert the order of tiles in the strip,
 | ||||||
|  |                 // since the rotates are done individually on each tile.
 | ||||||
|  |                 RotateTile180(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap); | ||||||
|  |                 image_strip_width = cvt.input_line_width; | ||||||
|  |                 output_stride = 8; | ||||||
|  |                 break; | ||||||
|  |             case Rotation::Clockwise_270: | ||||||
|  |                 RotateTile270(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap); | ||||||
|  |                 image_strip_width = 8; | ||||||
|  |                 output_stride = 8 * row_height; | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             switch (cvt.block_alignment) { | ||||||
|  |             case BlockAlignment::Linear: | ||||||
|  |                 WriteTileToOutput(output_buffer, tmp_tile, row_height, image_strip_width); | ||||||
|  |                 output_buffer += output_stride; | ||||||
|  |                 break; | ||||||
|  |             case BlockAlignment::Block8x8: | ||||||
|  |                 WriteTileToOutput(output_buffer, tmp_tile, 8, 8); | ||||||
|  |                 output_buffer += TILE_SIZE; | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Note(yuriks): If additional optimization is required, output_format can be moved to a
 | ||||||
|  |         // template parameter, so that its dispatch can be moved to outside the inner loop.
 | ||||||
|  |         SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } | ||||||
|  | } | ||||||
							
								
								
									
										15
									
								
								src/core/hw/y2r.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								src/core/hw/y2r.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,15 @@ | ||||||
|  | // Copyright 2015 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | namespace Y2R_U { | ||||||
|  |     struct ConversionConfiguration; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | namespace HW { | ||||||
|  | namespace Y2R { | ||||||
|  | 
 | ||||||
|  | void PerformConversion(Y2R_U::ConversionConfiguration& cvt); | ||||||
|  | 
 | ||||||
|  | } | ||||||
|  | } | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue