mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	Refactor software renderer (#6621)
This commit is contained in:
		
							parent
							
								
									7198243319
								
							
						
					
					
						commit
						9b82de6b24
					
				
					 39 changed files with 1815 additions and 1796 deletions
				
			
		|  | @ -344,11 +344,14 @@ int main(int argc, char** argv) { | ||||||
|         return -1; |         return -1; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     auto& system = Core::System::GetInstance(); | ||||||
|  |     auto& movie = Core::Movie::GetInstance(); | ||||||
|  | 
 | ||||||
|     if (!movie_record.empty()) { |     if (!movie_record.empty()) { | ||||||
|         Core::Movie::GetInstance().PrepareForRecording(); |         movie.PrepareForRecording(); | ||||||
|     } |     } | ||||||
|     if (!movie_play.empty()) { |     if (!movie_play.empty()) { | ||||||
|         Core::Movie::GetInstance().PrepareForPlayback(movie_play); |         movie.PrepareForPlayback(movie_play); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // Apply the command line arguments
 |     // Apply the command line arguments
 | ||||||
|  | @ -361,13 +364,13 @@ int main(int argc, char** argv) { | ||||||
| 
 | 
 | ||||||
|     EmuWindow_SDL2::InitializeSDL2(); |     EmuWindow_SDL2::InitializeSDL2(); | ||||||
| 
 | 
 | ||||||
|     const auto create_emu_window = [](bool fullscreen, |     const auto create_emu_window = [&](bool fullscreen, | ||||||
|                                        bool is_secondary) -> std::unique_ptr<EmuWindow_SDL2> { |                                        bool is_secondary) -> std::unique_ptr<EmuWindow_SDL2> { | ||||||
|         switch (Settings::values.graphics_api.GetValue()) { |         switch (Settings::values.graphics_api.GetValue()) { | ||||||
|         case Settings::GraphicsAPI::OpenGL: |         case Settings::GraphicsAPI::OpenGL: | ||||||
|             return std::make_unique<EmuWindow_SDL2_GL>(fullscreen, is_secondary); |             return std::make_unique<EmuWindow_SDL2_GL>(fullscreen, is_secondary); | ||||||
|         case Settings::GraphicsAPI::Software: |         case Settings::GraphicsAPI::Software: | ||||||
|             return std::make_unique<EmuWindow_SDL2_SW>(fullscreen, is_secondary); |             return std::make_unique<EmuWindow_SDL2_SW>(system, fullscreen, is_secondary); | ||||||
|         } |         } | ||||||
|         LOG_ERROR(Frontend, "Invalid Graphics API, using OpenGL"); |         LOG_ERROR(Frontend, "Invalid Graphics API, using OpenGL"); | ||||||
|         return std::make_unique<EmuWindow_SDL2_GL>(fullscreen, is_secondary); |         return std::make_unique<EmuWindow_SDL2_GL>(fullscreen, is_secondary); | ||||||
|  | @ -385,7 +388,6 @@ int main(int argc, char** argv) { | ||||||
|              Common::g_scm_desc); |              Common::g_scm_desc); | ||||||
|     Settings::LogSettings(); |     Settings::LogSettings(); | ||||||
| 
 | 
 | ||||||
|     Core::System& system = Core::System::GetInstance(); |  | ||||||
|     const Core::System::ResultStatus load_result{ |     const Core::System::ResultStatus load_result{ | ||||||
|         system.Load(*emu_window, filepath, secondary_window.get())}; |         system.Load(*emu_window, filepath, secondary_window.get())}; | ||||||
| 
 | 
 | ||||||
|  | @ -437,21 +439,21 @@ int main(int argc, char** argv) { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (!movie_play.empty()) { |     if (!movie_play.empty()) { | ||||||
|         auto metadata = Core::Movie::GetInstance().GetMovieMetadata(movie_play); |         auto metadata = movie.GetMovieMetadata(movie_play); | ||||||
|         LOG_INFO(Movie, "Author: {}", metadata.author); |         LOG_INFO(Movie, "Author: {}", metadata.author); | ||||||
|         LOG_INFO(Movie, "Rerecord count: {}", metadata.rerecord_count); |         LOG_INFO(Movie, "Rerecord count: {}", metadata.rerecord_count); | ||||||
|         LOG_INFO(Movie, "Input count: {}", metadata.input_count); |         LOG_INFO(Movie, "Input count: {}", metadata.input_count); | ||||||
|         Core::Movie::GetInstance().StartPlayback(movie_play); |         movie.StartPlayback(movie_play); | ||||||
|     } |     } | ||||||
|     if (!movie_record.empty()) { |     if (!movie_record.empty()) { | ||||||
|         Core::Movie::GetInstance().StartRecording(movie_record, movie_record_author); |         movie.StartRecording(movie_record, movie_record_author); | ||||||
|     } |     } | ||||||
|     if (!dump_video.empty() && DynamicLibrary::FFmpeg::LoadFFmpeg()) { |     if (!dump_video.empty() && DynamicLibrary::FFmpeg::LoadFFmpeg()) { | ||||||
|         Layout::FramebufferLayout layout{Layout::FrameLayoutFromResolutionScale( |         const auto layout{ | ||||||
|             VideoCore::g_renderer->GetResolutionScaleFactor())}; |             Layout::FrameLayoutFromResolutionScale(system.Renderer().GetResolutionScaleFactor())}; | ||||||
|         auto dumper = std::make_shared<VideoDumper::FFmpegBackend>(); |         auto dumper = std::make_shared<VideoDumper::FFmpegBackend>(); | ||||||
|         if (dumper->StartDumping(dump_video, layout)) { |         if (dumper->StartDumping(dump_video, layout)) { | ||||||
|             Core::System::GetInstance().RegisterVideoDumper(dumper); |             system.RegisterVideoDumper(dumper); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -494,7 +496,7 @@ int main(int argc, char** argv) { | ||||||
|     main_render_thread.join(); |     main_render_thread.join(); | ||||||
|     secondary_render_thread.join(); |     secondary_render_thread.join(); | ||||||
| 
 | 
 | ||||||
|     Core::Movie::GetInstance().Shutdown(); |     movie.Shutdown(); | ||||||
| 
 | 
 | ||||||
|     auto video_dumper = system.GetVideoDumper(); |     auto video_dumper = system.GetVideoDumper(); | ||||||
|     if (video_dumper && video_dumper->IsDumping()) { |     if (video_dumper && video_dumper->IsDumping()) { | ||||||
|  |  | ||||||
|  | @ -9,18 +9,16 @@ | ||||||
| #include <SDL.h> | #include <SDL.h> | ||||||
| #include <SDL_rect.h> | #include <SDL_rect.h> | ||||||
| #include "citra/emu_window/emu_window_sdl2_sw.h" | #include "citra/emu_window/emu_window_sdl2_sw.h" | ||||||
| #include "common/color.h" |  | ||||||
| #include "common/scm_rev.h" | #include "common/scm_rev.h" | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
|  | #include "core/core.h" | ||||||
| #include "core/frontend/emu_window.h" | #include "core/frontend/emu_window.h" | ||||||
| #include "core/hw/gpu.h" | #include "video_core/renderer_software/renderer_software.h" | ||||||
| #include "core/memory.h" |  | ||||||
| #include "video_core/video_core.h" |  | ||||||
| 
 | 
 | ||||||
| class DummyContext : public Frontend::GraphicsContext {}; | class DummyContext : public Frontend::GraphicsContext {}; | ||||||
| 
 | 
 | ||||||
| EmuWindow_SDL2_SW::EmuWindow_SDL2_SW(bool fullscreen, bool is_secondary) | EmuWindow_SDL2_SW::EmuWindow_SDL2_SW(Core::System& system_, bool fullscreen, bool is_secondary) | ||||||
|     : EmuWindow_SDL2{is_secondary} { |     : EmuWindow_SDL2{is_secondary}, system{system_} { | ||||||
|     std::string window_title = fmt::format("Citra {} | {}-{}", Common::g_build_fullname, |     std::string window_title = fmt::format("Citra {} | {}-{}", Common::g_build_fullname, | ||||||
|                                            Common::g_scm_branch, Common::g_scm_desc); |                                            Common::g_scm_branch, Common::g_scm_desc); | ||||||
|     render_window = |     render_window = | ||||||
|  | @ -67,6 +65,8 @@ void EmuWindow_SDL2_SW::Present() { | ||||||
|     const auto layout{Layout::DefaultFrameLayout( |     const auto layout{Layout::DefaultFrameLayout( | ||||||
|         Core::kScreenTopWidth, Core::kScreenTopHeight + Core::kScreenBottomHeight, false, false)}; |         Core::kScreenTopWidth, Core::kScreenTopHeight + Core::kScreenBottomHeight, false, false)}; | ||||||
| 
 | 
 | ||||||
|  |     using VideoCore::ScreenId; | ||||||
|  | 
 | ||||||
|     while (IsOpen()) { |     while (IsOpen()) { | ||||||
|         SDL_SetRenderDrawColor(renderer, |         SDL_SetRenderDrawColor(renderer, | ||||||
|                                static_cast<Uint8>(Settings::values.bg_red.GetValue() * 255), |                                static_cast<Uint8>(Settings::values.bg_red.GetValue() * 255), | ||||||
|  | @ -74,62 +74,34 @@ void EmuWindow_SDL2_SW::Present() { | ||||||
|                                static_cast<Uint8>(Settings::values.bg_blue.GetValue() * 255), 0xFF); |                                static_cast<Uint8>(Settings::values.bg_blue.GetValue() * 255), 0xFF); | ||||||
|         SDL_RenderClear(renderer); |         SDL_RenderClear(renderer); | ||||||
| 
 | 
 | ||||||
|         const auto draw_screen = [&](int fb_id) { |         const auto draw_screen = [&](ScreenId screen_id) { | ||||||
|             const auto dst_rect = fb_id == 0 ? layout.top_screen : layout.bottom_screen; |             const auto dst_rect = | ||||||
|  |                 screen_id == ScreenId::TopLeft ? layout.top_screen : layout.bottom_screen; | ||||||
|             SDL_Rect sdl_rect{static_cast<int>(dst_rect.left), static_cast<int>(dst_rect.top), |             SDL_Rect sdl_rect{static_cast<int>(dst_rect.left), static_cast<int>(dst_rect.top), | ||||||
|                               static_cast<int>(dst_rect.GetWidth()), |                               static_cast<int>(dst_rect.GetWidth()), | ||||||
|                               static_cast<int>(dst_rect.GetHeight())}; |                               static_cast<int>(dst_rect.GetHeight())}; | ||||||
|             SDL_Surface* screen = LoadFramebuffer(fb_id); |             SDL_Surface* screen = LoadFramebuffer(screen_id); | ||||||
|             SDL_BlitSurface(screen, nullptr, window_surface, &sdl_rect); |             SDL_BlitSurface(screen, nullptr, window_surface, &sdl_rect); | ||||||
|             SDL_FreeSurface(screen); |             SDL_FreeSurface(screen); | ||||||
|         }; |         }; | ||||||
| 
 | 
 | ||||||
|         draw_screen(0); |         draw_screen(ScreenId::TopLeft); | ||||||
|         draw_screen(1); |         draw_screen(ScreenId::Bottom); | ||||||
| 
 | 
 | ||||||
|         SDL_RenderPresent(renderer); |         SDL_RenderPresent(renderer); | ||||||
|         SDL_UpdateWindowSurface(render_window); |         SDL_UpdateWindowSurface(render_window); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| SDL_Surface* EmuWindow_SDL2_SW::LoadFramebuffer(int fb_id) { | SDL_Surface* EmuWindow_SDL2_SW::LoadFramebuffer(VideoCore::ScreenId screen_id) { | ||||||
|     const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id]; |     const auto& renderer = static_cast<SwRenderer::RendererSoftware&>(system.Renderer()); | ||||||
|     const PAddr framebuffer_addr = |     const auto& info = renderer.Screen(screen_id); | ||||||
|         framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2; |     const int width = static_cast<int>(info.width); | ||||||
| 
 |     const int height = static_cast<int>(info.height); | ||||||
|     Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height); |  | ||||||
|     const u8* framebuffer_data = VideoCore::g_memory->GetPhysicalPointer(framebuffer_addr); |  | ||||||
| 
 |  | ||||||
|     const int width = framebuffer.height; |  | ||||||
|     const int height = framebuffer.width; |  | ||||||
|     const int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); |  | ||||||
| 
 |  | ||||||
|     SDL_Surface* surface = |     SDL_Surface* surface = | ||||||
|         SDL_CreateRGBSurfaceWithFormat(0, width, height, 0, SDL_PIXELFORMAT_ABGR8888); |         SDL_CreateRGBSurfaceWithFormat(0, width, height, 0, SDL_PIXELFORMAT_ABGR8888); | ||||||
|     SDL_LockSurface(surface); |     SDL_LockSurface(surface); | ||||||
|     for (int y = 0; y < height; y++) { |     std::memcpy(surface->pixels, info.pixels.data(), info.pixels.size()); | ||||||
|         for (int x = 0; x < width; x++) { |  | ||||||
|             const u8* pixel = framebuffer_data + (x * height + height - y) * bpp; |  | ||||||
|             const Common::Vec4 color = [&] { |  | ||||||
|                 switch (framebuffer.color_format) { |  | ||||||
|                 case GPU::Regs::PixelFormat::RGBA8: |  | ||||||
|                     return Common::Color::DecodeRGBA8(pixel); |  | ||||||
|                 case GPU::Regs::PixelFormat::RGB8: |  | ||||||
|                     return Common::Color::DecodeRGB8(pixel); |  | ||||||
|                 case GPU::Regs::PixelFormat::RGB565: |  | ||||||
|                     return Common::Color::DecodeRGB565(pixel); |  | ||||||
|                 case GPU::Regs::PixelFormat::RGB5A1: |  | ||||||
|                     return Common::Color::DecodeRGB5A1(pixel); |  | ||||||
|                 case GPU::Regs::PixelFormat::RGBA4: |  | ||||||
|                     return Common::Color::DecodeRGBA4(pixel); |  | ||||||
|                 } |  | ||||||
|                 UNREACHABLE(); |  | ||||||
|             }(); |  | ||||||
| 
 |  | ||||||
|             u8* dst_pixel = reinterpret_cast<u8*>(surface->pixels) + (y * width + x) * 4; |  | ||||||
|             std::memcpy(dst_pixel, color.AsArray(), sizeof(color)); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     SDL_UnlockSurface(surface); |     SDL_UnlockSurface(surface); | ||||||
|     return surface; |     return surface; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -10,9 +10,17 @@ | ||||||
| struct SDL_Renderer; | struct SDL_Renderer; | ||||||
| struct SDL_Surface; | struct SDL_Surface; | ||||||
| 
 | 
 | ||||||
|  | namespace VideoCore { | ||||||
|  | enum class ScreenId : u32; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | namespace Core { | ||||||
|  | class System; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| class EmuWindow_SDL2_SW : public EmuWindow_SDL2 { | class EmuWindow_SDL2_SW : public EmuWindow_SDL2 { | ||||||
| public: | public: | ||||||
|     explicit EmuWindow_SDL2_SW(bool fullscreen, bool is_secondary); |     explicit EmuWindow_SDL2_SW(Core::System& system, bool fullscreen, bool is_secondary); | ||||||
|     ~EmuWindow_SDL2_SW(); |     ~EmuWindow_SDL2_SW(); | ||||||
| 
 | 
 | ||||||
|     void Present() override; |     void Present() override; | ||||||
|  | @ -22,7 +30,10 @@ public: | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     /// Loads a framebuffer to an SDL surface
 |     /// Loads a framebuffer to an SDL surface
 | ||||||
|     SDL_Surface* LoadFramebuffer(int fb_id); |     SDL_Surface* LoadFramebuffer(VideoCore::ScreenId screen_id); | ||||||
|  | 
 | ||||||
|  |     /// The system class.
 | ||||||
|  |     Core::System& system; | ||||||
| 
 | 
 | ||||||
|     /// The SDL software renderer
 |     /// The SDL software renderer
 | ||||||
|     SDL_Renderer* renderer; |     SDL_Renderer* renderer; | ||||||
|  |  | ||||||
|  | @ -25,6 +25,7 @@ | ||||||
| #include "input_common/motion_emu.h" | #include "input_common/motion_emu.h" | ||||||
| #include "video_core/custom_textures/custom_tex_manager.h" | #include "video_core/custom_textures/custom_tex_manager.h" | ||||||
| #include "video_core/renderer_base.h" | #include "video_core/renderer_base.h" | ||||||
|  | #include "video_core/renderer_software/renderer_software.h" | ||||||
| #include "video_core/video_core.h" | #include "video_core/video_core.h" | ||||||
| 
 | 
 | ||||||
| #ifdef HAS_OPENGL | #ifdef HAS_OPENGL | ||||||
|  | @ -288,7 +289,8 @@ private: | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| struct SoftwareRenderWidget : public RenderWidget { | struct SoftwareRenderWidget : public RenderWidget { | ||||||
|     explicit SoftwareRenderWidget(GRenderWindow* parent) : RenderWidget(parent) {} |     explicit SoftwareRenderWidget(GRenderWindow* parent, Core::System& system_) | ||||||
|  |         : RenderWidget(parent), system(system_) {} | ||||||
| 
 | 
 | ||||||
|     void Present() override { |     void Present() override { | ||||||
|         if (!isVisible()) { |         if (!isVisible()) { | ||||||
|  | @ -298,61 +300,40 @@ struct SoftwareRenderWidget : public RenderWidget { | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |         using VideoCore::ScreenId; | ||||||
|  | 
 | ||||||
|         const auto layout{Layout::DefaultFrameLayout(width(), height(), false, false)}; |         const auto layout{Layout::DefaultFrameLayout(width(), height(), false, false)}; | ||||||
|         QPainter painter(this); |         QPainter painter(this); | ||||||
| 
 | 
 | ||||||
|         const auto draw_screen = [&](int fb_id) { |         const auto draw_screen = [&](ScreenId screen_id) { | ||||||
|             const auto rect = fb_id == 0 ? layout.top_screen : layout.bottom_screen; |             const auto rect = | ||||||
|             const QImage screen = LoadFramebuffer(fb_id).scaled(rect.GetWidth(), rect.GetHeight()); |                 screen_id == ScreenId::TopLeft ? layout.top_screen : layout.bottom_screen; | ||||||
|  |             const QImage screen = | ||||||
|  |                 LoadFramebuffer(screen_id).scaled(rect.GetWidth(), rect.GetHeight()); | ||||||
|             painter.drawImage(rect.left, rect.top, screen); |             painter.drawImage(rect.left, rect.top, screen); | ||||||
|         }; |         }; | ||||||
| 
 | 
 | ||||||
|         painter.fillRect(rect(), qRgb(Settings::values.bg_red.GetValue() * 255, |         painter.fillRect(rect(), qRgb(Settings::values.bg_red.GetValue() * 255, | ||||||
|                                       Settings::values.bg_green.GetValue() * 255, |                                       Settings::values.bg_green.GetValue() * 255, | ||||||
|                                       Settings::values.bg_blue.GetValue() * 255)); |                                       Settings::values.bg_blue.GetValue() * 255)); | ||||||
|         draw_screen(0); |         draw_screen(ScreenId::TopLeft); | ||||||
|         draw_screen(1); |         draw_screen(ScreenId::Bottom); | ||||||
| 
 | 
 | ||||||
|         painter.end(); |         painter.end(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     QImage LoadFramebuffer(int fb_id) { |     QImage LoadFramebuffer(VideoCore::ScreenId screen_id) { | ||||||
|         const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id]; |         const auto& renderer = static_cast<SwRenderer::RendererSoftware&>(system.Renderer()); | ||||||
|         const PAddr framebuffer_addr = |         const auto& info = renderer.Screen(screen_id); | ||||||
|             framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2; |         const int width = static_cast<int>(info.width); | ||||||
| 
 |         const int height = static_cast<int>(info.height); | ||||||
|         Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height); |         QImage image{height, width, QImage::Format_RGBA8888}; | ||||||
|         const u8* framebuffer_data = VideoCore::g_memory->GetPhysicalPointer(framebuffer_addr); |         std::memcpy(image.bits(), info.pixels.data(), info.pixels.size()); | ||||||
| 
 |  | ||||||
|         const int width = framebuffer.height; |  | ||||||
|         const int height = framebuffer.width; |  | ||||||
|         const int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); |  | ||||||
| 
 |  | ||||||
|         QImage image{width, height, QImage::Format_RGBA8888}; |  | ||||||
|         for (int y = 0; y < height; y++) { |  | ||||||
|             for (int x = 0; x < width; x++) { |  | ||||||
|                 const u8* pixel = framebuffer_data + (x * height + height - y) * bpp; |  | ||||||
|                 const Common::Vec4 color = [&] { |  | ||||||
|                     switch (framebuffer.color_format) { |  | ||||||
|                     case GPU::Regs::PixelFormat::RGBA8: |  | ||||||
|                         return Common::Color::DecodeRGBA8(pixel); |  | ||||||
|                     case GPU::Regs::PixelFormat::RGB8: |  | ||||||
|                         return Common::Color::DecodeRGB8(pixel); |  | ||||||
|                     case GPU::Regs::PixelFormat::RGB565: |  | ||||||
|                         return Common::Color::DecodeRGB565(pixel); |  | ||||||
|                     case GPU::Regs::PixelFormat::RGB5A1: |  | ||||||
|                         return Common::Color::DecodeRGB5A1(pixel); |  | ||||||
|                     case GPU::Regs::PixelFormat::RGBA4: |  | ||||||
|                         return Common::Color::DecodeRGBA4(pixel); |  | ||||||
|                     } |  | ||||||
|                     UNREACHABLE(); |  | ||||||
|                 }(); |  | ||||||
| 
 |  | ||||||
|                 image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a())); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         return image; |         return image; | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     Core::System& system; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static Frontend::WindowSystemType GetWindowSystemType() { | static Frontend::WindowSystemType GetWindowSystemType() { | ||||||
|  | @ -401,8 +382,9 @@ static Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window | ||||||
| 
 | 
 | ||||||
| std::unique_ptr<Frontend::GraphicsContext> GRenderWindow::main_context; | std::unique_ptr<Frontend::GraphicsContext> GRenderWindow::main_context; | ||||||
| 
 | 
 | ||||||
| GRenderWindow::GRenderWindow(QWidget* parent_, EmuThread* emu_thread, bool is_secondary_) | GRenderWindow::GRenderWindow(QWidget* parent_, EmuThread* emu_thread_, Core::System& system_, | ||||||
|     : QWidget(parent_), EmuWindow(is_secondary_), emu_thread(emu_thread) { |                              bool is_secondary_) | ||||||
|  |     : QWidget(parent_), EmuWindow(is_secondary_), emu_thread(emu_thread_), system{system_} { | ||||||
| 
 | 
 | ||||||
|     setWindowTitle(QStringLiteral("Citra %1 | %2-%3") |     setWindowTitle(QStringLiteral("Citra %1 | %2-%3") | ||||||
|                        .arg(QString::fromUtf8(Common::g_build_name), |                        .arg(QString::fromUtf8(Common::g_build_name), | ||||||
|  | @ -652,12 +634,12 @@ void GRenderWindow::ReleaseRenderTarget() { | ||||||
| 
 | 
 | ||||||
| void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) { | void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) { | ||||||
|     if (res_scale == 0) { |     if (res_scale == 0) { | ||||||
|         res_scale = VideoCore::g_renderer->GetResolutionScaleFactor(); |         res_scale = system.Renderer().GetResolutionScaleFactor(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const auto layout{Layout::FrameLayoutFromResolutionScale(res_scale, is_secondary)}; |     const auto layout{Layout::FrameLayoutFromResolutionScale(res_scale, is_secondary)}; | ||||||
|     screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32); |     screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32); | ||||||
|     VideoCore::g_renderer->RequestScreenshot( |     system.Renderer().RequestScreenshot( | ||||||
|         screenshot_image.bits(), |         screenshot_image.bits(), | ||||||
|         [this, screenshot_path] { |         [this, screenshot_path] { | ||||||
|             const std::string std_screenshot_path = screenshot_path.toStdString(); |             const std::string std_screenshot_path = screenshot_path.toStdString(); | ||||||
|  | @ -708,7 +690,7 @@ bool GRenderWindow::InitializeOpenGL() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GRenderWindow::InitializeSoftware() { | void GRenderWindow::InitializeSoftware() { | ||||||
|     child_widget = new SoftwareRenderWidget(this); |     child_widget = new SoftwareRenderWidget(this, system); | ||||||
|     main_context = std::make_unique<DummyContext>(); |     main_context = std::make_unique<DummyContext>(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -112,7 +112,7 @@ class GRenderWindow : public QWidget, public Frontend::EmuWindow { | ||||||
|     Q_OBJECT |     Q_OBJECT | ||||||
| 
 | 
 | ||||||
| public: | public: | ||||||
|     GRenderWindow(QWidget* parent, EmuThread* emu_thread, bool is_secondary); |     GRenderWindow(QWidget* parent, EmuThread* emu_thread, Core::System& system, bool is_secondary); | ||||||
|     ~GRenderWindow() override; |     ~GRenderWindow() override; | ||||||
| 
 | 
 | ||||||
|     // EmuWindow implementation.
 |     // EmuWindow implementation.
 | ||||||
|  | @ -188,6 +188,7 @@ private: | ||||||
|     QWidget* child_widget = nullptr; |     QWidget* child_widget = nullptr; | ||||||
| 
 | 
 | ||||||
|     EmuThread* emu_thread; |     EmuThread* emu_thread; | ||||||
|  |     Core::System& system; | ||||||
| 
 | 
 | ||||||
|     /// Main context that will be shared with all other contexts that are requested.
 |     /// Main context that will be shared with all other contexts that are requested.
 | ||||||
|     /// If this is used in a shared context setting, then this should not be used directly, but
 |     /// If this is used in a shared context setting, then this should not be used directly, but
 | ||||||
|  |  | ||||||
|  | @ -550,8 +550,8 @@ void GraphicsVertexShaderWidget::OnResumed() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GraphicsVertexShaderWidget::OnInputAttributeChanged(int index) { | void GraphicsVertexShaderWidget::OnInputAttributeChanged(int index) { | ||||||
|     float value = input_data[index]->text().toFloat(); |     const f32 value = input_data[index]->text().toFloat(); | ||||||
|     input_vertex.attr[index / 4][index % 4] = Pica::float24::FromFloat32(value); |     input_vertex.attr[index / 4][index % 4] = Pica::f24::FromFloat32(value); | ||||||
|     // Re-execute shader with updated value
 |     // Re-execute shader with updated value
 | ||||||
|     Reload(); |     Reload(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -297,8 +297,8 @@ void GMainWindow::InitializeWidgets() { | ||||||
| #ifdef CITRA_ENABLE_COMPATIBILITY_REPORTING | #ifdef CITRA_ENABLE_COMPATIBILITY_REPORTING | ||||||
|     ui->action_Report_Compatibility->setVisible(true); |     ui->action_Report_Compatibility->setVisible(true); | ||||||
| #endif | #endif | ||||||
|     render_window = new GRenderWindow(this, emu_thread.get(), false); |     render_window = new GRenderWindow(this, emu_thread.get(), system, false); | ||||||
|     secondary_window = new GRenderWindow(this, emu_thread.get(), true); |     secondary_window = new GRenderWindow(this, emu_thread.get(), system, true); | ||||||
|     render_window->hide(); |     render_window->hide(); | ||||||
|     secondary_window->hide(); |     secondary_window->hide(); | ||||||
|     secondary_window->setParent(nullptr); |     secondary_window->setParent(nullptr); | ||||||
|  |  | ||||||
|  | @ -14,7 +14,6 @@ | ||||||
| #include "video_core/shader/shader_interpreter.h" | #include "video_core/shader/shader_interpreter.h" | ||||||
| #include "video_core/shader/shader_jit_x64_compiler.h" | #include "video_core/shader/shader_jit_x64_compiler.h" | ||||||
| 
 | 
 | ||||||
| using float24 = Pica::float24; |  | ||||||
| using JitShader = Pica::Shader::JitShader; | using JitShader = Pica::Shader::JitShader; | ||||||
| using ShaderInterpreter = Pica::Shader::InterpreterEngine; | using ShaderInterpreter = Pica::Shader::InterpreterEngine; | ||||||
| 
 | 
 | ||||||
|  | @ -51,14 +50,14 @@ public: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void RunJit(Pica::Shader::UnitState& shader_unit, float input) { |     void RunJit(Pica::Shader::UnitState& shader_unit, float input) { | ||||||
|         shader_unit.registers.input[0].x = float24::FromFloat32(input); |         shader_unit.registers.input[0].x = Pica::f24::FromFloat32(input); | ||||||
|         shader_unit.registers.temporary[0].x = float24::FromFloat32(0); |         shader_unit.registers.temporary[0].x = Pica::f24::Zero(); | ||||||
|         shader_jit.Run(*shader_setup, shader_unit, 0); |         shader_jit.Run(*shader_setup, shader_unit, 0); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void RunInterpreter(Pica::Shader::UnitState& shader_unit, float input) { |     void RunInterpreter(Pica::Shader::UnitState& shader_unit, float input) { | ||||||
|         shader_unit.registers.input[0].x = float24::FromFloat32(input); |         shader_unit.registers.input[0].x = Pica::f24::FromFloat32(input); | ||||||
|         shader_unit.registers.temporary[0].x = float24::FromFloat32(0); |         shader_unit.registers.temporary[0].x = Pica::f24::Zero(); | ||||||
|         shader_interpreter.Run(*shader_setup, shader_unit); |         shader_interpreter.Run(*shader_setup, shader_unit); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -83,8 +83,6 @@ add_library(video_core STATIC | ||||||
|     renderer_opengl/post_processing_opengl.h |     renderer_opengl/post_processing_opengl.h | ||||||
|     renderer_opengl/renderer_opengl.cpp |     renderer_opengl/renderer_opengl.cpp | ||||||
|     renderer_opengl/renderer_opengl.h |     renderer_opengl/renderer_opengl.h | ||||||
|     renderer_software/rasterizer.cpp |  | ||||||
|     renderer_software/rasterizer.h |  | ||||||
|     renderer_software/renderer_software.cpp |     renderer_software/renderer_software.cpp | ||||||
|     renderer_software/renderer_software.h |     renderer_software/renderer_software.h | ||||||
|     renderer_software/sw_clipper.cpp |     renderer_software/sw_clipper.cpp | ||||||
|  |  | ||||||
|  | @ -90,16 +90,16 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup, | ||||||
|                 for (auto i : {0, 1, 2, 3}) { |                 for (auto i : {0, 1, 2, 3}) { | ||||||
|                     float buffer_value; |                     float buffer_value; | ||||||
|                     std::memcpy(&buffer_value, &uniform_write_buffer[i], sizeof(float)); |                     std::memcpy(&buffer_value, &uniform_write_buffer[i], sizeof(float)); | ||||||
|                     uniform[3 - i] = float24::FromFloat32(buffer_value); |                     uniform[3 - i] = f24::FromFloat32(buffer_value); | ||||||
|                 } |                 } | ||||||
|             } else { |             } else { | ||||||
|                 // TODO: Untested
 |                 // TODO: Untested
 | ||||||
|                 uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); |                 uniform.w = f24::FromRaw(uniform_write_buffer[0] >> 8); | ||||||
|                 uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | |                 uniform.z = f24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | | ||||||
|                                          ((uniform_write_buffer[1] >> 16) & 0xFFFF)); |                                          ((uniform_write_buffer[1] >> 16) & 0xFFFF)); | ||||||
|                 uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | |                 uniform.y = f24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | | ||||||
|                                          ((uniform_write_buffer[2] >> 24) & 0xFF)); |                                          ((uniform_write_buffer[2] >> 24) & 0xFF)); | ||||||
|                 uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); |                 uniform.x = f24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             LOG_TRACE(HW_GPU, "Set {} float uniform {:x} to ({} {} {} {})", |             LOG_TRACE(HW_GPU, "Set {} float uniform {:x} to ({} {} {} {})", | ||||||
|  | @ -182,15 +182,15 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             Common::Vec4<float24> attribute; |             Common::Vec4<f24> attribute; | ||||||
| 
 | 
 | ||||||
|             // NOTE: The destination component order indeed is "backwards"
 |             // NOTE: The destination component order indeed is "backwards"
 | ||||||
|             attribute.w = float24::FromRaw(g_state.default_attr_write_buffer[0] >> 8); |             attribute.w = f24::FromRaw(g_state.default_attr_write_buffer[0] >> 8); | ||||||
|             attribute.z = float24::FromRaw(((g_state.default_attr_write_buffer[0] & 0xFF) << 16) | |             attribute.z = f24::FromRaw(((g_state.default_attr_write_buffer[0] & 0xFF) << 16) | | ||||||
|                                        ((g_state.default_attr_write_buffer[1] >> 16) & 0xFFFF)); |                                        ((g_state.default_attr_write_buffer[1] >> 16) & 0xFFFF)); | ||||||
|             attribute.y = float24::FromRaw(((g_state.default_attr_write_buffer[1] & 0xFFFF) << 8) | |             attribute.y = f24::FromRaw(((g_state.default_attr_write_buffer[1] & 0xFFFF) << 8) | | ||||||
|                                        ((g_state.default_attr_write_buffer[2] >> 24) & 0xFF)); |                                        ((g_state.default_attr_write_buffer[2] >> 24) & 0xFF)); | ||||||
|             attribute.x = float24::FromRaw(g_state.default_attr_write_buffer[2] & 0xFFFFFF); |             attribute.x = f24::FromRaw(g_state.default_attr_write_buffer[2] & 0xFFFFFF); | ||||||
| 
 | 
 | ||||||
|             LOG_TRACE(HW_GPU, "Set default VS attribute {:x} to ({} {} {} {})", (int)setup.index, |             LOG_TRACE(HW_GPU, "Set default VS attribute {:x} to ({} {} {} {})", (int)setup.index, | ||||||
|                       attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), |                       attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), | ||||||
|  |  | ||||||
|  | @ -85,8 +85,8 @@ private: | ||||||
|     const Regs& regs; |     const Regs& regs; | ||||||
|     Shader::GSUnitState& unit; |     Shader::GSUnitState& unit; | ||||||
|     Shader::AttributeBuffer attribute_buffer; |     Shader::AttributeBuffer attribute_buffer; | ||||||
|     Common::Vec4<float24>* buffer_cur; |     Common::Vec4<f24>* buffer_cur; | ||||||
|     Common::Vec4<float24>* buffer_end; |     Common::Vec4<f24>* buffer_end; | ||||||
|     unsigned int vs_output_num; |     unsigned int vs_output_num; | ||||||
| 
 | 
 | ||||||
|     GeometryPipeline_Point() : regs(g_state.regs), unit(g_state.gs_unit) {} |     GeometryPipeline_Point() : regs(g_state.regs), unit(g_state.gs_unit) {} | ||||||
|  | @ -146,7 +146,7 @@ public: | ||||||
|         DEBUG_ASSERT(need_index); |         DEBUG_ASSERT(need_index); | ||||||
| 
 | 
 | ||||||
|         // The number of vertex input is put to the uniform register
 |         // The number of vertex input is put to the uniform register
 | ||||||
|         float24 vertex_num = float24::FromFloat32(static_cast<float>(val)); |         f24 vertex_num = f24::FromFloat32(static_cast<float>(val)); | ||||||
|         setup.uniforms.f[0] = Common::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num); |         setup.uniforms.f[0] = Common::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num); | ||||||
| 
 | 
 | ||||||
|         // The second uniform register and so on are used for receiving input vertices
 |         // The second uniform register and so on are used for receiving input vertices
 | ||||||
|  | @ -183,7 +183,7 @@ private: | ||||||
|     Shader::ShaderSetup& setup; |     Shader::ShaderSetup& setup; | ||||||
|     unsigned int main_vertex_num; |     unsigned int main_vertex_num; | ||||||
|     unsigned int total_vertex_num; |     unsigned int total_vertex_num; | ||||||
|     Common::Vec4<float24>* buffer_cur; |     Common::Vec4<f24>* buffer_cur; | ||||||
|     unsigned int vs_output_num; |     unsigned int vs_output_num; | ||||||
| 
 | 
 | ||||||
|     GeometryPipeline_VariablePrimitive() : regs(g_state.regs), setup(g_state.gs) {} |     GeometryPipeline_VariablePrimitive() : regs(g_state.regs), setup(g_state.gs) {} | ||||||
|  | @ -257,9 +257,9 @@ public: | ||||||
| private: | private: | ||||||
|     [[maybe_unused]] const Regs& regs; |     [[maybe_unused]] const Regs& regs; | ||||||
|     Shader::ShaderSetup& setup; |     Shader::ShaderSetup& setup; | ||||||
|     Common::Vec4<float24>* buffer_begin; |     Common::Vec4<f24>* buffer_begin; | ||||||
|     Common::Vec4<float24>* buffer_cur; |     Common::Vec4<f24>* buffer_cur; | ||||||
|     Common::Vec4<float24>* buffer_end; |     Common::Vec4<f24>* buffer_end; | ||||||
|     unsigned int vs_output_num; |     unsigned int vs_output_num; | ||||||
| 
 | 
 | ||||||
|     GeometryPipeline_FixedPrimitive() : regs(g_state.regs), setup(g_state.gs) {} |     GeometryPipeline_FixedPrimitive() : regs(g_state.regs), setup(g_state.gs) {} | ||||||
|  |  | ||||||
|  | @ -25,20 +25,20 @@ namespace Pica { | ||||||
| template <unsigned M, unsigned E> | template <unsigned M, unsigned E> | ||||||
| struct Float { | struct Float { | ||||||
| public: | public: | ||||||
|     static Float<M, E> FromFloat32(float val) { |     static constexpr Float<M, E> FromFloat32(float val) { | ||||||
|         Float<M, E> ret; |         Float<M, E> ret; | ||||||
|         ret.value = val; |         ret.value = val; | ||||||
|         return ret; |         return ret; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     static Float<M, E> FromRaw(u32 hex) { |     static constexpr Float<M, E> FromRaw(u32 hex) { | ||||||
|         Float<M, E> res; |         Float<M, E> res; | ||||||
| 
 | 
 | ||||||
|         const int width = M + E + 1; |         const s32 width = M + E + 1; | ||||||
|         const int bias = 128 - (1 << (E - 1)); |         const s32 bias = 128 - (1 << (E - 1)); | ||||||
|         int exponent = (hex >> M) & ((1 << E) - 1); |         s32 exponent = (hex >> M) & ((1 << E) - 1); | ||||||
|         const unsigned mantissa = hex & ((1 << M) - 1); |         const u32 mantissa = hex & ((1 << M) - 1); | ||||||
|         const unsigned sign = (hex >> (E + M)) << 31; |         const u32 sign = (hex >> (E + M)) << 31; | ||||||
| 
 | 
 | ||||||
|         if (hex & ((1 << (width - 1)) - 1)) { |         if (hex & ((1 << (width - 1)) - 1)) { | ||||||
|             if (exponent == (1 << E) - 1) |             if (exponent == (1 << E) - 1) | ||||||
|  | @ -55,16 +55,20 @@ public: | ||||||
|         return res; |         return res; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     static Float<M, E> Zero() { |     static constexpr Float<M, E> Zero() { | ||||||
|         return FromFloat32(0.f); |         return FromFloat32(0.f); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     static constexpr Float<M, E> One() { | ||||||
|  |         return FromFloat32(1.f); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     // Not recommended for anything but logging
 |     // Not recommended for anything but logging
 | ||||||
|     float ToFloat32() const { |     constexpr float ToFloat32() const { | ||||||
|         return value; |         return value; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Float<M, E> operator*(const Float<M, E>& flt) const { |     constexpr Float<M, E> operator*(const Float<M, E>& flt) const { | ||||||
|         float result = value * flt.ToFloat32(); |         float result = value * flt.ToFloat32(); | ||||||
|         // PICA gives 0 instead of NaN when multiplying by inf
 |         // PICA gives 0 instead of NaN when multiplying by inf
 | ||||||
|         if (std::isnan(result)) |         if (std::isnan(result)) | ||||||
|  | @ -73,70 +77,70 @@ public: | ||||||
|         return Float<M, E>::FromFloat32(result); |         return Float<M, E>::FromFloat32(result); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Float<M, E> operator/(const Float<M, E>& flt) const { |     constexpr Float<M, E> operator/(const Float<M, E>& flt) const { | ||||||
|         return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32()); |         return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32()); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Float<M, E> operator+(const Float<M, E>& flt) const { |     constexpr Float<M, E> operator+(const Float<M, E>& flt) const { | ||||||
|         return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32()); |         return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32()); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Float<M, E> operator-(const Float<M, E>& flt) const { |     constexpr Float<M, E> operator-(const Float<M, E>& flt) const { | ||||||
|         return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32()); |         return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32()); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Float<M, E>& operator*=(const Float<M, E>& flt) { |     constexpr Float<M, E>& operator*=(const Float<M, E>& flt) { | ||||||
|         value = operator*(flt).value; |         value = operator*(flt).value; | ||||||
|         return *this; |         return *this; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Float<M, E>& operator/=(const Float<M, E>& flt) { |     constexpr Float<M, E>& operator/=(const Float<M, E>& flt) { | ||||||
|         value /= flt.ToFloat32(); |         value /= flt.ToFloat32(); | ||||||
|         return *this; |         return *this; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Float<M, E>& operator+=(const Float<M, E>& flt) { |     constexpr Float<M, E>& operator+=(const Float<M, E>& flt) { | ||||||
|         value += flt.ToFloat32(); |         value += flt.ToFloat32(); | ||||||
|         return *this; |         return *this; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Float<M, E>& operator-=(const Float<M, E>& flt) { |     constexpr Float<M, E>& operator-=(const Float<M, E>& flt) { | ||||||
|         value -= flt.ToFloat32(); |         value -= flt.ToFloat32(); | ||||||
|         return *this; |         return *this; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Float<M, E> operator-() const { |     constexpr Float<M, E> operator-() const { | ||||||
|         return Float<M, E>::FromFloat32(-ToFloat32()); |         return Float<M, E>::FromFloat32(-ToFloat32()); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     bool operator<(const Float<M, E>& flt) const { |     constexpr bool operator<(const Float<M, E>& flt) const { | ||||||
|         return ToFloat32() < flt.ToFloat32(); |         return ToFloat32() < flt.ToFloat32(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     bool operator>(const Float<M, E>& flt) const { |     constexpr bool operator>(const Float<M, E>& flt) const { | ||||||
|         return ToFloat32() > flt.ToFloat32(); |         return ToFloat32() > flt.ToFloat32(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     bool operator>=(const Float<M, E>& flt) const { |     constexpr bool operator>=(const Float<M, E>& flt) const { | ||||||
|         return ToFloat32() >= flt.ToFloat32(); |         return ToFloat32() >= flt.ToFloat32(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     bool operator<=(const Float<M, E>& flt) const { |     constexpr bool operator<=(const Float<M, E>& flt) const { | ||||||
|         return ToFloat32() <= flt.ToFloat32(); |         return ToFloat32() <= flt.ToFloat32(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     bool operator==(const Float<M, E>& flt) const { |     constexpr bool operator==(const Float<M, E>& flt) const { | ||||||
|         return ToFloat32() == flt.ToFloat32(); |         return ToFloat32() == flt.ToFloat32(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     bool operator!=(const Float<M, E>& flt) const { |     constexpr bool operator!=(const Float<M, E>& flt) const { | ||||||
|         return ToFloat32() != flt.ToFloat32(); |         return ToFloat32() != flt.ToFloat32(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     static const unsigned MASK = (1 << (M + E + 1)) - 1; |     static constexpr u32 MASK = (1 << (M + E + 1)) - 1; | ||||||
|     static const unsigned MANTISSA_MASK = (1 << M) - 1; |     static constexpr u32 MANTISSA_MASK = (1 << M) - 1; | ||||||
|     static const unsigned EXPONENT_MASK = (1 << E) - 1; |     static constexpr u32 EXPONENT_MASK = (1 << E) - 1; | ||||||
| 
 | 
 | ||||||
|     // Stored as a regular float, merely for convenience
 |     // Stored as a regular float, merely for convenience
 | ||||||
|     // TODO: Perform proper arithmetic on this!
 |     // TODO: Perform proper arithmetic on this!
 | ||||||
|  | @ -149,8 +153,8 @@ private: | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| using float24 = Float<16, 7>; | using f24 = Pica::Float<16, 7>; | ||||||
| using float20 = Float<12, 7>; | using f20 = Pica::Float<12, 7>; | ||||||
| using float16 = Float<10, 5>; | using f16 = Pica::Float<10, 5>; | ||||||
| 
 | 
 | ||||||
| } // namespace Pica
 | } // namespace Pica
 | ||||||
|  |  | ||||||
|  | @ -10,6 +10,8 @@ | ||||||
| 
 | 
 | ||||||
| namespace VideoCore { | namespace VideoCore { | ||||||
| 
 | 
 | ||||||
|  | using Pica::f24; | ||||||
|  | 
 | ||||||
| static Common::Vec4f ColorRGBA8(const u32 color) { | static Common::Vec4f ColorRGBA8(const u32 color) { | ||||||
|     const auto rgba = |     const auto rgba = | ||||||
|         Common::Vec4u{color >> 0 & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF, color >> 24 & 0xFF}; |         Common::Vec4u{color >> 0 & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF, color >> 24 & 0xFF}; | ||||||
|  | @ -73,7 +75,7 @@ RasterizerAccelerated::RasterizerAccelerated(Memory::MemorySystem& memory_) | ||||||
|  * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around |  * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around | ||||||
|  * these issues, making this basic implementation actually more accurate to the hardware. |  * these issues, making this basic implementation actually more accurate to the hardware. | ||||||
|  */ |  */ | ||||||
| static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) { | static bool AreQuaternionsOpposite(Common::Vec4<f24> qa, Common::Vec4<f24> qb) { | ||||||
|     Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; |     Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; | ||||||
|     Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()}; |     Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()}; | ||||||
| 
 | 
 | ||||||
|  | @ -612,7 +614,7 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncDepthScale() { | void RasterizerAccelerated::SyncDepthScale() { | ||||||
|     float depth_scale = Pica::float24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32(); |     const f32 depth_scale = f24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32(); | ||||||
| 
 | 
 | ||||||
|     if (depth_scale != uniform_block_data.data.depth_scale) { |     if (depth_scale != uniform_block_data.data.depth_scale) { | ||||||
|         uniform_block_data.data.depth_scale = depth_scale; |         uniform_block_data.data.depth_scale = depth_scale; | ||||||
|  | @ -621,8 +623,7 @@ void RasterizerAccelerated::SyncDepthScale() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncDepthOffset() { | void RasterizerAccelerated::SyncDepthOffset() { | ||||||
|     float depth_offset = |     const f32 depth_offset = f24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32(); | ||||||
|         Pica::float24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32(); |  | ||||||
| 
 | 
 | ||||||
|     if (depth_offset != uniform_block_data.data.depth_offset) { |     if (depth_offset != uniform_block_data.data.depth_offset) { | ||||||
|         uniform_block_data.data.depth_offset = depth_offset; |         uniform_block_data.data.depth_offset = depth_offset; | ||||||
|  | @ -646,16 +647,16 @@ void RasterizerAccelerated::SyncFogColor() { | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncProcTexNoise() { | void RasterizerAccelerated::SyncProcTexNoise() { | ||||||
|     const Common::Vec2f proctex_noise_f = { |     const Common::Vec2f proctex_noise_f = { | ||||||
|         Pica::float16::FromRaw(regs.texturing.proctex_noise_frequency.u).ToFloat32(), |         Pica::f16::FromRaw(regs.texturing.proctex_noise_frequency.u).ToFloat32(), | ||||||
|         Pica::float16::FromRaw(regs.texturing.proctex_noise_frequency.v).ToFloat32(), |         Pica::f16::FromRaw(regs.texturing.proctex_noise_frequency.v).ToFloat32(), | ||||||
|     }; |     }; | ||||||
|     const Common::Vec2f proctex_noise_a = { |     const Common::Vec2f proctex_noise_a = { | ||||||
|         regs.texturing.proctex_noise_u.amplitude / 4095.0f, |         regs.texturing.proctex_noise_u.amplitude / 4095.0f, | ||||||
|         regs.texturing.proctex_noise_v.amplitude / 4095.0f, |         regs.texturing.proctex_noise_v.amplitude / 4095.0f, | ||||||
|     }; |     }; | ||||||
|     const Common::Vec2f proctex_noise_p = { |     const Common::Vec2f proctex_noise_p = { | ||||||
|         Pica::float16::FromRaw(regs.texturing.proctex_noise_u.phase).ToFloat32(), |         Pica::f16::FromRaw(regs.texturing.proctex_noise_u.phase).ToFloat32(), | ||||||
|         Pica::float16::FromRaw(regs.texturing.proctex_noise_v.phase).ToFloat32(), |         Pica::f16::FromRaw(regs.texturing.proctex_noise_v.phase).ToFloat32(), | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     if (proctex_noise_f != uniform_block_data.data.proctex_noise_f || |     if (proctex_noise_f != uniform_block_data.data.proctex_noise_f || | ||||||
|  | @ -669,7 +670,7 @@ void RasterizerAccelerated::SyncProcTexNoise() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncProcTexBias() { | void RasterizerAccelerated::SyncProcTexBias() { | ||||||
|     const auto proctex_bias = Pica::float16::FromRaw(regs.texturing.proctex.bias_low | |     const auto proctex_bias = Pica::f16::FromRaw(regs.texturing.proctex.bias_low | | ||||||
|                                                  (regs.texturing.proctex_lut.bias_high << 8)) |                                                  (regs.texturing.proctex_lut.bias_high << 8)) | ||||||
|                                   .ToFloat32(); |                                   .ToFloat32(); | ||||||
|     if (proctex_bias != uniform_block_data.data.proctex_bias) { |     if (proctex_bias != uniform_block_data.data.proctex_bias) { | ||||||
|  | @ -687,7 +688,7 @@ void RasterizerAccelerated::SyncAlphaTest() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncCombinerColor() { | void RasterizerAccelerated::SyncCombinerColor() { | ||||||
|     auto combiner_color = ColorRGBA8(regs.texturing.tev_combiner_buffer_color.raw); |     const auto combiner_color = ColorRGBA8(regs.texturing.tev_combiner_buffer_color.raw); | ||||||
|     if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) { |     if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) { | ||||||
|         uniform_block_data.data.tev_combiner_buffer_color = combiner_color; |         uniform_block_data.data.tev_combiner_buffer_color = combiner_color; | ||||||
|         uniform_block_data.dirty = true; |         uniform_block_data.dirty = true; | ||||||
|  | @ -695,7 +696,7 @@ void RasterizerAccelerated::SyncCombinerColor() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncTevConstColor( | void RasterizerAccelerated::SyncTevConstColor( | ||||||
|     std::size_t stage_index, const Pica::TexturingRegs::TevStageConfig& tev_stage) { |     const size_t stage_index, const Pica::TexturingRegs::TevStageConfig& tev_stage) { | ||||||
|     const auto const_color = ColorRGBA8(tev_stage.const_color); |     const auto const_color = ColorRGBA8(tev_stage.const_color); | ||||||
| 
 | 
 | ||||||
|     if (const_color == uniform_block_data.data.const_color[stage_index]) { |     if (const_color == uniform_block_data.data.const_color[stage_index]) { | ||||||
|  | @ -707,7 +708,7 @@ void RasterizerAccelerated::SyncTevConstColor( | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncGlobalAmbient() { | void RasterizerAccelerated::SyncGlobalAmbient() { | ||||||
|     auto color = LightColor(regs.lighting.global_ambient); |     const auto color = LightColor(regs.lighting.global_ambient); | ||||||
|     if (color != uniform_block_data.data.lighting_global_ambient) { |     if (color != uniform_block_data.data.lighting_global_ambient) { | ||||||
|         uniform_block_data.data.lighting_global_ambient = color; |         uniform_block_data.data.lighting_global_ambient = color; | ||||||
|         uniform_block_data.dirty = true; |         uniform_block_data.dirty = true; | ||||||
|  | @ -715,7 +716,7 @@ void RasterizerAccelerated::SyncGlobalAmbient() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncLightSpecular0(int light_index) { | void RasterizerAccelerated::SyncLightSpecular0(int light_index) { | ||||||
|     auto color = LightColor(regs.lighting.light[light_index].specular_0); |     const auto color = LightColor(regs.lighting.light[light_index].specular_0); | ||||||
|     if (color != uniform_block_data.data.light_src[light_index].specular_0) { |     if (color != uniform_block_data.data.light_src[light_index].specular_0) { | ||||||
|         uniform_block_data.data.light_src[light_index].specular_0 = color; |         uniform_block_data.data.light_src[light_index].specular_0 = color; | ||||||
|         uniform_block_data.dirty = true; |         uniform_block_data.dirty = true; | ||||||
|  | @ -723,7 +724,7 @@ void RasterizerAccelerated::SyncLightSpecular0(int light_index) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncLightSpecular1(int light_index) { | void RasterizerAccelerated::SyncLightSpecular1(int light_index) { | ||||||
|     auto color = LightColor(regs.lighting.light[light_index].specular_1); |     const auto color = LightColor(regs.lighting.light[light_index].specular_1); | ||||||
|     if (color != uniform_block_data.data.light_src[light_index].specular_1) { |     if (color != uniform_block_data.data.light_src[light_index].specular_1) { | ||||||
|         uniform_block_data.data.light_src[light_index].specular_1 = color; |         uniform_block_data.data.light_src[light_index].specular_1 = color; | ||||||
|         uniform_block_data.dirty = true; |         uniform_block_data.dirty = true; | ||||||
|  | @ -731,7 +732,7 @@ void RasterizerAccelerated::SyncLightSpecular1(int light_index) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncLightDiffuse(int light_index) { | void RasterizerAccelerated::SyncLightDiffuse(int light_index) { | ||||||
|     auto color = LightColor(regs.lighting.light[light_index].diffuse); |     const auto color = LightColor(regs.lighting.light[light_index].diffuse); | ||||||
|     if (color != uniform_block_data.data.light_src[light_index].diffuse) { |     if (color != uniform_block_data.data.light_src[light_index].diffuse) { | ||||||
|         uniform_block_data.data.light_src[light_index].diffuse = color; |         uniform_block_data.data.light_src[light_index].diffuse = color; | ||||||
|         uniform_block_data.dirty = true; |         uniform_block_data.dirty = true; | ||||||
|  | @ -739,7 +740,7 @@ void RasterizerAccelerated::SyncLightDiffuse(int light_index) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncLightAmbient(int light_index) { | void RasterizerAccelerated::SyncLightAmbient(int light_index) { | ||||||
|     auto color = LightColor(regs.lighting.light[light_index].ambient); |     const auto color = LightColor(regs.lighting.light[light_index].ambient); | ||||||
|     if (color != uniform_block_data.data.light_src[light_index].ambient) { |     if (color != uniform_block_data.data.light_src[light_index].ambient) { | ||||||
|         uniform_block_data.data.light_src[light_index].ambient = color; |         uniform_block_data.data.light_src[light_index].ambient = color; | ||||||
|         uniform_block_data.dirty = true; |         uniform_block_data.dirty = true; | ||||||
|  | @ -748,9 +749,9 @@ void RasterizerAccelerated::SyncLightAmbient(int light_index) { | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncLightPosition(int light_index) { | void RasterizerAccelerated::SyncLightPosition(int light_index) { | ||||||
|     const Common::Vec3f position = { |     const Common::Vec3f position = { | ||||||
|         Pica::float16::FromRaw(regs.lighting.light[light_index].x).ToFloat32(), |         Pica::f16::FromRaw(regs.lighting.light[light_index].x).ToFloat32(), | ||||||
|         Pica::float16::FromRaw(regs.lighting.light[light_index].y).ToFloat32(), |         Pica::f16::FromRaw(regs.lighting.light[light_index].y).ToFloat32(), | ||||||
|         Pica::float16::FromRaw(regs.lighting.light[light_index].z).ToFloat32(), |         Pica::f16::FromRaw(regs.lighting.light[light_index].z).ToFloat32(), | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     if (position != uniform_block_data.data.light_src[light_index].position) { |     if (position != uniform_block_data.data.light_src[light_index].position) { | ||||||
|  | @ -771,8 +772,8 @@ void RasterizerAccelerated::SyncLightSpotDirection(int light_index) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncLightDistanceAttenuationBias(int light_index) { | void RasterizerAccelerated::SyncLightDistanceAttenuationBias(int light_index) { | ||||||
|     float dist_atten_bias = |     const f32 dist_atten_bias = | ||||||
|         Pica::float20::FromRaw(regs.lighting.light[light_index].dist_atten_bias).ToFloat32(); |         Pica::f20::FromRaw(regs.lighting.light[light_index].dist_atten_bias).ToFloat32(); | ||||||
| 
 | 
 | ||||||
|     if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { |     if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { | ||||||
|         uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; |         uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; | ||||||
|  | @ -781,8 +782,8 @@ void RasterizerAccelerated::SyncLightDistanceAttenuationBias(int light_index) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncLightDistanceAttenuationScale(int light_index) { | void RasterizerAccelerated::SyncLightDistanceAttenuationScale(int light_index) { | ||||||
|     float dist_atten_scale = |     const f32 dist_atten_scale = | ||||||
|         Pica::float20::FromRaw(regs.lighting.light[light_index].dist_atten_scale).ToFloat32(); |         Pica::f20::FromRaw(regs.lighting.light[light_index].dist_atten_scale).ToFloat32(); | ||||||
| 
 | 
 | ||||||
|     if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { |     if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { | ||||||
|         uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; |         uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; | ||||||
|  | @ -792,8 +793,8 @@ void RasterizerAccelerated::SyncLightDistanceAttenuationScale(int light_index) { | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncShadowBias() { | void RasterizerAccelerated::SyncShadowBias() { | ||||||
|     const auto& shadow = regs.framebuffer.shadow; |     const auto& shadow = regs.framebuffer.shadow; | ||||||
|     float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32(); |     const f32 constant = Pica::f16::FromRaw(shadow.constant).ToFloat32(); | ||||||
|     float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32(); |     const f32 linear = Pica::f16::FromRaw(shadow.linear).ToFloat32(); | ||||||
| 
 | 
 | ||||||
|     if (constant != uniform_block_data.data.shadow_bias_constant || |     if (constant != uniform_block_data.data.shadow_bias_constant || | ||||||
|         linear != uniform_block_data.data.shadow_bias_linear) { |         linear != uniform_block_data.data.shadow_bias_linear) { | ||||||
|  | @ -804,7 +805,7 @@ void RasterizerAccelerated::SyncShadowBias() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncShadowTextureBias() { | void RasterizerAccelerated::SyncShadowTextureBias() { | ||||||
|     int bias = regs.texturing.shadow.bias << 1; |     const s32 bias = regs.texturing.shadow.bias << 1; | ||||||
|     if (bias != uniform_block_data.data.shadow_texture_bias) { |     if (bias != uniform_block_data.data.shadow_texture_bias) { | ||||||
|         uniform_block_data.data.shadow_texture_bias = bias; |         uniform_block_data.data.shadow_texture_bias = bias; | ||||||
|         uniform_block_data.dirty = true; |         uniform_block_data.dirty = true; | ||||||
|  | @ -813,7 +814,7 @@ void RasterizerAccelerated::SyncShadowTextureBias() { | ||||||
| 
 | 
 | ||||||
| void RasterizerAccelerated::SyncTextureLodBias(int tex_index) { | void RasterizerAccelerated::SyncTextureLodBias(int tex_index) { | ||||||
|     const auto pica_textures = regs.texturing.GetTextures(); |     const auto pica_textures = regs.texturing.GetTextures(); | ||||||
|     const float bias = pica_textures[tex_index].config.lod.bias / 256.0f; |     const f32 bias = pica_textures[tex_index].config.lod.bias / 256.0f; | ||||||
|     if (bias != uniform_block_data.data.tex_lod_bias[tex_index]) { |     if (bias != uniform_block_data.data.tex_lod_bias[tex_index]) { | ||||||
|         uniform_block_data.data.tex_lod_bias[tex_index] = bias; |         uniform_block_data.data.tex_lod_bias[tex_index] = bias; | ||||||
|         uniform_block_data.dirty = true; |         uniform_block_data.dirty = true; | ||||||
|  |  | ||||||
|  | @ -37,9 +37,9 @@ struct RasterizerRegs { | ||||||
|     BitField<0, 1, u32> clip_enable; |     BitField<0, 1, u32> clip_enable; | ||||||
|     BitField<0, 24, u32> clip_coef[4]; // float24
 |     BitField<0, 24, u32> clip_coef[4]; // float24
 | ||||||
| 
 | 
 | ||||||
|     Common::Vec4<float24> GetClipCoef() const { |     Common::Vec4<f24> GetClipCoef() const { | ||||||
|         return {float24::FromRaw(clip_coef[0]), float24::FromRaw(clip_coef[1]), |         return {f24::FromRaw(clip_coef[0]), f24::FromRaw(clip_coef[1]), f24::FromRaw(clip_coef[2]), | ||||||
|                 float24::FromRaw(clip_coef[2]), float24::FromRaw(clip_coef[3])}; |                 f24::FromRaw(clip_coef[3])}; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Common::Rectangle<s32> GetViewportRect() const { |     Common::Rectangle<s32> GetViewportRect() const { | ||||||
|  | @ -47,9 +47,9 @@ struct RasterizerRegs { | ||||||
|             // These registers hold half-width and half-height, so must be multiplied by 2
 |             // These registers hold half-width and half-height, so must be multiplied by 2
 | ||||||
|             viewport_corner.x,  // left
 |             viewport_corner.x,  // left
 | ||||||
|             viewport_corner.y + // top
 |             viewport_corner.y + // top
 | ||||||
|                 static_cast<s32>(float24::FromRaw(viewport_size_y).ToFloat32() * 2), |                 static_cast<s32>(f24::FromRaw(viewport_size_y).ToFloat32() * 2), | ||||||
|             viewport_corner.x + // right
 |             viewport_corner.x + // right
 | ||||||
|                 static_cast<s32>(float24::FromRaw(viewport_size_x).ToFloat32() * 2), |                 static_cast<s32>(f24::FromRaw(viewport_size_x).ToFloat32() * 2), | ||||||
|             viewport_corner.y // bottom
 |             viewport_corner.y // bottom
 | ||||||
|         }; |         }; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -18,6 +18,12 @@ class System; | ||||||
| 
 | 
 | ||||||
| namespace VideoCore { | namespace VideoCore { | ||||||
| 
 | 
 | ||||||
|  | enum class ScreenId : u32 { | ||||||
|  |     TopLeft, | ||||||
|  |     TopRight, | ||||||
|  |     Bottom, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| struct RendererSettings { | struct RendererSettings { | ||||||
|     // Screenshot
 |     // Screenshot
 | ||||||
|     std::atomic_bool screenshot_requested{false}; |     std::atomic_bool screenshot_requested{false}; | ||||||
|  | @ -75,7 +81,7 @@ public: | ||||||
|         return current_fps; |         return current_fps; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     int GetCurrentFrame() const { |     s32 GetCurrentFrame() const { | ||||||
|         return current_frame; |         return current_frame; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -108,7 +114,7 @@ protected: | ||||||
|     Frontend::EmuWindow& render_window;    ///< Reference to the render window handle.
 |     Frontend::EmuWindow& render_window;    ///< Reference to the render window handle.
 | ||||||
|     Frontend::EmuWindow* secondary_window; ///< Reference to the secondary render window handle.
 |     Frontend::EmuWindow* secondary_window; ///< Reference to the secondary render window handle.
 | ||||||
|     f32 current_fps = 0.0f;                ///< Current framerate, should be set by the renderer
 |     f32 current_fps = 0.0f;                ///< Current framerate, should be set by the renderer
 | ||||||
|     int current_frame = 0;                 ///< Current frame, should be set by the renderer
 |     s32 current_frame = 0;                 ///< Current frame, should be set by the renderer
 | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace VideoCore
 | } // namespace VideoCore
 | ||||||
|  |  | ||||||
|  | @ -1,901 +0,0 @@ | ||||||
| // Copyright 2014 Citra Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <algorithm> |  | ||||||
| #include <array> |  | ||||||
| #include <cmath> |  | ||||||
| #include <tuple> |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/bit_field.h" |  | ||||||
| #include "common/color.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "common/microprofile.h" |  | ||||||
| #include "common/quaternion.h" |  | ||||||
| #include "common/vector_math.h" |  | ||||||
| #include "core/hw/gpu.h" |  | ||||||
| #include "core/memory.h" |  | ||||||
| #include "video_core/debug_utils/debug_utils.h" |  | ||||||
| #include "video_core/pica_state.h" |  | ||||||
| #include "video_core/pica_types.h" |  | ||||||
| #include "video_core/regs_framebuffer.h" |  | ||||||
| #include "video_core/regs_rasterizer.h" |  | ||||||
| #include "video_core/regs_texturing.h" |  | ||||||
| #include "video_core/renderer_software/rasterizer.h" |  | ||||||
| #include "video_core/renderer_software/sw_framebuffer.h" |  | ||||||
| #include "video_core/renderer_software/sw_lighting.h" |  | ||||||
| #include "video_core/renderer_software/sw_proctex.h" |  | ||||||
| #include "video_core/renderer_software/sw_texturing.h" |  | ||||||
| #include "video_core/shader/shader.h" |  | ||||||
| #include "video_core/texture/texture_decode.h" |  | ||||||
| #include "video_core/utils.h" |  | ||||||
| #include "video_core/video_core.h" |  | ||||||
| 
 |  | ||||||
| namespace Pica::Rasterizer { |  | ||||||
| 
 |  | ||||||
| // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 |  | ||||||
| struct Fix12P4 { |  | ||||||
|     Fix12P4() {} |  | ||||||
|     Fix12P4(u16 val) : val(val) {} |  | ||||||
| 
 |  | ||||||
|     static u16 FracMask() { |  | ||||||
|         return 0xF; |  | ||||||
|     } |  | ||||||
|     static u16 IntMask() { |  | ||||||
|         return (u16)~0xF; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     operator u16() const { |  | ||||||
|         return val; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool operator<(const Fix12P4& oth) const { |  | ||||||
|         return (u16) * this < (u16)oth; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     u16 val; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /**
 |  | ||||||
|  * Calculate signed area of the triangle spanned by the three argument vertices. |  | ||||||
|  * The sign denotes an orientation. |  | ||||||
|  * |  | ||||||
|  * @todo define orientation concretely. |  | ||||||
|  */ |  | ||||||
| static int SignedArea(const Common::Vec2<Fix12P4>& vtx1, const Common::Vec2<Fix12P4>& vtx2, |  | ||||||
|                       const Common::Vec2<Fix12P4>& vtx3) { |  | ||||||
|     const auto vec1 = Common::MakeVec(vtx2 - vtx1, 0); |  | ||||||
|     const auto vec2 = Common::MakeVec(vtx3 - vtx1, 0); |  | ||||||
|     // TODO: There is a very small chance this will overflow for sizeof(int) == 4
 |  | ||||||
|     return Common::Cross(vec1, vec2).z; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Convert a 3D vector for cube map coordinates to 2D texture coordinates along with the face name
 |  | ||||||
| static std::tuple<float24, float24, float24, PAddr> ConvertCubeCoord(float24 u, float24 v, |  | ||||||
|                                                                      float24 w, |  | ||||||
|                                                                      const TexturingRegs& regs) { |  | ||||||
|     const float abs_u = std::abs(u.ToFloat32()); |  | ||||||
|     const float abs_v = std::abs(v.ToFloat32()); |  | ||||||
|     const float abs_w = std::abs(w.ToFloat32()); |  | ||||||
|     float24 x, y, z; |  | ||||||
|     PAddr addr; |  | ||||||
|     if (abs_u > abs_v && abs_u > abs_w) { |  | ||||||
|         if (u > float24::FromFloat32(0)) { |  | ||||||
|             addr = regs.GetCubePhysicalAddress(TexturingRegs::CubeFace::PositiveX); |  | ||||||
|             y = -v; |  | ||||||
|         } else { |  | ||||||
|             addr = regs.GetCubePhysicalAddress(TexturingRegs::CubeFace::NegativeX); |  | ||||||
|             y = v; |  | ||||||
|         } |  | ||||||
|         x = -w; |  | ||||||
|         z = u; |  | ||||||
|     } else if (abs_v > abs_w) { |  | ||||||
|         if (v > float24::FromFloat32(0)) { |  | ||||||
|             addr = regs.GetCubePhysicalAddress(TexturingRegs::CubeFace::PositiveY); |  | ||||||
|             x = u; |  | ||||||
|         } else { |  | ||||||
|             addr = regs.GetCubePhysicalAddress(TexturingRegs::CubeFace::NegativeY); |  | ||||||
|             x = -u; |  | ||||||
|         } |  | ||||||
|         y = w; |  | ||||||
|         z = v; |  | ||||||
|     } else { |  | ||||||
|         if (w > float24::FromFloat32(0)) { |  | ||||||
|             addr = regs.GetCubePhysicalAddress(TexturingRegs::CubeFace::PositiveZ); |  | ||||||
|             y = -v; |  | ||||||
|         } else { |  | ||||||
|             addr = regs.GetCubePhysicalAddress(TexturingRegs::CubeFace::NegativeZ); |  | ||||||
|             y = v; |  | ||||||
|         } |  | ||||||
|         x = u; |  | ||||||
|         z = w; |  | ||||||
|     } |  | ||||||
|     float24 z_abs = float24::FromFloat32(std::abs(z.ToFloat32())); |  | ||||||
|     const float24 half = float24::FromFloat32(0.5f); |  | ||||||
|     return std::make_tuple(x / z * half + half, y / z * half + half, z_abs, addr); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); |  | ||||||
| 
 |  | ||||||
| /**
 |  | ||||||
|  * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing |  | ||||||
|  * culling via recursion. |  | ||||||
|  */ |  | ||||||
| static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Vertex& v2, |  | ||||||
|                                     bool reversed = false) { |  | ||||||
|     const auto& regs = g_state.regs; |  | ||||||
|     MICROPROFILE_SCOPE(GPU_Rasterization); |  | ||||||
| 
 |  | ||||||
|     // vertex positions in rasterizer coordinates
 |  | ||||||
|     static auto FloatToFix = [](float24 flt) { |  | ||||||
|         // TODO: Rounding here is necessary to prevent garbage pixels at
 |  | ||||||
|         //       triangle borders. Is it that the correct solution, though?
 |  | ||||||
|         return Fix12P4(static_cast<unsigned short>(round(flt.ToFloat32() * 16.0f))); |  | ||||||
|     }; |  | ||||||
|     static auto ScreenToRasterizerCoordinates = [](const Common::Vec3<float24>& vec) { |  | ||||||
|         return Common::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     Common::Vec3<Fix12P4> vtxpos[3]{ScreenToRasterizerCoordinates(v0.screenpos), |  | ||||||
|                                     ScreenToRasterizerCoordinates(v1.screenpos), |  | ||||||
|                                     ScreenToRasterizerCoordinates(v2.screenpos)}; |  | ||||||
| 
 |  | ||||||
|     if (regs.rasterizer.cull_mode == RasterizerRegs::CullMode::KeepAll) { |  | ||||||
|         // Make sure we always end up with a triangle wound counter-clockwise
 |  | ||||||
|         if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { |  | ||||||
|             ProcessTriangleInternal(v0, v2, v1, true); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|     } else { |  | ||||||
|         if (!reversed && regs.rasterizer.cull_mode == RasterizerRegs::CullMode::KeepClockWise) { |  | ||||||
|             // Reverse vertex order and use the CCW code path.
 |  | ||||||
|             ProcessTriangleInternal(v0, v2, v1, true); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // Cull away triangles which are wound clockwise.
 |  | ||||||
|         if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) |  | ||||||
|             return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); |  | ||||||
|     u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); |  | ||||||
|     u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); |  | ||||||
|     u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); |  | ||||||
| 
 |  | ||||||
|     // Convert the scissor box coordinates to 12.4 fixed point
 |  | ||||||
|     u16 scissor_x1 = (u16)(regs.rasterizer.scissor_test.x1 << 4); |  | ||||||
|     u16 scissor_y1 = (u16)(regs.rasterizer.scissor_test.y1 << 4); |  | ||||||
|     // x2,y2 have +1 added to cover the entire sub-pixel area
 |  | ||||||
|     u16 scissor_x2 = (u16)((regs.rasterizer.scissor_test.x2 + 1) << 4); |  | ||||||
|     u16 scissor_y2 = (u16)((regs.rasterizer.scissor_test.y2 + 1) << 4); |  | ||||||
| 
 |  | ||||||
|     if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Include) { |  | ||||||
|         // Calculate the new bounds
 |  | ||||||
|         min_x = std::max(min_x, scissor_x1); |  | ||||||
|         min_y = std::max(min_y, scissor_y1); |  | ||||||
|         max_x = std::min(max_x, scissor_x2); |  | ||||||
|         max_y = std::min(max_y, scissor_y2); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     min_x &= Fix12P4::IntMask(); |  | ||||||
|     min_y &= Fix12P4::IntMask(); |  | ||||||
|     max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask()); |  | ||||||
|     max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask()); |  | ||||||
| 
 |  | ||||||
|     // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not
 |  | ||||||
|     // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias
 |  | ||||||
|     // values which are added to the barycentric coordinates w0, w1 and w2, respectively.
 |  | ||||||
|     // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones...
 |  | ||||||
|     auto IsRightSideOrFlatBottomEdge = [](const Common::Vec2<Fix12P4>& vtx, |  | ||||||
|                                           const Common::Vec2<Fix12P4>& line1, |  | ||||||
|                                           const Common::Vec2<Fix12P4>& line2) { |  | ||||||
|         if (line1.y == line2.y) { |  | ||||||
|             // just check if vertex is above us => bottom line parallel to x-axis
 |  | ||||||
|             return vtx.y < line1.y; |  | ||||||
|         } else { |  | ||||||
|             // check if vertex is on our left => right side
 |  | ||||||
|             // TODO: Not sure how likely this is to overflow
 |  | ||||||
|             return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * |  | ||||||
|                                                    ((int)vtx.y - (int)line1.y) / |  | ||||||
|                                                    ((int)line2.y - (int)line1.y); |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
|     int bias0 = |  | ||||||
|         IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; |  | ||||||
|     int bias1 = |  | ||||||
|         IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; |  | ||||||
|     int bias2 = |  | ||||||
|         IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; |  | ||||||
| 
 |  | ||||||
|     auto w_inverse = Common::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); |  | ||||||
| 
 |  | ||||||
|     auto textures = regs.texturing.GetTextures(); |  | ||||||
|     auto tev_stages = regs.texturing.GetTevStages(); |  | ||||||
| 
 |  | ||||||
|     bool stencil_action_enable = |  | ||||||
|         g_state.regs.framebuffer.output_merger.stencil_test.enable && |  | ||||||
|         g_state.regs.framebuffer.framebuffer.depth_format == FramebufferRegs::DepthFormat::D24S8; |  | ||||||
|     const auto stencil_test = g_state.regs.framebuffer.output_merger.stencil_test; |  | ||||||
| 
 |  | ||||||
|     // Enter rasterization loop, starting at the center of the topleft bounding box corner.
 |  | ||||||
|     // TODO: Not sure if looping through x first might be faster
 |  | ||||||
|     for (u16 y = min_y + 8; y < max_y; y += 0x10) { |  | ||||||
|         for (u16 x = min_x + 8; x < max_x; x += 0x10) { |  | ||||||
| 
 |  | ||||||
|             // Do not process the pixel if it's inside the scissor box and the scissor mode is set
 |  | ||||||
|             // to Exclude
 |  | ||||||
|             if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Exclude) { |  | ||||||
|                 if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) |  | ||||||
|                     continue; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             // Calculate the barycentric coordinates w0, w1 and w2
 |  | ||||||
|             int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); |  | ||||||
|             int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); |  | ||||||
|             int w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); |  | ||||||
|             int wsum = w0 + w1 + w2; |  | ||||||
| 
 |  | ||||||
|             // If current pixel is not covered by the current primitive
 |  | ||||||
|             if (w0 < 0 || w1 < 0 || w2 < 0) |  | ||||||
|                 continue; |  | ||||||
| 
 |  | ||||||
|             auto baricentric_coordinates = |  | ||||||
|                 Common::MakeVec(float24::FromFloat32(static_cast<float>(w0)), |  | ||||||
|                                 float24::FromFloat32(static_cast<float>(w1)), |  | ||||||
|                                 float24::FromFloat32(static_cast<float>(w2))); |  | ||||||
|             float24 interpolated_w_inverse = |  | ||||||
|                 float24::FromFloat32(1.0f) / Common::Dot(w_inverse, baricentric_coordinates); |  | ||||||
| 
 |  | ||||||
|             // interpolated_z = z / w
 |  | ||||||
|             float interpolated_z_over_w = |  | ||||||
|                 (v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 + |  | ||||||
|                  v2.screenpos[2].ToFloat32() * w2) / |  | ||||||
|                 wsum; |  | ||||||
| 
 |  | ||||||
|             // Not fully accurate. About 3 bits in precision are missing.
 |  | ||||||
|             // Z-Buffer (z / w * scale + offset)
 |  | ||||||
|             float depth_scale = float24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32(); |  | ||||||
|             float depth_offset = |  | ||||||
|                 float24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32(); |  | ||||||
|             float depth = interpolated_z_over_w * depth_scale + depth_offset; |  | ||||||
| 
 |  | ||||||
|             // Potentially switch to W-Buffer
 |  | ||||||
|             if (regs.rasterizer.depthmap_enable == |  | ||||||
|                 Pica::RasterizerRegs::DepthBuffering::WBuffering) { |  | ||||||
|                 // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
 |  | ||||||
|                 depth *= interpolated_w_inverse.ToFloat32() * wsum; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             // Clamp the result
 |  | ||||||
|             depth = std::clamp(depth, 0.0f, 1.0f); |  | ||||||
| 
 |  | ||||||
|             // Perspective correct attribute interpolation:
 |  | ||||||
|             // Attribute values cannot be calculated by simple linear interpolation since
 |  | ||||||
|             // they are not linear in screen space. For example, when interpolating a
 |  | ||||||
|             // texture coordinate across two vertices, something simple like
 |  | ||||||
|             //     u = (u0*w0 + u1*w1)/(w0+w1)
 |  | ||||||
|             // will not work. However, the attribute value divided by the
 |  | ||||||
|             // clipspace w-coordinate (u/w) and and the inverse w-coordinate (1/w) are linear
 |  | ||||||
|             // in screenspace. Hence, we can linearly interpolate these two independently and
 |  | ||||||
|             // calculate the interpolated attribute by dividing the results.
 |  | ||||||
|             // I.e.
 |  | ||||||
|             //     u_over_w   = ((u0/v0.pos.w)*w0 + (u1/v1.pos.w)*w1)/(w0+w1)
 |  | ||||||
|             //     one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1)
 |  | ||||||
|             //     u = u_over_w / one_over_w
 |  | ||||||
|             //
 |  | ||||||
|             // The generalization to three vertices is straightforward in baricentric coordinates.
 |  | ||||||
|             auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { |  | ||||||
|                 auto attr_over_w = Common::MakeVec(attr0, attr1, attr2); |  | ||||||
|                 float24 interpolated_attr_over_w = |  | ||||||
|                     Common::Dot(attr_over_w, baricentric_coordinates); |  | ||||||
|                 return interpolated_attr_over_w * interpolated_w_inverse; |  | ||||||
|             }; |  | ||||||
| 
 |  | ||||||
|             Common::Vec4<u8> primary_color{ |  | ||||||
|                 static_cast<u8>(round( |  | ||||||
|                     GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * |  | ||||||
|                     255)), |  | ||||||
|                 static_cast<u8>(round( |  | ||||||
|                     GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * |  | ||||||
|                     255)), |  | ||||||
|                 static_cast<u8>(round( |  | ||||||
|                     GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * |  | ||||||
|                     255)), |  | ||||||
|                 static_cast<u8>(round( |  | ||||||
|                     GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * |  | ||||||
|                     255)), |  | ||||||
|             }; |  | ||||||
| 
 |  | ||||||
|             Common::Vec2<float24> uv[3]; |  | ||||||
|             uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); |  | ||||||
|             uv[0].v() = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); |  | ||||||
|             uv[1].u() = GetInterpolatedAttribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u()); |  | ||||||
|             uv[1].v() = GetInterpolatedAttribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v()); |  | ||||||
|             uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u()); |  | ||||||
|             uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v()); |  | ||||||
| 
 |  | ||||||
|             Common::Vec4<u8> texture_color[4]{}; |  | ||||||
|             for (int i = 0; i < 3; ++i) { |  | ||||||
|                 const auto& texture = textures[i]; |  | ||||||
|                 if (!texture.enabled) |  | ||||||
|                     continue; |  | ||||||
| 
 |  | ||||||
|                 if (texture.config.address == 0) { |  | ||||||
|                     texture_color[i] = {0, 0, 0, 255}; |  | ||||||
|                     continue; |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 int coordinate_i = |  | ||||||
|                     (i == 2 && regs.texturing.main_config.texture2_use_coord1) ? 1 : i; |  | ||||||
|                 float24 u = uv[coordinate_i].u(); |  | ||||||
|                 float24 v = uv[coordinate_i].v(); |  | ||||||
| 
 |  | ||||||
|                 // Only unit 0 respects the texturing type (according to 3DBrew)
 |  | ||||||
|                 // TODO: Refactor so cubemaps and shadowmaps can be handled
 |  | ||||||
|                 PAddr texture_address = texture.config.GetPhysicalAddress(); |  | ||||||
|                 float24 shadow_z; |  | ||||||
|                 if (i == 0) { |  | ||||||
|                     switch (texture.config.type) { |  | ||||||
|                     case TexturingRegs::TextureConfig::Texture2D: |  | ||||||
|                         break; |  | ||||||
|                     case TexturingRegs::TextureConfig::ShadowCube: |  | ||||||
|                     case TexturingRegs::TextureConfig::TextureCube: { |  | ||||||
|                         auto w = GetInterpolatedAttribute(v0.tc0_w, v1.tc0_w, v2.tc0_w); |  | ||||||
|                         std::tie(u, v, shadow_z, texture_address) = |  | ||||||
|                             ConvertCubeCoord(u, v, w, regs.texturing); |  | ||||||
|                         break; |  | ||||||
|                     } |  | ||||||
|                     case TexturingRegs::TextureConfig::Projection2D: { |  | ||||||
|                         auto tc0_w = GetInterpolatedAttribute(v0.tc0_w, v1.tc0_w, v2.tc0_w); |  | ||||||
|                         u /= tc0_w; |  | ||||||
|                         v /= tc0_w; |  | ||||||
|                         break; |  | ||||||
|                     } |  | ||||||
|                     case TexturingRegs::TextureConfig::Shadow2D: { |  | ||||||
|                         auto tc0_w = GetInterpolatedAttribute(v0.tc0_w, v1.tc0_w, v2.tc0_w); |  | ||||||
|                         if (!regs.texturing.shadow.orthographic) { |  | ||||||
|                             u /= tc0_w; |  | ||||||
|                             v /= tc0_w; |  | ||||||
|                         } |  | ||||||
| 
 |  | ||||||
|                         shadow_z = float24::FromFloat32(std::abs(tc0_w.ToFloat32())); |  | ||||||
|                         break; |  | ||||||
|                     } |  | ||||||
|                     case TexturingRegs::TextureConfig::Disabled: |  | ||||||
|                         continue; // skip this unit and continue to the next unit
 |  | ||||||
|                     default: |  | ||||||
|                         LOG_ERROR(HW_GPU, "Unhandled texture type {:x}", (int)texture.config.type); |  | ||||||
|                         UNIMPLEMENTED(); |  | ||||||
|                         break; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))) |  | ||||||
|                             .ToFloat32(); |  | ||||||
|                 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))) |  | ||||||
|                             .ToFloat32(); |  | ||||||
| 
 |  | ||||||
|                 bool use_border_s = false; |  | ||||||
|                 bool use_border_t = false; |  | ||||||
| 
 |  | ||||||
|                 if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder) { |  | ||||||
|                     use_border_s = s < 0 || s >= static_cast<int>(texture.config.width); |  | ||||||
|                 } else if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder2) { |  | ||||||
|                     use_border_s = s >= static_cast<int>(texture.config.width); |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder) { |  | ||||||
|                     use_border_t = t < 0 || t >= static_cast<int>(texture.config.height); |  | ||||||
|                 } else if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder2) { |  | ||||||
|                     use_border_t = t >= static_cast<int>(texture.config.height); |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 if (use_border_s || use_border_t) { |  | ||||||
|                     auto border_color = texture.config.border_color; |  | ||||||
|                     texture_color[i] = |  | ||||||
|                         Common::MakeVec(border_color.r.Value(), border_color.g.Value(), |  | ||||||
|                                         border_color.b.Value(), border_color.a.Value()) |  | ||||||
|                             .Cast<u8>(); |  | ||||||
|                 } else { |  | ||||||
|                     // Textures are laid out from bottom to top, hence we invert the t coordinate.
 |  | ||||||
|                     // NOTE: This may not be the right place for the inversion.
 |  | ||||||
|                     // TODO: Check if this applies to ETC textures, too.
 |  | ||||||
|                     s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); |  | ||||||
|                     t = texture.config.height - 1 - |  | ||||||
|                         GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); |  | ||||||
| 
 |  | ||||||
|                     const u8* texture_data = |  | ||||||
|                         VideoCore::g_memory->GetPhysicalPointer(texture_address); |  | ||||||
|                     auto info = |  | ||||||
|                         Texture::TextureInfo::FromPicaRegister(texture.config, texture.format); |  | ||||||
| 
 |  | ||||||
|                     // TODO: Apply the min and mag filters to the texture
 |  | ||||||
|                     texture_color[i] = Texture::LookupTexture(texture_data, s, t, info); |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 if (i == 0 && (texture.config.type == TexturingRegs::TextureConfig::Shadow2D || |  | ||||||
|                                texture.config.type == TexturingRegs::TextureConfig::ShadowCube)) { |  | ||||||
| 
 |  | ||||||
|                     s32 z_int = static_cast<s32>(std::min(shadow_z.ToFloat32(), 1.0f) * 0xFFFFFF); |  | ||||||
|                     z_int -= regs.texturing.shadow.bias << 1; |  | ||||||
|                     auto& color = texture_color[i]; |  | ||||||
|                     s32 z_ref = (color.w << 16) | (color.z << 8) | color.y; |  | ||||||
|                     u8 density; |  | ||||||
|                     if (z_ref >= z_int) { |  | ||||||
|                         density = color.x; |  | ||||||
|                     } else { |  | ||||||
|                         density = 0; |  | ||||||
|                     } |  | ||||||
|                     texture_color[i] = {density, density, density, density}; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             // sample procedural texture
 |  | ||||||
|             if (regs.texturing.main_config.texture3_enable) { |  | ||||||
|                 const auto& proctex_uv = uv[regs.texturing.main_config.texture3_coordinates]; |  | ||||||
|                 texture_color[3] = ProcTex(proctex_uv.u().ToFloat32(), proctex_uv.v().ToFloat32(), |  | ||||||
|                                            g_state.regs.texturing, g_state.proctex); |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             // Texture environment - consists of 6 stages of color and alpha combining.
 |  | ||||||
|             //
 |  | ||||||
|             // Color combiners take three input color values from some source (e.g. interpolated
 |  | ||||||
|             // vertex color, texture color, previous stage, etc), perform some very simple
 |  | ||||||
|             // operations on each of them (e.g. inversion) and then calculate the output color
 |  | ||||||
|             // with some basic arithmetic. Alpha combiners can be configured separately but work
 |  | ||||||
|             // analogously.
 |  | ||||||
|             Common::Vec4<u8> combiner_output; |  | ||||||
|             Common::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; |  | ||||||
|             Common::Vec4<u8> next_combiner_buffer = |  | ||||||
|                 Common::MakeVec(regs.texturing.tev_combiner_buffer_color.r.Value(), |  | ||||||
|                                 regs.texturing.tev_combiner_buffer_color.g.Value(), |  | ||||||
|                                 regs.texturing.tev_combiner_buffer_color.b.Value(), |  | ||||||
|                                 regs.texturing.tev_combiner_buffer_color.a.Value()) |  | ||||||
|                     .Cast<u8>(); |  | ||||||
| 
 |  | ||||||
|             Common::Vec4<u8> primary_fragment_color = {0, 0, 0, 0}; |  | ||||||
|             Common::Vec4<u8> secondary_fragment_color = {0, 0, 0, 0}; |  | ||||||
| 
 |  | ||||||
|             if (!g_state.regs.lighting.disable) { |  | ||||||
|                 Common::Quaternion<float> normquat = |  | ||||||
|                     Common::Quaternion<float>{ |  | ||||||
|                         {GetInterpolatedAttribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(), |  | ||||||
|                          GetInterpolatedAttribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(), |  | ||||||
|                          GetInterpolatedAttribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()}, |  | ||||||
|                         GetInterpolatedAttribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), |  | ||||||
|                     } |  | ||||||
|                         .Normalized(); |  | ||||||
| 
 |  | ||||||
|                 Common::Vec3<float> view{ |  | ||||||
|                     GetInterpolatedAttribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(), |  | ||||||
|                     GetInterpolatedAttribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), |  | ||||||
|                     GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(), |  | ||||||
|                 }; |  | ||||||
|                 std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors( |  | ||||||
|                     g_state.regs.lighting, g_state.lighting, normquat, view, texture_color); |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); |  | ||||||
|                  ++tev_stage_index) { |  | ||||||
|                 const auto& tev_stage = tev_stages[tev_stage_index]; |  | ||||||
|                 using Source = TexturingRegs::TevStageConfig::Source; |  | ||||||
| 
 |  | ||||||
|                 auto GetSource = [&](Source source) -> Common::Vec4<u8> { |  | ||||||
|                     switch (source) { |  | ||||||
|                     case Source::PrimaryColor: |  | ||||||
|                         return primary_color; |  | ||||||
| 
 |  | ||||||
|                     case Source::PrimaryFragmentColor: |  | ||||||
|                         return primary_fragment_color; |  | ||||||
| 
 |  | ||||||
|                     case Source::SecondaryFragmentColor: |  | ||||||
|                         return secondary_fragment_color; |  | ||||||
| 
 |  | ||||||
|                     case Source::Texture0: |  | ||||||
|                         return texture_color[0]; |  | ||||||
| 
 |  | ||||||
|                     case Source::Texture1: |  | ||||||
|                         return texture_color[1]; |  | ||||||
| 
 |  | ||||||
|                     case Source::Texture2: |  | ||||||
|                         return texture_color[2]; |  | ||||||
| 
 |  | ||||||
|                     case Source::Texture3: |  | ||||||
|                         return texture_color[3]; |  | ||||||
| 
 |  | ||||||
|                     case Source::PreviousBuffer: |  | ||||||
|                         return combiner_buffer; |  | ||||||
| 
 |  | ||||||
|                     case Source::Constant: |  | ||||||
|                         return Common::MakeVec(tev_stage.const_r.Value(), tev_stage.const_g.Value(), |  | ||||||
|                                                tev_stage.const_b.Value(), tev_stage.const_a.Value()) |  | ||||||
|                             .Cast<u8>(); |  | ||||||
| 
 |  | ||||||
|                     case Source::Previous: |  | ||||||
|                         return combiner_output; |  | ||||||
| 
 |  | ||||||
|                     default: |  | ||||||
|                         LOG_ERROR(HW_GPU, "Unknown color combiner source {}", (int)source); |  | ||||||
|                         UNIMPLEMENTED(); |  | ||||||
|                         return {0, 0, 0, 0}; |  | ||||||
|                     } |  | ||||||
|                 }; |  | ||||||
| 
 |  | ||||||
|                 // color combiner
 |  | ||||||
|                 // NOTE: Not sure if the alpha combiner might use the color output of the previous
 |  | ||||||
|                 //       stage as input. Hence, we currently don't directly write the result to
 |  | ||||||
|                 //       combiner_output.rgb(), but instead store it in a temporary variable until
 |  | ||||||
|                 //       alpha combining has been done.
 |  | ||||||
|                 Common::Vec3<u8> color_result[3] = { |  | ||||||
|                     GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)), |  | ||||||
|                     GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)), |  | ||||||
|                     GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)), |  | ||||||
|                 }; |  | ||||||
|                 auto color_output = ColorCombine(tev_stage.color_op, color_result); |  | ||||||
| 
 |  | ||||||
|                 u8 alpha_output; |  | ||||||
|                 if (tev_stage.color_op == TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { |  | ||||||
|                     // result of Dot3_RGBA operation is also placed to the alpha component
 |  | ||||||
|                     alpha_output = color_output.x; |  | ||||||
|                 } else { |  | ||||||
|                     // alpha combiner
 |  | ||||||
|                     std::array<u8, 3> alpha_result = {{ |  | ||||||
|                         GetAlphaModifier(tev_stage.alpha_modifier1, |  | ||||||
|                                          GetSource(tev_stage.alpha_source1)), |  | ||||||
|                         GetAlphaModifier(tev_stage.alpha_modifier2, |  | ||||||
|                                          GetSource(tev_stage.alpha_source2)), |  | ||||||
|                         GetAlphaModifier(tev_stage.alpha_modifier3, |  | ||||||
|                                          GetSource(tev_stage.alpha_source3)), |  | ||||||
|                     }}; |  | ||||||
|                     alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 combiner_output[0] = |  | ||||||
|                     std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); |  | ||||||
|                 combiner_output[1] = |  | ||||||
|                     std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); |  | ||||||
|                 combiner_output[2] = |  | ||||||
|                     std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); |  | ||||||
|                 combiner_output[3] = |  | ||||||
|                     std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); |  | ||||||
| 
 |  | ||||||
|                 combiner_buffer = next_combiner_buffer; |  | ||||||
| 
 |  | ||||||
|                 if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor( |  | ||||||
|                         tev_stage_index)) { |  | ||||||
|                     next_combiner_buffer.r() = combiner_output.r(); |  | ||||||
|                     next_combiner_buffer.g() = combiner_output.g(); |  | ||||||
|                     next_combiner_buffer.b() = combiner_output.b(); |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha( |  | ||||||
|                         tev_stage_index)) { |  | ||||||
|                     next_combiner_buffer.a() = combiner_output.a(); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             const auto& output_merger = regs.framebuffer.output_merger; |  | ||||||
| 
 |  | ||||||
|             if (output_merger.fragment_operation_mode == |  | ||||||
|                 FramebufferRegs::FragmentOperationMode::Shadow) { |  | ||||||
|                 u32 depth_int = static_cast<u32>(depth * 0xFFFFFF); |  | ||||||
|                 // use green color as the shadow intensity
 |  | ||||||
|                 u8 stencil = combiner_output.y; |  | ||||||
|                 DrawShadowMapPixel(x >> 4, y >> 4, depth_int, stencil); |  | ||||||
|                 // skip the normal output merger pipeline if it is in shadow mode
 |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             // TODO: Does alpha testing happen before or after stencil?
 |  | ||||||
|             if (output_merger.alpha_test.enable) { |  | ||||||
|                 bool pass = false; |  | ||||||
| 
 |  | ||||||
|                 switch (output_merger.alpha_test.func) { |  | ||||||
|                 case FramebufferRegs::CompareFunc::Never: |  | ||||||
|                     pass = false; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::Always: |  | ||||||
|                     pass = true; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::Equal: |  | ||||||
|                     pass = combiner_output.a() == output_merger.alpha_test.ref; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::NotEqual: |  | ||||||
|                     pass = combiner_output.a() != output_merger.alpha_test.ref; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::LessThan: |  | ||||||
|                     pass = combiner_output.a() < output_merger.alpha_test.ref; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::LessThanOrEqual: |  | ||||||
|                     pass = combiner_output.a() <= output_merger.alpha_test.ref; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::GreaterThan: |  | ||||||
|                     pass = combiner_output.a() > output_merger.alpha_test.ref; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::GreaterThanOrEqual: |  | ||||||
|                     pass = combiner_output.a() >= output_merger.alpha_test.ref; |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 if (!pass) |  | ||||||
|                     continue; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             // Apply fog combiner
 |  | ||||||
|             // Not fully accurate. We'd have to know what data type is used to
 |  | ||||||
|             // store the depth etc. Using float for now until we know more
 |  | ||||||
|             // about Pica datatypes
 |  | ||||||
|             if (regs.texturing.fog_mode == TexturingRegs::FogMode::Fog) { |  | ||||||
|                 const Common::Vec3<u8> fog_color = |  | ||||||
|                     Common::MakeVec(regs.texturing.fog_color.r.Value(), |  | ||||||
|                                     regs.texturing.fog_color.g.Value(), |  | ||||||
|                                     regs.texturing.fog_color.b.Value()) |  | ||||||
|                         .Cast<u8>(); |  | ||||||
| 
 |  | ||||||
|                 // Get index into fog LUT
 |  | ||||||
|                 float fog_index; |  | ||||||
|                 if (g_state.regs.texturing.fog_flip) { |  | ||||||
|                     fog_index = (1.0f - depth) * 128.0f; |  | ||||||
|                 } else { |  | ||||||
|                     fog_index = depth * 128.0f; |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 // Generate clamped fog factor from LUT for given fog index
 |  | ||||||
|                 float fog_i = std::clamp(floorf(fog_index), 0.0f, 127.0f); |  | ||||||
|                 float fog_f = fog_index - fog_i; |  | ||||||
|                 const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; |  | ||||||
|                 float fog_factor = fog_lut_entry.ToFloat() + fog_lut_entry.DiffToFloat() * fog_f; |  | ||||||
|                 fog_factor = std::clamp(fog_factor, 0.0f, 1.0f); |  | ||||||
| 
 |  | ||||||
|                 // Blend the fog
 |  | ||||||
|                 for (unsigned i = 0; i < 3; i++) { |  | ||||||
|                     combiner_output[i] = static_cast<u8>(fog_factor * combiner_output[i] + |  | ||||||
|                                                          (1.0f - fog_factor) * fog_color[i]); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             u8 old_stencil = 0; |  | ||||||
| 
 |  | ||||||
|             auto UpdateStencil = [stencil_test, x, y, |  | ||||||
|                                   &old_stencil](Pica::FramebufferRegs::StencilAction action) { |  | ||||||
|                 u8 new_stencil = |  | ||||||
|                     PerformStencilAction(action, old_stencil, stencil_test.reference_value); |  | ||||||
|                 if (g_state.regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) |  | ||||||
|                     SetStencil(x >> 4, y >> 4, |  | ||||||
|                                (new_stencil & stencil_test.write_mask) | |  | ||||||
|                                    (old_stencil & ~stencil_test.write_mask)); |  | ||||||
|             }; |  | ||||||
| 
 |  | ||||||
|             if (stencil_action_enable) { |  | ||||||
|                 old_stencil = GetStencil(x >> 4, y >> 4); |  | ||||||
|                 u8 dest = old_stencil & stencil_test.input_mask; |  | ||||||
|                 u8 ref = stencil_test.reference_value & stencil_test.input_mask; |  | ||||||
| 
 |  | ||||||
|                 bool pass = false; |  | ||||||
|                 switch (stencil_test.func) { |  | ||||||
|                 case FramebufferRegs::CompareFunc::Never: |  | ||||||
|                     pass = false; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::Always: |  | ||||||
|                     pass = true; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::Equal: |  | ||||||
|                     pass = (ref == dest); |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::NotEqual: |  | ||||||
|                     pass = (ref != dest); |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::LessThan: |  | ||||||
|                     pass = (ref < dest); |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::LessThanOrEqual: |  | ||||||
|                     pass = (ref <= dest); |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::GreaterThan: |  | ||||||
|                     pass = (ref > dest); |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::GreaterThanOrEqual: |  | ||||||
|                     pass = (ref >= dest); |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 if (!pass) { |  | ||||||
|                     UpdateStencil(stencil_test.action_stencil_fail); |  | ||||||
|                     continue; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             // Convert float to integer
 |  | ||||||
|             unsigned num_bits = |  | ||||||
|                 FramebufferRegs::DepthBitsPerPixel(regs.framebuffer.framebuffer.depth_format); |  | ||||||
|             u32 z = (u32)(depth * ((1 << num_bits) - 1)); |  | ||||||
| 
 |  | ||||||
|             if (output_merger.depth_test_enable) { |  | ||||||
|                 u32 ref_z = GetDepth(x >> 4, y >> 4); |  | ||||||
| 
 |  | ||||||
|                 bool pass = false; |  | ||||||
| 
 |  | ||||||
|                 switch (output_merger.depth_test_func) { |  | ||||||
|                 case FramebufferRegs::CompareFunc::Never: |  | ||||||
|                     pass = false; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::Always: |  | ||||||
|                     pass = true; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::Equal: |  | ||||||
|                     pass = z == ref_z; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::NotEqual: |  | ||||||
|                     pass = z != ref_z; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::LessThan: |  | ||||||
|                     pass = z < ref_z; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::LessThanOrEqual: |  | ||||||
|                     pass = z <= ref_z; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::GreaterThan: |  | ||||||
|                     pass = z > ref_z; |  | ||||||
|                     break; |  | ||||||
| 
 |  | ||||||
|                 case FramebufferRegs::CompareFunc::GreaterThanOrEqual: |  | ||||||
|                     pass = z >= ref_z; |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 if (!pass) { |  | ||||||
|                     if (stencil_action_enable) |  | ||||||
|                         UpdateStencil(stencil_test.action_depth_fail); |  | ||||||
|                     continue; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             if (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && |  | ||||||
|                 output_merger.depth_write_enable) { |  | ||||||
| 
 |  | ||||||
|                 SetDepth(x >> 4, y >> 4, z); |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             // The stencil depth_pass action is executed even if depth testing is disabled
 |  | ||||||
|             if (stencil_action_enable) |  | ||||||
|                 UpdateStencil(stencil_test.action_depth_pass); |  | ||||||
| 
 |  | ||||||
|             auto dest = GetPixel(x >> 4, y >> 4); |  | ||||||
|             Common::Vec4<u8> blend_output = combiner_output; |  | ||||||
| 
 |  | ||||||
|             if (output_merger.alphablend_enable) { |  | ||||||
|                 auto params = output_merger.alpha_blending; |  | ||||||
| 
 |  | ||||||
|                 auto LookupFactor = [&](unsigned channel, |  | ||||||
|                                         FramebufferRegs::BlendFactor factor) -> u8 { |  | ||||||
|                     DEBUG_ASSERT(channel < 4); |  | ||||||
| 
 |  | ||||||
|                     const Common::Vec4<u8> blend_const = |  | ||||||
|                         Common::MakeVec(output_merger.blend_const.r.Value(), |  | ||||||
|                                         output_merger.blend_const.g.Value(), |  | ||||||
|                                         output_merger.blend_const.b.Value(), |  | ||||||
|                                         output_merger.blend_const.a.Value()) |  | ||||||
|                             .Cast<u8>(); |  | ||||||
| 
 |  | ||||||
|                     switch (factor) { |  | ||||||
|                     case FramebufferRegs::BlendFactor::Zero: |  | ||||||
|                         return 0; |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::One: |  | ||||||
|                         return 255; |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::SourceColor: |  | ||||||
|                         return combiner_output[channel]; |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::OneMinusSourceColor: |  | ||||||
|                         return 255 - combiner_output[channel]; |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::DestColor: |  | ||||||
|                         return dest[channel]; |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::OneMinusDestColor: |  | ||||||
|                         return 255 - dest[channel]; |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::SourceAlpha: |  | ||||||
|                         return combiner_output.a(); |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::OneMinusSourceAlpha: |  | ||||||
|                         return 255 - combiner_output.a(); |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::DestAlpha: |  | ||||||
|                         return dest.a(); |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::OneMinusDestAlpha: |  | ||||||
|                         return 255 - dest.a(); |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::ConstantColor: |  | ||||||
|                         return blend_const[channel]; |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::OneMinusConstantColor: |  | ||||||
|                         return 255 - blend_const[channel]; |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::ConstantAlpha: |  | ||||||
|                         return blend_const.a(); |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::OneMinusConstantAlpha: |  | ||||||
|                         return 255 - blend_const.a(); |  | ||||||
| 
 |  | ||||||
|                     case FramebufferRegs::BlendFactor::SourceAlphaSaturate: |  | ||||||
|                         // Returns 1.0 for the alpha channel
 |  | ||||||
|                         if (channel == 3) |  | ||||||
|                             return 255; |  | ||||||
|                         return std::min(combiner_output.a(), static_cast<u8>(255 - dest.a())); |  | ||||||
| 
 |  | ||||||
|                     default: |  | ||||||
|                         LOG_CRITICAL(HW_GPU, "Unknown blend factor {:x}", factor); |  | ||||||
|                         UNIMPLEMENTED(); |  | ||||||
|                         break; |  | ||||||
|                     } |  | ||||||
| 
 |  | ||||||
|                     return combiner_output[channel]; |  | ||||||
|                 }; |  | ||||||
| 
 |  | ||||||
|                 auto srcfactor = Common::MakeVec(LookupFactor(0, params.factor_source_rgb), |  | ||||||
|                                                  LookupFactor(1, params.factor_source_rgb), |  | ||||||
|                                                  LookupFactor(2, params.factor_source_rgb), |  | ||||||
|                                                  LookupFactor(3, params.factor_source_a)); |  | ||||||
| 
 |  | ||||||
|                 auto dstfactor = Common::MakeVec(LookupFactor(0, params.factor_dest_rgb), |  | ||||||
|                                                  LookupFactor(1, params.factor_dest_rgb), |  | ||||||
|                                                  LookupFactor(2, params.factor_dest_rgb), |  | ||||||
|                                                  LookupFactor(3, params.factor_dest_a)); |  | ||||||
| 
 |  | ||||||
|                 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, |  | ||||||
|                                                      params.blend_equation_rgb); |  | ||||||
|                 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, |  | ||||||
|                                                          dstfactor, params.blend_equation_a) |  | ||||||
|                                        .a(); |  | ||||||
|             } else { |  | ||||||
|                 blend_output = |  | ||||||
|                     Common::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), |  | ||||||
|                                     LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), |  | ||||||
|                                     LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), |  | ||||||
|                                     LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             const Common::Vec4<u8> result = { |  | ||||||
|                 output_merger.red_enable ? blend_output.r() : dest.r(), |  | ||||||
|                 output_merger.green_enable ? blend_output.g() : dest.g(), |  | ||||||
|                 output_merger.blue_enable ? blend_output.b() : dest.b(), |  | ||||||
|                 output_merger.alpha_enable ? blend_output.a() : dest.a(), |  | ||||||
|             }; |  | ||||||
| 
 |  | ||||||
|             if (regs.framebuffer.framebuffer.allow_color_write != 0) |  | ||||||
|                 DrawPixel(x >> 4, y >> 4, result); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2) { |  | ||||||
|     ProcessTriangleInternal(v0, v1, v2); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace Pica::Rasterizer
 |  | ||||||
|  | @ -1,44 +0,0 @@ | ||||||
| // Copyright 2014 Citra Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include "video_core/shader/shader.h" |  | ||||||
| 
 |  | ||||||
| namespace Pica::Rasterizer { |  | ||||||
| 
 |  | ||||||
| struct Vertex : Shader::OutputVertex { |  | ||||||
|     Vertex(const OutputVertex& v) : OutputVertex(v) {} |  | ||||||
| 
 |  | ||||||
|     // Attributes used to store intermediate results
 |  | ||||||
|     // position after perspective divide
 |  | ||||||
|     Common::Vec3<float24> screenpos; |  | ||||||
| 
 |  | ||||||
|     // Linear interpolation
 |  | ||||||
|     // factor: 0=this, 1=vtx
 |  | ||||||
|     // Note: This function cannot be called after perspective divide
 |  | ||||||
|     void Lerp(float24 factor, const Vertex& vtx) { |  | ||||||
|         pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); |  | ||||||
|         quat = quat * factor + vtx.quat * (float24::FromFloat32(1) - factor); |  | ||||||
|         color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); |  | ||||||
|         tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); |  | ||||||
|         tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); |  | ||||||
|         tc0_w = tc0_w * factor + vtx.tc0_w * (float24::FromFloat32(1) - factor); |  | ||||||
|         view = view * factor + vtx.view * (float24::FromFloat32(1) - factor); |  | ||||||
|         tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Linear interpolation
 |  | ||||||
|     // factor: 0=v0, 1=v1
 |  | ||||||
|     // Note: This function cannot be called after perspective divide
 |  | ||||||
|     static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) { |  | ||||||
|         Vertex ret = v0; |  | ||||||
|         ret.Lerp(factor, v1); |  | ||||||
|         return ret; |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2); |  | ||||||
| 
 |  | ||||||
| } // namespace Pica::Rasterizer
 |  | ||||||
|  | @ -2,18 +2,86 @@ | ||||||
| // Licensed under GPLv2 or any later version
 | // Licensed under GPLv2 or any later version
 | ||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
|  | #include "common/color.h" | ||||||
|  | #include "core/core.h" | ||||||
|  | #include "core/hw/gpu.h" | ||||||
|  | #include "core/hw/hw.h" | ||||||
|  | #include "core/hw/lcd.h" | ||||||
| #include "video_core/renderer_software/renderer_software.h" | #include "video_core/renderer_software/renderer_software.h" | ||||||
| 
 | 
 | ||||||
| namespace VideoCore { | namespace SwRenderer { | ||||||
| 
 | 
 | ||||||
| RendererSoftware::RendererSoftware(Core::System& system, Frontend::EmuWindow& window) | RendererSoftware::RendererSoftware(Core::System& system, Frontend::EmuWindow& window) | ||||||
|     : VideoCore::RendererBase{system, window, nullptr}, |     : VideoCore::RendererBase{system, window, nullptr}, memory{system.Memory()}, | ||||||
|       rasterizer{std::make_unique<RasterizerSoftware>()} {} |       rasterizer{std::make_unique<RasterizerSoftware>(system.Memory())} {} | ||||||
| 
 | 
 | ||||||
| RendererSoftware::~RendererSoftware() = default; | RendererSoftware::~RendererSoftware() = default; | ||||||
| 
 | 
 | ||||||
| void RendererSoftware::SwapBuffers() { | void RendererSoftware::SwapBuffers() { | ||||||
|  |     PrepareRenderTarget(); | ||||||
|     EndFrame(); |     EndFrame(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace VideoCore
 | void RendererSoftware::PrepareRenderTarget() { | ||||||
|  |     for (int i : {0, 1, 2}) { | ||||||
|  |         const int fb_id = i == 2 ? 1 : 0; | ||||||
|  |         const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id]; | ||||||
|  |         auto& info = screen_infos[i]; | ||||||
|  | 
 | ||||||
|  |         u32 lcd_color_addr = | ||||||
|  |             (fb_id == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); | ||||||
|  |         lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; | ||||||
|  |         LCD::Regs::ColorFill color_fill = {0}; | ||||||
|  |         LCD::Read(color_fill.raw, lcd_color_addr); | ||||||
|  | 
 | ||||||
|  |         if (!color_fill.is_enabled) { | ||||||
|  |             const u32 old_width = std::exchange(info.width, framebuffer.width); | ||||||
|  |             const u32 old_height = std::exchange(info.height, framebuffer.height); | ||||||
|  |             if (framebuffer.width != old_width || framebuffer.height != old_height) [[unlikely]] { | ||||||
|  |                 info.pixels.resize(framebuffer.width * framebuffer.height * 4); | ||||||
|  |             } | ||||||
|  |             CopyPixels(i); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void RendererSoftware::CopyPixels(int i) { | ||||||
|  |     const u32 fb_id = i == 2 ? 1 : 0; | ||||||
|  |     const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id]; | ||||||
|  | 
 | ||||||
|  |     const PAddr framebuffer_addr = | ||||||
|  |         framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2; | ||||||
|  |     const s32 bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); | ||||||
|  |     const u8* framebuffer_data = memory.GetPhysicalPointer(framebuffer_addr); | ||||||
|  | 
 | ||||||
|  |     const s32 stride = framebuffer.stride; | ||||||
|  |     const s32 height = framebuffer.height; | ||||||
|  |     ASSERT(stride * height != 0); | ||||||
|  | 
 | ||||||
|  |     u32 output_offset = 0; | ||||||
|  |     for (u32 y = 0; y < framebuffer.height; y++) { | ||||||
|  |         for (u32 x = 0; x < framebuffer.width; x++) { | ||||||
|  |             const u8* pixel = framebuffer_data + (y * stride + x) * bpp; | ||||||
|  |             const Common::Vec4 color = [&] { | ||||||
|  |                 switch (framebuffer.color_format) { | ||||||
|  |                 case GPU::Regs::PixelFormat::RGBA8: | ||||||
|  |                     return Common::Color::DecodeRGBA8(pixel); | ||||||
|  |                 case GPU::Regs::PixelFormat::RGB8: | ||||||
|  |                     return Common::Color::DecodeRGB8(pixel); | ||||||
|  |                 case GPU::Regs::PixelFormat::RGB565: | ||||||
|  |                     return Common::Color::DecodeRGB565(pixel); | ||||||
|  |                 case GPU::Regs::PixelFormat::RGB5A1: | ||||||
|  |                     return Common::Color::DecodeRGB5A1(pixel); | ||||||
|  |                 case GPU::Regs::PixelFormat::RGBA4: | ||||||
|  |                     return Common::Color::DecodeRGBA4(pixel); | ||||||
|  |                 } | ||||||
|  |                 UNREACHABLE(); | ||||||
|  |             }(); | ||||||
|  |             u8* dest = screen_infos[i].pixels.data() + output_offset; | ||||||
|  |             std::memcpy(dest, color.AsArray(), sizeof(color)); | ||||||
|  |             output_offset += sizeof(color); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace SwRenderer
 | ||||||
|  |  | ||||||
|  | @ -11,7 +11,13 @@ namespace Core { | ||||||
| class System; | class System; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| namespace VideoCore { | namespace SwRenderer { | ||||||
|  | 
 | ||||||
|  | struct ScreenInfo { | ||||||
|  |     u32 width; | ||||||
|  |     u32 height; | ||||||
|  |     std::vector<u8> pixels; | ||||||
|  | }; | ||||||
| 
 | 
 | ||||||
| class RendererSoftware : public VideoCore::RendererBase { | class RendererSoftware : public VideoCore::RendererBase { | ||||||
| public: | public: | ||||||
|  | @ -22,12 +28,22 @@ public: | ||||||
|         return rasterizer.get(); |         return rasterizer.get(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     [[nodiscard]] const ScreenInfo& Screen(VideoCore::ScreenId id) const noexcept { | ||||||
|  |         return screen_infos[static_cast<u32>(id)]; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     void SwapBuffers() override; |     void SwapBuffers() override; | ||||||
|     void TryPresent(int timeout_ms, bool is_secondary) override {} |     void TryPresent(int timeout_ms, bool is_secondary) override {} | ||||||
|     void Sync() override {} |     void Sync() override {} | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|  |     void PrepareRenderTarget(); | ||||||
|  |     void CopyPixels(int i); | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     Memory::MemorySystem& memory; | ||||||
|     std::unique_ptr<RasterizerSoftware> rasterizer; |     std::unique_ptr<RasterizerSoftware> rasterizer; | ||||||
|  |     std::array<ScreenInfo, 3> screen_infos{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace VideoCore
 | } // namespace SwRenderer
 | ||||||
|  |  | ||||||
|  | @ -1,196 +1,88 @@ | ||||||
| // Copyright 2014 Citra Emulator Project
 | // Copyright 2023 Citra Emulator Project
 | ||||||
| // Licensed under GPLv2 or any later version
 | // Licensed under GPLv2 or any later version
 | ||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
| #include <algorithm> |  | ||||||
| #include <array> | #include <array> | ||||||
| #include <cstddef> | #include <cstddef> | ||||||
| #include <boost/container/static_vector.hpp> | #include "video_core/regs_texturing.h" | ||||||
| #include "common/bit_field.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "common/vector_math.h" |  | ||||||
| #include "video_core/pica_state.h" |  | ||||||
| #include "video_core/pica_types.h" |  | ||||||
| #include "video_core/renderer_software/rasterizer.h" |  | ||||||
| #include "video_core/renderer_software/sw_clipper.h" | #include "video_core/renderer_software/sw_clipper.h" | ||||||
| #include "video_core/shader/shader.h" |  | ||||||
| 
 | 
 | ||||||
| using Pica::Rasterizer::Vertex; | namespace SwRenderer { | ||||||
| 
 | 
 | ||||||
| namespace Pica::Clipper { | using Pica::TexturingRegs; | ||||||
| 
 | 
 | ||||||
| struct ClippingEdge { | void FlipQuaternionIfOpposite(Common::Vec4<f24>& a, const Common::Vec4<f24>& b) { | ||||||
| public: |     if (Common::Dot(a, b) < f24::Zero()) { | ||||||
|     ClippingEdge(Common::Vec4<float24> coeffs, |         a *= f24::FromFloat32(-1.0f); | ||||||
|                  Common::Vec4<float24> bias = Common::Vec4<float24>(float24::FromFloat32(0), |  | ||||||
|                                                                     float24::FromFloat32(0), |  | ||||||
|                                                                     float24::FromFloat32(0), |  | ||||||
|                                                                     float24::FromFloat32(0))) |  | ||||||
|         : coeffs(coeffs), bias(bias) {} |  | ||||||
| 
 |  | ||||||
|     bool IsInside(const Vertex& vertex) const { |  | ||||||
|         return Common::Dot(vertex.pos + bias, coeffs) >= float24::FromFloat32(0); |  | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|     bool IsOutSide(const Vertex& vertex) const { |  | ||||||
|         return !IsInside(vertex); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Vertex GetIntersection(const Vertex& v0, const Vertex& v1) const { |  | ||||||
|         float24 dp = Common::Dot(v0.pos + bias, coeffs); |  | ||||||
|         float24 dp_prev = Common::Dot(v1.pos + bias, coeffs); |  | ||||||
|         float24 factor = dp_prev / (dp_prev - dp); |  | ||||||
| 
 |  | ||||||
|         return Vertex::Lerp(factor, v0, v1); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     [[maybe_unused]] float24 pos; |  | ||||||
|     Common::Vec4<float24> coeffs; |  | ||||||
|     Common::Vec4<float24> bias; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static void InitScreenCoordinates(Vertex& vtx) { | int SignedArea(const Common::Vec2<Fix12P4>& vtx1, const Common::Vec2<Fix12P4>& vtx2, | ||||||
|     struct { |                const Common::Vec2<Fix12P4>& vtx3) { | ||||||
|         float24 halfsize_x; |     const auto vec1 = Common::MakeVec(vtx2 - vtx1, 0); | ||||||
|         float24 offset_x; |     const auto vec2 = Common::MakeVec(vtx3 - vtx1, 0); | ||||||
|         float24 halfsize_y; |     // TODO: There is a very small chance this will overflow for sizeof(int) == 4
 | ||||||
|         float24 offset_y; |     return Common::Cross(vec1, vec2).z; | ||||||
|         float24 zscale; | }; | ||||||
|         float24 offset_z; |  | ||||||
|     } viewport; |  | ||||||
| 
 | 
 | ||||||
|     const auto& regs = g_state.regs; | std::tuple<f24, f24, f24, PAddr> ConvertCubeCoord(f24 u, f24 v, f24 w, | ||||||
|     viewport.halfsize_x = float24::FromRaw(regs.rasterizer.viewport_size_x); |                                                   const Pica::TexturingRegs& regs) { | ||||||
|     viewport.halfsize_y = float24::FromRaw(regs.rasterizer.viewport_size_y); |     const float abs_u = std::abs(u.ToFloat32()); | ||||||
|     viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.rasterizer.viewport_corner.x)); |     const float abs_v = std::abs(v.ToFloat32()); | ||||||
|     viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.rasterizer.viewport_corner.y)); |     const float abs_w = std::abs(w.ToFloat32()); | ||||||
| 
 |     f24 x, y, z; | ||||||
|     float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; |     PAddr addr; | ||||||
|     vtx.pos.w = inv_w; |     if (abs_u > abs_v && abs_u > abs_w) { | ||||||
|     vtx.quat *= inv_w; |         if (u > f24::Zero()) { | ||||||
|     vtx.color *= inv_w; |             addr = regs.GetCubePhysicalAddress(TexturingRegs::CubeFace::PositiveX); | ||||||
|     vtx.tc0 *= inv_w; |             y = -v; | ||||||
|     vtx.tc1 *= inv_w; |         } else { | ||||||
|     vtx.tc0_w *= inv_w; |             addr = regs.GetCubePhysicalAddress(TexturingRegs::CubeFace::NegativeX); | ||||||
|     vtx.view *= inv_w; |             y = v; | ||||||
|     vtx.tc2 *= inv_w; |         } | ||||||
| 
 |         x = -w; | ||||||
|     vtx.screenpos[0] = |         z = u; | ||||||
|         (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; |     } else if (abs_v > abs_w) { | ||||||
|     vtx.screenpos[1] = |         if (v > f24::Zero()) { | ||||||
|         (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; |             addr = regs.GetCubePhysicalAddress(TexturingRegs::CubeFace::PositiveY); | ||||||
|     vtx.screenpos[2] = vtx.pos.z * inv_w; |             x = u; | ||||||
|  |         } else { | ||||||
|  |             addr = regs.GetCubePhysicalAddress(TexturingRegs::CubeFace::NegativeY); | ||||||
|  |             x = -u; | ||||||
|  |         } | ||||||
|  |         y = w; | ||||||
|  |         z = v; | ||||||
|  |     } else { | ||||||
|  |         if (w > f24::Zero()) { | ||||||
|  |             addr = regs.GetCubePhysicalAddress(TexturingRegs::CubeFace::PositiveZ); | ||||||
|  |             y = -v; | ||||||
|  |         } else { | ||||||
|  |             addr = regs.GetCubePhysicalAddress(TexturingRegs::CubeFace::NegativeZ); | ||||||
|  |             y = v; | ||||||
|  |         } | ||||||
|  |         x = u; | ||||||
|  |         z = w; | ||||||
|  |     } | ||||||
|  |     const f24 z_abs = f24::FromFloat32(std::abs(z.ToFloat32())); | ||||||
|  |     const f24 half = f24::FromFloat32(0.5f); | ||||||
|  |     return std::make_tuple(x / z * half + half, y / z * half + half, z_abs, addr); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { | bool IsRightSideOrFlatBottomEdge(const Common::Vec2<Fix12P4>& vtx, | ||||||
|     using boost::container::static_vector; |                                  const Common::Vec2<Fix12P4>& line1, | ||||||
| 
 |                                  const Common::Vec2<Fix12P4>& line2) { | ||||||
|     // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at
 |     if (line1.y == line2.y) { | ||||||
|     // the new edge (or less in degenerate cases). As such, we can say that each clipping plane
 |         // Just check if vertex is above us => bottom line parallel to x-axis
 | ||||||
|     // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a
 |         return vtx.y < line1.y; | ||||||
|     // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.
 |     } else { | ||||||
|     static const std::size_t MAX_VERTICES = 9; |         // Check if vertex is on our left => right side
 | ||||||
|     static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; |         // TODO: Not sure how likely this is to overflow
 | ||||||
|     static_vector<Vertex, MAX_VERTICES> buffer_b; |         const auto svtx = vtx.Cast<s32>(); | ||||||
| 
 |         const auto sline1 = line1.Cast<s32>(); | ||||||
|     auto FlipQuaternionIfOpposite = [](auto& a, const auto& b) { |         const auto sline2 = line2.Cast<s32>(); | ||||||
|         if (Common::Dot(a, b) < float24::Zero()) |         return svtx.x < | ||||||
|             a = a * float24::FromFloat32(-1.0f); |                sline1.x + (sline2.x - sline1.x) * (svtx.y - sline1.y) / (sline2.y - sline1.y); | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     // Flip the quaternions if they are opposite to prevent interpolating them over the wrong
 |  | ||||||
|     // direction.
 |  | ||||||
|     FlipQuaternionIfOpposite(buffer_a[1].quat, buffer_a[0].quat); |  | ||||||
|     FlipQuaternionIfOpposite(buffer_a[2].quat, buffer_a[0].quat); |  | ||||||
| 
 |  | ||||||
|     auto* output_list = &buffer_a; |  | ||||||
|     auto* input_list = &buffer_b; |  | ||||||
| 
 |  | ||||||
|     // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
 |  | ||||||
|     // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
 |  | ||||||
|     //       epsilon possible within float24 accuracy.
 |  | ||||||
|     static const float24 EPSILON = float24::FromFloat32(0.00001f); |  | ||||||
|     static const float24 f0 = float24::FromFloat32(0.0); |  | ||||||
|     static const float24 f1 = float24::FromFloat32(1.0); |  | ||||||
|     static const std::array<ClippingEdge, 7> clipping_edges = {{ |  | ||||||
|         {Common::MakeVec(-f1, f0, f0, f1)}, // x = +w
 |  | ||||||
|         {Common::MakeVec(f1, f0, f0, f1)},  // x = -w
 |  | ||||||
|         {Common::MakeVec(f0, -f1, f0, f1)}, // y = +w
 |  | ||||||
|         {Common::MakeVec(f0, f1, f0, f1)},  // y = -w
 |  | ||||||
|         {Common::MakeVec(f0, f0, -f1, f0)}, // z =  0
 |  | ||||||
|         {Common::MakeVec(f0, f0, f1, f1)},  // z = -w
 |  | ||||||
|         {Common::MakeVec(f0, f0, f0, f1), |  | ||||||
|          Common::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON
 |  | ||||||
|     }}; |  | ||||||
| 
 |  | ||||||
|     // Simple implementation of the Sutherland-Hodgman clipping algorithm.
 |  | ||||||
|     // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
 |  | ||||||
|     auto Clip = [&](const ClippingEdge& edge) { |  | ||||||
|         std::swap(input_list, output_list); |  | ||||||
|         output_list->clear(); |  | ||||||
| 
 |  | ||||||
|         const Vertex* reference_vertex = &input_list->back(); |  | ||||||
| 
 |  | ||||||
|         for (const auto& vertex : *input_list) { |  | ||||||
|             // NOTE: This algorithm changes vertex order in some cases!
 |  | ||||||
|             if (edge.IsInside(vertex)) { |  | ||||||
|                 if (edge.IsOutSide(*reference_vertex)) { |  | ||||||
|                     output_list->push_back(edge.GetIntersection(vertex, *reference_vertex)); |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 output_list->push_back(vertex); |  | ||||||
|             } else if (edge.IsInside(*reference_vertex)) { |  | ||||||
|                 output_list->push_back(edge.GetIntersection(vertex, *reference_vertex)); |  | ||||||
|             } |  | ||||||
|             reference_vertex = &vertex; |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     for (auto edge : clipping_edges) { |  | ||||||
|         Clip(edge); |  | ||||||
| 
 |  | ||||||
|         // Need to have at least a full triangle to continue...
 |  | ||||||
|         if (output_list->size() < 3) |  | ||||||
|             return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (g_state.regs.rasterizer.clip_enable) { |  | ||||||
|         ClippingEdge custom_edge{g_state.regs.rasterizer.GetClipCoef()}; |  | ||||||
|         Clip(custom_edge); |  | ||||||
| 
 |  | ||||||
|         if (output_list->size() < 3) |  | ||||||
|             return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     InitScreenCoordinates((*output_list)[0]); |  | ||||||
|     InitScreenCoordinates((*output_list)[1]); |  | ||||||
| 
 |  | ||||||
|     for (std::size_t i = 0; i < output_list->size() - 2; i++) { |  | ||||||
|         Vertex& vtx0 = (*output_list)[0]; |  | ||||||
|         Vertex& vtx1 = (*output_list)[i + 1]; |  | ||||||
|         Vertex& vtx2 = (*output_list)[i + 2]; |  | ||||||
| 
 |  | ||||||
|         InitScreenCoordinates(vtx2); |  | ||||||
| 
 |  | ||||||
|         LOG_TRACE( |  | ||||||
|             Render_Software, |  | ||||||
|             "Triangle {}/{} at position ({:.3}, {:.3}, {:.3}, {:.3f}), " |  | ||||||
|             "({:.3}, {:.3}, {:.3}, {:.3}), ({:.3}, {:.3}, {:.3}, {:.3}) and " |  | ||||||
|             "screen position ({:.2}, {:.2}, {:.2}), ({:.2}, {:.2}, {:.2}), ({:.2}, {:.2}, {:.2})", |  | ||||||
|             i + 1, output_list->size() - 2, vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), |  | ||||||
|             vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), vtx1.pos.x.ToFloat32(), |  | ||||||
|             vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), |  | ||||||
|             vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), |  | ||||||
|             vtx2.pos.w.ToFloat32(), vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(), |  | ||||||
|             vtx0.screenpos.z.ToFloat32(), vtx1.screenpos.x.ToFloat32(), |  | ||||||
|             vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(), |  | ||||||
|             vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), |  | ||||||
|             vtx2.screenpos.z.ToFloat32()); |  | ||||||
| 
 |  | ||||||
|         Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace Pica::Clipper
 | } // namespace SwRenderer
 | ||||||
|  |  | ||||||
|  | @ -1,19 +1,87 @@ | ||||||
| // Copyright 2014 Citra Emulator Project
 | // Copyright 2023 Citra Emulator Project
 | ||||||
| // Licensed under GPLv2 or any later version
 | // Licensed under GPLv2 or any later version
 | ||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "common/vector_math.h" | ||||||
|  | #include "video_core/pica_types.h" | ||||||
|  | 
 | ||||||
| namespace Pica { | namespace Pica { | ||||||
| namespace Shader { | struct TexturingRegs; | ||||||
| struct OutputVertex; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| namespace Clipper { | namespace SwRenderer { | ||||||
| 
 | 
 | ||||||
| using Shader::OutputVertex; | using Pica::f24; | ||||||
| 
 | 
 | ||||||
| void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2); | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 | ||||||
|  | struct Fix12P4 { | ||||||
|  |     Fix12P4() {} | ||||||
|  |     Fix12P4(u16 val) : val(val) {} | ||||||
| 
 | 
 | ||||||
| } // namespace Clipper
 |     static Fix12P4 FromFloat24(f24 flt) { | ||||||
| } // namespace Pica
 |         // TODO: Rounding here is necessary to prevent garbage pixels at
 | ||||||
|  |         //       triangle borders. Is it that the correct solution, though?
 | ||||||
|  |         return Fix12P4(static_cast<u16>(round(flt.ToFloat32() * 16.0f))); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     static u16 FracMask() { | ||||||
|  |         return 0xF; | ||||||
|  |     } | ||||||
|  |     static u16 IntMask() { | ||||||
|  |         return static_cast<u16>(~0xF); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     operator u16() const { | ||||||
|  |         return val; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool operator<(const Fix12P4& oth) const { | ||||||
|  |         return (u16) * this < (u16)oth; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     u16 val; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct Viewport { | ||||||
|  |     f24 halfsize_x; | ||||||
|  |     f24 offset_x; | ||||||
|  |     f24 halfsize_y; | ||||||
|  |     f24 offset_y; | ||||||
|  |     f24 zscale; | ||||||
|  |     f24 offset_z; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Flips the quaternions if they are opposite to prevent | ||||||
|  |  * interpolating them over the wrong direction. | ||||||
|  |  */ | ||||||
|  | void FlipQuaternionIfOpposite(Common::Vec4<f24>& a, const Common::Vec4<f24>& b); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Calculate signed area of the triangle spanned by the three argument vertices. | ||||||
|  |  * The sign denotes an orientation. | ||||||
|  |  **/ | ||||||
|  | int SignedArea(const Common::Vec2<Fix12P4>& vtx1, const Common::Vec2<Fix12P4>& vtx2, | ||||||
|  |                const Common::Vec2<Fix12P4>& vtx3); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Convert a 3D vector for cube map coordinates to 2D texture coordinates along with the face name. | ||||||
|  |  **/ | ||||||
|  | std::tuple<f24, f24, f24, PAddr> ConvertCubeCoord(f24 u, f24 v, f24 w, | ||||||
|  |                                                   const Pica::TexturingRegs& regs); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not | ||||||
|  |  * drawn. Pixels on any other triangle border are drawn. This is implemented with three bias | ||||||
|  |  * values which are added to the barycentric coordinates w0, w1 and w2, respectively. | ||||||
|  |  * NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones... | ||||||
|  |  **/ | ||||||
|  | bool IsRightSideOrFlatBottomEdge(const Common::Vec2<Fix12P4>& vtx, | ||||||
|  |                                  const Common::Vec2<Fix12P4>& line1, | ||||||
|  |                                  const Common::Vec2<Fix12P4>& line2); | ||||||
|  | 
 | ||||||
|  | } // namespace SwRenderer
 | ||||||
|  |  | ||||||
|  | @ -3,23 +3,46 @@ | ||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
| #include <algorithm> | #include <algorithm> | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/color.h" | #include "common/color.h" | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "common/vector_math.h" |  | ||||||
| #include "core/hw/gpu.h" | #include "core/hw/gpu.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
| #include "video_core/pica_state.h" | #include "video_core/pica_types.h" | ||||||
| #include "video_core/regs_framebuffer.h" | #include "video_core/regs_framebuffer.h" | ||||||
| #include "video_core/renderer_software/sw_framebuffer.h" | #include "video_core/renderer_software/sw_framebuffer.h" | ||||||
| #include "video_core/utils.h" | #include "video_core/utils.h" | ||||||
| #include "video_core/video_core.h" |  | ||||||
| 
 | 
 | ||||||
| namespace Pica::Rasterizer { | namespace SwRenderer { | ||||||
| 
 | 
 | ||||||
| void DrawPixel(int x, int y, const Common::Vec4<u8>& color) { | using Pica::f16; | ||||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; | using Pica::FramebufferRegs; | ||||||
|  | 
 | ||||||
|  | namespace { | ||||||
|  | 
 | ||||||
|  | /// Decode/Encode for shadow map format. It is similar to D24S8 format,
 | ||||||
|  | /// but the depth field is in big-endian.
 | ||||||
|  | const Common::Vec2<u32> DecodeD24S8Shadow(const u8* bytes) { | ||||||
|  |     return {static_cast<u32>((bytes[0] << 16) | (bytes[1] << 8) | bytes[2]), bytes[3]}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void EncodeD24X8Shadow(u32 depth, u8* bytes) { | ||||||
|  |     bytes[2] = depth & 0xFF; | ||||||
|  |     bytes[1] = (depth >> 8) & 0xFF; | ||||||
|  |     bytes[0] = (depth >> 16) & 0xFF; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void EncodeX24S8Shadow(u8 stencil, u8* bytes) { | ||||||
|  |     bytes[3] = stencil; | ||||||
|  | } | ||||||
|  | } // Anonymous namespace
 | ||||||
|  | 
 | ||||||
|  | Framebuffer::Framebuffer(Memory::MemorySystem& memory_, const Pica::FramebufferRegs& regs_) | ||||||
|  |     : memory{memory_}, regs{regs_} {} | ||||||
|  | 
 | ||||||
|  | Framebuffer::~Framebuffer() = default; | ||||||
|  | 
 | ||||||
|  | void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const { | ||||||
|  |     const auto& framebuffer = regs.framebuffer; | ||||||
|     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); |     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); | ||||||
| 
 | 
 | ||||||
|     // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
 |     // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
 | ||||||
|  | @ -27,33 +50,29 @@ void DrawPixel(int x, int y, const Common::Vec4<u8>& color) { | ||||||
|     y = framebuffer.height - y; |     y = framebuffer.height - y; | ||||||
| 
 | 
 | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|     u32 bytes_per_pixel = |     const u32 bytes_per_pixel = | ||||||
|         GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); |         GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | ||||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + |     const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||||||
|                            coarse_y * framebuffer.width * bytes_per_pixel; |                            coarse_y * framebuffer.width * bytes_per_pixel; | ||||||
|     u8* dst_pixel = VideoCore::g_memory->GetPhysicalPointer(addr) + dst_offset; |     u8* depth_buffer = memory.GetPhysicalPointer(addr); | ||||||
|  |     u8* dst_pixel = depth_buffer + dst_offset; | ||||||
| 
 | 
 | ||||||
|     switch (framebuffer.color_format) { |     switch (framebuffer.color_format) { | ||||||
|     case FramebufferRegs::ColorFormat::RGBA8: |     case FramebufferRegs::ColorFormat::RGBA8: | ||||||
|         Common::Color::EncodeRGBA8(color, dst_pixel); |         Common::Color::EncodeRGBA8(color, dst_pixel); | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::ColorFormat::RGB8: |     case FramebufferRegs::ColorFormat::RGB8: | ||||||
|         Common::Color::EncodeRGB8(color, dst_pixel); |         Common::Color::EncodeRGB8(color, dst_pixel); | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::ColorFormat::RGB5A1: |     case FramebufferRegs::ColorFormat::RGB5A1: | ||||||
|         Common::Color::EncodeRGB5A1(color, dst_pixel); |         Common::Color::EncodeRGB5A1(color, dst_pixel); | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::ColorFormat::RGB565: |     case FramebufferRegs::ColorFormat::RGB565: | ||||||
|         Common::Color::EncodeRGB565(color, dst_pixel); |         Common::Color::EncodeRGB565(color, dst_pixel); | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::ColorFormat::RGBA4: |     case FramebufferRegs::ColorFormat::RGBA4: | ||||||
|         Common::Color::EncodeRGBA4(color, dst_pixel); |         Common::Color::EncodeRGBA4(color, dst_pixel); | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     default: |     default: | ||||||
|         LOG_CRITICAL(Render_Software, "Unknown framebuffer color format {:x}", |         LOG_CRITICAL(Render_Software, "Unknown framebuffer color format {:x}", | ||||||
|                      static_cast<u32>(framebuffer.color_format.Value())); |                      static_cast<u32>(framebuffer.color_format.Value())); | ||||||
|  | @ -61,35 +80,31 @@ void DrawPixel(int x, int y, const Common::Vec4<u8>& color) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const Common::Vec4<u8> GetPixel(int x, int y) { | const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const { | ||||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; |     const auto& framebuffer = regs.framebuffer; | ||||||
|     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); |     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); | ||||||
| 
 | 
 | ||||||
|     y = framebuffer.height - y; |     y = framebuffer.height - y; | ||||||
| 
 | 
 | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|     u32 bytes_per_pixel = |     const u32 bytes_per_pixel = | ||||||
|         GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); |         GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | ||||||
|     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + |     const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||||||
|                            coarse_y * framebuffer.width * bytes_per_pixel; |                            coarse_y * framebuffer.width * bytes_per_pixel; | ||||||
|     u8* src_pixel = VideoCore::g_memory->GetPhysicalPointer(addr) + src_offset; |     const u8* color_buffer = memory.GetPhysicalPointer(addr); | ||||||
|  |     const u8* src_pixel = color_buffer + src_offset; | ||||||
| 
 | 
 | ||||||
|     switch (framebuffer.color_format) { |     switch (framebuffer.color_format) { | ||||||
|     case FramebufferRegs::ColorFormat::RGBA8: |     case FramebufferRegs::ColorFormat::RGBA8: | ||||||
|         return Common::Color::DecodeRGBA8(src_pixel); |         return Common::Color::DecodeRGBA8(src_pixel); | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::ColorFormat::RGB8: |     case FramebufferRegs::ColorFormat::RGB8: | ||||||
|         return Common::Color::DecodeRGB8(src_pixel); |         return Common::Color::DecodeRGB8(src_pixel); | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::ColorFormat::RGB5A1: |     case FramebufferRegs::ColorFormat::RGB5A1: | ||||||
|         return Common::Color::DecodeRGB5A1(src_pixel); |         return Common::Color::DecodeRGB5A1(src_pixel); | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::ColorFormat::RGB565: |     case FramebufferRegs::ColorFormat::RGB565: | ||||||
|         return Common::Color::DecodeRGB565(src_pixel); |         return Common::Color::DecodeRGB565(src_pixel); | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::ColorFormat::RGBA4: |     case FramebufferRegs::ColorFormat::RGBA4: | ||||||
|         return Common::Color::DecodeRGBA4(src_pixel); |         return Common::Color::DecodeRGBA4(src_pixel); | ||||||
| 
 |  | ||||||
|     default: |     default: | ||||||
|         LOG_CRITICAL(Render_Software, "Unknown framebuffer color format {:x}", |         LOG_CRITICAL(Render_Software, "Unknown framebuffer color format {:x}", | ||||||
|                      static_cast<u32>(framebuffer.color_format.Value())); |                      static_cast<u32>(framebuffer.color_format.Value())); | ||||||
|  | @ -99,19 +114,19 @@ const Common::Vec4<u8> GetPixel(int x, int y) { | ||||||
|     return {0, 0, 0, 0}; |     return {0, 0, 0, 0}; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u32 GetDepth(int x, int y) { | u32 Framebuffer::GetDepth(int x, int y) const { | ||||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; |     const auto& framebuffer = regs.framebuffer; | ||||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); |     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|     u8* depth_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); |  | ||||||
| 
 | 
 | ||||||
|     y = framebuffer.height - y; |     y = framebuffer.height - y; | ||||||
| 
 | 
 | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|     u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); |     const u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); | ||||||
|     u32 stride = framebuffer.width * bytes_per_pixel; |     const u32 stride = framebuffer.width * bytes_per_pixel; | ||||||
| 
 | 
 | ||||||
|     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |     const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||||
|     u8* src_pixel = depth_buffer + src_offset; |     const u8* depth_buffer = memory.GetPhysicalPointer(addr); | ||||||
|  |     const u8* src_pixel = depth_buffer + src_offset; | ||||||
| 
 | 
 | ||||||
|     switch (framebuffer.depth_format) { |     switch (framebuffer.depth_format) { | ||||||
|     case FramebufferRegs::DepthFormat::D16: |     case FramebufferRegs::DepthFormat::D16: | ||||||
|  | @ -128,24 +143,23 @@ u32 GetDepth(int x, int y) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u8 GetStencil(int x, int y) { | u8 Framebuffer::GetStencil(int x, int y) const { | ||||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; |     const auto& framebuffer = regs.framebuffer; | ||||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); |     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|     u8* depth_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); |  | ||||||
| 
 | 
 | ||||||
|     y = framebuffer.height - y; |     y = framebuffer.height - y; | ||||||
| 
 | 
 | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|     u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); |     const u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); | ||||||
|     u32 stride = framebuffer.width * bytes_per_pixel; |     const u32 stride = framebuffer.width * bytes_per_pixel; | ||||||
| 
 | 
 | ||||||
|     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |     const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||||
|     u8* src_pixel = depth_buffer + src_offset; |     const u8* depth_buffer = memory.GetPhysicalPointer(addr); | ||||||
|  |     const u8* src_pixel = depth_buffer + src_offset; | ||||||
| 
 | 
 | ||||||
|     switch (framebuffer.depth_format) { |     switch (framebuffer.depth_format) { | ||||||
|     case FramebufferRegs::DepthFormat::D24S8: |     case FramebufferRegs::DepthFormat::D24S8: | ||||||
|         return Common::Color::DecodeD24S8(src_pixel).y; |         return Common::Color::DecodeD24S8(src_pixel).y; | ||||||
| 
 |  | ||||||
|     default: |     default: | ||||||
|         LOG_WARNING( |         LOG_WARNING( | ||||||
|             HW_GPU, |             HW_GPU, | ||||||
|  | @ -155,33 +169,30 @@ u8 GetStencil(int x, int y) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void SetDepth(int x, int y, u32 value) { | void Framebuffer::SetDepth(int x, int y, u32 value) const { | ||||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; |     const auto& framebuffer = regs.framebuffer; | ||||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); |     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|     u8* depth_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); |  | ||||||
| 
 | 
 | ||||||
|     y = framebuffer.height - y; |     y = framebuffer.height - y; | ||||||
| 
 | 
 | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|     u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); |     const u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); | ||||||
|     u32 stride = framebuffer.width * bytes_per_pixel; |     const u32 stride = framebuffer.width * bytes_per_pixel; | ||||||
| 
 | 
 | ||||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |     const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||||
|  |     u8* depth_buffer = memory.GetPhysicalPointer(addr); | ||||||
|     u8* dst_pixel = depth_buffer + dst_offset; |     u8* dst_pixel = depth_buffer + dst_offset; | ||||||
| 
 | 
 | ||||||
|     switch (framebuffer.depth_format) { |     switch (framebuffer.depth_format) { | ||||||
|     case FramebufferRegs::DepthFormat::D16: |     case FramebufferRegs::DepthFormat::D16: | ||||||
|         Common::Color::EncodeD16(value, dst_pixel); |         Common::Color::EncodeD16(value, dst_pixel); | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::DepthFormat::D24: |     case FramebufferRegs::DepthFormat::D24: | ||||||
|         Common::Color::EncodeD24(value, dst_pixel); |         Common::Color::EncodeD24(value, dst_pixel); | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::DepthFormat::D24S8: |     case FramebufferRegs::DepthFormat::D24S8: | ||||||
|         Common::Color::EncodeD24X8(value, dst_pixel); |         Common::Color::EncodeD24X8(value, dst_pixel); | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     default: |     default: | ||||||
|         LOG_CRITICAL(HW_GPU, "Unimplemented depth format {}", |         LOG_CRITICAL(HW_GPU, "Unimplemented depth format {}", | ||||||
|                      static_cast<u32>(framebuffer.depth_format.Value())); |                      static_cast<u32>(framebuffer.depth_format.Value())); | ||||||
|  | @ -190,18 +201,18 @@ void SetDepth(int x, int y, u32 value) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void SetStencil(int x, int y, u8 value) { | void Framebuffer::SetStencil(int x, int y, u8 value) const { | ||||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; |     const auto& framebuffer = regs.framebuffer; | ||||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); |     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||||
|     u8* depth_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); |  | ||||||
| 
 | 
 | ||||||
|     y = framebuffer.height - y; |     y = framebuffer.height - y; | ||||||
| 
 | 
 | ||||||
|     const u32 coarse_y = y & ~7; |     const u32 coarse_y = y & ~7; | ||||||
|     u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); |     const u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); | ||||||
|     u32 stride = framebuffer.width * bytes_per_pixel; |     const u32 stride = framebuffer.width * bytes_per_pixel; | ||||||
| 
 | 
 | ||||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |     const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||||
|  |     u8* depth_buffer = memory.GetPhysicalPointer(addr); | ||||||
|     u8* dst_pixel = depth_buffer + dst_offset; |     u8* dst_pixel = depth_buffer + dst_offset; | ||||||
| 
 | 
 | ||||||
|     switch (framebuffer.depth_format) { |     switch (framebuffer.depth_format) { | ||||||
|  | @ -209,11 +220,9 @@ void SetStencil(int x, int y, u8 value) { | ||||||
|     case Pica::FramebufferRegs::DepthFormat::D24: |     case Pica::FramebufferRegs::DepthFormat::D24: | ||||||
|         // Nothing to do
 |         // Nothing to do
 | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     case Pica::FramebufferRegs::DepthFormat::D24S8: |     case Pica::FramebufferRegs::DepthFormat::D24S8: | ||||||
|         Common::Color::EncodeX24S8(value, dst_pixel); |         Common::Color::EncodeX24S8(value, dst_pixel); | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     default: |     default: | ||||||
|         LOG_CRITICAL(HW_GPU, "Unimplemented depth format {}", |         LOG_CRITICAL(HW_GPU, "Unimplemented depth format {}", | ||||||
|                      static_cast<u32>(framebuffer.depth_format.Value())); |                      static_cast<u32>(framebuffer.depth_format.Value())); | ||||||
|  | @ -222,36 +231,65 @@ void SetStencil(int x, int y, u8 value) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void Framebuffer::DrawShadowMapPixel(int x, int y, u32 depth, u8 stencil) const { | ||||||
|  |     const auto& framebuffer = regs.framebuffer; | ||||||
|  |     const auto& shadow = regs.shadow; | ||||||
|  |     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); | ||||||
|  | 
 | ||||||
|  |     y = framebuffer.height - y; | ||||||
|  | 
 | ||||||
|  |     const u32 coarse_y = y & ~7; | ||||||
|  |     u32 bytes_per_pixel = 4; | ||||||
|  |     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||||||
|  |                      coarse_y * framebuffer.width * bytes_per_pixel; | ||||||
|  |     u8* shadow_buffer = memory.GetPhysicalPointer(addr); | ||||||
|  |     u8* dst_pixel = shadow_buffer + dst_offset; | ||||||
|  | 
 | ||||||
|  |     const auto ref = DecodeD24S8Shadow(dst_pixel); | ||||||
|  |     const u32 ref_z = ref.x; | ||||||
|  |     const u32 ref_s = ref.y; | ||||||
|  | 
 | ||||||
|  |     if (depth >= ref_z) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (stencil == 0) { | ||||||
|  |         EncodeD24X8Shadow(depth, dst_pixel); | ||||||
|  |     } else { | ||||||
|  |         const f16 constant = f16::FromRaw(shadow.constant); | ||||||
|  |         const f16 linear = f16::FromRaw(shadow.linear); | ||||||
|  |         const f16 x_ = f16::FromFloat32(static_cast<float>(depth) / ref_z); | ||||||
|  |         const f16 stencil_new = f16::FromFloat32(stencil) / (constant + linear * x_); | ||||||
|  |         stencil = static_cast<u8>(std::clamp(stencil_new.ToFloat32(), 0.0f, 255.0f)); | ||||||
|  | 
 | ||||||
|  |         if (stencil < ref_s) { | ||||||
|  |             EncodeX24S8Shadow(stencil, dst_pixel); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref) { | u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref) { | ||||||
|     switch (action) { |     switch (action) { | ||||||
|     case FramebufferRegs::StencilAction::Keep: |     case FramebufferRegs::StencilAction::Keep: | ||||||
|         return old_stencil; |         return old_stencil; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::StencilAction::Zero: |     case FramebufferRegs::StencilAction::Zero: | ||||||
|         return 0; |         return 0; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::StencilAction::Replace: |     case FramebufferRegs::StencilAction::Replace: | ||||||
|         return ref; |         return ref; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::StencilAction::Increment: |     case FramebufferRegs::StencilAction::Increment: | ||||||
|         // Saturated increment
 |         // Saturated increment
 | ||||||
|         return std::min<u8>(old_stencil, 254) + 1; |         return std::min<u8>(old_stencil, 254) + 1; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::StencilAction::Decrement: |     case FramebufferRegs::StencilAction::Decrement: | ||||||
|         // Saturated decrement
 |         // Saturated decrement
 | ||||||
|         return std::max<u8>(old_stencil, 1) - 1; |         return std::max<u8>(old_stencil, 1) - 1; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::StencilAction::Invert: |     case FramebufferRegs::StencilAction::Invert: | ||||||
|         return ~old_stencil; |         return ~old_stencil; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::StencilAction::IncrementWrap: |     case FramebufferRegs::StencilAction::IncrementWrap: | ||||||
|         return old_stencil + 1; |         return old_stencil + 1; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::StencilAction::DecrementWrap: |     case FramebufferRegs::StencilAction::DecrementWrap: | ||||||
|         return old_stencil - 1; |         return old_stencil - 1; | ||||||
| 
 |  | ||||||
|     default: |     default: | ||||||
|         LOG_CRITICAL(HW_GPU, "Unknown stencil action {:x}", (int)action); |         LOG_CRITICAL(HW_GPU, "Unknown stencil action {:x}", static_cast<int>(action)); | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|         return 0; |         return 0; | ||||||
|     } |     } | ||||||
|  | @ -262,24 +300,21 @@ Common::Vec4<u8> EvaluateBlendEquation(const Common::Vec4<u8>& src, | ||||||
|                                        const Common::Vec4<u8>& dest, |                                        const Common::Vec4<u8>& dest, | ||||||
|                                        const Common::Vec4<u8>& destfactor, |                                        const Common::Vec4<u8>& destfactor, | ||||||
|                                        FramebufferRegs::BlendEquation equation) { |                                        FramebufferRegs::BlendEquation equation) { | ||||||
|     Common::Vec4<int> result; |     Common::Vec4i result; | ||||||
| 
 | 
 | ||||||
|     auto src_result = (src * srcfactor).Cast<int>(); |     const auto src_result = (src * srcfactor).Cast<s32>(); | ||||||
|     auto dst_result = (dest * destfactor).Cast<int>(); |     const auto dst_result = (dest * destfactor).Cast<s32>(); | ||||||
| 
 | 
 | ||||||
|     switch (equation) { |     switch (equation) { | ||||||
|     case FramebufferRegs::BlendEquation::Add: |     case FramebufferRegs::BlendEquation::Add: | ||||||
|         result = (src_result + dst_result) / 255; |         result = (src_result + dst_result) / 255; | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::BlendEquation::Subtract: |     case FramebufferRegs::BlendEquation::Subtract: | ||||||
|         result = (src_result - dst_result) / 255; |         result = (src_result - dst_result) / 255; | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::BlendEquation::ReverseSubtract: |     case FramebufferRegs::BlendEquation::ReverseSubtract: | ||||||
|         result = (dst_result - src_result) / 255; |         result = (dst_result - src_result) / 255; | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     // TODO: How do these two actually work?  OpenGL doesn't include the blend factors in the
 |     // TODO: How do these two actually work?  OpenGL doesn't include the blend factors in the
 | ||||||
|     //       min/max computations, but is this what the 3DS actually does?
 |     //       min/max computations, but is this what the 3DS actually does?
 | ||||||
|     case FramebufferRegs::BlendEquation::Min: |     case FramebufferRegs::BlendEquation::Min: | ||||||
|  | @ -288,14 +323,12 @@ Common::Vec4<u8> EvaluateBlendEquation(const Common::Vec4<u8>& src, | ||||||
|         result.b() = std::min(src.b(), dest.b()); |         result.b() = std::min(src.b(), dest.b()); | ||||||
|         result.a() = std::min(src.a(), dest.a()); |         result.a() = std::min(src.a(), dest.a()); | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::BlendEquation::Max: |     case FramebufferRegs::BlendEquation::Max: | ||||||
|         result.r() = std::max(src.r(), dest.r()); |         result.r() = std::max(src.r(), dest.r()); | ||||||
|         result.g() = std::max(src.g(), dest.g()); |         result.g() = std::max(src.g(), dest.g()); | ||||||
|         result.b() = std::max(src.b(), dest.b()); |         result.b() = std::max(src.b(), dest.b()); | ||||||
|         result.a() = std::max(src.a(), dest.a()); |         result.a() = std::max(src.a(), dest.a()); | ||||||
|         break; |         break; | ||||||
| 
 |  | ||||||
|     default: |     default: | ||||||
|         LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation 0x{:x}", equation); |         LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation 0x{:x}", equation); | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|  | @ -309,103 +342,38 @@ u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) { | ||||||
|     switch (op) { |     switch (op) { | ||||||
|     case FramebufferRegs::LogicOp::Clear: |     case FramebufferRegs::LogicOp::Clear: | ||||||
|         return 0; |         return 0; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::And: |     case FramebufferRegs::LogicOp::And: | ||||||
|         return src & dest; |         return src & dest; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::AndReverse: |     case FramebufferRegs::LogicOp::AndReverse: | ||||||
|         return src & ~dest; |         return src & ~dest; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::Copy: |     case FramebufferRegs::LogicOp::Copy: | ||||||
|         return src; |         return src; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::Set: |     case FramebufferRegs::LogicOp::Set: | ||||||
|         return 255; |         return 255; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::CopyInverted: |     case FramebufferRegs::LogicOp::CopyInverted: | ||||||
|         return ~src; |         return ~src; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::NoOp: |     case FramebufferRegs::LogicOp::NoOp: | ||||||
|         return dest; |         return dest; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::Invert: |     case FramebufferRegs::LogicOp::Invert: | ||||||
|         return ~dest; |         return ~dest; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::Nand: |     case FramebufferRegs::LogicOp::Nand: | ||||||
|         return ~(src & dest); |         return ~(src & dest); | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::Or: |     case FramebufferRegs::LogicOp::Or: | ||||||
|         return src | dest; |         return src | dest; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::Nor: |     case FramebufferRegs::LogicOp::Nor: | ||||||
|         return ~(src | dest); |         return ~(src | dest); | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::Xor: |     case FramebufferRegs::LogicOp::Xor: | ||||||
|         return src ^ dest; |         return src ^ dest; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::Equiv: |     case FramebufferRegs::LogicOp::Equiv: | ||||||
|         return ~(src ^ dest); |         return ~(src ^ dest); | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::AndInverted: |     case FramebufferRegs::LogicOp::AndInverted: | ||||||
|         return ~src & dest; |         return ~src & dest; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::OrReverse: |     case FramebufferRegs::LogicOp::OrReverse: | ||||||
|         return src | ~dest; |         return src | ~dest; | ||||||
| 
 |  | ||||||
|     case FramebufferRegs::LogicOp::OrInverted: |     case FramebufferRegs::LogicOp::OrInverted: | ||||||
|         return ~src | dest; |         return ~src | dest; | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|     UNREACHABLE(); |     UNREACHABLE(); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| // Decode/Encode for shadow map format. It is similar to D24S8 format, but the depth field is in
 | } // namespace SwRenderer
 | ||||||
| // big-endian
 |  | ||||||
| static const Common::Vec2<u32> DecodeD24S8Shadow(const u8* bytes) { |  | ||||||
|     return {static_cast<u32>((bytes[0] << 16) | (bytes[1] << 8) | bytes[2]), bytes[3]}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void EncodeD24X8Shadow(u32 depth, u8* bytes) { |  | ||||||
|     bytes[2] = depth & 0xFF; |  | ||||||
|     bytes[1] = (depth >> 8) & 0xFF; |  | ||||||
|     bytes[0] = (depth >> 16) & 0xFF; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void EncodeX24S8Shadow(u8 stencil, u8* bytes) { |  | ||||||
|     bytes[3] = stencil; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void DrawShadowMapPixel(int x, int y, u32 depth, u8 stencil) { |  | ||||||
|     const auto& framebuffer = g_state.regs.framebuffer.framebuffer; |  | ||||||
|     const auto& shadow = g_state.regs.framebuffer.shadow; |  | ||||||
|     const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); |  | ||||||
| 
 |  | ||||||
|     y = framebuffer.height - y; |  | ||||||
| 
 |  | ||||||
|     const u32 coarse_y = y & ~7; |  | ||||||
|     u32 bytes_per_pixel = 4; |  | ||||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + |  | ||||||
|                      coarse_y * framebuffer.width * bytes_per_pixel; |  | ||||||
|     u8* dst_pixel = VideoCore::g_memory->GetPhysicalPointer(addr) + dst_offset; |  | ||||||
| 
 |  | ||||||
|     auto ref = DecodeD24S8Shadow(dst_pixel); |  | ||||||
|     u32 ref_z = ref.x; |  | ||||||
|     u32 ref_s = ref.y; |  | ||||||
| 
 |  | ||||||
|     if (depth < ref_z) { |  | ||||||
|         if (stencil == 0) { |  | ||||||
|             EncodeD24X8Shadow(depth, dst_pixel); |  | ||||||
|         } else { |  | ||||||
|             float16 constant = float16::FromRaw(shadow.constant); |  | ||||||
|             float16 linear = float16::FromRaw(shadow.linear); |  | ||||||
|             float16 x_ = float16::FromFloat32(static_cast<float>(depth) / ref_z); |  | ||||||
|             float16 stencil_new = float16::FromFloat32(stencil) / (constant + linear * x_); |  | ||||||
|             stencil = static_cast<u8>(std::clamp(stencil_new.ToFloat32(), 0.0f, 255.0f)); |  | ||||||
| 
 |  | ||||||
|             if (stencil < ref_s) |  | ||||||
|                 EncodeX24S8Shadow(stencil, dst_pixel); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace Pica::Rasterizer
 |  | ||||||
|  |  | ||||||
|  | @ -8,24 +8,55 @@ | ||||||
| #include "common/vector_math.h" | #include "common/vector_math.h" | ||||||
| #include "video_core/regs_framebuffer.h" | #include "video_core/regs_framebuffer.h" | ||||||
| 
 | 
 | ||||||
| namespace Pica::Rasterizer { | namespace Memory { | ||||||
|  | class MemorySystem; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| void DrawPixel(int x, int y, const Common::Vec4<u8>& color); | namespace Pica { | ||||||
| const Common::Vec4<u8> GetPixel(int x, int y); | struct FramebufferRegs; | ||||||
| u32 GetDepth(int x, int y); | } | ||||||
| u8 GetStencil(int x, int y); | 
 | ||||||
| void SetDepth(int x, int y, u32 value); | namespace SwRenderer { | ||||||
| void SetStencil(int x, int y, u8 value); | 
 | ||||||
| u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref); | class Framebuffer { | ||||||
|  | public: | ||||||
|  |     explicit Framebuffer(Memory::MemorySystem& memory, const Pica::FramebufferRegs& framebuffer); | ||||||
|  |     ~Framebuffer(); | ||||||
|  | 
 | ||||||
|  |     /// Draws a pixel at the specified coordinates.
 | ||||||
|  |     void DrawPixel(int x, int y, const Common::Vec4<u8>& color) const; | ||||||
|  | 
 | ||||||
|  |     /// Returns the current color at the specified coordinates.
 | ||||||
|  |     [[nodiscard]] const Common::Vec4<u8> GetPixel(int x, int y) const; | ||||||
|  | 
 | ||||||
|  |     /// Returns the depth value at the specified coordinates.
 | ||||||
|  |     [[nodiscard]] u32 GetDepth(int x, int y) const; | ||||||
|  | 
 | ||||||
|  |     /// Returns the stencil value at the specified coordinates.
 | ||||||
|  |     [[nodiscard]] u8 GetStencil(int x, int y) const; | ||||||
|  | 
 | ||||||
|  |     /// Stores the provided depth value at the specified coordinates.
 | ||||||
|  |     void SetDepth(int x, int y, u32 value) const; | ||||||
|  | 
 | ||||||
|  |     /// Stores the provided stencil value at the specified coordinates.
 | ||||||
|  |     void SetStencil(int x, int y, u8 value) const; | ||||||
|  | 
 | ||||||
|  |     /// Draws a pixel to the shadow buffer.
 | ||||||
|  |     void DrawShadowMapPixel(int x, int y, u32 depth, u8 stencil) const; | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     Memory::MemorySystem& memory; | ||||||
|  |     const Pica::FramebufferRegs& regs; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | u8 PerformStencilAction(Pica::FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref); | ||||||
| 
 | 
 | ||||||
| Common::Vec4<u8> EvaluateBlendEquation(const Common::Vec4<u8>& src, | Common::Vec4<u8> EvaluateBlendEquation(const Common::Vec4<u8>& src, | ||||||
|                                        const Common::Vec4<u8>& srcfactor, |                                        const Common::Vec4<u8>& srcfactor, | ||||||
|                                        const Common::Vec4<u8>& dest, |                                        const Common::Vec4<u8>& dest, | ||||||
|                                        const Common::Vec4<u8>& destfactor, |                                        const Common::Vec4<u8>& destfactor, | ||||||
|                                        FramebufferRegs::BlendEquation equation); |                                        Pica::FramebufferRegs::BlendEquation equation); | ||||||
| 
 | 
 | ||||||
| u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op); | u8 LogicOp(u8 src, u8 dest, Pica::FramebufferRegs::LogicOp op); | ||||||
| 
 | 
 | ||||||
| void DrawShadowMapPixel(int x, int y, u32 depth, u8 stencil); | } // namespace SwRenderer
 | ||||||
| 
 |  | ||||||
| } // namespace Pica::Rasterizer
 |  | ||||||
|  |  | ||||||
|  | @ -5,7 +5,10 @@ | ||||||
| #include <algorithm> | #include <algorithm> | ||||||
| #include "video_core/renderer_software/sw_lighting.h" | #include "video_core/renderer_software/sw_lighting.h" | ||||||
| 
 | 
 | ||||||
| namespace Pica { | namespace SwRenderer { | ||||||
|  | 
 | ||||||
|  | using Pica::f16; | ||||||
|  | using Pica::LightingRegs; | ||||||
| 
 | 
 | ||||||
| static float LookupLightingLut(const Pica::State::Lighting& lighting, std::size_t lut_index, | static float LookupLightingLut(const Pica::State::Lighting& lighting, std::size_t lut_index, | ||||||
|                                u8 index, float delta) { |                                u8 index, float delta) { | ||||||
|  | @ -14,18 +17,18 @@ static float LookupLightingLut(const Pica::State::Lighting& lighting, std::size_ | ||||||
| 
 | 
 | ||||||
|     const auto& lut = lighting.luts[lut_index][index]; |     const auto& lut = lighting.luts[lut_index][index]; | ||||||
| 
 | 
 | ||||||
|     float lut_value = lut.ToFloat(); |     const float lut_value = lut.ToFloat(); | ||||||
|     float lut_diff = lut.DiffToFloat(); |     const float lut_diff = lut.DiffToFloat(); | ||||||
| 
 | 
 | ||||||
|     return lut_value + lut_diff * delta; |     return lut_value + lut_diff * delta; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::tuple<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | std::pair<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | ||||||
|     const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, |     const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, | ||||||
|     const Common::Quaternion<float>& normquat, const Common::Vec3<float>& view, |     const Common::Quaternion<f32>& normquat, const Common::Vec3f& view, | ||||||
|     const Common::Vec4<u8> (&texture_color)[4]) { |     std::span<const Common::Vec4<u8>, 4> texture_color) { | ||||||
| 
 | 
 | ||||||
|     Common::Vec4<float> shadow; |     Common::Vec4f shadow; | ||||||
|     if (lighting.config0.enable_shadow) { |     if (lighting.config0.enable_shadow) { | ||||||
|         shadow = texture_color[lighting.config0.shadow_selector].Cast<float>() / 255.0f; |         shadow = texture_color[lighting.config0.shadow_selector].Cast<float>() / 255.0f; | ||||||
|         if (lighting.config0.shadow_invert) { |         if (lighting.config0.shadow_invert) { | ||||||
|  | @ -35,16 +38,16 @@ std::tuple<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | ||||||
|         shadow = Common::MakeVec(1.0f, 1.0f, 1.0f, 1.0f); |         shadow = Common::MakeVec(1.0f, 1.0f, 1.0f, 1.0f); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Common::Vec3<float> surface_normal{}; |     Common::Vec3f surface_normal{}; | ||||||
|     Common::Vec3<float> surface_tangent{}; |     Common::Vec3f surface_tangent{}; | ||||||
| 
 | 
 | ||||||
|     if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) { |     if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) { | ||||||
|         Common::Vec3<float> perturbation = |         Common::Vec3f perturbation = | ||||||
|             texture_color[lighting.config0.bump_selector].xyz().Cast<float>() / 127.5f - |             texture_color[lighting.config0.bump_selector].xyz().Cast<float>() / 127.5f - | ||||||
|             Common::MakeVec(1.0f, 1.0f, 1.0f); |             Common::MakeVec(1.0f, 1.0f, 1.0f); | ||||||
|         if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { |         if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { | ||||||
|             if (!lighting.config0.disable_bump_renorm) { |             if (!lighting.config0.disable_bump_renorm) { | ||||||
|                 const float z_square = 1 - perturbation.xy().Length2(); |                 const f32 z_square = 1 - perturbation.xy().Length2(); | ||||||
|                 perturbation.z = std::sqrt(std::max(z_square, 0.0f)); |                 perturbation.z = std::sqrt(std::max(z_square, 0.0f)); | ||||||
|             } |             } | ||||||
|             surface_normal = perturbation; |             surface_normal = perturbation; | ||||||
|  | @ -65,66 +68,64 @@ std::tuple<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | ||||||
|     auto normal = Common::QuaternionRotate(normquat, surface_normal); |     auto normal = Common::QuaternionRotate(normquat, surface_normal); | ||||||
|     auto tangent = Common::QuaternionRotate(normquat, surface_tangent); |     auto tangent = Common::QuaternionRotate(normquat, surface_tangent); | ||||||
| 
 | 
 | ||||||
|     Common::Vec4<float> diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f}; |     Common::Vec4f diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f}; | ||||||
|     Common::Vec4<float> specular_sum = {0.0f, 0.0f, 0.0f, 1.0f}; |     Common::Vec4f specular_sum = {0.0f, 0.0f, 0.0f, 1.0f}; | ||||||
| 
 | 
 | ||||||
|     for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { |     for (u32 light_index = 0; light_index <= lighting.max_light_index; ++light_index) { | ||||||
|         unsigned num = lighting.light_enable.GetNum(light_index); |         u32 num = lighting.light_enable.GetNum(light_index); | ||||||
|         const auto& light_config = lighting.light[num]; |         const auto& light_config = lighting.light[num]; | ||||||
| 
 | 
 | ||||||
|         Common::Vec3<float> refl_value = {}; |         const Common::Vec3f position = {f16::FromRaw(light_config.x).ToFloat32(), | ||||||
|         Common::Vec3<float> position = {float16::FromRaw(light_config.x).ToFloat32(), |                                         f16::FromRaw(light_config.y).ToFloat32(), | ||||||
|                                         float16::FromRaw(light_config.y).ToFloat32(), |                                         f16::FromRaw(light_config.z).ToFloat32()}; | ||||||
|                                         float16::FromRaw(light_config.z).ToFloat32()}; |         Common::Vec3f refl_value{}; | ||||||
|         Common::Vec3<float> light_vector; |         Common::Vec3f light_vector{}; | ||||||
| 
 | 
 | ||||||
|         if (light_config.config.directional) |         if (light_config.config.directional) { | ||||||
|             light_vector = position; |             light_vector = position; | ||||||
|         else |         } else { | ||||||
|             light_vector = position + view; |             light_vector = position + view; | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|         [[maybe_unused]] float length = light_vector.Normalize(); |         [[maybe_unused]] const f32 length = light_vector.Normalize(); | ||||||
| 
 | 
 | ||||||
|         Common::Vec3<float> norm_view = view.Normalized(); |         Common::Vec3f norm_view = view.Normalized(); | ||||||
|         Common::Vec3<float> half_vector = norm_view + light_vector; |         Common::Vec3f half_vector = norm_view + light_vector; | ||||||
| 
 | 
 | ||||||
|         float dist_atten = 1.0f; |         f32 dist_atten = 1.0f; | ||||||
|         if (!lighting.IsDistAttenDisabled(num)) { |         if (!lighting.IsDistAttenDisabled(num)) { | ||||||
|             float scale = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); |             const f32 scale = Pica::f20::FromRaw(light_config.dist_atten_scale).ToFloat32(); | ||||||
|             float bias = Pica::float20::FromRaw(light_config.dist_atten_bias).ToFloat32(); |             const f32 bias = Pica::f20::FromRaw(light_config.dist_atten_bias).ToFloat32(); | ||||||
|             std::size_t lut = |             const std::size_t lut = | ||||||
|                 static_cast<std::size_t>(LightingRegs::LightingSampler::DistanceAttenuation) + num; |                 static_cast<std::size_t>(LightingRegs::LightingSampler::DistanceAttenuation) + num; | ||||||
| 
 | 
 | ||||||
|             float sample_loc = std::clamp(scale * length + bias, 0.0f, 1.0f); |             const f32 sample_loc = std::clamp(scale * length + bias, 0.0f, 1.0f); | ||||||
| 
 | 
 | ||||||
|             u8 lutindex = |             const u8 lutindex = | ||||||
|                 static_cast<u8>(std::clamp(std::floor(sample_loc * 256.0f), 0.0f, 255.0f)); |                 static_cast<u8>(std::clamp(std::floor(sample_loc * 256.0f), 0.0f, 255.0f)); | ||||||
|             float delta = sample_loc * 256 - lutindex; |             const f32 delta = sample_loc * 256 - lutindex; | ||||||
|  | 
 | ||||||
|             dist_atten = LookupLightingLut(lighting_state, lut, lutindex, delta); |             dist_atten = LookupLightingLut(lighting_state, lut, lutindex, delta); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         auto GetLutValue = [&](LightingRegs::LightingLutInput input, bool abs, |         auto get_lut_value = [&](LightingRegs::LightingLutInput input, bool abs, | ||||||
|                                  LightingRegs::LightingScale scale_enum, |                                  LightingRegs::LightingScale scale_enum, | ||||||
|                                  LightingRegs::LightingSampler sampler) { |                                  LightingRegs::LightingSampler sampler) { | ||||||
|             float result = 0.0f; |             f32 result = 0.0f; | ||||||
| 
 | 
 | ||||||
|             switch (input) { |             switch (input) { | ||||||
|             case LightingRegs::LightingLutInput::NH: |             case LightingRegs::LightingLutInput::NH: | ||||||
|                 result = Common::Dot(normal, half_vector.Normalized()); |                 result = Common::Dot(normal, half_vector.Normalized()); | ||||||
|                 break; |                 break; | ||||||
| 
 |  | ||||||
|             case LightingRegs::LightingLutInput::VH: |             case LightingRegs::LightingLutInput::VH: | ||||||
|                 result = Common::Dot(norm_view, half_vector.Normalized()); |                 result = Common::Dot(norm_view, half_vector.Normalized()); | ||||||
|                 break; |                 break; | ||||||
| 
 |  | ||||||
|             case LightingRegs::LightingLutInput::NV: |             case LightingRegs::LightingLutInput::NV: | ||||||
|                 result = Common::Dot(normal, norm_view); |                 result = Common::Dot(normal, norm_view); | ||||||
|                 break; |                 break; | ||||||
| 
 |  | ||||||
|             case LightingRegs::LightingLutInput::LN: |             case LightingRegs::LightingLutInput::LN: | ||||||
|                 result = Common::Dot(light_vector, normal); |                 result = Common::Dot(light_vector, normal); | ||||||
|                 break; |                 break; | ||||||
| 
 |  | ||||||
|             case LightingRegs::LightingLutInput::SP: { |             case LightingRegs::LightingLutInput::SP: { | ||||||
|                 Common::Vec3<s32> spot_dir{light_config.spot_x.Value(), light_config.spot_y.Value(), |                 Common::Vec3<s32> spot_dir{light_config.spot_x.Value(), light_config.spot_y.Value(), | ||||||
|                                            light_config.spot_z.Value()}; |                                            light_config.spot_z.Value()}; | ||||||
|  | @ -133,8 +134,8 @@ std::tuple<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | ||||||
|             } |             } | ||||||
|             case LightingRegs::LightingLutInput::CP: |             case LightingRegs::LightingLutInput::CP: | ||||||
|                 if (lighting.config0.config == LightingRegs::LightingConfig::Config7) { |                 if (lighting.config0.config == LightingRegs::LightingConfig::Config7) { | ||||||
|                     const Common::Vec3<float> norm_half_vector = half_vector.Normalized(); |                     const Common::Vec3f norm_half_vector = half_vector.Normalized(); | ||||||
|                     const Common::Vec3<float> half_vector_proj = |                     const Common::Vec3f half_vector_proj = | ||||||
|                         norm_half_vector - normal * Common::Dot(normal, norm_half_vector); |                         norm_half_vector - normal * Common::Dot(normal, norm_half_vector); | ||||||
|                     result = Common::Dot(half_vector_proj, tangent); |                     result = Common::Dot(half_vector_proj, tangent); | ||||||
|                 } else { |                 } else { | ||||||
|  | @ -148,57 +149,59 @@ std::tuple<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             u8 index; |             u8 index; | ||||||
|             float delta; |             f32 delta; | ||||||
| 
 | 
 | ||||||
|             if (abs) { |             if (abs) { | ||||||
|                 if (light_config.config.two_sided_diffuse) |                 if (light_config.config.two_sided_diffuse) { | ||||||
|                     result = std::abs(result); |                     result = std::abs(result); | ||||||
|                 else |                 } else { | ||||||
|                     result = std::max(result, 0.0f); |                     result = std::max(result, 0.0f); | ||||||
|  |                 } | ||||||
| 
 | 
 | ||||||
|                 float flr = std::floor(result * 256.0f); |                 const f32 flr = std::floor(result * 256.0f); | ||||||
|                 index = static_cast<u8>(std::clamp(flr, 0.0f, 255.0f)); |                 index = static_cast<u8>(std::clamp(flr, 0.0f, 255.0f)); | ||||||
|                 delta = result * 256 - index; |                 delta = result * 256 - index; | ||||||
|             } else { |             } else { | ||||||
|                 float flr = std::floor(result * 128.0f); |                 const f32 flr = std::floor(result * 128.0f); | ||||||
|                 s8 signed_index = static_cast<s8>(std::clamp(flr, -128.0f, 127.0f)); |                 const s8 signed_index = static_cast<s8>(std::clamp(flr, -128.0f, 127.0f)); | ||||||
|                 delta = result * 128.0f - signed_index; |                 delta = result * 128.0f - signed_index; | ||||||
|                 index = static_cast<u8>(signed_index); |                 index = static_cast<u8>(signed_index); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             float scale = lighting.lut_scale.GetScale(scale_enum); |             const f32 scale = lighting.lut_scale.GetScale(scale_enum); | ||||||
|             return scale * LookupLightingLut(lighting_state, static_cast<std::size_t>(sampler), |             return scale * LookupLightingLut(lighting_state, static_cast<std::size_t>(sampler), | ||||||
|                                              index, delta); |                                              index, delta); | ||||||
|         }; |         }; | ||||||
| 
 | 
 | ||||||
|         // If enabled, compute spot light attenuation value
 |         // If enabled, compute spot light attenuation value
 | ||||||
|         float spot_atten = 1.0f; |         f32 spot_atten = 1.0f; | ||||||
|         if (!lighting.IsSpotAttenDisabled(num) && |         if (!lighting.IsSpotAttenDisabled(num) && | ||||||
|             LightingRegs::IsLightingSamplerSupported( |             LightingRegs::IsLightingSamplerSupported( | ||||||
|                 lighting.config0.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { |                 lighting.config0.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { | ||||||
|             auto lut = LightingRegs::SpotlightAttenuationSampler(num); |             auto lut = LightingRegs::SpotlightAttenuationSampler(num); | ||||||
|             spot_atten = GetLutValue(lighting.lut_input.sp, lighting.abs_lut_input.disable_sp == 0, |             spot_atten = | ||||||
|  |                 get_lut_value(lighting.lut_input.sp, lighting.abs_lut_input.disable_sp == 0, | ||||||
|                               lighting.lut_scale.sp, lut); |                               lighting.lut_scale.sp, lut); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Specular 0 component
 |         // Specular 0 component
 | ||||||
|         float d0_lut_value = 1.0f; |         f32 d0_lut_value = 1.0f; | ||||||
|         if (lighting.config1.disable_lut_d0 == 0 && |         if (lighting.config1.disable_lut_d0 == 0 && | ||||||
|             LightingRegs::IsLightingSamplerSupported( |             LightingRegs::IsLightingSamplerSupported( | ||||||
|                 lighting.config0.config, LightingRegs::LightingSampler::Distribution0)) { |                 lighting.config0.config, LightingRegs::LightingSampler::Distribution0)) { | ||||||
|             d0_lut_value = |             d0_lut_value = | ||||||
|                 GetLutValue(lighting.lut_input.d0, lighting.abs_lut_input.disable_d0 == 0, |                 get_lut_value(lighting.lut_input.d0, lighting.abs_lut_input.disable_d0 == 0, | ||||||
|                               lighting.lut_scale.d0, LightingRegs::LightingSampler::Distribution0); |                               lighting.lut_scale.d0, LightingRegs::LightingSampler::Distribution0); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         Common::Vec3<float> specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); |         Common::Vec3f specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); | ||||||
| 
 | 
 | ||||||
|         // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
 |         // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
 | ||||||
|         if (lighting.config1.disable_lut_rr == 0 && |         if (lighting.config1.disable_lut_rr == 0 && | ||||||
|             LightingRegs::IsLightingSamplerSupported(lighting.config0.config, |             LightingRegs::IsLightingSamplerSupported(lighting.config0.config, | ||||||
|                                                      LightingRegs::LightingSampler::ReflectRed)) { |                                                      LightingRegs::LightingSampler::ReflectRed)) { | ||||||
|             refl_value.x = |             refl_value.x = | ||||||
|                 GetLutValue(lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0, |                 get_lut_value(lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0, | ||||||
|                               lighting.lut_scale.rr, LightingRegs::LightingSampler::ReflectRed); |                               lighting.lut_scale.rr, LightingRegs::LightingSampler::ReflectRed); | ||||||
|         } else { |         } else { | ||||||
|             refl_value.x = 1.0f; |             refl_value.x = 1.0f; | ||||||
|  | @ -209,7 +212,7 @@ std::tuple<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | ||||||
|             LightingRegs::IsLightingSamplerSupported(lighting.config0.config, |             LightingRegs::IsLightingSamplerSupported(lighting.config0.config, | ||||||
|                                                      LightingRegs::LightingSampler::ReflectGreen)) { |                                                      LightingRegs::LightingSampler::ReflectGreen)) { | ||||||
|             refl_value.y = |             refl_value.y = | ||||||
|                 GetLutValue(lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0, |                 get_lut_value(lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0, | ||||||
|                               lighting.lut_scale.rg, LightingRegs::LightingSampler::ReflectGreen); |                               lighting.lut_scale.rg, LightingRegs::LightingSampler::ReflectGreen); | ||||||
|         } else { |         } else { | ||||||
|             refl_value.y = refl_value.x; |             refl_value.y = refl_value.x; | ||||||
|  | @ -220,24 +223,23 @@ std::tuple<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | ||||||
|             LightingRegs::IsLightingSamplerSupported(lighting.config0.config, |             LightingRegs::IsLightingSamplerSupported(lighting.config0.config, | ||||||
|                                                      LightingRegs::LightingSampler::ReflectBlue)) { |                                                      LightingRegs::LightingSampler::ReflectBlue)) { | ||||||
|             refl_value.z = |             refl_value.z = | ||||||
|                 GetLutValue(lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0, |                 get_lut_value(lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0, | ||||||
|                               lighting.lut_scale.rb, LightingRegs::LightingSampler::ReflectBlue); |                               lighting.lut_scale.rb, LightingRegs::LightingSampler::ReflectBlue); | ||||||
|         } else { |         } else { | ||||||
|             refl_value.z = refl_value.x; |             refl_value.z = refl_value.x; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Specular 1 component
 |         // Specular 1 component
 | ||||||
|         float d1_lut_value = 1.0f; |         f32 d1_lut_value = 1.0f; | ||||||
|         if (lighting.config1.disable_lut_d1 == 0 && |         if (lighting.config1.disable_lut_d1 == 0 && | ||||||
|             LightingRegs::IsLightingSamplerSupported( |             LightingRegs::IsLightingSamplerSupported( | ||||||
|                 lighting.config0.config, LightingRegs::LightingSampler::Distribution1)) { |                 lighting.config0.config, LightingRegs::LightingSampler::Distribution1)) { | ||||||
|             d1_lut_value = |             d1_lut_value = | ||||||
|                 GetLutValue(lighting.lut_input.d1, lighting.abs_lut_input.disable_d1 == 0, |                 get_lut_value(lighting.lut_input.d1, lighting.abs_lut_input.disable_d1 == 0, | ||||||
|                               lighting.lut_scale.d1, LightingRegs::LightingSampler::Distribution1); |                               lighting.lut_scale.d1, LightingRegs::LightingSampler::Distribution1); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         Common::Vec3<float> specular_1 = |         Common::Vec3f specular_1 = d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); | ||||||
|             d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); |  | ||||||
| 
 | 
 | ||||||
|         // Fresnel
 |         // Fresnel
 | ||||||
|         // Note: only the last entry in the light slots applies the Fresnel factor
 |         // Note: only the last entry in the light slots applies the Fresnel factor
 | ||||||
|  | @ -245,8 +247,8 @@ std::tuple<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | ||||||
|             LightingRegs::IsLightingSamplerSupported(lighting.config0.config, |             LightingRegs::IsLightingSamplerSupported(lighting.config0.config, | ||||||
|                                                      LightingRegs::LightingSampler::Fresnel)) { |                                                      LightingRegs::LightingSampler::Fresnel)) { | ||||||
| 
 | 
 | ||||||
|             float lut_value = |             const f32 lut_value = | ||||||
|                 GetLutValue(lighting.lut_input.fr, lighting.abs_lut_input.disable_fr == 0, |                 get_lut_value(lighting.lut_input.fr, lighting.abs_lut_input.disable_fr == 0, | ||||||
|                               lighting.lut_scale.fr, LightingRegs::LightingSampler::Fresnel); |                               lighting.lut_scale.fr, LightingRegs::LightingSampler::Fresnel); | ||||||
| 
 | 
 | ||||||
|             // Enabled for diffuse lighting alpha component
 |             // Enabled for diffuse lighting alpha component
 | ||||||
|  | @ -261,18 +263,19 @@ std::tuple<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         auto dot_product = Common::Dot(light_vector, normal); |         auto dot_product = Common::Dot(light_vector, normal); | ||||||
|         if (light_config.config.two_sided_diffuse) |         if (light_config.config.two_sided_diffuse) { | ||||||
|             dot_product = std::abs(dot_product); |             dot_product = std::abs(dot_product); | ||||||
|         else |         } else { | ||||||
|             dot_product = std::max(dot_product, 0.0f); |             dot_product = std::max(dot_product, 0.0f); | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|         float clamp_highlights = 1.0f; |         f32 clamp_highlights = 1.0f; | ||||||
|         if (lighting.config0.clamp_highlights) { |         if (lighting.config0.clamp_highlights) { | ||||||
|             clamp_highlights = dot_product == 0.0f ? 0.0f : 1.0f; |             clamp_highlights = dot_product == 0.0f ? 0.0f : 1.0f; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         if (light_config.config.geometric_factor_0 || light_config.config.geometric_factor_1) { |         if (light_config.config.geometric_factor_0 || light_config.config.geometric_factor_1) { | ||||||
|             float geo_factor = half_vector.Length2(); |             f32 geo_factor = half_vector.Length2(); | ||||||
|             geo_factor = geo_factor == 0.0f ? 0.0f : std::min(dot_product / geo_factor, 1.0f); |             geo_factor = geo_factor == 0.0f ? 0.0f : std::min(dot_product / geo_factor, 1.0f); | ||||||
|             if (light_config.config.geometric_factor_0) { |             if (light_config.config.geometric_factor_0) { | ||||||
|                 specular_0 *= geo_factor; |                 specular_0 *= geo_factor; | ||||||
|  | @ -315,17 +318,17 @@ std::tuple<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | ||||||
| 
 | 
 | ||||||
|     diffuse_sum += Common::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); |     diffuse_sum += Common::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); | ||||||
| 
 | 
 | ||||||
|     auto diffuse = Common::MakeVec<float>(std::clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, |     const auto diffuse = Common::MakeVec(std::clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, | ||||||
|                                          std::clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, |                                          std::clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, | ||||||
|                                          std::clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, |                                          std::clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, | ||||||
|                                          std::clamp(diffuse_sum.w, 0.0f, 1.0f) * 255) |                                          std::clamp(diffuse_sum.w, 0.0f, 1.0f) * 255) | ||||||
|                              .Cast<u8>(); |                              .Cast<u8>(); | ||||||
|     auto specular = Common::MakeVec<float>(std::clamp(specular_sum.x, 0.0f, 1.0f) * 255, |     const auto specular = Common::MakeVec(std::clamp(specular_sum.x, 0.0f, 1.0f) * 255, | ||||||
|                                           std::clamp(specular_sum.y, 0.0f, 1.0f) * 255, |                                           std::clamp(specular_sum.y, 0.0f, 1.0f) * 255, | ||||||
|                                           std::clamp(specular_sum.z, 0.0f, 1.0f) * 255, |                                           std::clamp(specular_sum.z, 0.0f, 1.0f) * 255, | ||||||
|                                           std::clamp(specular_sum.w, 0.0f, 1.0f) * 255) |                                           std::clamp(specular_sum.w, 0.0f, 1.0f) * 255) | ||||||
|                               .Cast<u8>(); |                               .Cast<u8>(); | ||||||
|     return std::make_tuple(diffuse, specular); |     return std::make_pair(diffuse, specular); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace Pica
 | } // namespace SwRenderer
 | ||||||
|  |  | ||||||
|  | @ -4,16 +4,18 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
| #include <tuple> | #include <span> | ||||||
|  | #include <utility> | ||||||
|  | 
 | ||||||
| #include "common/quaternion.h" | #include "common/quaternion.h" | ||||||
| #include "common/vector_math.h" | #include "common/vector_math.h" | ||||||
| #include "video_core/pica_state.h" | #include "video_core/pica_state.h" | ||||||
| 
 | 
 | ||||||
| namespace Pica { | namespace SwRenderer { | ||||||
| 
 | 
 | ||||||
| std::tuple<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | std::pair<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors( | ||||||
|     const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, |     const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, | ||||||
|     const Common::Quaternion<float>& normquat, const Common::Vec3<float>& view, |     const Common::Quaternion<f32>& normquat, const Common::Vec3f& view, | ||||||
|     const Common::Vec4<u8> (&texture_color)[4]); |     std::span<const Common::Vec4<u8>, 4> texture_color); | ||||||
| 
 | 
 | ||||||
| } // namespace Pica
 | } // namespace SwRenderer
 | ||||||
|  |  | ||||||
|  | @ -4,17 +4,18 @@ | ||||||
| 
 | 
 | ||||||
| #include <array> | #include <array> | ||||||
| #include <cmath> | #include <cmath> | ||||||
| #include "common/math_util.h" |  | ||||||
| #include "video_core/renderer_software/sw_proctex.h" | #include "video_core/renderer_software/sw_proctex.h" | ||||||
| 
 | 
 | ||||||
| namespace Pica::Rasterizer { | namespace SwRenderer { | ||||||
| 
 | 
 | ||||||
| using ProcTexClamp = TexturingRegs::ProcTexClamp; | namespace { | ||||||
| using ProcTexShift = TexturingRegs::ProcTexShift; | using ProcTexClamp = Pica::TexturingRegs::ProcTexClamp; | ||||||
| using ProcTexCombiner = TexturingRegs::ProcTexCombiner; | using ProcTexShift = Pica::TexturingRegs::ProcTexShift; | ||||||
| using ProcTexFilter = TexturingRegs::ProcTexFilter; | using ProcTexCombiner = Pica::TexturingRegs::ProcTexCombiner; | ||||||
|  | using ProcTexFilter = Pica::TexturingRegs::ProcTexFilter; | ||||||
|  | using Pica::f16; | ||||||
| 
 | 
 | ||||||
| static float LookupLUT(const std::array<State::ProcTex::ValueEntry, 128>& lut, float coord) { | float LookupLUT(const std::array<Pica::State::ProcTex::ValueEntry, 128>& lut, float coord) { | ||||||
|     // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and
 |     // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and
 | ||||||
|     // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using
 |     // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using
 | ||||||
|     // value entries and difference entries.
 |     // value entries and difference entries.
 | ||||||
|  | @ -26,13 +27,13 @@ static float LookupLUT(const std::array<State::ProcTex::ValueEntry, 128>& lut, f | ||||||
| 
 | 
 | ||||||
| // These function are used to generate random noise for procedural texture. Their results are
 | // These function are used to generate random noise for procedural texture. Their results are
 | ||||||
| // verified against real hardware, but it's not known if the algorithm is the same as hardware.
 | // verified against real hardware, but it's not known if the algorithm is the same as hardware.
 | ||||||
| static unsigned int NoiseRand1D(unsigned int v) { | unsigned int NoiseRand1D(unsigned int v) { | ||||||
|     static constexpr std::array<unsigned int, 16> table{ |     static constexpr std::array<unsigned int, 16> table{ | ||||||
|         {0, 4, 10, 8, 4, 9, 7, 12, 5, 15, 13, 14, 11, 15, 2, 11}}; |         {0, 4, 10, 8, 4, 9, 7, 12, 5, 15, 13, 14, 11, 15, 2, 11}}; | ||||||
|     return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; |     return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static float NoiseRand2D(unsigned int x, unsigned int y) { | float NoiseRand2D(unsigned int x, unsigned int y) { | ||||||
|     static constexpr std::array<unsigned int, 16> table{ |     static constexpr std::array<unsigned int, 16> table{ | ||||||
|         {10, 2, 15, 8, 0, 7, 4, 5, 5, 13, 2, 6, 13, 9, 3, 14}}; |         {10, 2, 15, 8, 0, 7, 4, 5, 5, 13, 2, 6, 13, 9, 3, 14}}; | ||||||
|     unsigned int u2 = NoiseRand1D(x); |     unsigned int u2 = NoiseRand1D(x); | ||||||
|  | @ -45,11 +46,12 @@ static float NoiseRand2D(unsigned int x, unsigned int y) { | ||||||
|     return -1.0f + v2 * 2.0f / 15.0f; |     return -1.0f + v2 * 2.0f / 15.0f; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static float NoiseCoef(float u, float v, const TexturingRegs& regs, const State::ProcTex& state) { | float NoiseCoef(float u, float v, const Pica::TexturingRegs& regs, | ||||||
|     const float freq_u = float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(); |                 const Pica::State::ProcTex& state) { | ||||||
|     const float freq_v = float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(); |     const float freq_u = f16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(); | ||||||
|     const float phase_u = float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(); |     const float freq_v = f16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(); | ||||||
|     const float phase_v = float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(); |     const float phase_u = f16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(); | ||||||
|  |     const float phase_v = f16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(); | ||||||
|     const float x = 9 * freq_u * std::abs(u + phase_u); |     const float x = 9 * freq_u * std::abs(u + phase_u); | ||||||
|     const float y = 9 * freq_v * std::abs(v + phase_v); |     const float y = 9 * freq_v * std::abs(v + phase_v); | ||||||
|     const int x_int = static_cast<int>(x); |     const int x_int = static_cast<int>(x); | ||||||
|  | @ -66,7 +68,7 @@ static float NoiseCoef(float u, float v, const TexturingRegs& regs, const State: | ||||||
|     return Common::BilinearInterp(g0, g1, g2, g3, x_noise, y_noise); |     return Common::BilinearInterp(g0, g1, g2, g3, x_noise, y_noise); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static float GetShiftOffset(float v, ProcTexShift mode, ProcTexClamp clamp_mode) { | float GetShiftOffset(float v, ProcTexShift mode, ProcTexClamp clamp_mode) { | ||||||
|     const float offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? 1 : 0.5f; |     const float offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? 1 : 0.5f; | ||||||
|     switch (mode) { |     switch (mode) { | ||||||
|     case ProcTexShift::None: |     case ProcTexShift::None: | ||||||
|  | @ -81,7 +83,7 @@ static float GetShiftOffset(float v, ProcTexShift mode, ProcTexClamp clamp_mode) | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static void ClampCoord(float& coord, ProcTexClamp mode) { | void ClampCoord(float& coord, ProcTexClamp mode) { | ||||||
|     switch (mode) { |     switch (mode) { | ||||||
|     case ProcTexClamp::ToZero: |     case ProcTexClamp::ToZero: | ||||||
|         if (coord > 1.0f) |         if (coord > 1.0f) | ||||||
|  | @ -112,8 +114,8 @@ static void ClampCoord(float& coord, ProcTexClamp mode) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static float CombineAndMap(float u, float v, ProcTexCombiner combiner, | float CombineAndMap(float u, float v, ProcTexCombiner combiner, | ||||||
|                            const std::array<State::ProcTex::ValueEntry, 128>& map_table) { |                     const std::array<Pica::State::ProcTex::ValueEntry, 128>& map_table) { | ||||||
|     float f; |     float f; | ||||||
|     switch (combiner) { |     switch (combiner) { | ||||||
|     case ProcTexCombiner::U: |     case ProcTexCombiner::U: | ||||||
|  | @ -122,28 +124,28 @@ static float CombineAndMap(float u, float v, ProcTexCombiner combiner, | ||||||
|     case ProcTexCombiner::U2: |     case ProcTexCombiner::U2: | ||||||
|         f = u * u; |         f = u * u; | ||||||
|         break; |         break; | ||||||
|     case TexturingRegs::ProcTexCombiner::V: |     case ProcTexCombiner::V: | ||||||
|         f = v; |         f = v; | ||||||
|         break; |         break; | ||||||
|     case TexturingRegs::ProcTexCombiner::V2: |     case ProcTexCombiner::V2: | ||||||
|         f = v * v; |         f = v * v; | ||||||
|         break; |         break; | ||||||
|     case TexturingRegs::ProcTexCombiner::Add: |     case ProcTexCombiner::Add: | ||||||
|         f = (u + v) * 0.5f; |         f = (u + v) * 0.5f; | ||||||
|         break; |         break; | ||||||
|     case TexturingRegs::ProcTexCombiner::Add2: |     case ProcTexCombiner::Add2: | ||||||
|         f = (u * u + v * v) * 0.5f; |         f = (u * u + v * v) * 0.5f; | ||||||
|         break; |         break; | ||||||
|     case TexturingRegs::ProcTexCombiner::SqrtAdd2: |     case ProcTexCombiner::SqrtAdd2: | ||||||
|         f = std::min(std::sqrt(u * u + v * v), 1.0f); |         f = std::min(std::sqrt(u * u + v * v), 1.0f); | ||||||
|         break; |         break; | ||||||
|     case TexturingRegs::ProcTexCombiner::Min: |     case ProcTexCombiner::Min: | ||||||
|         f = std::min(u, v); |         f = std::min(u, v); | ||||||
|         break; |         break; | ||||||
|     case TexturingRegs::ProcTexCombiner::Max: |     case ProcTexCombiner::Max: | ||||||
|         f = std::max(u, v); |         f = std::max(u, v); | ||||||
|         break; |         break; | ||||||
|     case TexturingRegs::ProcTexCombiner::RMax: |     case ProcTexCombiner::RMax: | ||||||
|         f = std::min(((u + v) * 0.5f + std::sqrt(u * u + v * v)) * 0.5f, 1.0f); |         f = std::min(((u + v) * 0.5f + std::sqrt(u * u + v * v)) * 0.5f, 1.0f); | ||||||
|         break; |         break; | ||||||
|     default: |     default: | ||||||
|  | @ -153,8 +155,10 @@ static float CombineAndMap(float u, float v, ProcTexCombiner combiner, | ||||||
|     } |     } | ||||||
|     return LookupLUT(map_table, f); |     return LookupLUT(map_table, f); | ||||||
| } | } | ||||||
|  | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| Common::Vec4<u8> ProcTex(float u, float v, const TexturingRegs& regs, const State::ProcTex& state) { | Common::Vec4<u8> ProcTex(float u, float v, const Pica::TexturingRegs& regs, | ||||||
|  |                          const Pica::State::ProcTex& state) { | ||||||
|     u = std::abs(u); |     u = std::abs(u); | ||||||
|     v = std::abs(v); |     v = std::abs(v); | ||||||
| 
 | 
 | ||||||
|  | @ -218,4 +222,4 @@ Common::Vec4<u8> ProcTex(float u, float v, const TexturingRegs& regs, const Stat | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace Pica::Rasterizer
 | } // namespace SwRenderer
 | ||||||
|  |  | ||||||
|  | @ -8,9 +8,10 @@ | ||||||
| #include "common/vector_math.h" | #include "common/vector_math.h" | ||||||
| #include "video_core/pica_state.h" | #include "video_core/pica_state.h" | ||||||
| 
 | 
 | ||||||
| namespace Pica::Rasterizer { | namespace SwRenderer { | ||||||
| 
 | 
 | ||||||
| /// Generates procedural texture color for the given coordinates
 | /// Generates procedural texture color for the given coordinates
 | ||||||
| Common::Vec4<u8> ProcTex(float u, float v, const TexturingRegs& regs, const State::ProcTex& state); | Common::Vec4<u8> ProcTex(float u, float v, const Pica::TexturingRegs& regs, | ||||||
|  |                          const Pica::State::ProcTex& state); | ||||||
| 
 | 
 | ||||||
| } // namespace Pica::Rasterizer
 | } // namespace SwRenderer
 | ||||||
|  |  | ||||||
|  | @ -2,15 +2,937 @@ | ||||||
| // Licensed under GPLv2 or any later version
 | // Licensed under GPLv2 or any later version
 | ||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
| #include "video_core/renderer_software/sw_clipper.h" | #include <boost/container/static_vector.hpp> | ||||||
|  | #include "common/logging/log.h" | ||||||
|  | #include "common/microprofile.h" | ||||||
|  | #include "common/quaternion.h" | ||||||
|  | #include "common/vector_math.h" | ||||||
|  | #include "core/memory.h" | ||||||
|  | #include "video_core/pica_state.h" | ||||||
|  | #include "video_core/pica_types.h" | ||||||
|  | #include "video_core/renderer_software/sw_framebuffer.h" | ||||||
|  | #include "video_core/renderer_software/sw_lighting.h" | ||||||
|  | #include "video_core/renderer_software/sw_proctex.h" | ||||||
| #include "video_core/renderer_software/sw_rasterizer.h" | #include "video_core/renderer_software/sw_rasterizer.h" | ||||||
|  | #include "video_core/renderer_software/sw_texturing.h" | ||||||
|  | #include "video_core/shader/shader.h" | ||||||
|  | #include "video_core/texture/texture_decode.h" | ||||||
| 
 | 
 | ||||||
| namespace VideoCore { | namespace SwRenderer { | ||||||
|  | 
 | ||||||
|  | using Pica::f24; | ||||||
|  | using Pica::FramebufferRegs; | ||||||
|  | using Pica::RasterizerRegs; | ||||||
|  | using Pica::TexturingRegs; | ||||||
|  | using Pica::Texture::LookupTexture; | ||||||
|  | using Pica::Texture::TextureInfo; | ||||||
|  | 
 | ||||||
|  | struct Vertex : Pica::Shader::OutputVertex { | ||||||
|  |     Vertex(const OutputVertex& v) : OutputVertex(v) {} | ||||||
|  | 
 | ||||||
|  |     /// Attributes used to store intermediate results position after perspective divide.
 | ||||||
|  |     Common::Vec3<f24> screenpos; | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Linear interpolation | ||||||
|  |      * factor: 0=this, 1=vtx | ||||||
|  |      * Note: This function cannot be called after perspective divide. | ||||||
|  |      **/ | ||||||
|  |     void Lerp(f24 factor, const Vertex& vtx) { | ||||||
|  |         pos = pos * factor + vtx.pos * (f24::One() - factor); | ||||||
|  |         quat = quat * factor + vtx.quat * (f24::One() - factor); | ||||||
|  |         color = color * factor + vtx.color * (f24::One() - factor); | ||||||
|  |         tc0 = tc0 * factor + vtx.tc0 * (f24::One() - factor); | ||||||
|  |         tc1 = tc1 * factor + vtx.tc1 * (f24::One() - factor); | ||||||
|  |         tc0_w = tc0_w * factor + vtx.tc0_w * (f24::One() - factor); | ||||||
|  |         view = view * factor + vtx.view * (f24::One() - factor); | ||||||
|  |         tc2 = tc2 * factor + vtx.tc2 * (f24::One() - factor); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Linear interpolation | ||||||
|  |      * factor: 0=v0, 1=v1 | ||||||
|  |      * Note: This function cannot be called after perspective divide. | ||||||
|  |      **/ | ||||||
|  |     static Vertex Lerp(f24 factor, const Vertex& v0, const Vertex& v1) { | ||||||
|  |         Vertex ret = v0; | ||||||
|  |         ret.Lerp(factor, v1); | ||||||
|  |         return ret; | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | namespace { | ||||||
|  | 
 | ||||||
|  | MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); | ||||||
|  | 
 | ||||||
|  | struct ClippingEdge { | ||||||
|  | public: | ||||||
|  |     constexpr ClippingEdge(Common::Vec4<f24> coeffs, | ||||||
|  |                            Common::Vec4<f24> bias = Common::Vec4<f24>(f24::Zero(), f24::Zero(), | ||||||
|  |                                                                       f24::Zero(), f24::Zero())) | ||||||
|  |         : pos(f24::Zero()), coeffs(coeffs), bias(bias) {} | ||||||
|  | 
 | ||||||
|  |     bool IsInside(const Vertex& vertex) const { | ||||||
|  |         return Common::Dot(vertex.pos + bias, coeffs) >= f24::Zero(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool IsOutSide(const Vertex& vertex) const { | ||||||
|  |         return !IsInside(vertex); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     Vertex GetIntersection(const Vertex& v0, const Vertex& v1) const { | ||||||
|  |         const f24 dp = Common::Dot(v0.pos + bias, coeffs); | ||||||
|  |         const f24 dp_prev = Common::Dot(v1.pos + bias, coeffs); | ||||||
|  |         const f24 factor = dp_prev / (dp_prev - dp); | ||||||
|  |         return Vertex::Lerp(factor, v0, v1); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     [[maybe_unused]] f24 pos; | ||||||
|  |     Common::Vec4<f24> coeffs; | ||||||
|  |     Common::Vec4<f24> bias; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | } // Anonymous namespace
 | ||||||
|  | 
 | ||||||
|  | RasterizerSoftware::RasterizerSoftware(Memory::MemorySystem& memory_) | ||||||
|  |     : memory{memory_}, state{Pica::g_state}, regs{state.regs}, fb{memory, regs.framebuffer} {} | ||||||
| 
 | 
 | ||||||
| void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0, | void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0, | ||||||
|                                      const Pica::Shader::OutputVertex& v1, |                                      const Pica::Shader::OutputVertex& v1, | ||||||
|                                      const Pica::Shader::OutputVertex& v2) { |                                      const Pica::Shader::OutputVertex& v2) { | ||||||
|     Pica::Clipper::ProcessTriangle(v0, v1, v2); |     /**
 | ||||||
|  |      * Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at | ||||||
|  |      * the new edge (or less in degenerate cases). As such, we can say that each clipping plane | ||||||
|  |      * introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a | ||||||
|  |      * fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. | ||||||
|  |      **/ | ||||||
|  |     static constexpr std::size_t MAX_VERTICES = 9; | ||||||
|  | 
 | ||||||
|  |     boost::container::static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; | ||||||
|  |     boost::container::static_vector<Vertex, MAX_VERTICES> buffer_b; | ||||||
|  | 
 | ||||||
|  |     FlipQuaternionIfOpposite(buffer_a[1].quat, buffer_a[0].quat); | ||||||
|  |     FlipQuaternionIfOpposite(buffer_a[2].quat, buffer_a[0].quat); | ||||||
|  | 
 | ||||||
|  |     auto* output_list = &buffer_a; | ||||||
|  |     auto* input_list = &buffer_b; | ||||||
|  | 
 | ||||||
|  |     // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
 | ||||||
|  |     // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
 | ||||||
|  |     //       epsilon possible within f24 accuracy.
 | ||||||
|  |     static constexpr f24 EPSILON = f24::FromFloat32(0.00001f); | ||||||
|  |     static constexpr f24 f0 = f24::Zero(); | ||||||
|  |     static constexpr f24 f1 = f24::One(); | ||||||
|  |     static constexpr std::array<ClippingEdge, 7> clipping_edges = {{ | ||||||
|  |         {Common::MakeVec(-f1, f0, f0, f1)},                                        // x = +w
 | ||||||
|  |         {Common::MakeVec(f1, f0, f0, f1)},                                         // x = -w
 | ||||||
|  |         {Common::MakeVec(f0, -f1, f0, f1)},                                        // y = +w
 | ||||||
|  |         {Common::MakeVec(f0, f1, f0, f1)},                                         // y = -w
 | ||||||
|  |         {Common::MakeVec(f0, f0, -f1, f0)},                                        // z =  0
 | ||||||
|  |         {Common::MakeVec(f0, f0, f1, f1)},                                         // z = -w
 | ||||||
|  |         {Common::MakeVec(f0, f0, f0, f1), Common::Vec4<f24>(f0, f0, f0, EPSILON)}, // w = EPSILON
 | ||||||
|  |     }}; | ||||||
|  | 
 | ||||||
|  |     // Simple implementation of the Sutherland-Hodgman clipping algorithm.
 | ||||||
|  |     // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
 | ||||||
|  |     const auto clip = [&](const ClippingEdge& edge) { | ||||||
|  |         std::swap(input_list, output_list); | ||||||
|  |         output_list->clear(); | ||||||
|  | 
 | ||||||
|  |         const Vertex* reference_vertex = &input_list->back(); | ||||||
|  |         for (const auto& vertex : *input_list) { | ||||||
|  |             // NOTE: This algorithm changes vertex order in some cases!
 | ||||||
|  |             if (edge.IsInside(vertex)) { | ||||||
|  |                 if (edge.IsOutSide(*reference_vertex)) { | ||||||
|  |                     output_list->push_back(edge.GetIntersection(vertex, *reference_vertex)); | ||||||
|  |                 } | ||||||
|  |                 output_list->push_back(vertex); | ||||||
|  |             } else if (edge.IsInside(*reference_vertex)) { | ||||||
|  |                 output_list->push_back(edge.GetIntersection(vertex, *reference_vertex)); | ||||||
|  |             } | ||||||
|  |             reference_vertex = &vertex; | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     for (const ClippingEdge& edge : clipping_edges) { | ||||||
|  |         clip(edge); | ||||||
|  |         if (output_list->size() < 3) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (state.regs.rasterizer.clip_enable) { | ||||||
|  |         const ClippingEdge custom_edge{state.regs.rasterizer.GetClipCoef()}; | ||||||
|  |         clip(custom_edge); | ||||||
|  |         if (output_list->size() < 3) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     MakeScreenCoords((*output_list)[0]); | ||||||
|  |     MakeScreenCoords((*output_list)[1]); | ||||||
|  | 
 | ||||||
|  |     for (std::size_t i = 0; i < output_list->size() - 2; i++) { | ||||||
|  |         Vertex& vtx0 = (*output_list)[0]; | ||||||
|  |         Vertex& vtx1 = (*output_list)[i + 1]; | ||||||
|  |         Vertex& vtx2 = (*output_list)[i + 2]; | ||||||
|  | 
 | ||||||
|  |         MakeScreenCoords(vtx2); | ||||||
|  | 
 | ||||||
|  |         LOG_TRACE( | ||||||
|  |             Render_Software, | ||||||
|  |             "Triangle {}/{} at position ({:.3}, {:.3}, {:.3}, {:.3f}), " | ||||||
|  |             "({:.3}, {:.3}, {:.3}, {:.3}), ({:.3}, {:.3}, {:.3}, {:.3}) and " | ||||||
|  |             "screen position ({:.2}, {:.2}, {:.2}), ({:.2}, {:.2}, {:.2}), ({:.2}, {:.2}, {:.2})", | ||||||
|  |             i + 1, output_list->size() - 2, vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), | ||||||
|  |             vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), vtx1.pos.x.ToFloat32(), | ||||||
|  |             vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), | ||||||
|  |             vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), | ||||||
|  |             vtx2.pos.w.ToFloat32(), vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(), | ||||||
|  |             vtx0.screenpos.z.ToFloat32(), vtx1.screenpos.x.ToFloat32(), | ||||||
|  |             vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(), | ||||||
|  |             vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), | ||||||
|  |             vtx2.screenpos.z.ToFloat32()); | ||||||
|  | 
 | ||||||
|  |         ProcessTriangle(vtx0, vtx1, vtx2); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace VideoCore
 | void RasterizerSoftware::MakeScreenCoords(Vertex& vtx) { | ||||||
|  |     Viewport viewport{}; | ||||||
|  |     viewport.halfsize_x = f24::FromRaw(regs.rasterizer.viewport_size_x); | ||||||
|  |     viewport.halfsize_y = f24::FromRaw(regs.rasterizer.viewport_size_y); | ||||||
|  |     viewport.offset_x = f24::FromFloat32(static_cast<f32>(regs.rasterizer.viewport_corner.x)); | ||||||
|  |     viewport.offset_y = f24::FromFloat32(static_cast<f32>(regs.rasterizer.viewport_corner.y)); | ||||||
|  | 
 | ||||||
|  |     f24 inv_w = f24::One() / vtx.pos.w; | ||||||
|  |     vtx.pos.w = inv_w; | ||||||
|  |     vtx.quat *= inv_w; | ||||||
|  |     vtx.color *= inv_w; | ||||||
|  |     vtx.tc0 *= inv_w; | ||||||
|  |     vtx.tc1 *= inv_w; | ||||||
|  |     vtx.tc0_w *= inv_w; | ||||||
|  |     vtx.view *= inv_w; | ||||||
|  |     vtx.tc2 *= inv_w; | ||||||
|  | 
 | ||||||
|  |     vtx.screenpos[0] = (vtx.pos.x * inv_w + f24::One()) * viewport.halfsize_x + viewport.offset_x; | ||||||
|  |     vtx.screenpos[1] = (vtx.pos.y * inv_w + f24::One()) * viewport.halfsize_y + viewport.offset_y; | ||||||
|  |     vtx.screenpos[2] = vtx.pos.z * inv_w; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2, | ||||||
|  |                                          bool reversed) { | ||||||
|  |     MICROPROFILE_SCOPE(GPU_Rasterization); | ||||||
|  | 
 | ||||||
|  |     // Vertex positions in rasterizer coordinates
 | ||||||
|  |     static auto screen_to_rasterizer_coords = [](const Common::Vec3<f24>& vec) { | ||||||
|  |         return Common::Vec3{Fix12P4::FromFloat24(vec.x), Fix12P4::FromFloat24(vec.y), | ||||||
|  |                             Fix12P4::FromFloat24(vec.z)}; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     const std::array<Common::Vec3<Fix12P4>, 3> vtxpos = { | ||||||
|  |         screen_to_rasterizer_coords(v0.screenpos), | ||||||
|  |         screen_to_rasterizer_coords(v1.screenpos), | ||||||
|  |         screen_to_rasterizer_coords(v2.screenpos), | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     if (regs.rasterizer.cull_mode == RasterizerRegs::CullMode::KeepAll) { | ||||||
|  |         // Make sure we always end up with a triangle wound counter-clockwise
 | ||||||
|  |         if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { | ||||||
|  |             ProcessTriangle(v0, v2, v1, true); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         if (!reversed && regs.rasterizer.cull_mode == RasterizerRegs::CullMode::KeepClockWise) { | ||||||
|  |             // Reverse vertex order and use the CCW code path.
 | ||||||
|  |             ProcessTriangle(v0, v2, v1, true); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |         // Cull away triangles which are wound clockwise.
 | ||||||
|  |         if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); | ||||||
|  |     u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | ||||||
|  |     u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); | ||||||
|  |     u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | ||||||
|  | 
 | ||||||
|  |     // Convert the scissor box coordinates to 12.4 fixed point
 | ||||||
|  |     const u16 scissor_x1 = static_cast<u16>(regs.rasterizer.scissor_test.x1 << 4); | ||||||
|  |     const u16 scissor_y1 = static_cast<u16>(regs.rasterizer.scissor_test.y1 << 4); | ||||||
|  |     // x2,y2 have +1 added to cover the entire sub-pixel area
 | ||||||
|  |     const u16 scissor_x2 = static_cast<u16>((regs.rasterizer.scissor_test.x2 + 1) << 4); | ||||||
|  |     const u16 scissor_y2 = static_cast<u16>((regs.rasterizer.scissor_test.y2 + 1) << 4); | ||||||
|  | 
 | ||||||
|  |     if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Include) { | ||||||
|  |         // Calculate the new bounds
 | ||||||
|  |         min_x = std::max(min_x, scissor_x1); | ||||||
|  |         min_y = std::max(min_y, scissor_y1); | ||||||
|  |         max_x = std::min(max_x, scissor_x2); | ||||||
|  |         max_y = std::min(max_y, scissor_y2); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     min_x &= Fix12P4::IntMask(); | ||||||
|  |     min_y &= Fix12P4::IntMask(); | ||||||
|  |     max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask()); | ||||||
|  |     max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask()); | ||||||
|  | 
 | ||||||
|  |     const int bias0 = | ||||||
|  |         IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; | ||||||
|  |     const int bias1 = | ||||||
|  |         IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; | ||||||
|  |     const int bias2 = | ||||||
|  |         IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; | ||||||
|  | 
 | ||||||
|  |     const auto w_inverse = Common::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); | ||||||
|  | 
 | ||||||
|  |     auto textures = regs.texturing.GetTextures(); | ||||||
|  |     const auto tev_stages = regs.texturing.GetTevStages(); | ||||||
|  | 
 | ||||||
|  |     const bool stencil_action_enable = | ||||||
|  |         regs.framebuffer.output_merger.stencil_test.enable && | ||||||
|  |         regs.framebuffer.framebuffer.depth_format == FramebufferRegs::DepthFormat::D24S8; | ||||||
|  |     const auto stencil_test = regs.framebuffer.output_merger.stencil_test; | ||||||
|  | 
 | ||||||
|  |     // Enter rasterization loop, starting at the center of the topleft bounding box corner.
 | ||||||
|  |     // TODO: Not sure if looping through x first might be faster
 | ||||||
|  |     for (u16 y = min_y + 8; y < max_y; y += 0x10) { | ||||||
|  |         for (u16 x = min_x + 8; x < max_x; x += 0x10) { | ||||||
|  |             // Do not process the pixel if it's inside the scissor box and the scissor mode is set
 | ||||||
|  |             // to Exclude.
 | ||||||
|  |             if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Exclude) { | ||||||
|  |                 if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) { | ||||||
|  |                     continue; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Calculate the barycentric coordinates w0, w1 and w2
 | ||||||
|  |             const s32 w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); | ||||||
|  |             const s32 w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); | ||||||
|  |             const s32 w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); | ||||||
|  |             const s32 wsum = w0 + w1 + w2; | ||||||
|  | 
 | ||||||
|  |             // If current pixel is not covered by the current primitive
 | ||||||
|  |             if (w0 < 0 || w1 < 0 || w2 < 0) { | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             const auto baricentric_coordinates = Common::MakeVec( | ||||||
|  |                 f24::FromFloat32(static_cast<f32>(w0)), f24::FromFloat32(static_cast<f32>(w1)), | ||||||
|  |                 f24::FromFloat32(static_cast<f32>(w2))); | ||||||
|  |             const f24 interpolated_w_inverse = | ||||||
|  |                 f24::One() / Common::Dot(w_inverse, baricentric_coordinates); | ||||||
|  | 
 | ||||||
|  |             // interpolated_z = z / w
 | ||||||
|  |             const float interpolated_z_over_w = | ||||||
|  |                 (v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 + | ||||||
|  |                  v2.screenpos[2].ToFloat32() * w2) / | ||||||
|  |                 wsum; | ||||||
|  | 
 | ||||||
|  |             // Not fully accurate. About 3 bits in precision are missing.
 | ||||||
|  |             // Z-Buffer (z / w * scale + offset)
 | ||||||
|  |             const float depth_scale = | ||||||
|  |                 f24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32(); | ||||||
|  |             const float depth_offset = | ||||||
|  |                 f24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32(); | ||||||
|  |             float depth = interpolated_z_over_w * depth_scale + depth_offset; | ||||||
|  | 
 | ||||||
|  |             // Potentially switch to W-Buffer
 | ||||||
|  |             if (regs.rasterizer.depthmap_enable == | ||||||
|  |                 Pica::RasterizerRegs::DepthBuffering::WBuffering) { | ||||||
|  |                 // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
 | ||||||
|  |                 depth *= interpolated_w_inverse.ToFloat32() * wsum; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Clamp the result
 | ||||||
|  |             depth = std::clamp(depth, 0.0f, 1.0f); | ||||||
|  | 
 | ||||||
|  |             /**
 | ||||||
|  |              * Perspective correct attribute interpolation: | ||||||
|  |              * Attribute values cannot be calculated by simple linear interpolation since | ||||||
|  |              * they are not linear in screen space. For example, when interpolating a | ||||||
|  |              * texture coordinate across two vertices, something simple like | ||||||
|  |              *     u = (u0*w0 + u1*w1)/(w0+w1) | ||||||
|  |              * will not work. However, the attribute value divided by the | ||||||
|  |              * clipspace w-coordinate (u/w) and and the inverse w-coordinate (1/w) are linear | ||||||
|  |              * in screenspace. Hence, we can linearly interpolate these two independently and | ||||||
|  |              * calculate the interpolated attribute by dividing the results. | ||||||
|  |              * I.e. | ||||||
|  |              *     u_over_w   = ((u0/v0.pos.w)*w0 + (u1/v1.pos.w)*w1)/(w0+w1) | ||||||
|  |              *     one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1) | ||||||
|  |              *     u = u_over_w / one_over_w | ||||||
|  |              * | ||||||
|  |              * The generalization to three vertices is straightforward in baricentric coordinates. | ||||||
|  |              **/ | ||||||
|  |             const auto get_interpolated_attribute = [&](f24 attr0, f24 attr1, f24 attr2) { | ||||||
|  |                 auto attr_over_w = Common::MakeVec(attr0, attr1, attr2); | ||||||
|  |                 f24 interpolated_attr_over_w = Common::Dot(attr_over_w, baricentric_coordinates); | ||||||
|  |                 return interpolated_attr_over_w * interpolated_w_inverse; | ||||||
|  |             }; | ||||||
|  | 
 | ||||||
|  |             const Common::Vec4<u8> primary_color{ | ||||||
|  |                 static_cast<u8>( | ||||||
|  |                     round(get_interpolated_attribute(v0.color.r(), v1.color.r(), v2.color.r()) | ||||||
|  |                               .ToFloat32() * | ||||||
|  |                           255)), | ||||||
|  |                 static_cast<u8>( | ||||||
|  |                     round(get_interpolated_attribute(v0.color.g(), v1.color.g(), v2.color.g()) | ||||||
|  |                               .ToFloat32() * | ||||||
|  |                           255)), | ||||||
|  |                 static_cast<u8>( | ||||||
|  |                     round(get_interpolated_attribute(v0.color.b(), v1.color.b(), v2.color.b()) | ||||||
|  |                               .ToFloat32() * | ||||||
|  |                           255)), | ||||||
|  |                 static_cast<u8>( | ||||||
|  |                     round(get_interpolated_attribute(v0.color.a(), v1.color.a(), v2.color.a()) | ||||||
|  |                               .ToFloat32() * | ||||||
|  |                           255)), | ||||||
|  |             }; | ||||||
|  | 
 | ||||||
|  |             std::array<Common::Vec2<f24>, 3> uv; | ||||||
|  |             uv[0].u() = get_interpolated_attribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); | ||||||
|  |             uv[0].v() = get_interpolated_attribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); | ||||||
|  |             uv[1].u() = get_interpolated_attribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u()); | ||||||
|  |             uv[1].v() = get_interpolated_attribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v()); | ||||||
|  |             uv[2].u() = get_interpolated_attribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u()); | ||||||
|  |             uv[2].v() = get_interpolated_attribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v()); | ||||||
|  | 
 | ||||||
|  |             // Sample bound texture units.
 | ||||||
|  |             const f24 tc0_w = get_interpolated_attribute(v0.tc0_w, v1.tc0_w, v2.tc0_w); | ||||||
|  |             const auto texture_color = TextureColor(uv, textures, tc0_w); | ||||||
|  | 
 | ||||||
|  |             Common::Vec4<u8> primary_fragment_color{0, 0, 0, 0}; | ||||||
|  |             Common::Vec4<u8> secondary_fragment_color{0, 0, 0, 0}; | ||||||
|  |             if (!regs.lighting.disable) { | ||||||
|  |                 const auto normquat = | ||||||
|  |                     Common::Quaternion<f32>{ | ||||||
|  |                         {get_interpolated_attribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(), | ||||||
|  |                          get_interpolated_attribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(), | ||||||
|  |                          get_interpolated_attribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()}, | ||||||
|  |                         get_interpolated_attribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), | ||||||
|  |                     } | ||||||
|  |                         .Normalized(); | ||||||
|  | 
 | ||||||
|  |                 const Common::Vec3f view{ | ||||||
|  |                     get_interpolated_attribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(), | ||||||
|  |                     get_interpolated_attribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), | ||||||
|  |                     get_interpolated_attribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(), | ||||||
|  |                 }; | ||||||
|  |                 std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors( | ||||||
|  |                     regs.lighting, state.lighting, normquat, view, texture_color); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Write the TEV stages.
 | ||||||
|  |             Common::Vec4<u8> combiner_output = | ||||||
|  |                 WriteTevConfig(texture_color, tev_stages, primary_color, primary_fragment_color, | ||||||
|  |                                secondary_fragment_color); | ||||||
|  | 
 | ||||||
|  |             const auto& output_merger = regs.framebuffer.output_merger; | ||||||
|  |             if (output_merger.fragment_operation_mode == | ||||||
|  |                 FramebufferRegs::FragmentOperationMode::Shadow) { | ||||||
|  |                 u32 depth_int = static_cast<u32>(depth * 0xFFFFFF); | ||||||
|  |                 // Use green color as the shadow intensity
 | ||||||
|  |                 u8 stencil = combiner_output.y; | ||||||
|  |                 fb.DrawShadowMapPixel(x >> 4, y >> 4, depth_int, stencil); | ||||||
|  |                 // Skip the normal output merger pipeline if it is in shadow mode
 | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Does alpha testing happen before or after stencil?
 | ||||||
|  |             if (!DoAlphaTest(combiner_output.a())) { | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |             WriteFog(combiner_output, depth); | ||||||
|  |             if (!DoDepthStencilTest(x, y, depth, stencil_action_enable)) { | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |             const auto result = PixelColor(x, y, combiner_output); | ||||||
|  |             if (regs.framebuffer.framebuffer.allow_color_write != 0) { | ||||||
|  |                 fb.DrawPixel(x >> 4, y >> 4, result); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor( | ||||||
|  |     std::span<const Common::Vec2<f24>, 3> uv, | ||||||
|  |     std::span<const Pica::TexturingRegs::FullTextureConfig, 3> textures, f24 tc0_w) const { | ||||||
|  |     std::array<Common::Vec4<u8>, 4> texture_color{}; | ||||||
|  |     for (u32 i = 0; i < 3; ++i) { | ||||||
|  |         const auto& texture = textures[i]; | ||||||
|  |         if (!texture.enabled) [[unlikely]] { | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         if (texture.config.address == 0) [[unlikely]] { | ||||||
|  |             texture_color[i] = {0, 0, 0, 255}; | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         const s32 coordinate_i = (i == 2 && regs.texturing.main_config.texture2_use_coord1) ? 1 : i; | ||||||
|  |         f24 u = uv[coordinate_i].u(); | ||||||
|  |         f24 v = uv[coordinate_i].v(); | ||||||
|  | 
 | ||||||
|  |         // Only unit 0 respects the texturing type (according to 3DBrew)
 | ||||||
|  |         PAddr texture_address = texture.config.GetPhysicalAddress(); | ||||||
|  |         f24 shadow_z; | ||||||
|  |         if (i == 0) { | ||||||
|  |             switch (texture.config.type) { | ||||||
|  |             case TexturingRegs::TextureConfig::Texture2D: | ||||||
|  |                 break; | ||||||
|  |             case TexturingRegs::TextureConfig::ShadowCube: | ||||||
|  |             case TexturingRegs::TextureConfig::TextureCube: { | ||||||
|  |                 std::tie(u, v, shadow_z, texture_address) = | ||||||
|  |                     ConvertCubeCoord(u, v, tc0_w, regs.texturing); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             case TexturingRegs::TextureConfig::Projection2D: { | ||||||
|  |                 u /= tc0_w; | ||||||
|  |                 v /= tc0_w; | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             case TexturingRegs::TextureConfig::Shadow2D: { | ||||||
|  |                 if (!regs.texturing.shadow.orthographic) { | ||||||
|  |                     u /= tc0_w; | ||||||
|  |                     v /= tc0_w; | ||||||
|  |                 } | ||||||
|  |                 shadow_z = f24::FromFloat32(std::abs(tc0_w.ToFloat32())); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             case TexturingRegs::TextureConfig::Disabled: | ||||||
|  |                 continue; // skip this unit and continue to the next unit
 | ||||||
|  |             default: | ||||||
|  |                 LOG_ERROR(HW_GPU, "Unhandled texture type {:x}", (int)texture.config.type); | ||||||
|  |                 UNIMPLEMENTED(); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         const f24 width = f24::FromFloat32(static_cast<f32>(texture.config.width)); | ||||||
|  |         const f24 height = f24::FromFloat32(static_cast<f32>(texture.config.height)); | ||||||
|  |         s32 s = static_cast<s32>((u * width).ToFloat32()); | ||||||
|  |         s32 t = static_cast<s32>((v * height).ToFloat32()); | ||||||
|  | 
 | ||||||
|  |         bool use_border_s = false; | ||||||
|  |         bool use_border_t = false; | ||||||
|  | 
 | ||||||
|  |         if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder) { | ||||||
|  |             use_border_s = s < 0 || s >= static_cast<s32>(texture.config.width); | ||||||
|  |         } else if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder2) { | ||||||
|  |             use_border_s = s >= static_cast<s32>(texture.config.width); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder) { | ||||||
|  |             use_border_t = t < 0 || t >= static_cast<s32>(texture.config.height); | ||||||
|  |         } else if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder2) { | ||||||
|  |             use_border_t = t >= static_cast<s32>(texture.config.height); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (use_border_s || use_border_t) { | ||||||
|  |             const auto border_color = texture.config.border_color; | ||||||
|  |             texture_color[i] = Common::MakeVec(border_color.r.Value(), border_color.g.Value(), | ||||||
|  |                                                border_color.b.Value(), border_color.a.Value()) | ||||||
|  |                                    .Cast<u8>(); | ||||||
|  |         } else { | ||||||
|  |             // Textures are laid out from bottom to top, hence we invert the t coordinate.
 | ||||||
|  |             // NOTE: This may not be the right place for the inversion.
 | ||||||
|  |             // TODO: Check if this applies to ETC textures, too.
 | ||||||
|  |             s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); | ||||||
|  |             t = texture.config.height - 1 - | ||||||
|  |                 GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); | ||||||
|  | 
 | ||||||
|  |             const u8* texture_data = memory.GetPhysicalPointer(texture_address); | ||||||
|  |             const auto info = TextureInfo::FromPicaRegister(texture.config, texture.format); | ||||||
|  | 
 | ||||||
|  |             // TODO: Apply the min and mag filters to the texture
 | ||||||
|  |             texture_color[i] = LookupTexture(texture_data, s, t, info); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (i == 0 && (texture.config.type == TexturingRegs::TextureConfig::Shadow2D || | ||||||
|  |                        texture.config.type == TexturingRegs::TextureConfig::ShadowCube)) { | ||||||
|  | 
 | ||||||
|  |             s32 z_int = static_cast<s32>(std::min(shadow_z.ToFloat32(), 1.0f) * 0xFFFFFF); | ||||||
|  |             z_int -= regs.texturing.shadow.bias << 1; | ||||||
|  |             const auto& color = texture_color[i]; | ||||||
|  |             const s32 z_ref = (color.w << 16) | (color.z << 8) | color.y; | ||||||
|  |             u8 density; | ||||||
|  |             if (z_ref >= z_int) { | ||||||
|  |                 density = color.x; | ||||||
|  |             } else { | ||||||
|  |                 density = 0; | ||||||
|  |             } | ||||||
|  |             texture_color[i] = {density, density, density, density}; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Sample procedural texture
 | ||||||
|  |     if (regs.texturing.main_config.texture3_enable) { | ||||||
|  |         const auto& proctex_uv = uv[regs.texturing.main_config.texture3_coordinates]; | ||||||
|  |         texture_color[3] = ProcTex(proctex_uv.u().ToFloat32(), proctex_uv.v().ToFloat32(), | ||||||
|  |                                    regs.texturing, state.proctex); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return texture_color; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y, | ||||||
|  |                                                 Common::Vec4<u8>& combiner_output) const { | ||||||
|  |     const auto dest = fb.GetPixel(x >> 4, y >> 4); | ||||||
|  |     Common::Vec4<u8> blend_output = combiner_output; | ||||||
|  | 
 | ||||||
|  |     const auto& output_merger = regs.framebuffer.output_merger; | ||||||
|  |     if (output_merger.alphablend_enable) { | ||||||
|  |         const auto params = output_merger.alpha_blending; | ||||||
|  |         const auto lookup_factor = [&](u32 channel, FramebufferRegs::BlendFactor factor) -> u8 { | ||||||
|  |             DEBUG_ASSERT(channel < 4); | ||||||
|  | 
 | ||||||
|  |             const Common::Vec4<u8> blend_const = | ||||||
|  |                 Common::MakeVec( | ||||||
|  |                     output_merger.blend_const.r.Value(), output_merger.blend_const.g.Value(), | ||||||
|  |                     output_merger.blend_const.b.Value(), output_merger.blend_const.a.Value()) | ||||||
|  |                     .Cast<u8>(); | ||||||
|  | 
 | ||||||
|  |             switch (factor) { | ||||||
|  |             case FramebufferRegs::BlendFactor::Zero: | ||||||
|  |                 return 0; | ||||||
|  |             case FramebufferRegs::BlendFactor::One: | ||||||
|  |                 return 255; | ||||||
|  |             case FramebufferRegs::BlendFactor::SourceColor: | ||||||
|  |                 return combiner_output[channel]; | ||||||
|  |             case FramebufferRegs::BlendFactor::OneMinusSourceColor: | ||||||
|  |                 return 255 - combiner_output[channel]; | ||||||
|  |             case FramebufferRegs::BlendFactor::DestColor: | ||||||
|  |                 return dest[channel]; | ||||||
|  |             case FramebufferRegs::BlendFactor::OneMinusDestColor: | ||||||
|  |                 return 255 - dest[channel]; | ||||||
|  |             case FramebufferRegs::BlendFactor::SourceAlpha: | ||||||
|  |                 return combiner_output.a(); | ||||||
|  |             case FramebufferRegs::BlendFactor::OneMinusSourceAlpha: | ||||||
|  |                 return 255 - combiner_output.a(); | ||||||
|  |             case FramebufferRegs::BlendFactor::DestAlpha: | ||||||
|  |                 return dest.a(); | ||||||
|  |             case FramebufferRegs::BlendFactor::OneMinusDestAlpha: | ||||||
|  |                 return 255 - dest.a(); | ||||||
|  |             case FramebufferRegs::BlendFactor::ConstantColor: | ||||||
|  |                 return blend_const[channel]; | ||||||
|  |             case FramebufferRegs::BlendFactor::OneMinusConstantColor: | ||||||
|  |                 return 255 - blend_const[channel]; | ||||||
|  |             case FramebufferRegs::BlendFactor::ConstantAlpha: | ||||||
|  |                 return blend_const.a(); | ||||||
|  |             case FramebufferRegs::BlendFactor::OneMinusConstantAlpha: | ||||||
|  |                 return 255 - blend_const.a(); | ||||||
|  |             case FramebufferRegs::BlendFactor::SourceAlphaSaturate: | ||||||
|  |                 // Returns 1.0 for the alpha channel
 | ||||||
|  |                 if (channel == 3) { | ||||||
|  |                     return 255; | ||||||
|  |                 } | ||||||
|  |                 return std::min(combiner_output.a(), static_cast<u8>(255 - dest.a())); | ||||||
|  |             default: | ||||||
|  |                 LOG_CRITICAL(HW_GPU, "Unknown blend factor {:x}", factor); | ||||||
|  |                 UNIMPLEMENTED(); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             return combiner_output[channel]; | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         const auto srcfactor = Common::MakeVec( | ||||||
|  |             lookup_factor(0, params.factor_source_rgb), lookup_factor(1, params.factor_source_rgb), | ||||||
|  |             lookup_factor(2, params.factor_source_rgb), lookup_factor(3, params.factor_source_a)); | ||||||
|  | 
 | ||||||
|  |         const auto dstfactor = Common::MakeVec( | ||||||
|  |             lookup_factor(0, params.factor_dest_rgb), lookup_factor(1, params.factor_dest_rgb), | ||||||
|  |             lookup_factor(2, params.factor_dest_rgb), lookup_factor(3, params.factor_dest_a)); | ||||||
|  | 
 | ||||||
|  |         blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, | ||||||
|  |                                              params.blend_equation_rgb); | ||||||
|  |         blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, | ||||||
|  |                                                  params.blend_equation_a) | ||||||
|  |                                .a(); | ||||||
|  |     } else { | ||||||
|  |         blend_output = | ||||||
|  |             Common::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), | ||||||
|  |                             LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), | ||||||
|  |                             LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), | ||||||
|  |                             LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const Common::Vec4<u8> result = { | ||||||
|  |         output_merger.red_enable ? blend_output.r() : dest.r(), | ||||||
|  |         output_merger.green_enable ? blend_output.g() : dest.g(), | ||||||
|  |         output_merger.blue_enable ? blend_output.b() : dest.b(), | ||||||
|  |         output_merger.alpha_enable ? blend_output.a() : dest.a(), | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Common::Vec4<u8> RasterizerSoftware::WriteTevConfig( | ||||||
|  |     std::span<const Common::Vec4<u8>, 4> texture_color, | ||||||
|  |     std::span<const Pica::TexturingRegs::TevStageConfig, 6> tev_stages, | ||||||
|  |     Common::Vec4<u8> primary_color, Common::Vec4<u8> primary_fragment_color, | ||||||
|  |     Common::Vec4<u8> secondary_fragment_color) const { | ||||||
|  |     /**
 | ||||||
|  |      * Texture environment - consists of 6 stages of color and alpha combining. | ||||||
|  |      * Color combiners take three input color values from some source (e.g. interpolated | ||||||
|  |      * vertex color, texture color, previous stage, etc), perform some very simple | ||||||
|  |      * operations on each of them (e.g. inversion) and then calculate the output color | ||||||
|  |      * with some basic arithmetic. Alpha combiners can be configured separately but work | ||||||
|  |      * analogously. | ||||||
|  |      **/ | ||||||
|  |     Common::Vec4<u8> combiner_output; | ||||||
|  |     Common::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; | ||||||
|  |     Common::Vec4<u8> next_combiner_buffer = | ||||||
|  |         Common::MakeVec(regs.texturing.tev_combiner_buffer_color.r.Value(), | ||||||
|  |                         regs.texturing.tev_combiner_buffer_color.g.Value(), | ||||||
|  |                         regs.texturing.tev_combiner_buffer_color.b.Value(), | ||||||
|  |                         regs.texturing.tev_combiner_buffer_color.a.Value()) | ||||||
|  |             .Cast<u8>(); | ||||||
|  | 
 | ||||||
|  |     for (u32 tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { | ||||||
|  |         const auto& tev_stage = tev_stages[tev_stage_index]; | ||||||
|  |         using Source = TexturingRegs::TevStageConfig::Source; | ||||||
|  | 
 | ||||||
|  |         auto get_source = [&](Source source) -> Common::Vec4<u8> { | ||||||
|  |             switch (source) { | ||||||
|  |             case Source::PrimaryColor: | ||||||
|  |                 return primary_color; | ||||||
|  |             case Source::PrimaryFragmentColor: | ||||||
|  |                 return primary_fragment_color; | ||||||
|  |             case Source::SecondaryFragmentColor: | ||||||
|  |                 return secondary_fragment_color; | ||||||
|  |             case Source::Texture0: | ||||||
|  |                 return texture_color[0]; | ||||||
|  |             case Source::Texture1: | ||||||
|  |                 return texture_color[1]; | ||||||
|  |             case Source::Texture2: | ||||||
|  |                 return texture_color[2]; | ||||||
|  |             case Source::Texture3: | ||||||
|  |                 return texture_color[3]; | ||||||
|  |             case Source::PreviousBuffer: | ||||||
|  |                 return combiner_buffer; | ||||||
|  |             case Source::Constant: | ||||||
|  |                 return Common::MakeVec(tev_stage.const_r.Value(), tev_stage.const_g.Value(), | ||||||
|  |                                        tev_stage.const_b.Value(), tev_stage.const_a.Value()) | ||||||
|  |                     .Cast<u8>(); | ||||||
|  |             case Source::Previous: | ||||||
|  |                 return combiner_output; | ||||||
|  |             default: | ||||||
|  |                 LOG_ERROR(HW_GPU, "Unknown color combiner source {}", (int)source); | ||||||
|  |                 UNIMPLEMENTED(); | ||||||
|  |                 return {0, 0, 0, 0}; | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         /**
 | ||||||
|  |          * Color combiner | ||||||
|  |          * NOTE: Not sure if the alpha combiner might use the color output of the previous | ||||||
|  |          *       stage as input. Hence, we currently don't directly write the result to | ||||||
|  |          *       combiner_output.rgb(), but instead store it in a temporary variable until | ||||||
|  |          *       alpha combining has been done. | ||||||
|  |          **/ | ||||||
|  |         const std::array<Common::Vec3<u8>, 3> color_result = { | ||||||
|  |             GetColorModifier(tev_stage.color_modifier1, get_source(tev_stage.color_source1)), | ||||||
|  |             GetColorModifier(tev_stage.color_modifier2, get_source(tev_stage.color_source2)), | ||||||
|  |             GetColorModifier(tev_stage.color_modifier3, get_source(tev_stage.color_source3)), | ||||||
|  |         }; | ||||||
|  |         const Common::Vec3<u8> color_output = ColorCombine(tev_stage.color_op, color_result); | ||||||
|  | 
 | ||||||
|  |         u8 alpha_output; | ||||||
|  |         if (tev_stage.color_op == TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { | ||||||
|  |             // result of Dot3_RGBA operation is also placed to the alpha component
 | ||||||
|  |             alpha_output = color_output.x; | ||||||
|  |         } else { | ||||||
|  |             // alpha combiner
 | ||||||
|  |             const std::array<u8, 3> alpha_result = {{ | ||||||
|  |                 GetAlphaModifier(tev_stage.alpha_modifier1, get_source(tev_stage.alpha_source1)), | ||||||
|  |                 GetAlphaModifier(tev_stage.alpha_modifier2, get_source(tev_stage.alpha_source2)), | ||||||
|  |                 GetAlphaModifier(tev_stage.alpha_modifier3, get_source(tev_stage.alpha_source3)), | ||||||
|  |             }}; | ||||||
|  |             alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         combiner_output[0] = std::min(255U, color_output.r() * tev_stage.GetColorMultiplier()); | ||||||
|  |         combiner_output[1] = std::min(255U, color_output.g() * tev_stage.GetColorMultiplier()); | ||||||
|  |         combiner_output[2] = std::min(255U, color_output.b() * tev_stage.GetColorMultiplier()); | ||||||
|  |         combiner_output[3] = std::min(255U, alpha_output * tev_stage.GetAlphaMultiplier()); | ||||||
|  | 
 | ||||||
|  |         combiner_buffer = next_combiner_buffer; | ||||||
|  | 
 | ||||||
|  |         if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor( | ||||||
|  |                 tev_stage_index)) { | ||||||
|  |             next_combiner_buffer.r() = combiner_output.r(); | ||||||
|  |             next_combiner_buffer.g() = combiner_output.g(); | ||||||
|  |             next_combiner_buffer.b() = combiner_output.b(); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha( | ||||||
|  |                 tev_stage_index)) { | ||||||
|  |             next_combiner_buffer.a() = combiner_output.a(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     return combiner_output; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void RasterizerSoftware::WriteFog(Common::Vec4<u8>& combiner_output, float depth) const { | ||||||
|  |     /**
 | ||||||
|  |      * Apply fog combiner. Not fully accurate. We'd have to know what data type is used to | ||||||
|  |      * store the depth etc. Using float for now until we know more about Pica datatypes. | ||||||
|  |      **/ | ||||||
|  |     if (regs.texturing.fog_mode == TexturingRegs::FogMode::Fog) { | ||||||
|  |         const Common::Vec3<u8> fog_color = | ||||||
|  |             Common::MakeVec(regs.texturing.fog_color.r.Value(), regs.texturing.fog_color.g.Value(), | ||||||
|  |                             regs.texturing.fog_color.b.Value()) | ||||||
|  |                 .Cast<u8>(); | ||||||
|  | 
 | ||||||
|  |         float fog_index; | ||||||
|  |         if (regs.texturing.fog_flip) { | ||||||
|  |             fog_index = (1.0f - depth) * 128.0f; | ||||||
|  |         } else { | ||||||
|  |             fog_index = depth * 128.0f; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Generate clamped fog factor from LUT for given fog index
 | ||||||
|  |         const f32 fog_i = std::clamp(floorf(fog_index), 0.0f, 127.0f); | ||||||
|  |         const f32 fog_f = fog_index - fog_i; | ||||||
|  |         const auto& fog_lut_entry = state.fog.lut[static_cast<u32>(fog_i)]; | ||||||
|  |         f32 fog_factor = fog_lut_entry.ToFloat() + fog_lut_entry.DiffToFloat() * fog_f; | ||||||
|  |         fog_factor = std::clamp(fog_factor, 0.0f, 1.0f); | ||||||
|  |         for (u32 i = 0; i < 3; i++) { | ||||||
|  |             combiner_output[i] = static_cast<u8>(fog_factor * combiner_output[i] + | ||||||
|  |                                                  (1.0f - fog_factor) * fog_color[i]); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool RasterizerSoftware::DoAlphaTest(u8 alpha) const { | ||||||
|  |     const auto& output_merger = regs.framebuffer.output_merger; | ||||||
|  |     if (!output_merger.alpha_test.enable) { | ||||||
|  |         return true; | ||||||
|  |     } | ||||||
|  |     switch (output_merger.alpha_test.func) { | ||||||
|  |     case FramebufferRegs::CompareFunc::Never: | ||||||
|  |         return false; | ||||||
|  |     case FramebufferRegs::CompareFunc::Always: | ||||||
|  |         return true; | ||||||
|  |     case FramebufferRegs::CompareFunc::Equal: | ||||||
|  |         return alpha == output_merger.alpha_test.ref; | ||||||
|  |     case FramebufferRegs::CompareFunc::NotEqual: | ||||||
|  |         return alpha != output_merger.alpha_test.ref; | ||||||
|  |     case FramebufferRegs::CompareFunc::LessThan: | ||||||
|  |         return alpha < output_merger.alpha_test.ref; | ||||||
|  |     case FramebufferRegs::CompareFunc::LessThanOrEqual: | ||||||
|  |         return alpha <= output_merger.alpha_test.ref; | ||||||
|  |     case FramebufferRegs::CompareFunc::GreaterThan: | ||||||
|  |         return alpha > output_merger.alpha_test.ref; | ||||||
|  |     case FramebufferRegs::CompareFunc::GreaterThanOrEqual: | ||||||
|  |         return alpha >= output_merger.alpha_test.ref; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool RasterizerSoftware::DoDepthStencilTest(u16 x, u16 y, float depth, | ||||||
|  |                                             bool stencil_action_enable) const { | ||||||
|  |     const auto& framebuffer = regs.framebuffer.framebuffer; | ||||||
|  |     const auto stencil_test = regs.framebuffer.output_merger.stencil_test; | ||||||
|  |     u8 old_stencil = 0; | ||||||
|  | 
 | ||||||
|  |     const auto update_stencil = [&](Pica::FramebufferRegs::StencilAction action) { | ||||||
|  |         const u8 new_stencil = | ||||||
|  |             PerformStencilAction(action, old_stencil, stencil_test.reference_value); | ||||||
|  |         if (framebuffer.allow_depth_stencil_write != 0) { | ||||||
|  |             const u8 stencil = | ||||||
|  |                 (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask); | ||||||
|  |             fb.SetStencil(x >> 4, y >> 4, stencil); | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     if (stencil_action_enable) { | ||||||
|  |         old_stencil = fb.GetStencil(x >> 4, y >> 4); | ||||||
|  |         const u8 dest = old_stencil & stencil_test.input_mask; | ||||||
|  |         const u8 ref = stencil_test.reference_value & stencil_test.input_mask; | ||||||
|  |         bool pass = false; | ||||||
|  |         switch (stencil_test.func) { | ||||||
|  |         case FramebufferRegs::CompareFunc::Never: | ||||||
|  |             pass = false; | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::Always: | ||||||
|  |             pass = true; | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::Equal: | ||||||
|  |             pass = (ref == dest); | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::NotEqual: | ||||||
|  |             pass = (ref != dest); | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::LessThan: | ||||||
|  |             pass = (ref < dest); | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::LessThanOrEqual: | ||||||
|  |             pass = (ref <= dest); | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::GreaterThan: | ||||||
|  |             pass = (ref > dest); | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::GreaterThanOrEqual: | ||||||
|  |             pass = (ref >= dest); | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |         if (!pass) { | ||||||
|  |             update_stencil(stencil_test.action_stencil_fail); | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const u32 num_bits = FramebufferRegs::DepthBitsPerPixel(framebuffer.depth_format); | ||||||
|  |     const u32 z = static_cast<u32>(depth * ((1 << num_bits) - 1)); | ||||||
|  | 
 | ||||||
|  |     const auto& output_merger = regs.framebuffer.output_merger; | ||||||
|  |     if (output_merger.depth_test_enable) { | ||||||
|  |         const u32 ref_z = fb.GetDepth(x >> 4, y >> 4); | ||||||
|  |         bool pass = false; | ||||||
|  |         switch (output_merger.depth_test_func) { | ||||||
|  |         case FramebufferRegs::CompareFunc::Never: | ||||||
|  |             pass = false; | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::Always: | ||||||
|  |             pass = true; | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::Equal: | ||||||
|  |             pass = z == ref_z; | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::NotEqual: | ||||||
|  |             pass = z != ref_z; | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::LessThan: | ||||||
|  |             pass = z < ref_z; | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::LessThanOrEqual: | ||||||
|  |             pass = z <= ref_z; | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::GreaterThan: | ||||||
|  |             pass = z > ref_z; | ||||||
|  |             break; | ||||||
|  |         case FramebufferRegs::CompareFunc::GreaterThanOrEqual: | ||||||
|  |             pass = z >= ref_z; | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |         if (!pass) { | ||||||
|  |             if (stencil_action_enable) { | ||||||
|  |                 update_stencil(stencil_test.action_depth_fail); | ||||||
|  |             } | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     if (framebuffer.allow_depth_stencil_write != 0 && output_merger.depth_write_enable) { | ||||||
|  |         fb.SetDepth(x >> 4, y >> 4, z); | ||||||
|  |     } | ||||||
|  |     // The stencil depth_pass action is executed even if depth testing is disabled
 | ||||||
|  |     if (stencil_action_enable) { | ||||||
|  |         update_stencil(stencil_test.action_depth_pass); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace SwRenderer
 | ||||||
|  |  | ||||||
|  | @ -4,16 +4,30 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include <span> | ||||||
|  | 
 | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
|  | #include "video_core/regs_texturing.h" | ||||||
|  | #include "video_core/renderer_software/sw_clipper.h" | ||||||
|  | #include "video_core/renderer_software/sw_framebuffer.h" | ||||||
| 
 | 
 | ||||||
| namespace Pica::Shader { | namespace Pica::Shader { | ||||||
| struct OutputVertex; | struct OutputVertex; | ||||||
| } // namespace Pica::Shader
 | } | ||||||
| 
 | 
 | ||||||
| namespace VideoCore { | namespace Pica { | ||||||
|  | struct State; | ||||||
|  | struct Regs; | ||||||
|  | } // namespace Pica
 | ||||||
|  | 
 | ||||||
|  | namespace SwRenderer { | ||||||
|  | 
 | ||||||
|  | struct Vertex; | ||||||
|  | 
 | ||||||
|  | class RasterizerSoftware : public VideoCore::RasterizerInterface { | ||||||
|  | public: | ||||||
|  |     explicit RasterizerSoftware(Memory::MemorySystem& memory); | ||||||
| 
 | 
 | ||||||
| class RasterizerSoftware : public RasterizerInterface { |  | ||||||
|     void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, |     void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, | ||||||
|                      const Pica::Shader::OutputVertex& v2) override; |                      const Pica::Shader::OutputVertex& v2) override; | ||||||
|     void DrawTriangles() override {} |     void DrawTriangles() override {} | ||||||
|  | @ -23,6 +37,44 @@ class RasterizerSoftware : public RasterizerInterface { | ||||||
|     void InvalidateRegion(PAddr addr, u32 size) override {} |     void InvalidateRegion(PAddr addr, u32 size) override {} | ||||||
|     void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} |     void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} | ||||||
|     void ClearAll(bool flush) override {} |     void ClearAll(bool flush) override {} | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     /// Computes the screen coordinates of the provided vertex.
 | ||||||
|  |     void MakeScreenCoords(Vertex& vtx); | ||||||
|  | 
 | ||||||
|  |     /// Processes the triangle defined by the provided vertices.
 | ||||||
|  |     void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2, | ||||||
|  |                          bool reversed = false); | ||||||
|  | 
 | ||||||
|  |     /// Returns the texture color of the currently processed pixel.
 | ||||||
|  |     std::array<Common::Vec4<u8>, 4> TextureColor( | ||||||
|  |         std::span<const Common::Vec2<f24>, 3> uv, | ||||||
|  |         std::span<const Pica::TexturingRegs::FullTextureConfig, 3> textures, f24 tc0_w) const; | ||||||
|  | 
 | ||||||
|  |     /// Returns the final pixel color with blending or logic ops applied.
 | ||||||
|  |     Common::Vec4<u8> PixelColor(u16 x, u16 y, Common::Vec4<u8>& combiner_output) const; | ||||||
|  | 
 | ||||||
|  |     /// Emulates the TEV configuration and returns the combiner output.
 | ||||||
|  |     Common::Vec4<u8> WriteTevConfig( | ||||||
|  |         std::span<const Common::Vec4<u8>, 4> texture_color, | ||||||
|  |         std::span<const Pica::TexturingRegs::TevStageConfig, 6> tev_stages, | ||||||
|  |         Common::Vec4<u8> primary_color, Common::Vec4<u8> primary_fragment_color, | ||||||
|  |         Common::Vec4<u8> secondary_fragment_color) const; | ||||||
|  | 
 | ||||||
|  |     /// Blends fog to the combiner output if enabled.
 | ||||||
|  |     void WriteFog(Common::Vec4<u8>& combiner_output, float depth) const; | ||||||
|  | 
 | ||||||
|  |     /// Performs the alpha test. Returns false if the test failed.
 | ||||||
|  |     bool DoAlphaTest(u8 alpha) const; | ||||||
|  | 
 | ||||||
|  |     /// Performs the depth stencil test. Returns false if the test failed.
 | ||||||
|  |     bool DoDepthStencilTest(u16 x, u16 y, float depth, bool stencil_action_enable) const; | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     Memory::MemorySystem& memory; | ||||||
|  |     Pica::State& state; | ||||||
|  |     const Pica::Regs& regs; | ||||||
|  |     Framebuffer fb; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace VideoCore
 | } // namespace SwRenderer
 | ||||||
|  |  | ||||||
|  | @ -9,41 +9,40 @@ | ||||||
| #include "video_core/regs_texturing.h" | #include "video_core/regs_texturing.h" | ||||||
| #include "video_core/renderer_software/sw_texturing.h" | #include "video_core/renderer_software/sw_texturing.h" | ||||||
| 
 | 
 | ||||||
| namespace Pica::Rasterizer { | namespace SwRenderer { | ||||||
| 
 | 
 | ||||||
| using TevStageConfig = TexturingRegs::TevStageConfig; | using TevStageConfig = Pica::TexturingRegs::TevStageConfig; | ||||||
|  | 
 | ||||||
|  | int GetWrappedTexCoord(Pica::TexturingRegs::TextureConfig::WrapMode mode, s32 val, u32 size) { | ||||||
|  |     using TextureConfig = Pica::TexturingRegs::TextureConfig; | ||||||
| 
 | 
 | ||||||
| int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) { |  | ||||||
|     switch (mode) { |     switch (mode) { | ||||||
|     case TexturingRegs::TextureConfig::ClampToEdge2: |     case TextureConfig::ClampToEdge2: | ||||||
|         // For negative coordinate, ClampToEdge2 behaves the same as Repeat
 |         // For negative coordinate, ClampToEdge2 behaves the same as Repeat
 | ||||||
|         if (val < 0) { |         if (val < 0) { | ||||||
|             return static_cast<int>(static_cast<unsigned>(val) % size); |             return static_cast<s32>(static_cast<u32>(val) % size); | ||||||
|         } |         } | ||||||
|     // [[fallthrough]]
 |         [[fallthrough]]; | ||||||
|     case TexturingRegs::TextureConfig::ClampToEdge: |     case TextureConfig::ClampToEdge: | ||||||
|         val = std::max(val, 0); |         val = std::max(val, 0); | ||||||
|         val = std::min(val, static_cast<int>(size) - 1); |         val = std::min(val, static_cast<s32>(size) - 1); | ||||||
|         return val; |         return val; | ||||||
| 
 |     case TextureConfig::ClampToBorder: | ||||||
|     case TexturingRegs::TextureConfig::ClampToBorder: |  | ||||||
|         return val; |         return val; | ||||||
| 
 |     case TextureConfig::ClampToBorder2: | ||||||
|     case TexturingRegs::TextureConfig::ClampToBorder2: |  | ||||||
|     // For ClampToBorder2, the case of positive coordinate beyond the texture size is already
 |     // For ClampToBorder2, the case of positive coordinate beyond the texture size is already
 | ||||||
|     // handled outside. Here we only handle the negative coordinate in the same way as Repeat.
 |     // handled outside. Here we only handle the negative coordinate in the same way as Repeat.
 | ||||||
|     case TexturingRegs::TextureConfig::Repeat2: |     case TextureConfig::Repeat2: | ||||||
|     case TexturingRegs::TextureConfig::Repeat3: |     case TextureConfig::Repeat3: | ||||||
|     case TexturingRegs::TextureConfig::Repeat: |     case TextureConfig::Repeat: | ||||||
|         return static_cast<int>(static_cast<unsigned>(val) % size); |         return static_cast<s32>(static_cast<u32>(val) % size); | ||||||
| 
 |     case TextureConfig::MirroredRepeat: { | ||||||
|     case TexturingRegs::TextureConfig::MirroredRepeat: { |         u32 coord = (static_cast<u32>(val) % (2 * size)); | ||||||
|         unsigned int coord = (static_cast<unsigned>(val) % (2 * size)); |         if (coord >= size) { | ||||||
|         if (coord >= size) |  | ||||||
|             coord = 2 * size - 1 - coord; |             coord = 2 * size - 1 - coord; | ||||||
|         return static_cast<int>(coord); |  | ||||||
|         } |         } | ||||||
| 
 |         return static_cast<s32>(coord); | ||||||
|  |     } | ||||||
|     default: |     default: | ||||||
|         LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode {:x}", (int)mode); |         LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode {:x}", (int)mode); | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|  | @ -58,35 +57,25 @@ Common::Vec3<u8> GetColorModifier(TevStageConfig::ColorModifier factor, | ||||||
|     switch (factor) { |     switch (factor) { | ||||||
|     case ColorModifier::SourceColor: |     case ColorModifier::SourceColor: | ||||||
|         return values.rgb(); |         return values.rgb(); | ||||||
| 
 |  | ||||||
|     case ColorModifier::OneMinusSourceColor: |     case ColorModifier::OneMinusSourceColor: | ||||||
|         return (Common::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>(); |         return (Common::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>(); | ||||||
| 
 |  | ||||||
|     case ColorModifier::SourceAlpha: |     case ColorModifier::SourceAlpha: | ||||||
|         return values.aaa(); |         return values.aaa(); | ||||||
| 
 |  | ||||||
|     case ColorModifier::OneMinusSourceAlpha: |     case ColorModifier::OneMinusSourceAlpha: | ||||||
|         return (Common::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>(); |         return (Common::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>(); | ||||||
| 
 |  | ||||||
|     case ColorModifier::SourceRed: |     case ColorModifier::SourceRed: | ||||||
|         return values.rrr(); |         return values.rrr(); | ||||||
| 
 |  | ||||||
|     case ColorModifier::OneMinusSourceRed: |     case ColorModifier::OneMinusSourceRed: | ||||||
|         return (Common::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>(); |         return (Common::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>(); | ||||||
| 
 |  | ||||||
|     case ColorModifier::SourceGreen: |     case ColorModifier::SourceGreen: | ||||||
|         return values.ggg(); |         return values.ggg(); | ||||||
| 
 |  | ||||||
|     case ColorModifier::OneMinusSourceGreen: |     case ColorModifier::OneMinusSourceGreen: | ||||||
|         return (Common::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>(); |         return (Common::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>(); | ||||||
| 
 |  | ||||||
|     case ColorModifier::SourceBlue: |     case ColorModifier::SourceBlue: | ||||||
|         return values.bbb(); |         return values.bbb(); | ||||||
| 
 |  | ||||||
|     case ColorModifier::OneMinusSourceBlue: |     case ColorModifier::OneMinusSourceBlue: | ||||||
|         return (Common::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>(); |         return (Common::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>(); | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|     UNREACHABLE(); |     UNREACHABLE(); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -96,42 +85,33 @@ u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Common::Vec4<u8> | ||||||
|     switch (factor) { |     switch (factor) { | ||||||
|     case AlphaModifier::SourceAlpha: |     case AlphaModifier::SourceAlpha: | ||||||
|         return values.a(); |         return values.a(); | ||||||
| 
 |  | ||||||
|     case AlphaModifier::OneMinusSourceAlpha: |     case AlphaModifier::OneMinusSourceAlpha: | ||||||
|         return 255 - values.a(); |         return 255 - values.a(); | ||||||
| 
 |  | ||||||
|     case AlphaModifier::SourceRed: |     case AlphaModifier::SourceRed: | ||||||
|         return values.r(); |         return values.r(); | ||||||
| 
 |  | ||||||
|     case AlphaModifier::OneMinusSourceRed: |     case AlphaModifier::OneMinusSourceRed: | ||||||
|         return 255 - values.r(); |         return 255 - values.r(); | ||||||
| 
 |  | ||||||
|     case AlphaModifier::SourceGreen: |     case AlphaModifier::SourceGreen: | ||||||
|         return values.g(); |         return values.g(); | ||||||
| 
 |  | ||||||
|     case AlphaModifier::OneMinusSourceGreen: |     case AlphaModifier::OneMinusSourceGreen: | ||||||
|         return 255 - values.g(); |         return 255 - values.g(); | ||||||
| 
 |  | ||||||
|     case AlphaModifier::SourceBlue: |     case AlphaModifier::SourceBlue: | ||||||
|         return values.b(); |         return values.b(); | ||||||
| 
 |  | ||||||
|     case AlphaModifier::OneMinusSourceBlue: |     case AlphaModifier::OneMinusSourceBlue: | ||||||
|         return 255 - values.b(); |         return 255 - values.b(); | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|     UNREACHABLE(); |     UNREACHABLE(); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| Common::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Common::Vec3<u8> input[3]) { | Common::Vec3<u8> ColorCombine(TevStageConfig::Operation op, | ||||||
|  |                               std::span<const Common::Vec3<u8>, 3> input) { | ||||||
|     using Operation = TevStageConfig::Operation; |     using Operation = TevStageConfig::Operation; | ||||||
| 
 | 
 | ||||||
|     switch (op) { |     switch (op) { | ||||||
|     case Operation::Replace: |     case Operation::Replace: | ||||||
|         return input[0]; |         return input[0]; | ||||||
| 
 |  | ||||||
|     case Operation::Modulate: |     case Operation::Modulate: | ||||||
|         return ((input[0] * input[1]) / 255).Cast<u8>(); |         return ((input[0] * input[1]) / 255).Cast<u8>(); | ||||||
| 
 |  | ||||||
|     case Operation::Add: { |     case Operation::Add: { | ||||||
|         auto result = input[0] + input[1]; |         auto result = input[0] + input[1]; | ||||||
|         result.r() = std::min(255, result.r()); |         result.r() = std::min(255, result.r()); | ||||||
|  | @ -139,46 +119,41 @@ Common::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Common::Vec3<u | ||||||
|         result.b() = std::min(255, result.b()); |         result.b() = std::min(255, result.b()); | ||||||
|         return result.Cast<u8>(); |         return result.Cast<u8>(); | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|     case Operation::AddSigned: { |     case Operation::AddSigned: { | ||||||
|         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
 |         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
 | ||||||
|         // (byte) 128 is correct
 |         // (byte) 128 is correct
 | ||||||
|         auto result = |         Common::Vec3i result = | ||||||
|             input[0].Cast<int>() + input[1].Cast<int>() - Common::MakeVec<int>(128, 128, 128); |             input[0].Cast<s32>() + input[1].Cast<s32>() - Common::MakeVec<s32>(128, 128, 128); | ||||||
|         result.r() = std::clamp<int>(result.r(), 0, 255); |         result.r() = std::clamp<s32>(result.r(), 0, 255); | ||||||
|         result.g() = std::clamp<int>(result.g(), 0, 255); |         result.g() = std::clamp<s32>(result.g(), 0, 255); | ||||||
|         result.b() = std::clamp<int>(result.b(), 0, 255); |         result.b() = std::clamp<s32>(result.b(), 0, 255); | ||||||
|         return result.Cast<u8>(); |         return result.Cast<u8>(); | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|     case Operation::Lerp: |     case Operation::Lerp: | ||||||
|         return ((input[0] * input[2] + |         return ((input[0] * input[2] + | ||||||
|                  input[1] * (Common::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / |                  input[1] * (Common::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / | ||||||
|                 255) |                 255) | ||||||
|             .Cast<u8>(); |             .Cast<u8>(); | ||||||
| 
 |  | ||||||
|     case Operation::Subtract: { |     case Operation::Subtract: { | ||||||
|         auto result = input[0].Cast<int>() - input[1].Cast<int>(); |         auto result = input[0].Cast<s32>() - input[1].Cast<s32>(); | ||||||
|         result.r() = std::max(0, result.r()); |         result.r() = std::max(0, result.r()); | ||||||
|         result.g() = std::max(0, result.g()); |         result.g() = std::max(0, result.g()); | ||||||
|         result.b() = std::max(0, result.b()); |         result.b() = std::max(0, result.b()); | ||||||
|         return result.Cast<u8>(); |         return result.Cast<u8>(); | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|     case Operation::MultiplyThenAdd: { |     case Operation::MultiplyThenAdd: { | ||||||
|         auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; |         auto result = (input[0] * input[1] + 255 * input[2].Cast<s32>()) / 255; | ||||||
|         result.r() = std::min(255, result.r()); |         result.r() = std::min(255, result.r()); | ||||||
|         result.g() = std::min(255, result.g()); |         result.g() = std::min(255, result.g()); | ||||||
|         result.b() = std::min(255, result.b()); |         result.b() = std::min(255, result.b()); | ||||||
|         return result.Cast<u8>(); |         return result.Cast<u8>(); | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|     case Operation::AddThenMultiply: { |     case Operation::AddThenMultiply: { | ||||||
|         auto result = input[0] + input[1]; |         auto result = input[0] + input[1]; | ||||||
|         result.r() = std::min(255, result.r()); |         result.r() = std::min(255, result.r()); | ||||||
|         result.g() = std::min(255, result.g()); |         result.g() = std::min(255, result.g()); | ||||||
|         result.b() = std::min(255, result.b()); |         result.b() = std::min(255, result.b()); | ||||||
|         result = (result * input[2].Cast<int>()) / 255; |         result = (result * input[2].Cast<s32>()) / 255; | ||||||
|         return result.Cast<u8>(); |         return result.Cast<u8>(); | ||||||
|     } |     } | ||||||
|     case Operation::Dot3_RGB: |     case Operation::Dot3_RGB: | ||||||
|  | @ -187,11 +162,11 @@ Common::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Common::Vec3<u | ||||||
|         // indicate that the per-component computation can't have a higher precision than 1/256,
 |         // indicate that the per-component computation can't have a higher precision than 1/256,
 | ||||||
|         // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give
 |         // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give
 | ||||||
|         // different results.
 |         // different results.
 | ||||||
|         int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + |         s32 result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | ||||||
|                      ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + |                      ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | ||||||
|                      ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; |                      ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | ||||||
|         result = std::max(0, std::min(255, result)); |         result = std::clamp(result, 0, 255); | ||||||
|         return {(u8)result, (u8)result, (u8)result}; |         return Common::Vec3{result, result, result}.Cast<u8>(); | ||||||
|     } |     } | ||||||
|     default: |     default: | ||||||
|         LOG_ERROR(HW_GPU, "Unknown color combiner operation {}", (int)op); |         LOG_ERROR(HW_GPU, "Unknown color combiner operation {}", (int)op); | ||||||
|  | @ -205,31 +180,23 @@ u8 AlphaCombine(TevStageConfig::Operation op, const std::array<u8, 3>& input) { | ||||||
|         using Operation = TevStageConfig::Operation; |         using Operation = TevStageConfig::Operation; | ||||||
|     case Operation::Replace: |     case Operation::Replace: | ||||||
|         return input[0]; |         return input[0]; | ||||||
| 
 |  | ||||||
|     case Operation::Modulate: |     case Operation::Modulate: | ||||||
|         return input[0] * input[1] / 255; |         return input[0] * input[1] / 255; | ||||||
| 
 |  | ||||||
|     case Operation::Add: |     case Operation::Add: | ||||||
|         return std::min(255, input[0] + input[1]); |         return std::min(255, input[0] + input[1]); | ||||||
| 
 |  | ||||||
|     case Operation::AddSigned: { |     case Operation::AddSigned: { | ||||||
|         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
 |         // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
 | ||||||
|         auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; |         auto result = static_cast<s32>(input[0]) + static_cast<s32>(input[1]) - 128; | ||||||
|         return static_cast<u8>(std::clamp<int>(result, 0, 255)); |         return static_cast<u8>(std::clamp<s32>(result, 0, 255)); | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|     case Operation::Lerp: |     case Operation::Lerp: | ||||||
|         return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; |         return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; | ||||||
| 
 |  | ||||||
|     case Operation::Subtract: |     case Operation::Subtract: | ||||||
|         return std::max(0, (int)input[0] - (int)input[1]); |         return std::max(0, static_cast<s32>(input[0]) - static_cast<s32>(input[1])); | ||||||
| 
 |  | ||||||
|     case Operation::MultiplyThenAdd: |     case Operation::MultiplyThenAdd: | ||||||
|         return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); |         return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); | ||||||
| 
 |  | ||||||
|     case Operation::AddThenMultiply: |     case Operation::AddThenMultiply: | ||||||
|         return (std::min(255, (input[0] + input[1])) * input[2]) / 255; |         return (std::min(255, (input[0] + input[1])) * input[2]) / 255; | ||||||
| 
 |  | ||||||
|     default: |     default: | ||||||
|         LOG_ERROR(HW_GPU, "Unknown alpha combiner operation {}", (int)op); |         LOG_ERROR(HW_GPU, "Unknown alpha combiner operation {}", (int)op); | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|  | @ -237,4 +204,4 @@ u8 AlphaCombine(TevStageConfig::Operation op, const std::array<u8, 3>& input) { | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Pica::Rasterizer
 | } // namespace SwRenderer
 | ||||||
|  |  | ||||||
|  | @ -4,23 +4,25 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
|  | #include <span> | ||||||
|  | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/vector_math.h" | #include "common/vector_math.h" | ||||||
| #include "video_core/regs_texturing.h" | #include "video_core/regs_texturing.h" | ||||||
| 
 | 
 | ||||||
| namespace Pica::Rasterizer { | namespace SwRenderer { | ||||||
| 
 | 
 | ||||||
| int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size); | int GetWrappedTexCoord(Pica::TexturingRegs::TextureConfig::WrapMode mode, s32 val, u32 size); | ||||||
| 
 | 
 | ||||||
| Common::Vec3<u8> GetColorModifier(TexturingRegs::TevStageConfig::ColorModifier factor, | Common::Vec3<u8> GetColorModifier(Pica::TexturingRegs::TevStageConfig::ColorModifier factor, | ||||||
|                                   const Common::Vec4<u8>& values); |                                   const Common::Vec4<u8>& values); | ||||||
| 
 | 
 | ||||||
| u8 GetAlphaModifier(TexturingRegs::TevStageConfig::AlphaModifier factor, | u8 GetAlphaModifier(Pica::TexturingRegs::TevStageConfig::AlphaModifier factor, | ||||||
|                     const Common::Vec4<u8>& values); |                     const Common::Vec4<u8>& values); | ||||||
| 
 | 
 | ||||||
| Common::Vec3<u8> ColorCombine(TexturingRegs::TevStageConfig::Operation op, | Common::Vec3<u8> ColorCombine(Pica::TexturingRegs::TevStageConfig::Operation op, | ||||||
|                               const Common::Vec3<u8> input[3]); |                               std::span<const Common::Vec3<u8>, 3> input); | ||||||
| 
 | 
 | ||||||
| u8 AlphaCombine(TexturingRegs::TevStageConfig::Operation op, const std::array<u8, 3>& input); | u8 AlphaCombine(Pica::TexturingRegs::TevStageConfig::Operation op, const std::array<u8, 3>& input); | ||||||
| 
 | 
 | ||||||
| } // namespace Pica::Rasterizer
 | } // namespace SwRenderer
 | ||||||
|  |  | ||||||
|  | @ -54,12 +54,12 @@ struct DebugData<true> { | ||||||
|             LOOP_INT_IN = 0x800, |             LOOP_INT_IN = 0x800, | ||||||
|         }; |         }; | ||||||
| 
 | 
 | ||||||
|         Common::Vec4<float24> src1; |         Common::Vec4<f24> src1; | ||||||
|         Common::Vec4<float24> src2; |         Common::Vec4<f24> src2; | ||||||
|         Common::Vec4<float24> src3; |         Common::Vec4<f24> src3; | ||||||
| 
 | 
 | ||||||
|         Common::Vec4<float24> dest_in; |         Common::Vec4<f24> dest_in; | ||||||
|         Common::Vec4<float24> dest_out; |         Common::Vec4<f24> dest_out; | ||||||
| 
 | 
 | ||||||
|         s32 address_registers[2]; |         s32 address_registers[2]; | ||||||
|         bool conditional_code[2]; |         bool conditional_code[2]; | ||||||
|  | @ -89,7 +89,7 @@ template <DebugDataRecord::Type type, typename ValueType> | ||||||
| inline void SetField(DebugDataRecord& record, ValueType value); | inline void SetField(DebugDataRecord& record, ValueType value); | ||||||
| 
 | 
 | ||||||
| template <> | template <> | ||||||
| inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { | inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, f24* value) { | ||||||
|     record.src1.x = value[0]; |     record.src1.x = value[0]; | ||||||
|     record.src1.y = value[1]; |     record.src1.y = value[1]; | ||||||
|     record.src1.z = value[2]; |     record.src1.z = value[2]; | ||||||
|  | @ -97,7 +97,7 @@ inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* va | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <> | template <> | ||||||
| inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { | inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, f24* value) { | ||||||
|     record.src2.x = value[0]; |     record.src2.x = value[0]; | ||||||
|     record.src2.y = value[1]; |     record.src2.y = value[1]; | ||||||
|     record.src2.z = value[2]; |     record.src2.z = value[2]; | ||||||
|  | @ -105,7 +105,7 @@ inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* va | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <> | template <> | ||||||
| inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { | inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, f24* value) { | ||||||
|     record.src3.x = value[0]; |     record.src3.x = value[0]; | ||||||
|     record.src3.y = value[1]; |     record.src3.y = value[1]; | ||||||
|     record.src3.z = value[2]; |     record.src3.z = value[2]; | ||||||
|  | @ -113,7 +113,7 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <> | template <> | ||||||
| inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { | inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, f24* value) { | ||||||
|     record.dest_in.x = value[0]; |     record.dest_in.x = value[0]; | ||||||
|     record.dest_in.y = value[1]; |     record.dest_in.y = value[1]; | ||||||
|     record.dest_in.z = value[2]; |     record.dest_in.z = value[2]; | ||||||
|  | @ -121,7 +121,7 @@ inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <> | template <> | ||||||
| inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { | inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, f24* value) { | ||||||
|     record.dest_out.x = value[0]; |     record.dest_out.x = value[0]; | ||||||
|     record.dest_out.y = value[1]; |     record.dest_out.y = value[1]; | ||||||
|     record.dest_out.z = value[2]; |     record.dest_out.z = value[2]; | ||||||
|  |  | ||||||
|  | @ -5,10 +5,10 @@ | ||||||
| #include <cmath> | #include <cmath> | ||||||
| #include <cstring> | #include <cstring> | ||||||
| #include "common/arch.h" | #include "common/arch.h" | ||||||
|  | #include "common/assert.h" | ||||||
| #include "common/bit_set.h" | #include "common/bit_set.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "video_core/pica_state.h" |  | ||||||
| #include "video_core/regs_rasterizer.h" | #include "video_core/regs_rasterizer.h" | ||||||
| #include "video_core/regs_shader.h" | #include "video_core/regs_shader.h" | ||||||
| #include "video_core/shader/shader.h" | #include "video_core/shader/shader.h" | ||||||
|  | @ -41,11 +41,11 @@ OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, | ||||||
|         // Allow us to overflow OutputVertex to avoid branches, since
 |         // Allow us to overflow OutputVertex to avoid branches, since
 | ||||||
|         // RasterizerRegs::VSOutputAttributes::INVALID would write to slot 31, which
 |         // RasterizerRegs::VSOutputAttributes::INVALID would write to slot 31, which
 | ||||||
|         // would be out of bounds otherwise.
 |         // would be out of bounds otherwise.
 | ||||||
|         std::array<float24, 32> vertex_slots_overflow; |         std::array<f24, 32> vertex_slots_overflow; | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     // Assert that OutputVertex has enough space for 24 semantic registers
 |     // Assert that OutputVertex has enough space for 24 semantic registers
 | ||||||
|     static_assert(sizeof(std::array<float24, 24>) == sizeof(ret), |     static_assert(sizeof(std::array<f24, 24>) == sizeof(ret), | ||||||
|                   "Struct and array have different sizes."); |                   "Struct and array have different sizes."); | ||||||
| 
 | 
 | ||||||
|     unsigned int num_attributes = regs.vs_output_total & 7; |     unsigned int num_attributes = regs.vs_output_total & 7; | ||||||
|  | @ -61,7 +61,7 @@ OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, | ||||||
|     // interpolation
 |     // interpolation
 | ||||||
|     for (unsigned i = 0; i < 4; ++i) { |     for (unsigned i = 0; i < 4; ++i) { | ||||||
|         float c = std::fabs(ret.color[i].ToFloat32()); |         float c = std::fabs(ret.color[i].ToFloat32()); | ||||||
|         ret.color[i] = float24::FromFloat32(c < 1.0f ? c : 1.0f); |         ret.color[i] = f24::FromFloat32(c < 1.0f ? c : 1.0f); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     LOG_TRACE(HW_GPU, |     LOG_TRACE(HW_GPU, | ||||||
|  | @ -86,7 +86,7 @@ void UnitState::LoadInput(const ShaderRegs& config, const AttributeBuffer& input | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void CopyRegistersToOutput(std::span<Common::Vec4<float24>, 16> regs, u32 mask, | static void CopyRegistersToOutput(std::span<Common::Vec4<f24>, 16> regs, u32 mask, | ||||||
|                                   AttributeBuffer& buffer) { |                                   AttributeBuffer& buffer) { | ||||||
|     int output_i = 0; |     int output_i = 0; | ||||||
|     for (int reg : Common::BitSet<u32>(mask)) { |     for (int reg : Common::BitSet<u32>(mask)) { | ||||||
|  | @ -108,7 +108,7 @@ GSEmitter::~GSEmitter() { | ||||||
|     delete handlers; |     delete handlers; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GSEmitter::Emit(std::span<Common::Vec4<float24>, 16> output_regs) { | void GSEmitter::Emit(std::span<Common::Vec4<f24>, 16> output_regs) { | ||||||
|     ASSERT(vertex_id < 3); |     ASSERT(vertex_id < 3); | ||||||
|     // TODO: This should be merged with UnitState::WriteOutput somehow
 |     // TODO: This should be merged with UnitState::WriteOutput somehow
 | ||||||
|     CopyRegistersToOutput(output_regs, output_mask, buffer[vertex_id]); |     CopyRegistersToOutput(output_regs, output_mask, buffer[vertex_id]); | ||||||
|  |  | ||||||
|  | @ -12,7 +12,6 @@ | ||||||
| #include <boost/serialization/access.hpp> | #include <boost/serialization/access.hpp> | ||||||
| #include <boost/serialization/array.hpp> | #include <boost/serialization/array.hpp> | ||||||
| #include <boost/serialization/base_object.hpp> | #include <boost/serialization/base_object.hpp> | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/hash.h" | #include "common/hash.h" | ||||||
|  | @ -29,7 +28,7 @@ using ProgramCode = std::array<u32, MAX_PROGRAM_CODE_LENGTH>; | ||||||
| using SwizzleData = std::array<u32, MAX_SWIZZLE_DATA_LENGTH>; | using SwizzleData = std::array<u32, MAX_SWIZZLE_DATA_LENGTH>; | ||||||
| 
 | 
 | ||||||
| struct AttributeBuffer { | struct AttributeBuffer { | ||||||
|     alignas(16) Common::Vec4<float24> attr[16]; |     alignas(16) Common::Vec4<f24> attr[16]; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     friend class boost::serialization::access; |     friend class boost::serialization::access; | ||||||
|  | @ -46,16 +45,16 @@ using VertexHandler = std::function<void(const AttributeBuffer&)>; | ||||||
| using WindingSetter = std::function<void()>; | using WindingSetter = std::function<void()>; | ||||||
| 
 | 
 | ||||||
| struct OutputVertex { | struct OutputVertex { | ||||||
|     Common::Vec4<float24> pos; |     Common::Vec4<f24> pos; | ||||||
|     Common::Vec4<float24> quat; |     Common::Vec4<f24> quat; | ||||||
|     Common::Vec4<float24> color; |     Common::Vec4<f24> color; | ||||||
|     Common::Vec2<float24> tc0; |     Common::Vec2<f24> tc0; | ||||||
|     Common::Vec2<float24> tc1; |     Common::Vec2<f24> tc1; | ||||||
|     float24 tc0_w; |     f24 tc0_w; | ||||||
|     INSERT_PADDING_WORDS(1); |     INSERT_PADDING_WORDS(1); | ||||||
|     Common::Vec3<float24> view; |     Common::Vec3<f24> view; | ||||||
|     INSERT_PADDING_WORDS(1); |     INSERT_PADDING_WORDS(1); | ||||||
|     Common::Vec2<float24> tc2; |     Common::Vec2<f24> tc2; | ||||||
| 
 | 
 | ||||||
|     static void ValidateSemantics(const RasterizerRegs& regs); |     static void ValidateSemantics(const RasterizerRegs& regs); | ||||||
|     static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, |     static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, | ||||||
|  | @ -76,7 +75,7 @@ private: | ||||||
|     friend class boost::serialization::access; |     friend class boost::serialization::access; | ||||||
| }; | }; | ||||||
| #define ASSERT_POS(var, pos)                                                                       \ | #define ASSERT_POS(var, pos)                                                                       \ | ||||||
|     static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong "       \ |     static_assert(offsetof(OutputVertex, var) == pos * sizeof(f24), "Semantic at wrong "           \ | ||||||
|                                                                     "offset.") |                                                                     "offset.") | ||||||
| ASSERT_POS(pos, RasterizerRegs::VSOutputAttributes::POSITION_X); | ASSERT_POS(pos, RasterizerRegs::VSOutputAttributes::POSITION_X); | ||||||
| ASSERT_POS(quat, RasterizerRegs::VSOutputAttributes::QUATERNION_X); | ASSERT_POS(quat, RasterizerRegs::VSOutputAttributes::QUATERNION_X); | ||||||
|  | @ -109,7 +108,7 @@ struct GSEmitter { | ||||||
| 
 | 
 | ||||||
|     GSEmitter(); |     GSEmitter(); | ||||||
|     ~GSEmitter(); |     ~GSEmitter(); | ||||||
|     void Emit(std::span<Common::Vec4<float24>, 16> output_regs); |     void Emit(std::span<Common::Vec4<f24>, 16> output_regs); | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     friend class boost::serialization::access; |     friend class boost::serialization::access; | ||||||
|  | @ -136,9 +135,9 @@ struct UnitState { | ||||||
|     struct Registers { |     struct Registers { | ||||||
|         // The registers are accessed by the shader JIT using SSE instructions, and are therefore
 |         // The registers are accessed by the shader JIT using SSE instructions, and are therefore
 | ||||||
|         // required to be 16-byte aligned.
 |         // required to be 16-byte aligned.
 | ||||||
|         alignas(16) std::array<Common::Vec4<float24>, 16> input; |         alignas(16) std::array<Common::Vec4<f24>, 16> input; | ||||||
|         alignas(16) std::array<Common::Vec4<float24>, 16> temporary; |         alignas(16) std::array<Common::Vec4<f24>, 16> temporary; | ||||||
|         alignas(16) std::array<Common::Vec4<float24>, 16> output; |         alignas(16) std::array<Common::Vec4<f24>, 16> output; | ||||||
| 
 | 
 | ||||||
|     private: |     private: | ||||||
|         friend class boost::serialization::access; |         friend class boost::serialization::access; | ||||||
|  | @ -160,18 +159,16 @@ struct UnitState { | ||||||
|     GSEmitter* emitter_ptr; |     GSEmitter* emitter_ptr; | ||||||
| 
 | 
 | ||||||
|     static std::size_t InputOffset(int register_index) { |     static std::size_t InputOffset(int register_index) { | ||||||
|         return offsetof(UnitState, registers.input) + |         return offsetof(UnitState, registers.input) + register_index * sizeof(Common::Vec4<f24>); | ||||||
|                register_index * sizeof(Common::Vec4<float24>); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     static std::size_t OutputOffset(int register_index) { |     static std::size_t OutputOffset(int register_index) { | ||||||
|         return offsetof(UnitState, registers.output) + |         return offsetof(UnitState, registers.output) + register_index * sizeof(Common::Vec4<f24>); | ||||||
|                register_index * sizeof(Common::Vec4<float24>); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     static std::size_t TemporaryOffset(int register_index) { |     static std::size_t TemporaryOffset(int register_index) { | ||||||
|         return offsetof(UnitState, registers.temporary) + |         return offsetof(UnitState, registers.temporary) + | ||||||
|                register_index * sizeof(Common::Vec4<float24>); |                register_index * sizeof(Common::Vec4<f24>); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /**
 |     /**
 | ||||||
|  | @ -219,13 +216,13 @@ private: | ||||||
| struct Uniforms { | struct Uniforms { | ||||||
|     // The float uniforms are accessed by the shader JIT using SSE instructions, and are
 |     // The float uniforms are accessed by the shader JIT using SSE instructions, and are
 | ||||||
|     // therefore required to be 16-byte aligned.
 |     // therefore required to be 16-byte aligned.
 | ||||||
|     alignas(16) std::array<Common::Vec4<float24>, 96> f; |     alignas(16) std::array<Common::Vec4<f24>, 96> f; | ||||||
| 
 | 
 | ||||||
|     std::array<bool, 16> b; |     std::array<bool, 16> b; | ||||||
|     std::array<Common::Vec4<u8>, 4> i; |     std::array<Common::Vec4<u8>, 4> i; | ||||||
| 
 | 
 | ||||||
|     static std::size_t GetFloatUniformOffset(unsigned index) { |     static std::size_t GetFloatUniformOffset(unsigned index) { | ||||||
|         return offsetof(Uniforms, f) + index * sizeof(Common::Vec4<float24>); |         return offsetof(Uniforms, f) + index * sizeof(Common::Vec4<f24>); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     static std::size_t GetBoolUniformOffset(unsigned index) { |     static std::size_t GetBoolUniformOffset(unsigned index) { | ||||||
|  |  | ||||||
|  | @ -80,7 +80,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|     const auto& program_code = setup.program_code; |     const auto& program_code = setup.program_code; | ||||||
| 
 | 
 | ||||||
|     // Placeholder for invalid inputs
 |     // Placeholder for invalid inputs
 | ||||||
|     static float24 dummy_vec4_float24[4]; |     static f24 dummy_vec4_float24[4]; | ||||||
| 
 | 
 | ||||||
|     unsigned iteration = 0; |     unsigned iteration = 0; | ||||||
|     bool exit_loop = false; |     bool exit_loop = false; | ||||||
|  | @ -111,7 +111,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
| 
 | 
 | ||||||
|         debug_data.max_offset = std::max<u32>(debug_data.max_offset, 1 + program_counter); |         debug_data.max_offset = std::max<u32>(debug_data.max_offset, 1 + program_counter); | ||||||
| 
 | 
 | ||||||
|         auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |         auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const f24* { | ||||||
|             switch (source_reg.GetRegisterType()) { |             switch (source_reg.GetRegisterType()) { | ||||||
|             case RegisterType::Input: |             case RegisterType::Input: | ||||||
|                 return &state.registers.input[source_reg.GetIndex()].x; |                 return &state.registers.input[source_reg.GetIndex()].x; | ||||||
|  | @ -137,15 +137,15 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|                     ? 0 |                     ? 0 | ||||||
|                     : state.address_registers[instr.common.address_register_index - 1]; |                     : state.address_registers[instr.common.address_register_index - 1]; | ||||||
| 
 | 
 | ||||||
|             const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + |             const f24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + | ||||||
|                                                     (is_inverted ? 0 : address_offset)); |                                                     (is_inverted ? 0 : address_offset)); | ||||||
|             const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + |             const f24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + | ||||||
|                                                     (is_inverted ? address_offset : 0)); |                                                     (is_inverted ? address_offset : 0)); | ||||||
| 
 | 
 | ||||||
|             const bool negate_src1 = ((bool)swizzle.negate_src1 != false); |             const bool negate_src1 = ((bool)swizzle.negate_src1 != false); | ||||||
|             const bool negate_src2 = ((bool)swizzle.negate_src2 != false); |             const bool negate_src2 = ((bool)swizzle.negate_src2 != false); | ||||||
| 
 | 
 | ||||||
|             float24 src1[4] = { |             f24 src1[4] = { | ||||||
|                 src1_[(int)swizzle.src1_selector_0.Value()], |                 src1_[(int)swizzle.src1_selector_0.Value()], | ||||||
|                 src1_[(int)swizzle.src1_selector_1.Value()], |                 src1_[(int)swizzle.src1_selector_1.Value()], | ||||||
|                 src1_[(int)swizzle.src1_selector_2.Value()], |                 src1_[(int)swizzle.src1_selector_2.Value()], | ||||||
|  | @ -157,7 +157,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|                 src1[2] = -src1[2]; |                 src1[2] = -src1[2]; | ||||||
|                 src1[3] = -src1[3]; |                 src1[3] = -src1[3]; | ||||||
|             } |             } | ||||||
|             float24 src2[4] = { |             f24 src2[4] = { | ||||||
|                 src2_[(int)swizzle.src2_selector_0.Value()], |                 src2_[(int)swizzle.src2_selector_0.Value()], | ||||||
|                 src2_[(int)swizzle.src2_selector_1.Value()], |                 src2_[(int)swizzle.src2_selector_1.Value()], | ||||||
|                 src2_[(int)swizzle.src2_selector_2.Value()], |                 src2_[(int)swizzle.src2_selector_2.Value()], | ||||||
|  | @ -170,8 +170,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|                 src2[3] = -src2[3]; |                 src2[3] = -src2[3]; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             float24* dest = |             f24* dest = (instr.common.dest.Value() < 0x10) | ||||||
|                 (instr.common.dest.Value() < 0x10) |  | ||||||
|                             ? &state.registers.output[instr.common.dest.Value().GetIndex()][0] |                             ? &state.registers.output[instr.common.dest.Value().GetIndex()][0] | ||||||
|                         : (instr.common.dest.Value() < 0x20) |                         : (instr.common.dest.Value() < 0x20) | ||||||
|                             ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] |                             ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] | ||||||
|  | @ -216,7 +215,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); |                     dest[i] = f24::FromFloat32(std::floor(src1[i].ToFloat32())); | ||||||
|                 } |                 } | ||||||
|                 Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |                 Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
|  | @ -263,11 +262,10 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
| 
 | 
 | ||||||
|                 OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode(); |                 OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode(); | ||||||
|                 if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI) |                 if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI) | ||||||
|                     src1[3] = float24::FromFloat32(1.0f); |                     src1[3] = f24::One(); | ||||||
| 
 | 
 | ||||||
|                 int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; |                 int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; | ||||||
|                 float24 dot = std::inner_product(src1, src1 + num_components, src2, |                 f24 dot = std::inner_product(src1, src1 + num_components, src2, f24::Zero()); | ||||||
|                                                  float24::FromFloat32(0.f)); |  | ||||||
| 
 | 
 | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  | @ -283,7 +281,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|             case OpCode::Id::RCP: { |             case OpCode::Id::RCP: { | ||||||
|                 Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |                 Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); | ||||||
|                 Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |                 Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); | ||||||
|                 float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); |                 f24 rcp_res = f24::FromFloat32(1.0f / src1[0].ToFloat32()); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
|  | @ -298,7 +296,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|             case OpCode::Id::RSQ: { |             case OpCode::Id::RSQ: { | ||||||
|                 Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |                 Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); | ||||||
|                 Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |                 Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); | ||||||
|                 float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); |                 f24 rsq_res = f24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
|  | @ -345,8 +343,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) |                     dest[i] = (src1[i] >= src2[i]) ? f24::One() : f24::Zero(); | ||||||
|                                                    : float24::FromFloat32(0.0f); |  | ||||||
|                 } |                 } | ||||||
|                 Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |                 Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
|  | @ -360,8 +357,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) |                     dest[i] = (src1[i] < src2[i]) ? f24::One() : f24::Zero(); | ||||||
|                                                   : float24::FromFloat32(0.0f); |  | ||||||
|                 } |                 } | ||||||
|                 Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |                 Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
|  | @ -413,7 +409,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|                 Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |                 Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); | ||||||
| 
 | 
 | ||||||
|                 // EX2 only takes first component exp2 and writes it to all dest components
 |                 // EX2 only takes first component exp2 and writes it to all dest components
 | ||||||
|                 float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32())); |                 f24 ex2_res = f24::FromFloat32(std::exp2(src1[0].ToFloat32())); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
|  | @ -430,7 +426,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|                 Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |                 Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); | ||||||
| 
 | 
 | ||||||
|                 // LG2 only takes the first component log2 and writes it to all dest components
 |                 // LG2 only takes the first component log2 and writes it to all dest components
 | ||||||
|                 float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32())); |                 f24 lg2_res = f24::FromFloat32(std::log2(src1[0].ToFloat32())); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
|  | @ -466,17 +462,17 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|                         ? 0 |                         ? 0 | ||||||
|                         : state.address_registers[instr.mad.address_register_index - 1]; |                         : state.address_registers[instr.mad.address_register_index - 1]; | ||||||
| 
 | 
 | ||||||
|                 const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); |                 const f24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); | ||||||
|                 const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + |                 const f24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + | ||||||
|                                                         (!is_inverted * address_offset)); |                                                         (!is_inverted * address_offset)); | ||||||
|                 const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + |                 const f24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + | ||||||
|                                                         (is_inverted * address_offset)); |                                                         (is_inverted * address_offset)); | ||||||
| 
 | 
 | ||||||
|                 const bool negate_src1 = ((bool)mad_swizzle.negate_src1 != false); |                 const bool negate_src1 = ((bool)mad_swizzle.negate_src1 != false); | ||||||
|                 const bool negate_src2 = ((bool)mad_swizzle.negate_src2 != false); |                 const bool negate_src2 = ((bool)mad_swizzle.negate_src2 != false); | ||||||
|                 const bool negate_src3 = ((bool)mad_swizzle.negate_src3 != false); |                 const bool negate_src3 = ((bool)mad_swizzle.negate_src3 != false); | ||||||
| 
 | 
 | ||||||
|                 float24 src1[4] = { |                 f24 src1[4] = { | ||||||
|                     src1_[(int)mad_swizzle.src1_selector_0.Value()], |                     src1_[(int)mad_swizzle.src1_selector_0.Value()], | ||||||
|                     src1_[(int)mad_swizzle.src1_selector_1.Value()], |                     src1_[(int)mad_swizzle.src1_selector_1.Value()], | ||||||
|                     src1_[(int)mad_swizzle.src1_selector_2.Value()], |                     src1_[(int)mad_swizzle.src1_selector_2.Value()], | ||||||
|  | @ -488,7 +484,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|                     src1[2] = -src1[2]; |                     src1[2] = -src1[2]; | ||||||
|                     src1[3] = -src1[3]; |                     src1[3] = -src1[3]; | ||||||
|                 } |                 } | ||||||
|                 float24 src2[4] = { |                 f24 src2[4] = { | ||||||
|                     src2_[(int)mad_swizzle.src2_selector_0.Value()], |                     src2_[(int)mad_swizzle.src2_selector_0.Value()], | ||||||
|                     src2_[(int)mad_swizzle.src2_selector_1.Value()], |                     src2_[(int)mad_swizzle.src2_selector_1.Value()], | ||||||
|                     src2_[(int)mad_swizzle.src2_selector_2.Value()], |                     src2_[(int)mad_swizzle.src2_selector_2.Value()], | ||||||
|  | @ -500,7 +496,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|                     src2[2] = -src2[2]; |                     src2[2] = -src2[2]; | ||||||
|                     src2[3] = -src2[3]; |                     src2[3] = -src2[3]; | ||||||
|                 } |                 } | ||||||
|                 float24 src3[4] = { |                 f24 src3[4] = { | ||||||
|                     src3_[(int)mad_swizzle.src3_selector_0.Value()], |                     src3_[(int)mad_swizzle.src3_selector_0.Value()], | ||||||
|                     src3_[(int)mad_swizzle.src3_selector_1.Value()], |                     src3_[(int)mad_swizzle.src3_selector_1.Value()], | ||||||
|                     src3_[(int)mad_swizzle.src3_selector_2.Value()], |                     src3_[(int)mad_swizzle.src3_selector_2.Value()], | ||||||
|  | @ -513,8 +509,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||||
|                     src3[3] = -src3[3]; |                     src3[3] = -src3[3]; | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 float24* dest = |                 f24* dest = (instr.mad.dest.Value() < 0x10) | ||||||
|                     (instr.mad.dest.Value() < 0x10) |  | ||||||
|                                 ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0] |                                 ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0] | ||||||
|                             : (instr.mad.dest.Value() < 0x20) |                             : (instr.mad.dest.Value() < 0x20) | ||||||
|                                 ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] |                                 ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | ||||||
|  | @ -687,7 +682,7 @@ DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, | ||||||
|     DebugData<true> debug_data; |     DebugData<true> debug_data; | ||||||
| 
 | 
 | ||||||
|     // Setup input register table
 |     // Setup input register table
 | ||||||
|     state.registers.input.fill(Common::Vec4<float24>::AssignToAll(float24::Zero())); |     state.registers.input.fill(Common::Vec4<f24>::AssignToAll(f24::Zero())); | ||||||
|     state.LoadInput(config, input); |     state.LoadInput(config, input); | ||||||
|     RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); |     RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); | ||||||
|     return debug_data; |     return debug_data; | ||||||
|  |  | ||||||
|  | @ -5,6 +5,7 @@ | ||||||
| #include "common/arch.h" | #include "common/arch.h" | ||||||
| #if CITRA_ARCH(x86_64) | #if CITRA_ARCH(x86_64) | ||||||
| 
 | 
 | ||||||
|  | #include "common/assert.h" | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "video_core/shader/shader.h" | #include "video_core/shader/shader.h" | ||||||
| #include "video_core/shader/shader_jit_x64.h" | #include "video_core/shader/shader_jit_x64.h" | ||||||
|  |  | ||||||
|  | @ -813,7 +813,7 @@ void JitShader::Compile_JMP(Instruction instr) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void Emit(GSEmitter* emitter, Common::Vec4<float24> (*output)[16]) { | static void Emit(GSEmitter* emitter, Common::Vec4<f24> (*output)[16]) { | ||||||
|     emitter->Emit(*output); |     emitter->Emit(*output); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -98,7 +98,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, | ||||||
|                 const s8* srcdata = reinterpret_cast<const s8*>( |                 const s8* srcdata = reinterpret_cast<const s8*>( | ||||||
|                     VideoCore::g_memory->GetPhysicalPointer(source_addr)); |                     VideoCore::g_memory->GetPhysicalPointer(source_addr)); | ||||||
|                 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |                 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||||
|                     input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); |                     input.attr[i][comp] = f24::FromFloat32(srcdata[comp]); | ||||||
|                 } |                 } | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
|  | @ -106,7 +106,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, | ||||||
|                 const u8* srcdata = reinterpret_cast<const u8*>( |                 const u8* srcdata = reinterpret_cast<const u8*>( | ||||||
|                     VideoCore::g_memory->GetPhysicalPointer(source_addr)); |                     VideoCore::g_memory->GetPhysicalPointer(source_addr)); | ||||||
|                 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |                 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||||
|                     input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); |                     input.attr[i][comp] = f24::FromFloat32(srcdata[comp]); | ||||||
|                 } |                 } | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
|  | @ -114,7 +114,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, | ||||||
|                 const s16* srcdata = reinterpret_cast<const s16*>( |                 const s16* srcdata = reinterpret_cast<const s16*>( | ||||||
|                     VideoCore::g_memory->GetPhysicalPointer(source_addr)); |                     VideoCore::g_memory->GetPhysicalPointer(source_addr)); | ||||||
|                 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |                 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||||
|                     input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); |                     input.attr[i][comp] = f24::FromFloat32(srcdata[comp]); | ||||||
|                 } |                 } | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
|  | @ -122,7 +122,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, | ||||||
|                 const float* srcdata = reinterpret_cast<const float*>( |                 const float* srcdata = reinterpret_cast<const float*>( | ||||||
|                     VideoCore::g_memory->GetPhysicalPointer(source_addr)); |                     VideoCore::g_memory->GetPhysicalPointer(source_addr)); | ||||||
|                 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |                 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||||
|                     input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); |                     input.attr[i][comp] = f24::FromFloat32(srcdata[comp]); | ||||||
|                 } |                 } | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
|  | @ -132,8 +132,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, | ||||||
|             // is *not* carried over from the default attribute settings even if they're
 |             // is *not* carried over from the default attribute settings even if they're
 | ||||||
|             // enabled for this attribute.
 |             // enabled for this attribute.
 | ||||||
|             for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { |             for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { | ||||||
|                 input.attr[i][comp] = |                 input.attr[i][comp] = comp == 3 ? f24::One() : f24::Zero(); | ||||||
|                     comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); |  | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             LOG_TRACE(HW_GPU, |             LOG_TRACE(HW_GPU, | ||||||
|  |  | ||||||
|  | @ -40,7 +40,7 @@ void Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window | ||||||
| 
 | 
 | ||||||
|     switch (graphics_api) { |     switch (graphics_api) { | ||||||
|     case Settings::GraphicsAPI::Software: |     case Settings::GraphicsAPI::Software: | ||||||
|         g_renderer = std::make_unique<VideoCore::RendererSoftware>(system, emu_window); |         g_renderer = std::make_unique<SwRenderer::RendererSoftware>(system, emu_window); | ||||||
|         break; |         break; | ||||||
|     case Settings::GraphicsAPI::OpenGL: |     case Settings::GraphicsAPI::OpenGL: | ||||||
|         g_renderer = std::make_unique<OpenGL::RendererOpenGL>(system, emu_window, secondary_window); |         g_renderer = std::make_unique<OpenGL::RendererOpenGL>(system, emu_window, secondary_window); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue