mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	video_core: Refactor GPU interface (#7272)
* video_core: Refactor GPU interface * citra_qt: Better debug widget lifetime
This commit is contained in:
		
							parent
							
								
									602f4f60d8
								
							
						
					
					
						commit
						2bb7f89c30
					
				
					 167 changed files with 4172 additions and 4866 deletions
				
			
		|  | @ -299,8 +299,10 @@ add_library(citra_core STATIC | |||
|     hle/service/fs/fs_user.h | ||||
|     hle/service/gsp/gsp.cpp | ||||
|     hle/service/gsp/gsp.h | ||||
|     hle/service/gsp/gsp_command.h | ||||
|     hle/service/gsp/gsp_gpu.cpp | ||||
|     hle/service/gsp/gsp_gpu.h | ||||
|     hle/service/gsp/gsp_interrupt.h | ||||
|     hle/service/gsp/gsp_lcd.cpp | ||||
|     hle/service/gsp/gsp_lcd.h | ||||
|     hle/service/hid/hid.cpp | ||||
|  | @ -433,12 +435,6 @@ add_library(citra_core STATIC | |||
|     hw/aes/ccm.h | ||||
|     hw/aes/key.cpp | ||||
|     hw/aes/key.h | ||||
|     hw/gpu.cpp | ||||
|     hw/gpu.h | ||||
|     hw/hw.cpp | ||||
|     hw/hw.h | ||||
|     hw/lcd.cpp | ||||
|     hw/lcd.h | ||||
|     hw/rsa/rsa.cpp | ||||
|     hw/rsa/rsa.h | ||||
|     hw/y2r.cpp | ||||
|  |  | |||
|  | @ -11,7 +11,6 @@ | |||
| #include "core/core.h" | ||||
| #include "core/core_timing.h" | ||||
| #include "core/hle/kernel/process.h" | ||||
| #include "core/hw/gpu.h" | ||||
| 
 | ||||
| namespace Cheats { | ||||
| 
 | ||||
|  |  | |||
|  | @ -35,14 +35,13 @@ | |||
| #include "core/hle/service/cam/cam.h" | ||||
| #include "core/hle/service/fs/archive.h" | ||||
| #include "core/hle/service/gsp/gsp.h" | ||||
| #include "core/hle/service/gsp/gsp_gpu.h" | ||||
| #include "core/hle/service/ir/ir_rst.h" | ||||
| #include "core/hle/service/mic/mic_u.h" | ||||
| #include "core/hle/service/plgldr/plgldr.h" | ||||
| #include "core/hle/service/service.h" | ||||
| #include "core/hle/service/sm/sm.h" | ||||
| #include "core/hw/gpu.h" | ||||
| #include "core/hw/hw.h" | ||||
| #include "core/hw/lcd.h" | ||||
| #include "core/hw/aes/key.h" | ||||
| #include "core/loader/loader.h" | ||||
| #include "core/movie.h" | ||||
| #ifdef ENABLE_SCRIPTING | ||||
|  | @ -51,8 +50,8 @@ | |||
| #include "core/telemetry_session.h" | ||||
| #include "network/network.h" | ||||
| #include "video_core/custom_textures/custom_tex_manager.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/video_core.h" | ||||
| 
 | ||||
| namespace Core { | ||||
| 
 | ||||
|  | @ -235,7 +234,6 @@ System::ResultStatus System::RunLoop(bool tight_loop) { | |||
|         GDBStub::SetCpuStepFlag(false); | ||||
|     } | ||||
| 
 | ||||
|     HW::Update(); | ||||
|     Reschedule(); | ||||
| 
 | ||||
|     return status; | ||||
|  | @ -433,7 +431,7 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, | |||
|     service_manager = std::make_unique<Service::SM::ServiceManager>(*this); | ||||
|     archive_manager = std::make_unique<Service::FS::ArchiveManager>(*this); | ||||
| 
 | ||||
|     HW::Init(*memory); | ||||
|     HW::AES::InitKeys(); | ||||
|     Service::Init(*this); | ||||
|     GDBStub::DeferStart(); | ||||
| 
 | ||||
|  | @ -443,7 +441,10 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, | |||
| 
 | ||||
|     custom_tex_manager = std::make_unique<VideoCore::CustomTexManager>(*this); | ||||
| 
 | ||||
|     VideoCore::Init(emu_window, secondary_window, *this); | ||||
|     auto gsp = service_manager->GetService<Service::GSP::GSP_GPU>("gsp::Gpu"); | ||||
|     gpu = std::make_unique<VideoCore::GPU>(*this, emu_window, secondary_window); | ||||
|     gpu->SetInterruptHandler( | ||||
|         [gsp](Service::GSP::InterruptId interrupt_id) { gsp->SignalInterrupt(interrupt_id); }); | ||||
| 
 | ||||
|     LOG_DEBUG(Core, "Initialized OK"); | ||||
| 
 | ||||
|  | @ -452,8 +453,8 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, | |||
|     return ResultStatus::Success; | ||||
| } | ||||
| 
 | ||||
| VideoCore::RendererBase& System::Renderer() { | ||||
|     return *VideoCore::g_renderer; | ||||
| VideoCore::GPU& System::GPU() { | ||||
|     return *gpu; | ||||
| } | ||||
| 
 | ||||
| Service::SM::ServiceManager& System::ServiceManager() { | ||||
|  | @ -555,8 +556,7 @@ void System::Shutdown(bool is_deserializing) { | |||
|     // Shutdown emulation session
 | ||||
|     is_powered_on = false; | ||||
| 
 | ||||
|     VideoCore::Shutdown(); | ||||
|     HW::Shutdown(); | ||||
|     gpu.reset(); | ||||
|     if (!is_deserializing) { | ||||
|         GDBStub::Shutdown(); | ||||
|         perf_stats.reset(); | ||||
|  | @ -626,18 +626,9 @@ void System::ApplySettings() { | |||
|     GDBStub::SetServerPort(Settings::values.gdbstub_port.GetValue()); | ||||
|     GDBStub::ToggleServer(Settings::values.use_gdbstub.GetValue()); | ||||
| 
 | ||||
|     VideoCore::g_shader_jit_enabled = Settings::values.use_shader_jit.GetValue(); | ||||
|     VideoCore::g_hw_shader_enabled = Settings::values.use_hw_shader.GetValue(); | ||||
|     VideoCore::g_hw_shader_accurate_mul = Settings::values.shaders_accurate_mul.GetValue(); | ||||
| 
 | ||||
| #ifndef ANDROID | ||||
|     if (VideoCore::g_renderer) { | ||||
|         VideoCore::g_renderer->UpdateCurrentFramebufferLayout(); | ||||
|     } | ||||
| #endif | ||||
| 
 | ||||
|     if (VideoCore::g_renderer) { | ||||
|         auto& settings = VideoCore::g_renderer->Settings(); | ||||
|     if (gpu) { | ||||
|         gpu->Renderer().UpdateCurrentFramebufferLayout(); | ||||
|         auto& settings = gpu->Renderer().Settings(); | ||||
|         settings.bg_color_update_requested = true; | ||||
|         settings.shader_update_requested = true; | ||||
|     } | ||||
|  | @ -699,17 +690,15 @@ void System::serialize(Archive& ar, const unsigned int file_version) { | |||
|             *m_emu_window, m_secondary_window, *memory_mode.first, *n3ds_hw_caps.first, num_cores); | ||||
|     } | ||||
| 
 | ||||
|     // flush on save, don't flush on load
 | ||||
|     bool should_flush = !Archive::is_loading::value; | ||||
|     Memory::RasterizerClearAll(should_flush); | ||||
|     // Flush on save, don't flush on load
 | ||||
|     const bool should_flush = !Archive::is_loading::value; | ||||
|     gpu->ClearAll(should_flush); | ||||
|     ar&* timing.get(); | ||||
|     for (u32 i = 0; i < num_cores; i++) { | ||||
|         ar&* cpu_cores[i].get(); | ||||
|     } | ||||
|     ar&* service_manager.get(); | ||||
|     ar&* archive_manager.get(); | ||||
|     ar& GPU::g_regs; | ||||
|     ar& LCD::g_regs; | ||||
| 
 | ||||
|     // NOTE: DSP doesn't like being destroyed and recreated. So instead we do an inline
 | ||||
|     // serialization; this means that the DSP Settings need to match for loading to work.
 | ||||
|  | @ -722,16 +711,21 @@ void System::serialize(Archive& ar, const unsigned int file_version) { | |||
| 
 | ||||
|     ar&* memory.get(); | ||||
|     ar&* kernel.get(); | ||||
|     VideoCore::serialize(ar, file_version); | ||||
|     ar&* gpu.get(); | ||||
|     ar& movie; | ||||
| 
 | ||||
|     // This needs to be set from somewhere - might as well be here!
 | ||||
|     if (Archive::is_loading::value) { | ||||
|         timing->UnlockEventQueue(); | ||||
|         Service::GSP::SetGlobalModule(*this); | ||||
|         memory->SetDSP(*dsp_core); | ||||
|         cheat_engine->Connect(); | ||||
|         VideoCore::g_renderer->Sync(); | ||||
|         gpu->Sync(); | ||||
| 
 | ||||
|         // Re-register gpu callback, because gsp service changed after service_manager got
 | ||||
|         // serialized
 | ||||
|         auto gsp = service_manager->GetService<Service::GSP::GSP_GPU>("gsp::Gpu"); | ||||
|         gpu->SetInterruptHandler( | ||||
|             [gsp](Service::GSP::InterruptId interrupt_id) { gsp->SignalInterrupt(interrupt_id); }); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -58,9 +58,13 @@ class Backend; | |||
| 
 | ||||
| namespace VideoCore { | ||||
| class CustomTexManager; | ||||
| class RendererBase; | ||||
| class GPU; | ||||
| } // namespace VideoCore
 | ||||
| 
 | ||||
| namespace Pica { | ||||
| class DebugContext; | ||||
| } | ||||
| 
 | ||||
| namespace Loader { | ||||
| class AppLoader; | ||||
| } | ||||
|  | @ -217,7 +221,7 @@ public: | |||
|         return *dsp_core; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] VideoCore::RendererBase& Renderer(); | ||||
|     [[nodiscard]] VideoCore::GPU& GPU(); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Gets a reference to the service manager. | ||||
|  | @ -384,6 +388,8 @@ private: | |||
|     /// Telemetry session for this emulation session
 | ||||
|     std::unique_ptr<Core::TelemetrySession> telemetry_session; | ||||
| 
 | ||||
|     std::unique_ptr<VideoCore::GPU> gpu; | ||||
| 
 | ||||
|     /// Service manager
 | ||||
|     std::unique_ptr<Service::SM::ServiceManager> service_manager; | ||||
| 
 | ||||
|  |  | |||
|  | @ -37,6 +37,10 @@ | |||
| constexpr u64 BASE_CLOCK_RATE_ARM11 = 268111856; | ||||
| constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE_ARM11; | ||||
| 
 | ||||
| /// Refresh rate defined by ratio of ARM11 frequency to ARM11 ticks per frame
 | ||||
| /// (268,111,856) / (4,481,136) = 59.83122493939037Hz
 | ||||
| constexpr double SCREEN_REFRESH_RATE = BASE_CLOCK_RATE_ARM11 / static_cast<double>(4481136ull); | ||||
| 
 | ||||
| constexpr s64 msToCycles(int ms) { | ||||
|     // since ms is int there is no way to overflow
 | ||||
|     return BASE_CLOCK_RATE_ARM11 * static_cast<s64>(ms) / 1000; | ||||
|  |  | |||
|  | @ -11,10 +11,10 @@ | |||
| #include "common/scope_exit.h" | ||||
| #include "common/settings.h" | ||||
| #include "common/string_util.h" | ||||
| #include "core/core_timing.h" | ||||
| #include "core/dumping/ffmpeg_backend.h" | ||||
| #include "core/hw/gpu.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/video_core.h" | ||||
| 
 | ||||
| using namespace DynamicLibrary; | ||||
| 
 | ||||
|  | @ -381,7 +381,7 @@ bool FFmpegVideoStream::InitFilters() { | |||
|     } | ||||
| 
 | ||||
|     // Configure buffer source
 | ||||
|     static constexpr AVRational src_time_base{static_cast<int>(GPU::frame_ticks), | ||||
|     static constexpr AVRational src_time_base{static_cast<int>(VideoCore::FRAME_TICKS), | ||||
|                                               static_cast<int>(BASE_CLOCK_RATE_ARM11)}; | ||||
|     const std::string in_args = | ||||
|         fmt::format("video_size={}x{}:pix_fmt={}:time_base={}/{}:pixel_aspect=1", layout.width, | ||||
|  | @ -732,7 +732,7 @@ void FFmpegMuxer::WriteTrailer() { | |||
|     FFmpeg::av_write_trailer(format_context.get()); | ||||
| } | ||||
| 
 | ||||
| FFmpegBackend::FFmpegBackend() = default; | ||||
| FFmpegBackend::FFmpegBackend(VideoCore::RendererBase& renderer_) : renderer{renderer_} {} | ||||
| 
 | ||||
| FFmpegBackend::~FFmpegBackend() { | ||||
|     ASSERT_MSG(!IsDumping(), "Dumping must be stopped first"); | ||||
|  | @ -796,7 +796,7 @@ bool FFmpegBackend::StartDumping(const std::string& path, const Layout::Framebuf | |||
|         } | ||||
|     }); | ||||
| 
 | ||||
|     VideoCore::g_renderer->PrepareVideoDumping(); | ||||
|     renderer.PrepareVideoDumping(); | ||||
|     is_dumping = true; | ||||
| 
 | ||||
|     return true; | ||||
|  | @ -829,7 +829,7 @@ void FFmpegBackend::AddAudioSample(const std::array<s16, 2>& sample) { | |||
| 
 | ||||
| void FFmpegBackend::StopDumping() { | ||||
|     is_dumping = false; | ||||
|     VideoCore::g_renderer->CleanupVideoDumping(); | ||||
|     renderer.CleanupVideoDumping(); | ||||
| 
 | ||||
|     // Flush the video processing queue
 | ||||
|     AddVideoFrame(VideoFrame()); | ||||
|  |  | |||
|  | @ -18,6 +18,10 @@ | |||
| #include "common/threadsafe_queue.h" | ||||
| #include "core/dumping/backend.h" | ||||
| 
 | ||||
| namespace VideoCore { | ||||
| class RendererBase; | ||||
| } | ||||
| 
 | ||||
| namespace VideoDumper { | ||||
| 
 | ||||
| using VariableAudioFrame = std::vector<s16>; | ||||
|  | @ -181,7 +185,7 @@ private: | |||
|  */ | ||||
| class FFmpegBackend : public Backend { | ||||
| public: | ||||
|     FFmpegBackend(); | ||||
|     FFmpegBackend(VideoCore::RendererBase& renderer); | ||||
|     ~FFmpegBackend() override; | ||||
|     bool StartDumping(const std::string& path, const Layout::FramebufferLayout& layout) override; | ||||
|     void AddVideoFrame(VideoFrame frame) override; | ||||
|  | @ -194,6 +198,7 @@ public: | |||
| private: | ||||
|     void EndDumping(); | ||||
| 
 | ||||
|     VideoCore::RendererBase& renderer; | ||||
|     std::atomic_bool is_dumping = false; ///< Whether the backend is currently dumping
 | ||||
| 
 | ||||
|     FFmpegMuxer ffmpeg{}; | ||||
|  |  | |||
|  | @ -2,33 +2,17 @@ | |||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <vector> | ||||
| #include "core/core.h" | ||||
| #include "core/hle/kernel/event.h" | ||||
| #include "core/hle/kernel/shared_memory.h" | ||||
| #include "core/hle/service/gsp/gsp.h" | ||||
| #include "core/hle/service/gsp/gsp_gpu.h" | ||||
| #include "core/hle/service/gsp/gsp_lcd.h" | ||||
| 
 | ||||
| namespace Service::GSP { | ||||
| 
 | ||||
| static std::weak_ptr<GSP_GPU> gsp_gpu; | ||||
| 
 | ||||
| void SignalInterrupt(InterruptId interrupt_id) { | ||||
|     auto gpu = gsp_gpu.lock(); | ||||
|     ASSERT(gpu != nullptr); | ||||
|     return gpu->SignalInterrupt(interrupt_id); | ||||
| } | ||||
| 
 | ||||
| void InstallInterfaces(Core::System& system) { | ||||
|     auto& service_manager = system.ServiceManager(); | ||||
|     auto gpu = std::make_shared<GSP_GPU>(system); | ||||
|     gpu->InstallAsService(service_manager); | ||||
|     gsp_gpu = gpu; | ||||
| 
 | ||||
|     std::make_shared<GSP_GPU>(system)->InstallAsService(service_manager); | ||||
|     std::make_shared<GSP_LCD>()->InstallAsService(service_manager); | ||||
| } | ||||
| 
 | ||||
| void SetGlobalModule(Core::System& system) { | ||||
|     gsp_gpu = system.ServiceManager().GetService<GSP_GPU>("gsp::Gpu"); | ||||
| } | ||||
| 
 | ||||
| } // namespace Service::GSP
 | ||||
|  |  | |||
|  | @ -4,25 +4,12 @@ | |||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <cstddef> | ||||
| #include <string> | ||||
| #include "common/common_types.h" | ||||
| #include "core/hle/result.h" | ||||
| #include "core/hle/service/gsp/gsp_gpu.h" | ||||
| #include "core/hle/service/gsp/gsp_lcd.h" | ||||
| 
 | ||||
| namespace Core { | ||||
| class System; | ||||
| } | ||||
| 
 | ||||
| namespace Service::GSP { | ||||
| /**
 | ||||
|  * Signals that the specified interrupt type has occurred to userland code | ||||
|  * @param interrupt_id ID of interrupt that is being signalled | ||||
|  */ | ||||
| void SignalInterrupt(InterruptId interrupt_id); | ||||
| 
 | ||||
| void InstallInterfaces(Core::System& system); | ||||
| 
 | ||||
| void SetGlobalModule(Core::System& system); | ||||
| } // namespace Service::GSP
 | ||||
|  |  | |||
							
								
								
									
										110
									
								
								src/core/hle/service/gsp/gsp_command.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								src/core/hle/service/gsp/gsp_command.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,110 @@ | |||
| // Copyright 2023 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include "common/bit_field.h" | ||||
| 
 | ||||
| namespace Service::GSP { | ||||
| 
 | ||||
| /// GSP command ID
 | ||||
| enum class CommandId : u32 { | ||||
|     RequestDma = 0x00, | ||||
|     SubmitCmdList = 0x01, | ||||
|     MemoryFill = 0x02, | ||||
|     DisplayTransfer = 0x03, | ||||
|     TextureCopy = 0x04, | ||||
|     CacheFlush = 0x05, | ||||
| }; | ||||
| 
 | ||||
| struct DmaCommand { | ||||
|     u32 source_address; | ||||
|     u32 dest_address; | ||||
|     u32 size; | ||||
| }; | ||||
| 
 | ||||
| struct SubmitCmdListCommand { | ||||
|     u32 address; | ||||
|     u32 size; | ||||
|     u32 flags; | ||||
|     u32 unused[3]; | ||||
|     u32 do_flush; | ||||
| }; | ||||
| 
 | ||||
| struct MemoryFillCommand { | ||||
|     u32 start1; | ||||
|     u32 value1; | ||||
|     u32 end1; | ||||
| 
 | ||||
|     u32 start2; | ||||
|     u32 value2; | ||||
|     u32 end2; | ||||
| 
 | ||||
|     u16 control1; | ||||
|     u16 control2; | ||||
| }; | ||||
| 
 | ||||
| struct DisplayTransferCommand { | ||||
|     u32 in_buffer_address; | ||||
|     u32 out_buffer_address; | ||||
|     u32 in_buffer_size; | ||||
|     u32 out_buffer_size; | ||||
|     u32 flags; | ||||
| }; | ||||
| 
 | ||||
| struct TextureCopyCommand { | ||||
|     u32 in_buffer_address; | ||||
|     u32 out_buffer_address; | ||||
|     u32 size; | ||||
|     u32 in_width_gap; | ||||
|     u32 out_width_gap; | ||||
|     u32 flags; | ||||
| }; | ||||
| 
 | ||||
| struct CacheFlushCommand { | ||||
|     struct { | ||||
|         u32 address; | ||||
|         u32 size; | ||||
|     } regions[3]; | ||||
| }; | ||||
| 
 | ||||
| /// GSP command
 | ||||
| struct Command { | ||||
|     BitField<0, 8, CommandId> id; | ||||
|     union { | ||||
|         DmaCommand dma_request; | ||||
|         SubmitCmdListCommand submit_gpu_cmdlist; | ||||
|         MemoryFillCommand memory_fill; | ||||
|         DisplayTransferCommand display_transfer; | ||||
|         TextureCopyCommand texture_copy; | ||||
|         CacheFlushCommand cache_flush; | ||||
|         std::array<u8, 0x1C> raw_data; | ||||
|     }; | ||||
| }; | ||||
| static_assert(sizeof(Command) == 0x20, "Command struct has incorrect size"); | ||||
| 
 | ||||
| /// GSP shared memory GX command buffer header
 | ||||
| struct CommandBuffer { | ||||
|     union { | ||||
|         u32 hex; | ||||
| 
 | ||||
|         // Current command index. This index is updated by GSP module after loading the command
 | ||||
|         // data, right before the command is processed. When this index is updated by GSP module,
 | ||||
|         // the total commands field is decreased by one as well.
 | ||||
|         BitField<0, 8, u32> index; | ||||
| 
 | ||||
|         // Total commands to process, must not be value 0 when GSP module handles commands. This
 | ||||
|         // must be <=15 when writing a command to shared memory. This is incremented by the
 | ||||
|         // application when writing a command to shared memory, after increasing this value
 | ||||
|         // TriggerCmdReqQueue is only used if this field is value 1.
 | ||||
|         BitField<8, 8, u32> number_commands; | ||||
|     }; | ||||
| 
 | ||||
|     u32 unk[7]; | ||||
| 
 | ||||
|     Command commands[0xF]; | ||||
| }; | ||||
| static_assert(sizeof(CommandBuffer) == 0x200, "CommandBuffer struct has incorrect size"); | ||||
| 
 | ||||
| } // namespace Service::GSP
 | ||||
|  | @ -9,30 +9,21 @@ | |||
| #include <boost/serialization/shared_ptr.hpp> | ||||
| #include "common/archives.h" | ||||
| #include "common/bit_field.h" | ||||
| #include "common/microprofile.h" | ||||
| #include "common/swap.h" | ||||
| #include "core/core.h" | ||||
| #include "core/file_sys/plugin_3gx.h" | ||||
| #include "core/hle/ipc.h" | ||||
| #include "core/hle/ipc_helpers.h" | ||||
| #include "core/hle/kernel/shared_memory.h" | ||||
| #include "core/hle/kernel/shared_page.h" | ||||
| #include "core/hle/result.h" | ||||
| #include "core/hle/service/gsp/gsp_gpu.h" | ||||
| #include "core/hw/gpu.h" | ||||
| #include "core/hw/hw.h" | ||||
| #include "core/hw/lcd.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/debug_utils/debug_utils.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/gpu_debugger.h" | ||||
| #include "video_core/pica/regs_lcd.h" | ||||
| 
 | ||||
| SERIALIZE_EXPORT_IMPL(Service::GSP::SessionData) | ||||
| SERIALIZE_EXPORT_IMPL(Service::GSP::GSP_GPU) | ||||
| SERVICE_CONSTRUCT_IMPL(Service::GSP::GSP_GPU) | ||||
| 
 | ||||
| // Main graphics debugger object - TODO: Here is probably not the best place for this
 | ||||
| GraphicsDebugger g_debugger; | ||||
| 
 | ||||
| namespace Service::GSP { | ||||
| 
 | ||||
| // Beginning address of HW regs
 | ||||
|  | @ -59,60 +50,32 @@ constexpr ResultCode ERR_REGS_INVALID_SIZE(ErrorDescription::InvalidSize, ErrorM | |||
|                                            ErrorSummary::InvalidArgument, | ||||
|                                            ErrorLevel::Usage); // 0xE0E02BEC
 | ||||
| 
 | ||||
| static PAddr VirtualToPhysicalAddress(VAddr addr) { | ||||
|     if (addr == 0) { | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     // Note: the region end check is inclusive because the game can pass in an address that
 | ||||
|     // represents an open right boundary
 | ||||
|     if (addr >= Memory::VRAM_VADDR && addr <= Memory::VRAM_VADDR_END) { | ||||
|         return addr - Memory::VRAM_VADDR + Memory::VRAM_PADDR; | ||||
|     } | ||||
|     if (addr >= Memory::LINEAR_HEAP_VADDR && addr <= Memory::LINEAR_HEAP_VADDR_END) { | ||||
|         return addr - Memory::LINEAR_HEAP_VADDR + Memory::FCRAM_PADDR; | ||||
|     } | ||||
|     if (addr >= Memory::NEW_LINEAR_HEAP_VADDR && addr <= Memory::NEW_LINEAR_HEAP_VADDR_END) { | ||||
|         return addr - Memory::NEW_LINEAR_HEAP_VADDR + Memory::FCRAM_PADDR; | ||||
|     } | ||||
|     if (addr >= Memory::PLUGIN_3GX_FB_VADDR && addr <= Memory::PLUGIN_3GX_FB_VADDR_END) { | ||||
|         return addr - Memory::PLUGIN_3GX_FB_VADDR + Service::PLGLDR::PLG_LDR::GetPluginFBAddr(); | ||||
|     } | ||||
| 
 | ||||
|     LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x{:08X}", addr); | ||||
|     // To help with debugging, set bit on address so that it's obviously invalid.
 | ||||
|     // TODO: find the correct way to handle this error
 | ||||
|     return addr | 0x80000000; | ||||
| } | ||||
| 
 | ||||
| u32 GSP_GPU::GetUnusedThreadId() const { | ||||
|     for (u32 id = 0; id < MaxGSPThreads; ++id) { | ||||
|         if (!used_thread_ids[id]) | ||||
|         if (!used_thread_ids[id]) { | ||||
|             return id; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     UNREACHABLE_MSG("All GSP threads are in use"); | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| /// Gets a pointer to a thread command buffer in GSP shared memory
 | ||||
| static inline u8* GetCommandBuffer(std::shared_ptr<Kernel::SharedMemory> shared_memory, | ||||
|                                    u32 thread_id) { | ||||
|     return shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer))); | ||||
| CommandBuffer* GSP_GPU::GetCommandBuffer(u32 thread_id) { | ||||
|     auto* ptr = shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer))); | ||||
|     return reinterpret_cast<CommandBuffer*>(ptr); | ||||
| } | ||||
| 
 | ||||
| FrameBufferUpdate* GSP_GPU::GetFrameBufferInfo(u32 thread_id, u32 screen_index) { | ||||
|     DEBUG_ASSERT_MSG(screen_index < 2, "Invalid screen index"); | ||||
| 
 | ||||
|     // For each thread there are two FrameBufferUpdate fields
 | ||||
|     u32 offset = 0x200 + (2 * thread_id + screen_index) * sizeof(FrameBufferUpdate); | ||||
|     const u32 offset = 0x200 + (2 * thread_id + screen_index) * sizeof(FrameBufferUpdate); | ||||
|     u8* ptr = shared_memory->GetPointer(offset); | ||||
|     return reinterpret_cast<FrameBufferUpdate*>(ptr); | ||||
| } | ||||
| 
 | ||||
| /// Gets a pointer to the interrupt relay queue for a given thread index
 | ||||
| static inline InterruptRelayQueue* GetInterruptRelayQueue( | ||||
|     std::shared_ptr<Kernel::SharedMemory> shared_memory, u32 thread_id) { | ||||
| InterruptRelayQueue* GSP_GPU::GetInterruptRelayQueue(u32 thread_id) { | ||||
|     u8* ptr = shared_memory->GetPointer(sizeof(InterruptRelayQueue) * thread_id); | ||||
|     return reinterpret_cast<InterruptRelayQueue*>(ptr); | ||||
| } | ||||
|  | @ -125,19 +88,6 @@ void GSP_GPU::ClientDisconnected(std::shared_ptr<Kernel::ServerSession> server_s | |||
|     SessionRequestHandler::ClientDisconnected(server_session); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Writes a single GSP GPU hardware registers with a single u32 value | ||||
|  * (For internal use.) | ||||
|  * | ||||
|  * @param base_address The address of the register in question | ||||
|  * @param data Data to be written | ||||
|  */ | ||||
| static void WriteSingleHWReg(u32 base_address, u32 data) { | ||||
|     DEBUG_ASSERT_MSG((base_address & 3) == 0 && base_address < 0x420000, | ||||
|                      "Write address out of range or misaligned"); | ||||
|     HW::Write<u32>(base_address + REGS_BEGIN, data); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Writes sequential GSP GPU hardware registers using an array of source data | ||||
|  * | ||||
|  | @ -146,7 +96,8 @@ static void WriteSingleHWReg(u32 base_address, u32 data) { | |||
|  * @param data A vector containing the source data | ||||
|  * @return RESULT_SUCCESS if the parameters are valid, error code otherwise | ||||
|  */ | ||||
| static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span<const u8> data) { | ||||
| static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span<const u8> data, | ||||
|                               VideoCore::GPU& gpu) { | ||||
|     // This magic number is verified to be done by the gsp module
 | ||||
|     const u32 max_size_in_bytes = 0x80; | ||||
| 
 | ||||
|  | @ -155,28 +106,30 @@ static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span<con | |||
|                   "Write address was out of range or misaligned! (address=0x{:08x}, size=0x{:08x})", | ||||
|                   base_address, size_in_bytes); | ||||
|         return ERR_REGS_OUTOFRANGE_OR_MISALIGNED; | ||||
|     } else if (size_in_bytes <= max_size_in_bytes) { | ||||
|         if (size_in_bytes & 3) { | ||||
|             LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes); | ||||
|             return ERR_REGS_MISALIGNED; | ||||
|         } else { | ||||
|             std::size_t offset = 0; | ||||
|             while (size_in_bytes > 0) { | ||||
|                 u32 value; | ||||
|                 std::memcpy(&value, &data[offset], sizeof(u32)); | ||||
|                 WriteSingleHWReg(base_address, value); | ||||
|     } | ||||
| 
 | ||||
|                 size_in_bytes -= 4; | ||||
|                 offset += 4; | ||||
|                 base_address += 4; | ||||
|             } | ||||
|             return RESULT_SUCCESS; | ||||
|         } | ||||
| 
 | ||||
|     } else { | ||||
|     if (size_in_bytes > max_size_in_bytes) { | ||||
|         LOG_ERROR(Service_GSP, "Out of range size 0x{:08x}", size_in_bytes); | ||||
|         return ERR_REGS_INVALID_SIZE; | ||||
|     } | ||||
| 
 | ||||
|     if (size_in_bytes & 3) { | ||||
|         LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes); | ||||
|         return ERR_REGS_MISALIGNED; | ||||
|     } | ||||
| 
 | ||||
|     std::size_t offset = 0; | ||||
|     while (size_in_bytes > 0) { | ||||
|         u32 value; | ||||
|         std::memcpy(&value, &data[offset], sizeof(u32)); | ||||
|         gpu.WriteReg(REGS_BEGIN + base_address, value); | ||||
| 
 | ||||
|         size_in_bytes -= 4; | ||||
|         offset += 4; | ||||
|         base_address += 4; | ||||
|     } | ||||
| 
 | ||||
|     return RESULT_SUCCESS; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  | @ -190,7 +143,7 @@ static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span<con | |||
|  * @return RESULT_SUCCESS if the parameters are valid, error code otherwise | ||||
|  */ | ||||
| static ResultCode WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes, std::span<const u8> data, | ||||
|                                       std::span<const u8> masks) { | ||||
|                                       std::span<const u8> masks, VideoCore::GPU& gpu) { | ||||
|     // This magic number is verified to be done by the gsp module
 | ||||
|     const u32 max_size_in_bytes = 0x80; | ||||
| 
 | ||||
|  | @ -199,60 +152,58 @@ static ResultCode WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes, std:: | |||
|                   "Write address was out of range or misaligned! (address=0x{:08x}, size=0x{:08x})", | ||||
|                   base_address, size_in_bytes); | ||||
|         return ERR_REGS_OUTOFRANGE_OR_MISALIGNED; | ||||
|     } else if (size_in_bytes <= max_size_in_bytes) { | ||||
|         if (size_in_bytes & 3) { | ||||
|             LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes); | ||||
|             return ERR_REGS_MISALIGNED; | ||||
|         } else { | ||||
|             std::size_t offset = 0; | ||||
|             while (size_in_bytes > 0) { | ||||
|                 const u32 reg_address = base_address + REGS_BEGIN; | ||||
|     } | ||||
| 
 | ||||
|                 u32 reg_value; | ||||
|                 HW::Read<u32>(reg_value, reg_address); | ||||
| 
 | ||||
|                 u32 value, mask; | ||||
|                 std::memcpy(&value, &data[offset], sizeof(u32)); | ||||
|                 std::memcpy(&mask, &masks[offset], sizeof(u32)); | ||||
| 
 | ||||
|                 // Update the current value of the register only for set mask bits
 | ||||
|                 reg_value = (reg_value & ~mask) | (value & mask); | ||||
| 
 | ||||
|                 WriteSingleHWReg(base_address, reg_value); | ||||
| 
 | ||||
|                 size_in_bytes -= 4; | ||||
|                 offset += 4; | ||||
|                 base_address += 4; | ||||
|             } | ||||
|             return RESULT_SUCCESS; | ||||
|         } | ||||
| 
 | ||||
|     } else { | ||||
|     if (size_in_bytes > max_size_in_bytes) { | ||||
|         LOG_ERROR(Service_GSP, "Out of range size 0x{:08x}", size_in_bytes); | ||||
|         return ERR_REGS_INVALID_SIZE; | ||||
|     } | ||||
| 
 | ||||
|     if (size_in_bytes & 3) { | ||||
|         LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes); | ||||
|         return ERR_REGS_MISALIGNED; | ||||
|     } | ||||
| 
 | ||||
|     std::size_t offset = 0; | ||||
|     while (size_in_bytes > 0) { | ||||
|         const u32 reg_address = base_address + REGS_BEGIN; | ||||
|         u32 reg_value = gpu.ReadReg(reg_address); | ||||
| 
 | ||||
|         u32 value, mask; | ||||
|         std::memcpy(&value, &data[offset], sizeof(u32)); | ||||
|         std::memcpy(&mask, &masks[offset], sizeof(u32)); | ||||
| 
 | ||||
|         // Update the current value of the register only for set mask bits
 | ||||
|         reg_value = (reg_value & ~mask) | (value & mask); | ||||
|         gpu.WriteReg(reg_address, reg_value); | ||||
| 
 | ||||
|         size_in_bytes -= 4; | ||||
|         offset += 4; | ||||
|         base_address += 4; | ||||
|     } | ||||
| 
 | ||||
|     return RESULT_SUCCESS; | ||||
| } | ||||
| 
 | ||||
| void GSP_GPU::WriteHWRegs(Kernel::HLERequestContext& ctx) { | ||||
|     IPC::RequestParser rp(ctx); | ||||
|     u32 reg_addr = rp.Pop<u32>(); | ||||
|     u32 size = rp.Pop<u32>(); | ||||
|     std::vector<u8> src_data = rp.PopStaticBuffer(); | ||||
|     const u32 reg_addr = rp.Pop<u32>(); | ||||
|     const u32 size = rp.Pop<u32>(); | ||||
|     const auto src_data = rp.PopStaticBuffer(); | ||||
| 
 | ||||
|     IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); | ||||
|     rb.Push(GSP::WriteHWRegs(reg_addr, size, src_data)); | ||||
|     rb.Push(GSP::WriteHWRegs(reg_addr, size, src_data, system.GPU())); | ||||
| } | ||||
| 
 | ||||
| void GSP_GPU::WriteHWRegsWithMask(Kernel::HLERequestContext& ctx) { | ||||
|     IPC::RequestParser rp(ctx); | ||||
|     u32 reg_addr = rp.Pop<u32>(); | ||||
|     u32 size = rp.Pop<u32>(); | ||||
| 
 | ||||
|     std::vector<u8> src_data = rp.PopStaticBuffer(); | ||||
|     std::vector<u8> mask_data = rp.PopStaticBuffer(); | ||||
|     const u32 reg_addr = rp.Pop<u32>(); | ||||
|     const u32 size = rp.Pop<u32>(); | ||||
|     const auto src_data = rp.PopStaticBuffer(); | ||||
|     const auto mask_data = rp.PopStaticBuffer(); | ||||
| 
 | ||||
|     IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); | ||||
|     rb.Push(GSP::WriteHWRegsWithMask(reg_addr, size, src_data, mask_data)); | ||||
|     rb.Push(GSP::WriteHWRegsWithMask(reg_addr, size, src_data, mask_data, system.GPU())); | ||||
| } | ||||
| 
 | ||||
| void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) { | ||||
|  | @ -270,7 +221,7 @@ void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) { | |||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     // size should be word-aligned
 | ||||
|     // Size should be word-aligned
 | ||||
|     if ((size % 4) != 0) { | ||||
|         IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); | ||||
|         rb.Push(ERR_REGS_MISALIGNED); | ||||
|  | @ -279,8 +230,9 @@ void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) { | |||
|     } | ||||
| 
 | ||||
|     std::vector<u8> buffer(size); | ||||
|     for (u32 offset = 0; offset < size; ++offset) { | ||||
|         HW::Read<u8>(buffer[offset], REGS_BEGIN + reg_addr + offset); | ||||
|     for (u32 word = 0; word < size / sizeof(u32); ++word) { | ||||
|         const u32 data = system.GPU().ReadReg(REGS_BEGIN + reg_addr + word * sizeof(u32)); | ||||
|         std::memcpy(buffer.data() + word * sizeof(u32), &data, sizeof(u32)); | ||||
|     } | ||||
| 
 | ||||
|     IPC::RequestBuilder rb = rp.MakeBuilder(1, 2); | ||||
|  | @ -288,53 +240,15 @@ void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) { | |||
|     rb.PushStaticBuffer(std::move(buffer), 0); | ||||
| } | ||||
| 
 | ||||
| ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info) { | ||||
|     u32 base_address = 0x400000; | ||||
|     PAddr phys_address_left = VirtualToPhysicalAddress(info.address_left); | ||||
|     PAddr phys_address_right = VirtualToPhysicalAddress(info.address_right); | ||||
|     if (info.active_fb == 0) { | ||||
|         WriteSingleHWReg(base_address + 4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX( | ||||
|                                                 screen_id, address_left1)), | ||||
|                          phys_address_left); | ||||
|         WriteSingleHWReg(base_address + 4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX( | ||||
|                                                 screen_id, address_right1)), | ||||
|                          phys_address_right); | ||||
|     } else { | ||||
|         WriteSingleHWReg(base_address + 4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX( | ||||
|                                                 screen_id, address_left2)), | ||||
|                          phys_address_left); | ||||
|         WriteSingleHWReg(base_address + 4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX( | ||||
|                                                 screen_id, address_right2)), | ||||
|                          phys_address_right); | ||||
|     } | ||||
|     WriteSingleHWReg(base_address + | ||||
|                          4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(screen_id, stride)), | ||||
|                      info.stride); | ||||
|     WriteSingleHWReg(base_address + | ||||
|                          4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(screen_id, color_format)), | ||||
|                      info.format); | ||||
|     WriteSingleHWReg(base_address + | ||||
|                          4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(screen_id, active_fb)), | ||||
|                      info.shown_fb); | ||||
| 
 | ||||
|     if (Pica::g_debug_context) | ||||
|         Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::BufferSwapped, nullptr); | ||||
| 
 | ||||
|     if (screen_id == 0) { | ||||
|         MicroProfileFlip(); | ||||
|         Core::System::GetInstance().perf_stats->EndGameFrame(); | ||||
|     } | ||||
| 
 | ||||
|     return RESULT_SUCCESS; | ||||
| } | ||||
| 
 | ||||
| void GSP_GPU::SetBufferSwap(Kernel::HLERequestContext& ctx) { | ||||
|     IPC::RequestParser rp(ctx); | ||||
|     u32 screen_id = rp.Pop<u32>(); | ||||
|     auto fb_info = rp.PopRaw<FrameBufferInfo>(); | ||||
| 
 | ||||
|     system.GPU().SetBufferSwap(screen_id, fb_info); | ||||
| 
 | ||||
|     IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); | ||||
|     rb.Push(GSP::SetBufferSwap(screen_id, fb_info)); | ||||
|     rb.Push(RESULT_SUCCESS); | ||||
| } | ||||
| 
 | ||||
| void GSP_GPU::FlushDataCache(Kernel::HLERequestContext& ctx) { | ||||
|  | @ -382,10 +296,9 @@ void GSP_GPU::RegisterInterruptRelayQueue(Kernel::HLERequestContext& ctx) { | |||
|     u32 flags = rp.Pop<u32>(); | ||||
| 
 | ||||
|     auto interrupt_event = rp.PopObject<Kernel::Event>(); | ||||
|     // TODO(mailwl): return right error code instead assert
 | ||||
|     ASSERT_MSG((interrupt_event != nullptr), "handle is not valid!"); | ||||
|     ASSERT_MSG(interrupt_event, "handle is not valid!"); | ||||
| 
 | ||||
|     interrupt_event->SetName("GSP_GSP_GPU::interrupt_event"); | ||||
|     interrupt_event->SetName("GSP_GPU::interrupt_event"); | ||||
| 
 | ||||
|     SessionData* session_data = GetSessionData(ctx.Session()); | ||||
|     session_data->interrupt_event = std::move(interrupt_event); | ||||
|  | @ -422,15 +335,17 @@ void GSP_GPU::UnregisterInterruptRelayQueue(Kernel::HLERequestContext& ctx) { | |||
| 
 | ||||
| void GSP_GPU::SignalInterruptForThread(InterruptId interrupt_id, u32 thread_id) { | ||||
|     SessionData* session_data = FindRegisteredThreadData(thread_id); | ||||
|     if (session_data == nullptr) | ||||
|     if (!session_data) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     auto interrupt_event = session_data->interrupt_event; | ||||
|     if (interrupt_event == nullptr) { | ||||
|         LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!"); | ||||
|         return; | ||||
|     } | ||||
|     InterruptRelayQueue* interrupt_relay_queue = GetInterruptRelayQueue(shared_memory, thread_id); | ||||
| 
 | ||||
|     auto* interrupt_relay_queue = GetInterruptRelayQueue(thread_id); | ||||
|     u8 next = interrupt_relay_queue->index; | ||||
|     next += interrupt_relay_queue->number_interrupts; | ||||
|     next = next % 0x34; // 0x34 is the number of interrupt slots
 | ||||
|  | @ -441,29 +356,20 @@ void GSP_GPU::SignalInterruptForThread(InterruptId interrupt_id, u32 thread_id) | |||
|     interrupt_relay_queue->error_code = 0x0; // No error
 | ||||
| 
 | ||||
|     // Update framebuffer information if requested
 | ||||
|     // TODO(yuriks): Confirm where this code should be called. It is definitely updated without
 | ||||
|     //               executing any GSP commands, only waiting on the event.
 | ||||
|     // TODO(Subv): The real GSP module triggers PDC0 after updating both the top and bottom
 | ||||
|     // screen, it is currently unknown what PDC1 does.
 | ||||
|     int screen_id = (interrupt_id == InterruptId::PDC0)   ? 0 | ||||
|                     : (interrupt_id == InterruptId::PDC1) ? 1 | ||||
|                                                           : -1; | ||||
|     const s32 screen_id = (interrupt_id == InterruptId::PDC0)   ? 0 | ||||
|                           : (interrupt_id == InterruptId::PDC1) ? 1 | ||||
|                                                                 : -1; | ||||
|     if (screen_id != -1) { | ||||
|         FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id); | ||||
|         auto* info = GetFrameBufferInfo(thread_id, screen_id); | ||||
|         if (info->is_dirty) { | ||||
|             GSP::SetBufferSwap(screen_id, info->framebuffer_info[info->index]); | ||||
|             system.GPU().SetBufferSwap(screen_id, info->framebuffer_info[info->index]); | ||||
|             info->is_dirty.Assign(false); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     interrupt_event->Signal(); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Signals that the specified interrupt type has occurred to userland code | ||||
|  * @param interrupt_id ID of interrupt that is being signalled | ||||
|  * @todo This should probably take a thread_id parameter and only signal this thread? | ||||
|  * @todo This probably does not belong in the GSP module, instead move to video_core | ||||
|  */ | ||||
| void GSP_GPU::SignalInterrupt(InterruptId interrupt_id) { | ||||
|     if (nullptr == shared_memory) { | ||||
|         LOG_WARNING(Service_GSP, "cannot synchronize until GSP shared memory has been created!"); | ||||
|  | @ -488,154 +394,13 @@ void GSP_GPU::SignalInterrupt(InterruptId interrupt_id) { | |||
|     SignalInterruptForThread(interrupt_id, active_thread_id); | ||||
| } | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(GPU_GSP_DMA, "GPU", "GSP DMA", MP_RGB(100, 0, 255)); | ||||
| 
 | ||||
| /// Executes the next GSP command
 | ||||
| static void ExecuteCommand(const Command& command, u32 thread_id) { | ||||
|     // Utility function to convert register ID to address
 | ||||
|     static auto WriteGPURegister = [](u32 id, u32 data) { | ||||
|         GPU::Write<u32>(0x1EF00000 + 4 * id, data); | ||||
|     }; | ||||
| 
 | ||||
|     switch (command.id) { | ||||
| 
 | ||||
|     // GX request DMA - typically used for copying memory from GSP heap to VRAM
 | ||||
|     case CommandId::REQUEST_DMA: { | ||||
|         MICROPROFILE_SCOPE(GPU_GSP_DMA); | ||||
|         Memory::MemorySystem& memory = Core::System::GetInstance().Memory(); | ||||
| 
 | ||||
|         // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever
 | ||||
|         // possible/likely
 | ||||
|         Memory::RasterizerFlushVirtualRegion(command.dma_request.source_address, | ||||
|                                              command.dma_request.size, Memory::FlushMode::Flush); | ||||
|         Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address, | ||||
|                                              command.dma_request.size, | ||||
|                                              Memory::FlushMode::Invalidate); | ||||
| 
 | ||||
|         // TODO(Subv): These memory accesses should not go through the application's memory mapping.
 | ||||
|         // They should go through the GSP module's memory mapping.
 | ||||
|         memory.CopyBlock(*Core::System::GetInstance().Kernel().GetCurrentProcess(), | ||||
|                          command.dma_request.dest_address, command.dma_request.source_address, | ||||
|                          command.dma_request.size); | ||||
|         SignalInterrupt(InterruptId::DMA); | ||||
|         break; | ||||
|     } | ||||
|     // TODO: This will need some rework in the future. (why?)
 | ||||
|     case CommandId::SUBMIT_GPU_CMDLIST: { | ||||
|         auto& params = command.submit_gpu_cmdlist; | ||||
| 
 | ||||
|         if (params.do_flush) { | ||||
|             // This flag flushes the command list (params.address, params.size) from the cache.
 | ||||
|             // Command lists are not processed by the hardware renderer, so we don't need to
 | ||||
|             // actually flush them in Citra.
 | ||||
|         } | ||||
| 
 | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(command_processor_config.address)), | ||||
|                          VirtualToPhysicalAddress(params.address) >> 3); | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(command_processor_config.size)), | ||||
|                          params.size); | ||||
| 
 | ||||
|         // TODO: Not sure if we are supposed to always write this .. seems to trigger processing
 | ||||
|         // though
 | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(command_processor_config.trigger)), 1); | ||||
| 
 | ||||
|         // TODO(yuriks): Figure out the meaning of the `flags` field.
 | ||||
| 
 | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     // It's assumed that the two "blocks" behave equivalently.
 | ||||
|     // Presumably this is done simply to allow two memory fills to run in parallel.
 | ||||
|     case CommandId::SET_MEMORY_FILL: { | ||||
|         auto& params = command.memory_fill; | ||||
| 
 | ||||
|         if (params.start1 != 0) { | ||||
|             WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_start)), | ||||
|                              VirtualToPhysicalAddress(params.start1) >> 3); | ||||
|             WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_end)), | ||||
|                              VirtualToPhysicalAddress(params.end1) >> 3); | ||||
|             WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].value_32bit)), | ||||
|                              params.value1); | ||||
|             WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].control)), | ||||
|                              params.control1); | ||||
|         } | ||||
| 
 | ||||
|         if (params.start2 != 0) { | ||||
|             WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_start)), | ||||
|                              VirtualToPhysicalAddress(params.start2) >> 3); | ||||
|             WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_end)), | ||||
|                              VirtualToPhysicalAddress(params.end2) >> 3); | ||||
|             WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].value_32bit)), | ||||
|                              params.value2); | ||||
|             WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].control)), | ||||
|                              params.control2); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     case CommandId::SET_DISPLAY_TRANSFER: { | ||||
|         auto& params = command.display_transfer; | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), | ||||
|                          VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), | ||||
|                          VirtualToPhysicalAddress(params.out_buffer_address) >> 3); | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), | ||||
|                          params.in_buffer_size); | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), | ||||
|                          params.out_buffer_size); | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)), | ||||
|                          params.flags); | ||||
|         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1); | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     case CommandId::SET_TEXTURE_COPY: { | ||||
|         auto& params = command.texture_copy; | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address), | ||||
|                          VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address), | ||||
|                          VirtualToPhysicalAddress(params.out_buffer_address) >> 3); | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size), | ||||
|                          params.size); | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size), | ||||
|                          params.in_width_gap); | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size), | ||||
|                          params.out_width_gap); | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags), params.flags); | ||||
| 
 | ||||
|         // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to
 | ||||
|         // matter.
 | ||||
|         WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1); | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     case CommandId::CACHE_FLUSH: { | ||||
|         // NOTE: Rasterizer flushing handled elsewhere in CPU read/write and other GPU handlers
 | ||||
|         // Use command.cache_flush.regions to implement this handler
 | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     default: | ||||
|         LOG_ERROR(Service_GSP, "unknown command 0x{:08X}", (int)command.id.Value()); | ||||
|     } | ||||
| 
 | ||||
|     if (Pica::g_debug_context) | ||||
|         Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::GSPCommandProcessed, | ||||
|                                        (void*)&command); | ||||
| } | ||||
| 
 | ||||
| void GSP_GPU::SetLcdForceBlack(Kernel::HLERequestContext& ctx) { | ||||
|     IPC::RequestParser rp(ctx); | ||||
|     const bool enable_black = rp.Pop<bool>(); | ||||
| 
 | ||||
|     bool enable_black = rp.Pop<bool>(); | ||||
|     LCD::Regs::ColorFill data = {0}; | ||||
| 
 | ||||
|     // Since data is already zeroed, there is no need to explicitly set
 | ||||
|     // the color to black (all zero).
 | ||||
|     Pica::ColorFill data{}; | ||||
|     data.is_enabled.Assign(enable_black); | ||||
| 
 | ||||
|     LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_top), data.raw);    // Top LCD
 | ||||
|     LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_bottom), data.raw); // Bottom LCD
 | ||||
|     system.GPU().SetColorFill(data); | ||||
| 
 | ||||
|     IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); | ||||
|     rb.Push(RESULT_SUCCESS); | ||||
|  | @ -644,20 +409,17 @@ void GSP_GPU::SetLcdForceBlack(Kernel::HLERequestContext& ctx) { | |||
| void GSP_GPU::TriggerCmdReqQueue(Kernel::HLERequestContext& ctx) { | ||||
|     IPC::RequestParser rp(ctx); | ||||
| 
 | ||||
|     // Iterate through each thread's command queue...
 | ||||
|     for (unsigned thread_id = 0; thread_id < 0x4; ++thread_id) { | ||||
|         CommandBuffer* command_buffer = (CommandBuffer*)GetCommandBuffer(shared_memory, thread_id); | ||||
|     // Iterate through each command.
 | ||||
|     auto* command_buffer = GetCommandBuffer(active_thread_id); | ||||
|     auto& gpu = system.GPU(); | ||||
|     for (u32 i = 0; i < command_buffer->number_commands; i++) { | ||||
|         gpu.Debugger().GXCommandProcessed(command_buffer->commands[i]); | ||||
| 
 | ||||
|         // Iterate through each command...
 | ||||
|         for (unsigned i = 0; i < command_buffer->number_commands; ++i) { | ||||
|             g_debugger.GXCommandProcessed((u8*)&command_buffer->commands[i]); | ||||
|         // Decode and execute command
 | ||||
|         gpu.Execute(command_buffer->commands[i]); | ||||
| 
 | ||||
|             // Decode and execute command
 | ||||
|             ExecuteCommand(command_buffer->commands[i], thread_id); | ||||
| 
 | ||||
|             // Indicates that command has completed
 | ||||
|             command_buffer->number_commands.Assign(command_buffer->number_commands - 1); | ||||
|         } | ||||
|         // Indicates that command has completed
 | ||||
|         command_buffer->number_commands.Assign(command_buffer->number_commands - 1); | ||||
|     } | ||||
| 
 | ||||
|     IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); | ||||
|  |  | |||
|  | @ -13,7 +13,8 @@ | |||
| #include "common/common_types.h" | ||||
| #include "core/hle/kernel/event.h" | ||||
| #include "core/hle/kernel/hle_ipc.h" | ||||
| #include "core/hle/result.h" | ||||
| #include "core/hle/service/gsp/gsp_command.h" | ||||
| #include "core/hle/service/gsp/gsp_interrupt.h" | ||||
| #include "core/hle/service/service.h" | ||||
| 
 | ||||
| namespace Core { | ||||
|  | @ -28,53 +29,6 @@ class SharedMemory; | |||
| 
 | ||||
| namespace Service::GSP { | ||||
| 
 | ||||
| /// GSP interrupt ID
 | ||||
| enum class InterruptId : u8 { | ||||
|     PSC0 = 0x00, | ||||
|     PSC1 = 0x01, | ||||
|     PDC0 = 0x02, // Seems called every vertical screen line
 | ||||
|     PDC1 = 0x03, // Seems called every frame
 | ||||
|     PPF = 0x04, | ||||
|     P3D = 0x05, | ||||
|     DMA = 0x06, | ||||
| }; | ||||
| 
 | ||||
| /// GSP command ID
 | ||||
| enum class CommandId : u32 { | ||||
|     REQUEST_DMA = 0x00, | ||||
|     /// Submits a commandlist for execution by the GPU.
 | ||||
|     SUBMIT_GPU_CMDLIST = 0x01, | ||||
| 
 | ||||
|     // Fills a given memory range with a particular value
 | ||||
|     SET_MEMORY_FILL = 0x02, | ||||
| 
 | ||||
|     // Copies an image and optionally performs color-conversion or scaling.
 | ||||
|     // This is highly similar to the GameCube's EFB copy feature
 | ||||
|     SET_DISPLAY_TRANSFER = 0x03, | ||||
| 
 | ||||
|     // Conceptionally similar to SET_DISPLAY_TRANSFER and presumable uses the same hardware path
 | ||||
|     SET_TEXTURE_COPY = 0x04, | ||||
|     /// Flushes up to 3 cache regions in a single command.
 | ||||
|     CACHE_FLUSH = 0x05, | ||||
| }; | ||||
| 
 | ||||
| /// GSP thread interrupt relay queue
 | ||||
| struct InterruptRelayQueue { | ||||
|     // Index of last interrupt in the queue
 | ||||
|     u8 index; | ||||
|     // Number of interrupts remaining to be processed by the userland code
 | ||||
|     u8 number_interrupts; | ||||
|     // Error code - zero on success, otherwise an error has occurred
 | ||||
|     u8 error_code; | ||||
|     u8 padding1; | ||||
| 
 | ||||
|     u32 missed_PDC0; | ||||
|     u32 missed_PDC1; | ||||
| 
 | ||||
|     InterruptId slot[0x34]; ///< Interrupt ID slots
 | ||||
| }; | ||||
| static_assert(sizeof(InterruptRelayQueue) == 0x40, "InterruptRelayQueue struct has incorrect size"); | ||||
| 
 | ||||
| struct FrameBufferInfo { | ||||
|     u32 active_fb; // 0 = first, 1 = second
 | ||||
|     u32 address_left; | ||||
|  | @ -96,95 +50,9 @@ struct FrameBufferUpdate { | |||
|     u32 pad2; | ||||
| }; | ||||
| static_assert(sizeof(FrameBufferUpdate) == 0x40, "Struct has incorrect size"); | ||||
| // TODO: Not sure if this padding is correct.
 | ||||
| // Chances are the second block is stored at offset 0x24 rather than 0x20.
 | ||||
| static_assert(offsetof(FrameBufferUpdate, framebuffer_info[1]) == 0x20, | ||||
|               "FrameBufferInfo element has incorrect alignment"); | ||||
| 
 | ||||
| /// GSP command
 | ||||
| struct Command { | ||||
|     BitField<0, 8, CommandId> id; | ||||
| 
 | ||||
|     union { | ||||
|         struct { | ||||
|             u32 source_address; | ||||
|             u32 dest_address; | ||||
|             u32 size; | ||||
|         } dma_request; | ||||
| 
 | ||||
|         struct { | ||||
|             u32 address; | ||||
|             u32 size; | ||||
|             u32 flags; | ||||
|             u32 unused[3]; | ||||
|             u32 do_flush; | ||||
|         } submit_gpu_cmdlist; | ||||
| 
 | ||||
|         struct { | ||||
|             u32 start1; | ||||
|             u32 value1; | ||||
|             u32 end1; | ||||
| 
 | ||||
|             u32 start2; | ||||
|             u32 value2; | ||||
|             u32 end2; | ||||
| 
 | ||||
|             u16 control1; | ||||
|             u16 control2; | ||||
|         } memory_fill; | ||||
| 
 | ||||
|         struct { | ||||
|             u32 in_buffer_address; | ||||
|             u32 out_buffer_address; | ||||
|             u32 in_buffer_size; | ||||
|             u32 out_buffer_size; | ||||
|             u32 flags; | ||||
|         } display_transfer; | ||||
| 
 | ||||
|         struct { | ||||
|             u32 in_buffer_address; | ||||
|             u32 out_buffer_address; | ||||
|             u32 size; | ||||
|             u32 in_width_gap; | ||||
|             u32 out_width_gap; | ||||
|             u32 flags; | ||||
|         } texture_copy; | ||||
| 
 | ||||
|         struct { | ||||
|             struct { | ||||
|                 u32 address; | ||||
|                 u32 size; | ||||
|             } regions[3]; | ||||
|         } cache_flush; | ||||
| 
 | ||||
|         u8 raw_data[0x1C]; | ||||
|     }; | ||||
| }; | ||||
| static_assert(sizeof(Command) == 0x20, "Command struct has incorrect size"); | ||||
| 
 | ||||
| /// GSP shared memory GX command buffer header
 | ||||
| struct CommandBuffer { | ||||
|     union { | ||||
|         u32 hex; | ||||
| 
 | ||||
|         // Current command index. This index is updated by GSP module after loading the command
 | ||||
|         // data, right before the command is processed. When this index is updated by GSP module,
 | ||||
|         // the total commands field is decreased by one as well.
 | ||||
|         BitField<0, 8, u32> index; | ||||
| 
 | ||||
|         // Total commands to process, must not be value 0 when GSP module handles commands. This
 | ||||
|         // must be <=15 when writing a command to shared memory. This is incremented by the
 | ||||
|         // application when writing a command to shared memory, after increasing this value
 | ||||
|         // TriggerCmdReqQueue is only used if this field is value 1.
 | ||||
|         BitField<8, 8, u32> number_commands; | ||||
|     }; | ||||
| 
 | ||||
|     u32 unk[7]; | ||||
| 
 | ||||
|     Command commands[0xF]; | ||||
| }; | ||||
| static_assert(sizeof(CommandBuffer) == 0x200, "CommandBuffer struct has incorrect size"); | ||||
| 
 | ||||
| constexpr u32 FRAMEBUFFER_WIDTH = 240; | ||||
| constexpr u32 FRAMEBUFFER_WIDTH_POW2 = 256; | ||||
| constexpr u32 TOP_FRAMEBUFFER_HEIGHT = 400; | ||||
|  | @ -242,6 +110,12 @@ public: | |||
|      */ | ||||
|     FrameBufferUpdate* GetFrameBufferInfo(u32 thread_id, u32 screen_index); | ||||
| 
 | ||||
|     /// Gets a pointer to a thread command buffer in GSP shared memory
 | ||||
|     CommandBuffer* GetCommandBuffer(u32 thread_id); | ||||
| 
 | ||||
|     /// Gets a pointer to the interrupt relay queue for a given thread index
 | ||||
|     InterruptRelayQueue* GetInterruptRelayQueue(u32 thread_id); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Retreives the ID of the thread with GPU rights. | ||||
|      */ | ||||
|  | @ -513,7 +387,7 @@ private: | |||
|     static constexpr u32 MaxGSPThreads = 4; | ||||
| 
 | ||||
|     /// Thread ids currently in use by the sessions connected to the GSPGPU service.
 | ||||
|     std::array<bool, MaxGSPThreads> used_thread_ids = {false, false, false, false}; | ||||
|     std::array<bool, MaxGSPThreads> used_thread_ids{}; | ||||
| 
 | ||||
|     friend class SessionData; | ||||
| 
 | ||||
|  | @ -522,8 +396,6 @@ private: | |||
|     friend class boost::serialization::access; | ||||
| }; | ||||
| 
 | ||||
| ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info); | ||||
| 
 | ||||
| } // namespace Service::GSP
 | ||||
| 
 | ||||
| BOOST_CLASS_EXPORT_KEY(Service::GSP::SessionData) | ||||
|  |  | |||
							
								
								
									
										42
									
								
								src/core/hle/service/gsp/gsp_interrupt.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								src/core/hle/service/gsp/gsp_interrupt.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,42 @@ | |||
| // Copyright 2023 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <functional> | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| namespace Service::GSP { | ||||
| 
 | ||||
| /// GSP interrupt ID
 | ||||
| enum class InterruptId : u8 { | ||||
|     PSC0 = 0x00, | ||||
|     PSC1 = 0x01, | ||||
|     PDC0 = 0x02, | ||||
|     PDC1 = 0x03, | ||||
|     PPF = 0x04, | ||||
|     P3D = 0x05, | ||||
|     DMA = 0x06, | ||||
| }; | ||||
| 
 | ||||
| /// GSP thread interrupt relay queue
 | ||||
| struct InterruptRelayQueue { | ||||
|     // Index of last interrupt in the queue
 | ||||
|     u8 index; | ||||
|     // Number of interrupts remaining to be processed by the userland code
 | ||||
|     u8 number_interrupts; | ||||
|     // Error code - zero on success, otherwise an error has occurred
 | ||||
|     u8 error_code; | ||||
|     u8 padding1; | ||||
| 
 | ||||
|     u32 missed_PDC0; | ||||
|     u32 missed_PDC1; | ||||
| 
 | ||||
|     InterruptId slot[0x34]; ///< Interrupt ID slots
 | ||||
| }; | ||||
| static_assert(sizeof(InterruptRelayQueue) == 0x40, "InterruptRelayQueue struct has incorrect size"); | ||||
| 
 | ||||
| using InterruptHandler = std::function<void(InterruptId)>; | ||||
| 
 | ||||
| } // namespace Service::GSP
 | ||||
|  | @ -22,7 +22,6 @@ | |||
| #include "core/hle/service/hid/hid_user.h" | ||||
| #include "core/hle/service/service.h" | ||||
| #include "core/movie.h" | ||||
| #include "video_core/video_core.h" | ||||
| 
 | ||||
| SERVICE_CONSTRUCT_IMPL(Service::HID::Module) | ||||
| SERIALIZE_EXPORT_IMPL(Service::HID::Module) | ||||
|  |  | |||
|  | @ -1,572 +0,0 @@ | |||
| // Copyright 2014 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <cstring> | ||||
| #include <numeric> | ||||
| #include <type_traits> | ||||
| #include "common/alignment.h" | ||||
| #include "common/color.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "common/microprofile.h" | ||||
| #include "common/vector_math.h" | ||||
| #include "core/core.h" | ||||
| #include "core/core_timing.h" | ||||
| #include "core/hle/service/gsp/gsp.h" | ||||
| #include "core/hw/gpu.h" | ||||
| #include "core/hw/hw.h" | ||||
| #include "core/memory.h" | ||||
| #include "core/tracer/recorder.h" | ||||
| #include "video_core/command_processor.h" | ||||
| #include "video_core/debug_utils/debug_utils.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/utils.h" | ||||
| #include "video_core/video_core.h" | ||||
| 
 | ||||
| namespace GPU { | ||||
| 
 | ||||
| Regs g_regs; | ||||
| Memory::MemorySystem* g_memory; | ||||
| 
 | ||||
| /// Event id for CoreTiming
 | ||||
| static Core::TimingEventType* vblank_event; | ||||
| 
 | ||||
| template <typename T> | ||||
| inline void Read(T& var, const u32 raw_addr) { | ||||
|     u32 addr = raw_addr - HW::VADDR_GPU; | ||||
|     u32 index = addr / 4; | ||||
| 
 | ||||
|     // Reads other than u32 are untested, so I'd rather have them abort than silently fail
 | ||||
|     if (index >= Regs::NumIds() || !std::is_same<T, u32>::value) { | ||||
|         LOG_ERROR(HW_GPU, "unknown Read{} @ {:#010X}", sizeof(var) * 8, addr); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     var = g_regs[addr / 4]; | ||||
| } | ||||
| 
 | ||||
| static Common::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_pixel) { | ||||
|     switch (input_format) { | ||||
|     case Regs::PixelFormat::RGBA8: | ||||
|         return Common::Color::DecodeRGBA8(src_pixel); | ||||
| 
 | ||||
|     case Regs::PixelFormat::RGB8: | ||||
|         return Common::Color::DecodeRGB8(src_pixel); | ||||
| 
 | ||||
|     case Regs::PixelFormat::RGB565: | ||||
|         return Common::Color::DecodeRGB565(src_pixel); | ||||
| 
 | ||||
|     case Regs::PixelFormat::RGB5A1: | ||||
|         return Common::Color::DecodeRGB5A1(src_pixel); | ||||
| 
 | ||||
|     case Regs::PixelFormat::RGBA4: | ||||
|         return Common::Color::DecodeRGBA4(src_pixel); | ||||
| 
 | ||||
|     default: | ||||
|         LOG_ERROR(HW_GPU, "Unknown source framebuffer format {:x}", input_format); | ||||
|         return {0, 0, 0, 0}; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255)); | ||||
| MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100)); | ||||
| 
 | ||||
| static void MemoryFill(const Regs::MemoryFillConfig& config) { | ||||
|     const PAddr start_addr = config.GetStartAddress(); | ||||
|     const PAddr end_addr = config.GetEndAddress(); | ||||
| 
 | ||||
|     // TODO: do hwtest with these cases
 | ||||
|     if (!g_memory->IsValidPhysicalAddress(start_addr)) { | ||||
|         LOG_CRITICAL(HW_GPU, "invalid start address {:#010X}", start_addr); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (!g_memory->IsValidPhysicalAddress(end_addr)) { | ||||
|         LOG_CRITICAL(HW_GPU, "invalid end address {:#010X}", end_addr); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (end_addr <= start_addr) { | ||||
|         LOG_CRITICAL(HW_GPU, "invalid memory range from {:#010X} to {:#010X}", start_addr, | ||||
|                      end_addr); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     u8* start = g_memory->GetPhysicalPointer(start_addr); | ||||
|     u8* end = g_memory->GetPhysicalPointer(end_addr); | ||||
| 
 | ||||
|     if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) | ||||
|         return; | ||||
| 
 | ||||
|     Memory::RasterizerInvalidateRegion(config.GetStartAddress(), | ||||
|                                        config.GetEndAddress() - config.GetStartAddress()); | ||||
| 
 | ||||
|     if (config.fill_24bit) { | ||||
|         // fill with 24-bit values
 | ||||
|         for (u8* ptr = start; ptr < end; ptr += 3) { | ||||
|             ptr[0] = config.value_24bit_r; | ||||
|             ptr[1] = config.value_24bit_g; | ||||
|             ptr[2] = config.value_24bit_b; | ||||
|         } | ||||
|     } else if (config.fill_32bit) { | ||||
|         // fill with 32-bit values
 | ||||
|         if (end > start) { | ||||
|             u32 value = config.value_32bit; | ||||
|             std::size_t len = (end - start) / sizeof(u32); | ||||
|             for (std::size_t i = 0; i < len; ++i) | ||||
|                 std::memcpy(&start[i * sizeof(u32)], &value, sizeof(u32)); | ||||
|         } | ||||
|     } else { | ||||
|         // fill with 16-bit values
 | ||||
|         u16 value_16bit = config.value_16bit.Value(); | ||||
|         for (u8* ptr = start; ptr < end; ptr += sizeof(u16)) | ||||
|             std::memcpy(ptr, &value_16bit, sizeof(u16)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void DisplayTransfer(const Regs::DisplayTransferConfig& config) { | ||||
|     const PAddr src_addr = config.GetPhysicalInputAddress(); | ||||
|     PAddr dst_addr = config.GetPhysicalOutputAddress(); | ||||
| 
 | ||||
|     // TODO: do hwtest with these cases
 | ||||
|     if (!g_memory->IsValidPhysicalAddress(src_addr)) { | ||||
|         LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (!g_memory->IsValidPhysicalAddress(dst_addr)) { | ||||
|         LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (config.input_width == 0) { | ||||
|         LOG_CRITICAL(HW_GPU, "zero input width"); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (config.input_height == 0) { | ||||
|         LOG_CRITICAL(HW_GPU, "zero input height"); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (config.output_width == 0) { | ||||
|         LOG_CRITICAL(HW_GPU, "zero output width"); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (config.output_height == 0) { | ||||
|         LOG_CRITICAL(HW_GPU, "zero output height"); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) | ||||
|         return; | ||||
| 
 | ||||
|     // Using flip_vertically alongside crop_input_lines produces skewed output on hardware.
 | ||||
|     // We have to emulate this because some games rely on this behaviour to render correctly.
 | ||||
|     if (config.flip_vertically && config.crop_input_lines && | ||||
|         config.input_width > config.output_width) { | ||||
|         dst_addr += (config.input_width - config.output_width) * (config.output_height - 1) * | ||||
|                     GPU::Regs::BytesPerPixel(config.output_format); | ||||
|     } | ||||
| 
 | ||||
|     u8* src_pointer = g_memory->GetPhysicalPointer(src_addr); | ||||
|     u8* dst_pointer = g_memory->GetPhysicalPointer(dst_addr); | ||||
| 
 | ||||
|     if (config.scaling > config.ScaleXY) { | ||||
|         LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode {}", | ||||
|                      config.scaling.Value()); | ||||
|         UNIMPLEMENTED(); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (config.input_linear && config.scaling != config.NoScale) { | ||||
|         LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); | ||||
|         UNIMPLEMENTED(); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     int horizontal_scale = config.scaling != config.NoScale ? 1 : 0; | ||||
|     int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0; | ||||
| 
 | ||||
|     u32 output_width = config.output_width >> horizontal_scale; | ||||
|     u32 output_height = config.output_height >> vertical_scale; | ||||
| 
 | ||||
|     u32 input_size = | ||||
|         config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); | ||||
|     u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); | ||||
| 
 | ||||
|     Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); | ||||
|     Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); | ||||
| 
 | ||||
|     for (u32 y = 0; y < output_height; ++y) { | ||||
|         for (u32 x = 0; x < output_width; ++x) { | ||||
|             Common::Vec4<u8> src_color; | ||||
| 
 | ||||
|             // Calculate the [x,y] position of the input image
 | ||||
|             // based on the current output position and the scale
 | ||||
|             u32 input_x = x << horizontal_scale; | ||||
|             u32 input_y = y << vertical_scale; | ||||
| 
 | ||||
|             u32 output_y; | ||||
|             if (config.flip_vertically) { | ||||
|                 // Flip the y value of the output data,
 | ||||
|                 // we do this after calculating the [x,y] position of the input image
 | ||||
|                 // to account for the scaling options.
 | ||||
|                 output_y = output_height - y - 1; | ||||
|             } else { | ||||
|                 output_y = y; | ||||
|             } | ||||
| 
 | ||||
|             u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format); | ||||
|             u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format); | ||||
|             u32 src_offset; | ||||
|             u32 dst_offset; | ||||
| 
 | ||||
|             if (config.input_linear) { | ||||
|                 if (!config.dont_swizzle) { | ||||
|                     // Interpret the input as linear and the output as tiled
 | ||||
|                     u32 coarse_y = output_y & ~7; | ||||
|                     u32 stride = output_width * dst_bytes_per_pixel; | ||||
| 
 | ||||
|                     src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; | ||||
|                     dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) + | ||||
|                                  coarse_y * stride; | ||||
|                 } else { | ||||
|                     // Both input and output are linear
 | ||||
|                     src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; | ||||
|                     dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel; | ||||
|                 } | ||||
|             } else { | ||||
|                 if (!config.dont_swizzle) { | ||||
|                     // Interpret the input as tiled and the output as linear
 | ||||
|                     u32 coarse_y = input_y & ~7; | ||||
|                     u32 stride = config.input_width * src_bytes_per_pixel; | ||||
| 
 | ||||
|                     src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + | ||||
|                                  coarse_y * stride; | ||||
|                     dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel; | ||||
|                 } else { | ||||
|                     // Both input and output are tiled
 | ||||
|                     u32 out_coarse_y = output_y & ~7; | ||||
|                     u32 out_stride = output_width * dst_bytes_per_pixel; | ||||
| 
 | ||||
|                     u32 in_coarse_y = input_y & ~7; | ||||
|                     u32 in_stride = config.input_width * src_bytes_per_pixel; | ||||
| 
 | ||||
|                     src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + | ||||
|                                  in_coarse_y * in_stride; | ||||
|                     dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) + | ||||
|                                  out_coarse_y * out_stride; | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             const u8* src_pixel = src_pointer + src_offset; | ||||
|             src_color = DecodePixel(config.input_format, src_pixel); | ||||
|             if (config.scaling == config.ScaleX) { | ||||
|                 Common::Vec4<u8> pixel = | ||||
|                     DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); | ||||
|                 src_color = ((src_color + pixel) / 2).Cast<u8>(); | ||||
|             } else if (config.scaling == config.ScaleXY) { | ||||
|                 Common::Vec4<u8> pixel1 = | ||||
|                     DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); | ||||
|                 Common::Vec4<u8> pixel2 = | ||||
|                     DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); | ||||
|                 Common::Vec4<u8> pixel3 = | ||||
|                     DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); | ||||
|                 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); | ||||
|             } | ||||
| 
 | ||||
|             u8* dst_pixel = dst_pointer + dst_offset; | ||||
|             switch (config.output_format) { | ||||
|             case Regs::PixelFormat::RGBA8: | ||||
|                 Common::Color::EncodeRGBA8(src_color, dst_pixel); | ||||
|                 break; | ||||
| 
 | ||||
|             case Regs::PixelFormat::RGB8: | ||||
|                 Common::Color::EncodeRGB8(src_color, dst_pixel); | ||||
|                 break; | ||||
| 
 | ||||
|             case Regs::PixelFormat::RGB565: | ||||
|                 Common::Color::EncodeRGB565(src_color, dst_pixel); | ||||
|                 break; | ||||
| 
 | ||||
|             case Regs::PixelFormat::RGB5A1: | ||||
|                 Common::Color::EncodeRGB5A1(src_color, dst_pixel); | ||||
|                 break; | ||||
| 
 | ||||
|             case Regs::PixelFormat::RGBA4: | ||||
|                 Common::Color::EncodeRGBA4(src_color, dst_pixel); | ||||
|                 break; | ||||
| 
 | ||||
|             default: | ||||
|                 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format {:x}", | ||||
|                           static_cast<u32>(config.output_format.Value())); | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void TextureCopy(const Regs::DisplayTransferConfig& config) { | ||||
|     const PAddr src_addr = config.GetPhysicalInputAddress(); | ||||
|     const PAddr dst_addr = config.GetPhysicalOutputAddress(); | ||||
| 
 | ||||
|     // TODO: do hwtest with invalid addresses
 | ||||
|     if (!g_memory->IsValidPhysicalAddress(src_addr)) { | ||||
|         LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (!g_memory->IsValidPhysicalAddress(dst_addr)) { | ||||
|         LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (VideoCore::g_renderer->Rasterizer()->AccelerateTextureCopy(config)) | ||||
|         return; | ||||
| 
 | ||||
|     u8* src_pointer = g_memory->GetPhysicalPointer(src_addr); | ||||
|     u8* dst_pointer = g_memory->GetPhysicalPointer(dst_addr); | ||||
| 
 | ||||
|     u32 remaining_size = Common::AlignDown(config.texture_copy.size, 16); | ||||
| 
 | ||||
|     if (remaining_size == 0) { | ||||
|         LOG_CRITICAL(HW_GPU, "zero size. Real hardware freezes on this."); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     u32 input_gap = config.texture_copy.input_gap * 16; | ||||
|     u32 output_gap = config.texture_copy.output_gap * 16; | ||||
| 
 | ||||
|     // Zero gap means contiguous input/output even if width = 0. To avoid infinite loop below, width
 | ||||
|     // is assigned with the total size if gap = 0.
 | ||||
|     u32 input_width = input_gap == 0 ? remaining_size : config.texture_copy.input_width * 16; | ||||
|     u32 output_width = output_gap == 0 ? remaining_size : config.texture_copy.output_width * 16; | ||||
| 
 | ||||
|     if (input_width == 0) { | ||||
|         LOG_CRITICAL(HW_GPU, "zero input width. Real hardware freezes on this."); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (output_width == 0) { | ||||
|         LOG_CRITICAL(HW_GPU, "zero output width. Real hardware freezes on this."); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     std::size_t contiguous_input_size = | ||||
|         config.texture_copy.size / input_width * (input_width + input_gap); | ||||
|     Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), | ||||
|                                   static_cast<u32>(contiguous_input_size)); | ||||
| 
 | ||||
|     std::size_t contiguous_output_size = | ||||
|         config.texture_copy.size / output_width * (output_width + output_gap); | ||||
|     // Only need to flush output if it has a gap
 | ||||
|     const auto FlushInvalidate_fn = (output_gap != 0) ? Memory::RasterizerFlushAndInvalidateRegion | ||||
|                                                       : Memory::RasterizerInvalidateRegion; | ||||
|     FlushInvalidate_fn(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size)); | ||||
| 
 | ||||
|     u32 remaining_input = input_width; | ||||
|     u32 remaining_output = output_width; | ||||
|     while (remaining_size > 0) { | ||||
|         u32 copy_size = std::min({remaining_input, remaining_output, remaining_size}); | ||||
| 
 | ||||
|         std::memcpy(dst_pointer, src_pointer, copy_size); | ||||
|         src_pointer += copy_size; | ||||
|         dst_pointer += copy_size; | ||||
| 
 | ||||
|         remaining_input -= copy_size; | ||||
|         remaining_output -= copy_size; | ||||
|         remaining_size -= copy_size; | ||||
| 
 | ||||
|         if (remaining_input == 0) { | ||||
|             remaining_input = input_width; | ||||
|             src_pointer += input_gap; | ||||
|         } | ||||
|         if (remaining_output == 0) { | ||||
|             remaining_output = output_width; | ||||
|             dst_pointer += output_gap; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <typename T> | ||||
| inline void Write(u32 addr, const T data) { | ||||
|     addr -= HW::VADDR_GPU; | ||||
|     u32 index = addr / 4; | ||||
| 
 | ||||
|     // Writes other than u32 are untested, so I'd rather have them abort than silently fail
 | ||||
|     if (index >= Regs::NumIds() || !std::is_same<T, u32>::value) { | ||||
|         LOG_ERROR(HW_GPU, "unknown Write{} {:#010X} @ {:#010X}", sizeof(data) * 8, (u32)data, addr); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     g_regs[index] = static_cast<u32>(data); | ||||
| 
 | ||||
|     switch (index) { | ||||
| 
 | ||||
|     // Memory fills are triggered once the fill value is written.
 | ||||
|     case GPU_REG_INDEX(memory_fill_config[0].trigger): | ||||
|     case GPU_REG_INDEX(memory_fill_config[1].trigger): { | ||||
|         const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger)); | ||||
|         auto& config = g_regs.memory_fill_config[is_second_filler]; | ||||
| 
 | ||||
|         if (config.trigger) { | ||||
|             MemoryFill(config); | ||||
|             LOG_TRACE(HW_GPU, "MemoryFill from {:#010X} to {:#010X}", config.GetStartAddress(), | ||||
|                       config.GetEndAddress()); | ||||
| 
 | ||||
|             // It seems that it won't signal interrupt if "address_start" is zero.
 | ||||
|             // TODO: hwtest this
 | ||||
|             if (config.GetStartAddress() != 0) { | ||||
|                 if (!is_second_filler) { | ||||
|                     Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PSC0); | ||||
|                 } else { | ||||
|                     Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PSC1); | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             // Reset "trigger" flag and set the "finish" flag
 | ||||
|             // NOTE: This was confirmed to happen on hardware even if "address_start" is zero.
 | ||||
|             config.trigger.Assign(0); | ||||
|             config.finished.Assign(1); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     case GPU_REG_INDEX(display_transfer_config.trigger): { | ||||
|         MICROPROFILE_SCOPE(GPU_DisplayTransfer); | ||||
| 
 | ||||
|         const auto& config = g_regs.display_transfer_config; | ||||
|         if (config.trigger & 1) { | ||||
| 
 | ||||
|             if (Pica::g_debug_context) | ||||
|                 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, | ||||
|                                                nullptr); | ||||
| 
 | ||||
|             if (config.is_texture_copy) { | ||||
|                 TextureCopy(config); | ||||
|                 LOG_TRACE(HW_GPU, | ||||
|                           "TextureCopy: {:#X} bytes from {:#010X}({}+{})-> " | ||||
|                           "{:#010X}({}+{}), flags {:#010X}", | ||||
|                           config.texture_copy.size, config.GetPhysicalInputAddress(), | ||||
|                           config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16, | ||||
|                           config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16, | ||||
|                           config.texture_copy.output_gap * 16, config.flags); | ||||
|             } else { | ||||
|                 DisplayTransfer(config); | ||||
|                 LOG_TRACE(HW_GPU, | ||||
|                           "DisplayTransfer: {:#010X}({}x{})-> " | ||||
|                           "{:#010X}({}x{}), dst format {:x}, flags {:#010X}", | ||||
|                           config.GetPhysicalInputAddress(), config.input_width.Value(), | ||||
|                           config.input_height.Value(), config.GetPhysicalOutputAddress(), | ||||
|                           config.output_width.Value(), config.output_height.Value(), | ||||
|                           static_cast<u32>(config.output_format.Value()), config.flags); | ||||
|             } | ||||
| 
 | ||||
|             g_regs.display_transfer_config.trigger = 0; | ||||
|             Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PPF); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     // Seems like writing to this register triggers processing
 | ||||
|     case GPU_REG_INDEX(command_processor_config.trigger): { | ||||
|         const auto& config = g_regs.command_processor_config; | ||||
|         if (config.trigger & 1) { | ||||
|             MICROPROFILE_SCOPE(GPU_CmdlistProcessing); | ||||
| 
 | ||||
|             Pica::CommandProcessor::ProcessCommandList(config.GetPhysicalAddress(), config.size); | ||||
| 
 | ||||
|             g_regs.command_processor_config.trigger = 0; | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     // Notify tracer about the register write
 | ||||
|     // This is happening *after* handling the write to make sure we properly catch all memory reads.
 | ||||
|     if (Pica::g_debug_context && Pica::g_debug_context->recorder) { | ||||
|         // addr + GPU VBase - IO VBase + IO PBase
 | ||||
|         Pica::g_debug_context->recorder->RegisterWritten<T>( | ||||
|             addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // Explicitly instantiate template functions because we aren't defining this in the header:
 | ||||
| 
 | ||||
| template void Read<u64>(u64& var, const u32 addr); | ||||
| template void Read<u32>(u32& var, const u32 addr); | ||||
| template void Read<u16>(u16& var, const u32 addr); | ||||
| template void Read<u8>(u8& var, const u32 addr); | ||||
| 
 | ||||
| template void Write<u64>(u32 addr, const u64 data); | ||||
| template void Write<u32>(u32 addr, const u32 data); | ||||
| template void Write<u16>(u32 addr, const u16 data); | ||||
| template void Write<u8>(u32 addr, const u8 data); | ||||
| 
 | ||||
| /// Update hardware
 | ||||
| static void VBlankCallback(std::uintptr_t user_data, s64 cycles_late) { | ||||
|     VideoCore::g_renderer->SwapBuffers(); | ||||
| 
 | ||||
|     // Signal to GSP that GPU interrupt has occurred
 | ||||
|     // TODO(yuriks): hwtest to determine if PDC0 is for the Top screen and PDC1 for the Sub
 | ||||
|     // screen, or if both use the same interrupts and these two instead determine the
 | ||||
|     // beginning and end of the VBlank period. If needed, split the interrupt firing into
 | ||||
|     // two different intervals.
 | ||||
|     Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC0); | ||||
|     Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC1); | ||||
| 
 | ||||
|     // Reschedule recurrent event
 | ||||
|     Core::System::GetInstance().CoreTiming().ScheduleEvent(frame_ticks - cycles_late, vblank_event); | ||||
| } | ||||
| 
 | ||||
| /// Initialize hardware
 | ||||
| void Init(Memory::MemorySystem& memory) { | ||||
|     g_memory = &memory; | ||||
|     std::memset(&g_regs, 0, sizeof(g_regs)); | ||||
| 
 | ||||
|     auto& framebuffer_top = g_regs.framebuffer_config[0]; | ||||
|     auto& framebuffer_sub = g_regs.framebuffer_config[1]; | ||||
| 
 | ||||
|     // Setup default framebuffer addresses (located in VRAM)
 | ||||
|     // .. or at least these are the ones used by system applets.
 | ||||
|     // There's probably a smarter way to come up with addresses
 | ||||
|     // like this which does not require hardcoding.
 | ||||
|     framebuffer_top.address_left1 = 0x181E6000; | ||||
|     framebuffer_top.address_left2 = 0x1822C800; | ||||
|     framebuffer_top.address_right1 = 0x18273000; | ||||
|     framebuffer_top.address_right2 = 0x182B9800; | ||||
|     framebuffer_sub.address_left1 = 0x1848F000; | ||||
|     framebuffer_sub.address_left2 = 0x184C7800; | ||||
| 
 | ||||
|     framebuffer_top.width.Assign(240); | ||||
|     framebuffer_top.height.Assign(400); | ||||
|     framebuffer_top.stride = 3 * 240; | ||||
|     framebuffer_top.color_format.Assign(Regs::PixelFormat::RGB8); | ||||
|     framebuffer_top.active_fb = 0; | ||||
| 
 | ||||
|     framebuffer_sub.width.Assign(240); | ||||
|     framebuffer_sub.height.Assign(320); | ||||
|     framebuffer_sub.stride = 3 * 240; | ||||
|     framebuffer_sub.color_format.Assign(Regs::PixelFormat::RGB8); | ||||
|     framebuffer_sub.active_fb = 0; | ||||
| 
 | ||||
|     Core::Timing& timing = Core::System::GetInstance().CoreTiming(); | ||||
|     vblank_event = timing.RegisterEvent("GPU::VBlankCallback", VBlankCallback); | ||||
|     timing.ScheduleEvent(frame_ticks, vblank_event); | ||||
| 
 | ||||
|     LOG_DEBUG(HW_GPU, "initialized OK"); | ||||
| } | ||||
| 
 | ||||
| /// Shutdown hardware
 | ||||
| void Shutdown() { | ||||
|     LOG_DEBUG(HW_GPU, "shutdown OK"); | ||||
| } | ||||
| 
 | ||||
| } // namespace GPU
 | ||||
|  | @ -1,344 +0,0 @@ | |||
| // Copyright 2014 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <cstddef> | ||||
| #include <type_traits> | ||||
| #include <boost/serialization/access.hpp> | ||||
| #include <boost/serialization/binary_object.hpp> | ||||
| #include "common/assert.h" | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
| #include "core/core_timing.h" | ||||
| 
 | ||||
| namespace Memory { | ||||
| class MemorySystem; | ||||
| } | ||||
| 
 | ||||
| namespace GPU { | ||||
| 
 | ||||
| // Measured on hardware to be 2240568 timer cycles or 4481136 ARM11 cycles
 | ||||
| constexpr u64 frame_ticks = 4481136ull; | ||||
| 
 | ||||
| // Refresh rate defined by ratio of ARM11 frequency to ARM11 ticks per frame
 | ||||
| // (268,111,856) / (4,481,136) = 59.83122493939037Hz
 | ||||
| constexpr double SCREEN_REFRESH_RATE = BASE_CLOCK_RATE_ARM11 / static_cast<double>(frame_ticks); | ||||
| 
 | ||||
| // Returns index corresponding to the Regs member labeled by field_name
 | ||||
| #define GPU_REG_INDEX(field_name) (offsetof(GPU::Regs, field_name) / sizeof(u32)) | ||||
| 
 | ||||
| // Returns index corresponding to the Regs::FramebufferConfig labeled by field_name
 | ||||
| // screen_id is a subscript for Regs::framebuffer_config
 | ||||
| #define GPU_FRAMEBUFFER_REG_INDEX(screen_id, field_name)                                           \ | ||||
|     ((offsetof(GPU::Regs, framebuffer_config) +                                                    \ | ||||
|       sizeof(GPU::Regs::FramebufferConfig) * (screen_id) +                                         \ | ||||
|       offsetof(GPU::Regs::FramebufferConfig, field_name)) /                                        \ | ||||
|      sizeof(u32)) | ||||
| 
 | ||||
| // MMIO region 0x1EFxxxxx
 | ||||
| struct Regs { | ||||
| 
 | ||||
| // helper macro to make sure the defined structures are of the expected size.
 | ||||
| #define ASSERT_MEMBER_SIZE(name, size_in_bytes)                                                    \ | ||||
|     static_assert(sizeof(name) == size_in_bytes,                                                   \ | ||||
|                   "Structure size and register block length don't match") | ||||
| 
 | ||||
|     // Components are laid out in reverse byte order, most significant bits first.
 | ||||
|     enum class PixelFormat : u32 { | ||||
|         RGBA8 = 0, | ||||
|         RGB8 = 1, | ||||
|         RGB565 = 2, | ||||
|         RGB5A1 = 3, | ||||
|         RGBA4 = 4, | ||||
|     }; | ||||
| 
 | ||||
|     /**
 | ||||
|      * Returns the number of bytes per pixel. | ||||
|      */ | ||||
|     static int BytesPerPixel(PixelFormat format) { | ||||
|         switch (format) { | ||||
|         case PixelFormat::RGBA8: | ||||
|             return 4; | ||||
|         case PixelFormat::RGB8: | ||||
|             return 3; | ||||
|         case PixelFormat::RGB565: | ||||
|         case PixelFormat::RGB5A1: | ||||
|         case PixelFormat::RGBA4: | ||||
|             return 2; | ||||
|         default: | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
| 
 | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     INSERT_PADDING_WORDS(0x4); | ||||
| 
 | ||||
|     struct MemoryFillConfig { | ||||
|         u32 address_start; | ||||
|         u32 address_end; | ||||
| 
 | ||||
|         union { | ||||
|             u32 value_32bit; | ||||
| 
 | ||||
|             BitField<0, 16, u32> value_16bit; | ||||
| 
 | ||||
|             // TODO: Verify component order
 | ||||
|             BitField<0, 8, u32> value_24bit_r; | ||||
|             BitField<8, 8, u32> value_24bit_g; | ||||
|             BitField<16, 8, u32> value_24bit_b; | ||||
|         }; | ||||
| 
 | ||||
|         union { | ||||
|             u32 control; | ||||
| 
 | ||||
|             // Setting this field to 1 triggers the memory fill.
 | ||||
|             // This field also acts as a status flag, and gets reset to 0 upon completion.
 | ||||
|             BitField<0, 1, u32> trigger; | ||||
| 
 | ||||
|             // Set to 1 upon completion.
 | ||||
|             BitField<1, 1, u32> finished; | ||||
| 
 | ||||
|             // If both of these bits are unset, then it will fill the memory with a 16 bit value
 | ||||
|             // 1: fill with 24-bit wide values
 | ||||
|             BitField<8, 1, u32> fill_24bit; | ||||
|             // 1: fill with 32-bit wide values
 | ||||
|             BitField<9, 1, u32> fill_32bit; | ||||
|         }; | ||||
| 
 | ||||
|         inline u32 GetStartAddress() const { | ||||
|             return DecodeAddressRegister(address_start); | ||||
|         } | ||||
| 
 | ||||
|         inline u32 GetEndAddress() const { | ||||
|             return DecodeAddressRegister(address_end); | ||||
|         } | ||||
| 
 | ||||
|         inline std::string DebugName() const { | ||||
|             return fmt::format("from {:#X} to {:#X} with {}-bit value {:#X}", GetStartAddress(), | ||||
|                                GetEndAddress(), fill_32bit ? "32" : (fill_24bit ? "24" : "16"), | ||||
|                                value_32bit); | ||||
|         } | ||||
|     } memory_fill_config[2]; | ||||
|     ASSERT_MEMBER_SIZE(memory_fill_config[0], 0x10); | ||||
| 
 | ||||
|     INSERT_PADDING_WORDS(0x10b); | ||||
| 
 | ||||
|     struct FramebufferConfig { | ||||
|         union { | ||||
|             u32 size; | ||||
| 
 | ||||
|             BitField<0, 16, u32> width; | ||||
|             BitField<16, 16, u32> height; | ||||
|         }; | ||||
| 
 | ||||
|         INSERT_PADDING_WORDS(0x2); | ||||
| 
 | ||||
|         u32 address_left1; | ||||
|         u32 address_left2; | ||||
| 
 | ||||
|         union { | ||||
|             u32 format; | ||||
| 
 | ||||
|             BitField<0, 3, PixelFormat> color_format; | ||||
|         }; | ||||
| 
 | ||||
|         INSERT_PADDING_WORDS(0x1); | ||||
| 
 | ||||
|         union { | ||||
|             u32 active_fb; | ||||
| 
 | ||||
|             // 0: Use parameters ending with "1"
 | ||||
|             // 1: Use parameters ending with "2"
 | ||||
|             BitField<0, 1, u32> second_fb_active; | ||||
|         }; | ||||
| 
 | ||||
|         INSERT_PADDING_WORDS(0x5); | ||||
| 
 | ||||
|         // Distance between two pixel rows, in bytes
 | ||||
|         u32 stride; | ||||
| 
 | ||||
|         u32 address_right1; | ||||
|         u32 address_right2; | ||||
| 
 | ||||
|         INSERT_PADDING_WORDS(0x30); | ||||
|     } framebuffer_config[2]; | ||||
|     ASSERT_MEMBER_SIZE(framebuffer_config[0], 0x100); | ||||
| 
 | ||||
|     INSERT_PADDING_WORDS(0x169); | ||||
| 
 | ||||
|     struct DisplayTransferConfig { | ||||
|         u32 input_address; | ||||
|         u32 output_address; | ||||
| 
 | ||||
|         inline u32 GetPhysicalInputAddress() const { | ||||
|             return DecodeAddressRegister(input_address); | ||||
|         } | ||||
| 
 | ||||
|         inline u32 GetPhysicalOutputAddress() const { | ||||
|             return DecodeAddressRegister(output_address); | ||||
|         } | ||||
| 
 | ||||
|         inline std::string DebugName() const noexcept { | ||||
|             return fmt::format("from {:#x} to {:#x} with {} scaling and stride {}, width {}", | ||||
|                                GetPhysicalInputAddress(), GetPhysicalOutputAddress(), | ||||
|                                scaling == NoScale ? "no" : (scaling == ScaleX ? "X" : "XY"), | ||||
|                                input_width.Value(), output_width.Value()); | ||||
|         } | ||||
| 
 | ||||
|         union { | ||||
|             u32 output_size; | ||||
| 
 | ||||
|             BitField<0, 16, u32> output_width; | ||||
|             BitField<16, 16, u32> output_height; | ||||
|         }; | ||||
| 
 | ||||
|         union { | ||||
|             u32 input_size; | ||||
| 
 | ||||
|             BitField<0, 16, u32> input_width; | ||||
|             BitField<16, 16, u32> input_height; | ||||
|         }; | ||||
| 
 | ||||
|         enum ScalingMode : u32 { | ||||
|             NoScale = 0, // Doesn't scale the image
 | ||||
|             ScaleX = 1,  // Downscales the image in half in the X axis and applies a box filter
 | ||||
|             ScaleXY = | ||||
|                 2, // Downscales the image in half in both the X and Y axes and applies a box filter
 | ||||
|         }; | ||||
| 
 | ||||
|         union { | ||||
|             u32 flags; | ||||
| 
 | ||||
|             BitField<0, 1, u32> flip_vertically; // flips input data vertically
 | ||||
|             BitField<1, 1, u32> input_linear;    // Converts from linear to tiled format
 | ||||
|             BitField<2, 1, u32> crop_input_lines; | ||||
|             BitField<3, 1, u32> is_texture_copy; // Copies the data without performing any
 | ||||
|                                                  // processing and respecting texture copy fields
 | ||||
|             BitField<5, 1, u32> dont_swizzle; | ||||
|             BitField<8, 3, PixelFormat> input_format; | ||||
|             BitField<12, 3, PixelFormat> output_format; | ||||
|             /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
 | ||||
|             BitField<16, 1, u32> block_32;        // TODO(yuriks): unimplemented
 | ||||
|             BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
 | ||||
|         }; | ||||
| 
 | ||||
|         INSERT_PADDING_WORDS(0x1); | ||||
| 
 | ||||
|         // it seems that writing to this field triggers the display transfer
 | ||||
|         u32 trigger; | ||||
| 
 | ||||
|         INSERT_PADDING_WORDS(0x1); | ||||
| 
 | ||||
|         struct { | ||||
|             u32 size; // The lower 4 bits are ignored
 | ||||
| 
 | ||||
|             union { | ||||
|                 u32 input_size; | ||||
| 
 | ||||
|                 BitField<0, 16, u32> input_width; | ||||
|                 BitField<16, 16, u32> input_gap; | ||||
|             }; | ||||
| 
 | ||||
|             union { | ||||
|                 u32 output_size; | ||||
| 
 | ||||
|                 BitField<0, 16, u32> output_width; | ||||
|                 BitField<16, 16, u32> output_gap; | ||||
|             }; | ||||
|         } texture_copy; | ||||
|     } display_transfer_config; | ||||
|     ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c); | ||||
| 
 | ||||
|     INSERT_PADDING_WORDS(0x32D); | ||||
| 
 | ||||
|     struct { | ||||
|         // command list size (in bytes)
 | ||||
|         u32 size; | ||||
| 
 | ||||
|         INSERT_PADDING_WORDS(0x1); | ||||
| 
 | ||||
|         // command list address
 | ||||
|         u32 address; | ||||
| 
 | ||||
|         INSERT_PADDING_WORDS(0x1); | ||||
| 
 | ||||
|         // it seems that writing to this field triggers command list processing
 | ||||
|         u32 trigger; | ||||
| 
 | ||||
|         inline u32 GetPhysicalAddress() const { | ||||
|             return DecodeAddressRegister(address); | ||||
|         } | ||||
|     } command_processor_config; | ||||
|     ASSERT_MEMBER_SIZE(command_processor_config, 0x14); | ||||
| 
 | ||||
|     INSERT_PADDING_WORDS(0x9c3); | ||||
| 
 | ||||
|     static constexpr std::size_t NumIds() { | ||||
|         return sizeof(Regs) / sizeof(u32); | ||||
|     } | ||||
| 
 | ||||
|     const u32& operator[](int index) const { | ||||
|         const u32* content = reinterpret_cast<const u32*>(this); | ||||
|         return content[index]; | ||||
|     } | ||||
| 
 | ||||
|     u32& operator[](int index) { | ||||
|         u32* content = reinterpret_cast<u32*>(this); | ||||
|         return content[index]; | ||||
|     } | ||||
| 
 | ||||
| #undef ASSERT_MEMBER_SIZE | ||||
| 
 | ||||
| private: | ||||
|     /*
 | ||||
|      * Most physical addresses which GPU registers refer to are 8-byte aligned. | ||||
|      * This function should be used to get the address from a raw register value. | ||||
|      */ | ||||
|     static inline u32 DecodeAddressRegister(u32 register_value) { | ||||
|         return register_value * 8; | ||||
|     } | ||||
| 
 | ||||
|     template <class Archive> | ||||
|     void serialize(Archive& ar, const unsigned int) { | ||||
|         ar& boost::serialization::make_binary_object(this, sizeof(Regs)); | ||||
|     } | ||||
|     friend class boost::serialization::access; | ||||
| }; | ||||
| static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout"); | ||||
| 
 | ||||
| #define ASSERT_REG_POSITION(field_name, position)                                                  \ | ||||
|     static_assert(offsetof(Regs, field_name) == position * 4,                                      \ | ||||
|                   "Field " #field_name " has invalid position") | ||||
| 
 | ||||
| ASSERT_REG_POSITION(memory_fill_config[0], 0x00004); | ||||
| ASSERT_REG_POSITION(memory_fill_config[1], 0x00008); | ||||
| ASSERT_REG_POSITION(framebuffer_config[0], 0x00117); | ||||
| ASSERT_REG_POSITION(framebuffer_config[1], 0x00157); | ||||
| ASSERT_REG_POSITION(display_transfer_config, 0x00300); | ||||
| ASSERT_REG_POSITION(command_processor_config, 0x00638); | ||||
| 
 | ||||
| #undef ASSERT_REG_POSITION | ||||
| 
 | ||||
| // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value
 | ||||
| // anyway.
 | ||||
| static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set"); | ||||
| 
 | ||||
| extern Regs g_regs; | ||||
| 
 | ||||
| template <typename T> | ||||
| void Read(T& var, const u32 addr); | ||||
| 
 | ||||
| template <typename T> | ||||
| void Write(u32 addr, const T data); | ||||
| 
 | ||||
| /// Initialize hardware
 | ||||
| void Init(Memory::MemorySystem& memory); | ||||
| 
 | ||||
| /// Shutdown hardware
 | ||||
| void Shutdown(); | ||||
| 
 | ||||
| } // namespace GPU
 | ||||
|  | @ -1,102 +0,0 @@ | |||
| // Copyright 2014 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "core/hw/aes/key.h" | ||||
| #include "core/hw/gpu.h" | ||||
| #include "core/hw/hw.h" | ||||
| #include "core/hw/lcd.h" | ||||
| 
 | ||||
| namespace HW { | ||||
| 
 | ||||
| template <typename T> | ||||
| inline void Read(T& var, const u32 addr) { | ||||
|     switch (addr & 0xFFFFF000) { | ||||
|     case VADDR_GPU: | ||||
|     case VADDR_GPU + 0x1000: | ||||
|     case VADDR_GPU + 0x2000: | ||||
|     case VADDR_GPU + 0x3000: | ||||
|     case VADDR_GPU + 0x4000: | ||||
|     case VADDR_GPU + 0x5000: | ||||
|     case VADDR_GPU + 0x6000: | ||||
|     case VADDR_GPU + 0x7000: | ||||
|     case VADDR_GPU + 0x8000: | ||||
|     case VADDR_GPU + 0x9000: | ||||
|     case VADDR_GPU + 0xA000: | ||||
|     case VADDR_GPU + 0xB000: | ||||
|     case VADDR_GPU + 0xC000: | ||||
|     case VADDR_GPU + 0xD000: | ||||
|     case VADDR_GPU + 0xE000: | ||||
|     case VADDR_GPU + 0xF000: | ||||
|         GPU::Read(var, addr); | ||||
|         break; | ||||
|     case VADDR_LCD: | ||||
|         LCD::Read(var, addr); | ||||
|         break; | ||||
|     default: | ||||
|         LOG_ERROR(HW_Memory, "unknown Read{} @ {:#010X}", sizeof(var) * 8, addr); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <typename T> | ||||
| inline void Write(u32 addr, const T data) { | ||||
|     switch (addr & 0xFFFFF000) { | ||||
|     case VADDR_GPU: | ||||
|     case VADDR_GPU + 0x1000: | ||||
|     case VADDR_GPU + 0x2000: | ||||
|     case VADDR_GPU + 0x3000: | ||||
|     case VADDR_GPU + 0x4000: | ||||
|     case VADDR_GPU + 0x5000: | ||||
|     case VADDR_GPU + 0x6000: | ||||
|     case VADDR_GPU + 0x7000: | ||||
|     case VADDR_GPU + 0x8000: | ||||
|     case VADDR_GPU + 0x9000: | ||||
|     case VADDR_GPU + 0xA000: | ||||
|     case VADDR_GPU + 0xB000: | ||||
|     case VADDR_GPU + 0xC000: | ||||
|     case VADDR_GPU + 0xD000: | ||||
|     case VADDR_GPU + 0xE000: | ||||
|     case VADDR_GPU + 0xF000: | ||||
|         GPU::Write(addr, data); | ||||
|         break; | ||||
|     case VADDR_LCD: | ||||
|         LCD::Write(addr, data); | ||||
|         break; | ||||
|     default: | ||||
|         LOG_ERROR(HW_Memory, "unknown Write{} {:#010X} @ {:#010X}", sizeof(data) * 8, (u32)data, | ||||
|                   addr); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // Explicitly instantiate template functions because we aren't defining this in the header:
 | ||||
| 
 | ||||
| template void Read<u64>(u64& var, const u32 addr); | ||||
| template void Read<u32>(u32& var, const u32 addr); | ||||
| template void Read<u16>(u16& var, const u32 addr); | ||||
| template void Read<u8>(u8& var, const u32 addr); | ||||
| 
 | ||||
| template void Write<u64>(u32 addr, const u64 data); | ||||
| template void Write<u32>(u32 addr, const u32 data); | ||||
| template void Write<u16>(u32 addr, const u16 data); | ||||
| template void Write<u8>(u32 addr, const u8 data); | ||||
| 
 | ||||
| /// Update hardware
 | ||||
| void Update() {} | ||||
| 
 | ||||
| /// Initialize hardware
 | ||||
| void Init(Memory::MemorySystem& memory) { | ||||
|     AES::InitKeys(); | ||||
|     GPU::Init(memory); | ||||
|     LCD::Init(); | ||||
|     LOG_DEBUG(HW, "initialized OK"); | ||||
| } | ||||
| 
 | ||||
| /// Shutdown hardware
 | ||||
| void Shutdown() { | ||||
|     GPU::Shutdown(); | ||||
|     LCD::Shutdown(); | ||||
|     LOG_DEBUG(HW, "shutdown OK"); | ||||
| } | ||||
| } // namespace HW
 | ||||
|  | @ -1,54 +0,0 @@ | |||
| // Copyright 2014 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| namespace Memory { | ||||
| class MemorySystem; | ||||
| } | ||||
| 
 | ||||
| namespace HW { | ||||
| 
 | ||||
| /// Beginnings of IO register regions, in the user VA space.
 | ||||
| enum : u32 { | ||||
|     VADDR_HASH = 0x1EC01000, | ||||
|     VADDR_CSND = 0x1EC03000, | ||||
|     VADDR_DSP = 0x1EC40000, | ||||
|     VADDR_PDN = 0x1EC41000, | ||||
|     VADDR_CODEC = 0x1EC41000, | ||||
|     VADDR_SPI = 0x1EC42000, | ||||
|     VADDR_SPI_2 = 0x1EC43000, // Only used under TWL_FIRM?
 | ||||
|     VADDR_I2C = 0x1EC44000, | ||||
|     VADDR_CODEC_2 = 0x1EC45000, | ||||
|     VADDR_HID = 0x1EC46000, | ||||
|     VADDR_GPIO = 0x1EC47000, | ||||
|     VADDR_I2C_2 = 0x1EC48000, | ||||
|     VADDR_SPI_3 = 0x1EC60000, | ||||
|     VADDR_I2C_3 = 0x1EC61000, | ||||
|     VADDR_MIC = 0x1EC62000, | ||||
|     VADDR_PXI = 0x1EC63000, | ||||
|     VADDR_LCD = 0x1ED02000, | ||||
|     VADDR_DSP_2 = 0x1ED03000, | ||||
|     VADDR_HASH_2 = 0x1EE01000, | ||||
|     VADDR_GPU = 0x1EF00000, | ||||
| }; | ||||
| 
 | ||||
| template <typename T> | ||||
| void Read(T& var, const u32 addr); | ||||
| 
 | ||||
| template <typename T> | ||||
| void Write(u32 addr, const T data); | ||||
| 
 | ||||
| /// Update hardware
 | ||||
| void Update(); | ||||
| 
 | ||||
| /// Initialize hardware
 | ||||
| void Init(Memory::MemorySystem& memory); | ||||
| 
 | ||||
| /// Shutdown hardware
 | ||||
| void Shutdown(); | ||||
| 
 | ||||
| } // namespace HW
 | ||||
|  | @ -1,76 +0,0 @@ | |||
| // Copyright 2015 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <cstring> | ||||
| #include "common/common_types.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "core/hw/hw.h" | ||||
| #include "core/hw/lcd.h" | ||||
| #include "core/tracer/recorder.h" | ||||
| #include "video_core/debug_utils/debug_utils.h" | ||||
| 
 | ||||
| namespace LCD { | ||||
| 
 | ||||
| Regs g_regs; | ||||
| 
 | ||||
| template <typename T> | ||||
| inline void Read(T& var, const u32 raw_addr) { | ||||
|     u32 addr = raw_addr - HW::VADDR_LCD; | ||||
|     u32 index = addr / 4; | ||||
| 
 | ||||
|     // Reads other than u32 are untested, so I'd rather have them abort than silently fail
 | ||||
|     if (index >= 0x400 || !std::is_same<T, u32>::value) { | ||||
|         LOG_ERROR(HW_LCD, "unknown Read{} @ {:#010X}", sizeof(var) * 8, addr); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     var = g_regs[index]; | ||||
| } | ||||
| 
 | ||||
| template <typename T> | ||||
| inline void Write(u32 addr, const T data) { | ||||
|     addr -= HW::VADDR_LCD; | ||||
|     u32 index = addr / 4; | ||||
| 
 | ||||
|     // Writes other than u32 are untested, so I'd rather have them abort than silently fail
 | ||||
|     if (index >= 0x400 || !std::is_same<T, u32>::value) { | ||||
|         LOG_ERROR(HW_LCD, "unknown Write{} {:#010X} @ {:#010X}", sizeof(data) * 8, (u32)data, addr); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     g_regs[index] = static_cast<u32>(data); | ||||
| 
 | ||||
|     // Notify tracer about the register write
 | ||||
|     // This is happening *after* handling the write to make sure we properly catch all memory reads.
 | ||||
|     if (Pica::g_debug_context && Pica::g_debug_context->recorder) { | ||||
|         // addr + GPU VBase - IO VBase + IO PBase
 | ||||
|         Pica::g_debug_context->recorder->RegisterWritten<T>( | ||||
|             addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // Explicitly instantiate template functions because we aren't defining this in the header:
 | ||||
| 
 | ||||
| template void Read<u64>(u64& var, const u32 addr); | ||||
| template void Read<u32>(u32& var, const u32 addr); | ||||
| template void Read<u16>(u16& var, const u32 addr); | ||||
| template void Read<u8>(u8& var, const u32 addr); | ||||
| 
 | ||||
| template void Write<u64>(u32 addr, const u64 data); | ||||
| template void Write<u32>(u32 addr, const u32 data); | ||||
| template void Write<u16>(u32 addr, const u16 data); | ||||
| template void Write<u8>(u32 addr, const u8 data); | ||||
| 
 | ||||
| /// Initialize hardware
 | ||||
| void Init() { | ||||
|     std::memset(&g_regs, 0, sizeof(g_regs)); | ||||
|     LOG_DEBUG(HW_LCD, "initialized OK"); | ||||
| } | ||||
| 
 | ||||
| /// Shutdown hardware
 | ||||
| void Shutdown() { | ||||
|     LOG_DEBUG(HW_LCD, "shutdown OK"); | ||||
| } | ||||
| 
 | ||||
| } // namespace LCD
 | ||||
|  | @ -1,89 +0,0 @@ | |||
| // Copyright 2015 Citra Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <cstddef> | ||||
| #include <type_traits> | ||||
| #include <boost/serialization/access.hpp> | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| #define LCD_REG_INDEX(field_name) (offsetof(LCD::Regs, field_name) / sizeof(u32)) | ||||
| 
 | ||||
| namespace LCD { | ||||
| 
 | ||||
| struct Regs { | ||||
| 
 | ||||
|     union ColorFill { | ||||
|         u32 raw; | ||||
| 
 | ||||
|         BitField<0, 8, u32> color_r; | ||||
|         BitField<8, 8, u32> color_g; | ||||
|         BitField<16, 8, u32> color_b; | ||||
|         BitField<24, 1, u32> is_enabled; | ||||
|     }; | ||||
| 
 | ||||
|     INSERT_PADDING_WORDS(0x81); | ||||
|     ColorFill color_fill_top; | ||||
|     INSERT_PADDING_WORDS(0xE); | ||||
|     u32 backlight_top; | ||||
| 
 | ||||
|     INSERT_PADDING_WORDS(0x1F0); | ||||
| 
 | ||||
|     ColorFill color_fill_bottom; | ||||
|     INSERT_PADDING_WORDS(0xE); | ||||
|     u32 backlight_bottom; | ||||
|     INSERT_PADDING_WORDS(0x16F); | ||||
| 
 | ||||
|     static constexpr std::size_t NumIds() { | ||||
|         return sizeof(Regs) / sizeof(u32); | ||||
|     } | ||||
| 
 | ||||
|     const u32& operator[](int index) const { | ||||
|         const u32* content = reinterpret_cast<const u32*>(this); | ||||
|         return content[index]; | ||||
|     } | ||||
| 
 | ||||
|     u32& operator[](int index) { | ||||
|         u32* content = reinterpret_cast<u32*>(this); | ||||
|         return content[index]; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     template <class Archive> | ||||
|     void serialize(Archive& ar, const unsigned int) { | ||||
|         ar& color_fill_top.raw; | ||||
|         ar& backlight_top; | ||||
|         ar& color_fill_bottom.raw; | ||||
|         ar& backlight_bottom; | ||||
|     } | ||||
|     friend class boost::serialization::access; | ||||
| }; | ||||
| static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout"); | ||||
| 
 | ||||
| #define ASSERT_REG_POSITION(field_name, position)                                                  \ | ||||
|     static_assert(offsetof(Regs, field_name) == position * 4,                                      \ | ||||
|                   "Field " #field_name " has invalid position") | ||||
| ASSERT_REG_POSITION(color_fill_top, 0x81); | ||||
| ASSERT_REG_POSITION(backlight_top, 0x90); | ||||
| ASSERT_REG_POSITION(color_fill_bottom, 0x281); | ||||
| ASSERT_REG_POSITION(backlight_bottom, 0x290); | ||||
| 
 | ||||
| extern Regs g_regs; | ||||
| 
 | ||||
| template <typename T> | ||||
| void Read(T& var, const u32 addr); | ||||
| 
 | ||||
| template <typename T> | ||||
| void Write(u32 addr, const T data); | ||||
| 
 | ||||
| /// Initialize hardware
 | ||||
| void Init(); | ||||
| 
 | ||||
| /// Shutdown hardware
 | ||||
| void Shutdown(); | ||||
| 
 | ||||
| } // namespace LCD
 | ||||
|  | @ -9,7 +9,7 @@ | |||
| #include "common/assert.h" | ||||
| #include "common/color.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/microprofileui.h" | ||||
| #include "common/microprofile.h" | ||||
| #include "common/vector_math.h" | ||||
| #include "core/core.h" | ||||
| #include "core/hle/service/cam/y2r_u.h" | ||||
|  |  | |||
|  | @ -19,10 +19,9 @@ | |||
| #include "core/global.h" | ||||
| #include "core/hle/kernel/process.h" | ||||
| #include "core/hle/service/plgldr/plgldr.h" | ||||
| #include "core/hw/hw.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/video_core.h" | ||||
| 
 | ||||
| SERIALIZE_EXPORT_IMPL(Memory::MemorySystem::BackingMemImpl<Memory::Region::FCRAM>) | ||||
| SERIALIZE_EXPORT_IMPL(Memory::MemorySystem::BackingMemImpl<Memory::Region::VRAM>) | ||||
|  | @ -346,13 +345,52 @@ std::shared_ptr<PageTable> MemorySystem::GetCurrentPageTable() const { | |||
|     return impl->current_page_table; | ||||
| } | ||||
| 
 | ||||
| void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { | ||||
|     const VAddr end = start + size; | ||||
| 
 | ||||
|     auto CheckRegion = [&](VAddr region_start, VAddr region_end, PAddr paddr_region_start) { | ||||
|         if (start >= region_end || end <= region_start) { | ||||
|             // No overlap with region
 | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         auto& renderer = Core::System::GetInstance().GPU().Renderer(); | ||||
|         VAddr overlap_start = std::max(start, region_start); | ||||
|         VAddr overlap_end = std::min(end, region_end); | ||||
|         PAddr physical_start = paddr_region_start + (overlap_start - region_start); | ||||
|         u32 overlap_size = overlap_end - overlap_start; | ||||
| 
 | ||||
|         auto* rasterizer = renderer.Rasterizer(); | ||||
|         switch (mode) { | ||||
|         case FlushMode::Flush: | ||||
|             rasterizer->FlushRegion(physical_start, overlap_size); | ||||
|             break; | ||||
|         case FlushMode::Invalidate: | ||||
|             rasterizer->InvalidateRegion(physical_start, overlap_size); | ||||
|             break; | ||||
|         case FlushMode::FlushAndInvalidate: | ||||
|             rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); | ||||
|             break; | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     CheckRegion(LINEAR_HEAP_VADDR, LINEAR_HEAP_VADDR_END, FCRAM_PADDR); | ||||
|     CheckRegion(NEW_LINEAR_HEAP_VADDR, NEW_LINEAR_HEAP_VADDR_END, FCRAM_PADDR); | ||||
|     CheckRegion(VRAM_VADDR, VRAM_VADDR_END, VRAM_PADDR); | ||||
|     if (Service::PLGLDR::PLG_LDR::GetPluginFBAddr()) | ||||
|         CheckRegion(PLUGIN_3GX_FB_VADDR, PLUGIN_3GX_FB_VADDR_END, | ||||
|                     Service::PLGLDR::PLG_LDR::GetPluginFBAddr()); | ||||
| } | ||||
| 
 | ||||
| void MemorySystem::MapPages(PageTable& page_table, u32 base, u32 size, MemoryRef memory, | ||||
|                             PageType type) { | ||||
|     LOG_DEBUG(HW_Memory, "Mapping {} onto {:08X}-{:08X}", (void*)memory.GetPtr(), | ||||
|               base * CITRA_PAGE_SIZE, (base + size) * CITRA_PAGE_SIZE); | ||||
| 
 | ||||
|     RasterizerFlushVirtualRegion(base << CITRA_PAGE_BITS, size * CITRA_PAGE_SIZE, | ||||
|                                  FlushMode::FlushAndInvalidate); | ||||
|     if (impl->system.IsPoweredOn()) { | ||||
|         RasterizerFlushVirtualRegion(base << CITRA_PAGE_BITS, size * CITRA_PAGE_SIZE, | ||||
|                                      FlushMode::FlushAndInvalidate); | ||||
|     } | ||||
| 
 | ||||
|     u32 end = base + size; | ||||
|     while (base != end) { | ||||
|  | @ -421,9 +459,8 @@ T MemorySystem::Read(const VAddr vaddr) { | |||
|             return value; | ||||
|         } else if ((paddr & 0xF0000000) == 0x10000000 && | ||||
|                    paddr >= Memory::IO_AREA_PADDR) { // Check MMIO region
 | ||||
|             T ret; | ||||
|             HW::Read<T>(ret, static_cast<VAddr>(paddr) - Memory::IO_AREA_PADDR + 0x1EC00000); | ||||
|             return ret; | ||||
|             return impl->system.GPU().ReadReg(static_cast<VAddr>(paddr) - Memory::IO_AREA_PADDR + | ||||
|                                               0x1EC00000); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|  | @ -468,7 +505,10 @@ void MemorySystem::Write(const VAddr vaddr, const T data) { | |||
|             return; | ||||
|         } else if ((paddr & 0xF0000000) == 0x10000000 && | ||||
|                    paddr >= Memory::IO_AREA_PADDR) { // Check MMIO region
 | ||||
|             HW::Write<T>(static_cast<VAddr>(paddr) - Memory::IO_AREA_PADDR + 0x1EC00000, data); | ||||
|             ASSERT(sizeof(data) == sizeof(u32)); | ||||
|             impl->system.GPU().WriteReg(static_cast<VAddr>(paddr) - Memory::IO_AREA_PADDR + | ||||
|                                             0x1EC00000, | ||||
|                                         static_cast<u32>(data)); | ||||
|             return; | ||||
|         } | ||||
|     } | ||||
|  | @ -713,84 +753,6 @@ void MemorySystem::RasterizerMarkRegionCached(PAddr start, u32 size, bool cached | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerFlushRegion(PAddr start, u32 size) { | ||||
|     if (VideoCore::g_renderer == nullptr) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerInvalidateRegion(PAddr start, u32 size) { | ||||
|     if (VideoCore::g_renderer == nullptr) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) { | ||||
|     // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
 | ||||
|     // null here
 | ||||
|     if (VideoCore::g_renderer == nullptr) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerClearAll(bool flush) { | ||||
|     // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
 | ||||
|     // null here
 | ||||
|     if (VideoCore::g_renderer == nullptr) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     VideoCore::g_renderer->Rasterizer()->ClearAll(flush); | ||||
| } | ||||
| 
 | ||||
| void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { | ||||
|     // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
 | ||||
|     // null here
 | ||||
|     if (VideoCore::g_renderer == nullptr) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     VAddr end = start + size; | ||||
| 
 | ||||
|     auto CheckRegion = [&](VAddr region_start, VAddr region_end, PAddr paddr_region_start) { | ||||
|         if (start >= region_end || end <= region_start) { | ||||
|             // No overlap with region
 | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         VAddr overlap_start = std::max(start, region_start); | ||||
|         VAddr overlap_end = std::min(end, region_end); | ||||
|         PAddr physical_start = paddr_region_start + (overlap_start - region_start); | ||||
|         u32 overlap_size = overlap_end - overlap_start; | ||||
| 
 | ||||
|         auto* rasterizer = VideoCore::g_renderer->Rasterizer(); | ||||
|         switch (mode) { | ||||
|         case FlushMode::Flush: | ||||
|             rasterizer->FlushRegion(physical_start, overlap_size); | ||||
|             break; | ||||
|         case FlushMode::Invalidate: | ||||
|             rasterizer->InvalidateRegion(physical_start, overlap_size); | ||||
|             break; | ||||
|         case FlushMode::FlushAndInvalidate: | ||||
|             rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); | ||||
|             break; | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     CheckRegion(LINEAR_HEAP_VADDR, LINEAR_HEAP_VADDR_END, FCRAM_PADDR); | ||||
|     CheckRegion(NEW_LINEAR_HEAP_VADDR, NEW_LINEAR_HEAP_VADDR_END, FCRAM_PADDR); | ||||
|     CheckRegion(VRAM_VADDR, VRAM_VADDR_END, VRAM_PADDR); | ||||
|     if (Service::PLGLDR::PLG_LDR::GetPluginFBAddr()) | ||||
|         CheckRegion(PLUGIN_3GX_FB_VADDR, PLUGIN_3GX_FB_VADDR_END, | ||||
|                     Service::PLGLDR::PLG_LDR::GetPluginFBAddr()); | ||||
| } | ||||
| 
 | ||||
| u8 MemorySystem::Read8(const VAddr addr) { | ||||
|     return Read<u8>(addr); | ||||
| } | ||||
|  |  | |||
|  | @ -226,21 +226,6 @@ enum : VAddr { | |||
|     PLUGIN_3GX_FB_VADDR_END = PLUGIN_3GX_FB_VADDR + PLUGIN_3GX_FB_SIZE | ||||
| }; | ||||
| 
 | ||||
| /**
 | ||||
|  * Flushes any externally cached rasterizer resources touching the given region. | ||||
|  */ | ||||
| void RasterizerFlushRegion(PAddr start, u32 size); | ||||
| 
 | ||||
| /**
 | ||||
|  * Invalidates any externally cached rasterizer resources touching the given region. | ||||
|  */ | ||||
| void RasterizerInvalidateRegion(PAddr start, u32 size); | ||||
| 
 | ||||
| /**
 | ||||
|  * Flushes and invalidates any externally cached rasterizer resources touching the given region. | ||||
|  */ | ||||
| void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size); | ||||
| 
 | ||||
| enum class FlushMode { | ||||
|     /// Write back modified surfaces to RAM
 | ||||
|     Flush, | ||||
|  | @ -250,16 +235,6 @@ enum class FlushMode { | |||
|     FlushAndInvalidate, | ||||
| }; | ||||
| 
 | ||||
| /**
 | ||||
|  * Flushes and invalidates all memory in the rasterizer cache and removes any leftover state | ||||
|  * If flush is true, the rasterizer should flush any cached resources to RAM before clearing | ||||
|  */ | ||||
| void RasterizerClearAll(bool flush); | ||||
| 
 | ||||
| /**
 | ||||
|  * Flushes and invalidates any externally cached rasterizer resources touching the given virtual | ||||
|  * address region. | ||||
|  */ | ||||
| void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode); | ||||
| 
 | ||||
| class MemorySystem { | ||||
|  |  | |||
|  | @ -21,7 +21,6 @@ | |||
| #include "core/hle/service/hid/hid.h" | ||||
| #include "core/hle/service/ir/extra_hid.h" | ||||
| #include "core/hle/service/ir/ir_rst.h" | ||||
| #include "core/hw/gpu.h" | ||||
| #include "core/loader/loader.h" | ||||
| #include "core/movie.h" | ||||
| 
 | ||||
|  | @ -218,10 +217,10 @@ Movie::PlayMode Movie::GetPlayMode() const { | |||
| } | ||||
| 
 | ||||
| u64 Movie::GetCurrentInputIndex() const { | ||||
|     return static_cast<u64>(std::nearbyint(current_input / 234.0 * GPU::SCREEN_REFRESH_RATE)); | ||||
|     return static_cast<u64>(std::nearbyint(current_input / 234.0 * SCREEN_REFRESH_RATE)); | ||||
| } | ||||
| u64 Movie::GetTotalInputCount() const { | ||||
|     return static_cast<u64>(std::nearbyint(total_input / 234.0 * GPU::SCREEN_REFRESH_RATE)); | ||||
|     return static_cast<u64>(std::nearbyint(total_input / 234.0 * SCREEN_REFRESH_RATE)); | ||||
| } | ||||
| 
 | ||||
| void Movie::CheckInputEnd() { | ||||
|  |  | |||
|  | @ -13,8 +13,9 @@ | |||
| #include <fmt/format.h> | ||||
| #include "common/file_util.h" | ||||
| #include "common/settings.h" | ||||
| #include "core/hw/gpu.h" | ||||
| #include "core/core_timing.h" | ||||
| #include "core/perf_stats.h" | ||||
| #include "video_core/gpu.h" | ||||
| 
 | ||||
| using namespace std::chrono_literals; | ||||
| using DoubleSecs = std::chrono::duration<double, std::chrono::seconds::period>; | ||||
|  | @ -120,7 +121,7 @@ PerfStats::Results PerfStats::GetLastStats() { | |||
| double PerfStats::GetLastFrameTimeScale() const { | ||||
|     std::scoped_lock lock{object_mutex}; | ||||
| 
 | ||||
|     constexpr double FRAME_LENGTH = 1.0 / GPU::SCREEN_REFRESH_RATE; | ||||
|     constexpr double FRAME_LENGTH = 1.0 / SCREEN_REFRESH_RATE; | ||||
|     return duration_cast<DoubleSecs>(previous_frame_length).count() / FRAME_LENGTH; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -75,16 +75,7 @@ struct CTMemoryLoad { | |||
| 
 | ||||
| struct CTRegisterWrite { | ||||
|     u32 physical_address; | ||||
| 
 | ||||
|     enum : u32 { | ||||
|         SIZE_8 = 0xD1, | ||||
|         SIZE_16 = 0xD2, | ||||
|         SIZE_32 = 0xD3, | ||||
|         SIZE_64 = 0xD4, | ||||
|     } size; | ||||
| 
 | ||||
|     // TODO: Make it clearer which bits of this member are used for sizes other than 32 bits
 | ||||
|     u64 value; | ||||
|     u32 value; | ||||
| }; | ||||
| 
 | ||||
| struct CTStreamElement { | ||||
|  |  | |||
|  | @ -22,9 +22,8 @@ void Recorder::Finish(const std::string& filename) { | |||
|     // Calculate file offsets
 | ||||
|     auto& initial = header.initial_state_offsets; | ||||
| 
 | ||||
|     initial.gpu_registers_size = static_cast<u32>(initial_state.gpu_registers.size()); | ||||
|     initial.lcd_registers_size = static_cast<u32>(initial_state.lcd_registers.size()); | ||||
|     initial.pica_registers_size = static_cast<u32>(initial_state.pica_registers.size()); | ||||
|     initial.lcd_registers_size = static_cast<u32>(initial_state.lcd_registers.size()); | ||||
|     initial.default_attributes_size = static_cast<u32>(initial_state.default_attributes.size()); | ||||
|     initial.vs_program_binary_size = static_cast<u32>(initial_state.vs_program_binary.size()); | ||||
|     initial.vs_swizzle_data_size = static_cast<u32>(initial_state.vs_swizzle_data.size()); | ||||
|  | @ -81,22 +80,17 @@ void Recorder::Finish(const std::string& filename) { | |||
|             throw "Failed to write header"; | ||||
| 
 | ||||
|         // Write initial state
 | ||||
|         written = | ||||
|             file.WriteArray(initial_state.gpu_registers.data(), initial_state.gpu_registers.size()); | ||||
|         if (written != initial_state.gpu_registers.size() || file.Tell() != initial.lcd_registers) | ||||
|             throw "Failed to write GPU registers"; | ||||
| 
 | ||||
|         written = | ||||
|             file.WriteArray(initial_state.lcd_registers.data(), initial_state.lcd_registers.size()); | ||||
|         if (written != initial_state.lcd_registers.size() || file.Tell() != initial.pica_registers) | ||||
|             throw "Failed to write LCD registers"; | ||||
| 
 | ||||
|         written = file.WriteArray(initial_state.pica_registers.data(), | ||||
|                                   initial_state.pica_registers.size()); | ||||
|         if (written != initial_state.pica_registers.size() || | ||||
|             file.Tell() != initial.default_attributes) | ||||
|             throw "Failed to write Pica registers"; | ||||
| 
 | ||||
|         written = | ||||
|             file.WriteArray(initial_state.lcd_registers.data(), initial_state.lcd_registers.size()); | ||||
|         if (written != initial_state.lcd_registers.size() || file.Tell() != initial.pica_registers) | ||||
|             throw "Failed to write LCD registers"; | ||||
| 
 | ||||
|         written = file.WriteArray(initial_state.default_attributes.data(), | ||||
|                                   initial_state.default_attributes.size()); | ||||
|         if (written != initial_state.default_attributes.size() || | ||||
|  | @ -187,21 +181,12 @@ void Recorder::MemoryAccessed(const u8* data, u32 size, u32 physical_address) { | |||
|     stream.push_back(element); | ||||
| } | ||||
| 
 | ||||
| template <typename T> | ||||
| void Recorder::RegisterWritten(u32 physical_address, T value) { | ||||
| void Recorder::RegisterWritten(u32 physical_address, u32 value) { | ||||
|     StreamElement element = {{RegisterWrite}}; | ||||
|     element.data.register_write.size = (sizeof(T) == 1)   ? CTRegisterWrite::SIZE_8 | ||||
|                                        : (sizeof(T) == 2) ? CTRegisterWrite::SIZE_16 | ||||
|                                        : (sizeof(T) == 4) ? CTRegisterWrite::SIZE_32 | ||||
|                                                           : CTRegisterWrite::SIZE_64; | ||||
|     element.data.register_write.physical_address = physical_address; | ||||
|     element.data.register_write.value = value; | ||||
| 
 | ||||
|     stream.push_back(element); | ||||
| } | ||||
| 
 | ||||
| template void Recorder::RegisterWritten(u32, u8); | ||||
| template void Recorder::RegisterWritten(u32, u16); | ||||
| template void Recorder::RegisterWritten(u32, u32); | ||||
| template void Recorder::RegisterWritten(u32, u64); | ||||
| } // namespace CiTrace
 | ||||
|  |  | |||
|  | @ -4,7 +4,6 @@ | |||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <span> | ||||
| #include <string> | ||||
| #include <unordered_map> | ||||
| #include <vector> | ||||
|  | @ -17,7 +16,6 @@ namespace CiTrace { | |||
| class Recorder { | ||||
| public: | ||||
|     struct InitialState { | ||||
|         std::vector<u32> gpu_registers; | ||||
|         std::vector<u32> lcd_registers; | ||||
|         std::vector<u32> pica_registers; | ||||
|         std::vector<u32> default_attributes; | ||||
|  | @ -52,8 +50,7 @@ public: | |||
|      * Record a register write. | ||||
|      * @note Use this whenever a GPU-related MMIO register has been written to. | ||||
|      */ | ||||
|     template <typename T> | ||||
|     void RegisterWritten(u32 physical_address, T value); | ||||
|     void RegisterWritten(u32 physical_address, u32 value); | ||||
| 
 | ||||
| private: | ||||
|     // Initial state of recording start
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue