video_core: Refactor GPU interface (#7272)

* video_core: Refactor GPU interface * citra_qt: Better debug widget lifetime
2025-11-01 14:20:04 +00:00 · 2023-12-28 12:46:57 +02:00 · 2023-12-28 12:46:57 +02:00 · 2bb7f89c30
commit 2bb7f89c30
parent 602f4f60d8
167 changed files with 4172 additions and 4866 deletions
--- a/src/core/hle/service/gsp/gsp.cpp
+++ b/src/core/hle/service/gsp/gsp.cpp
@ -2,33 +2,17 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <vector>
 #include "core/core.h"
-#include "core/hle/kernel/event.h"
-#include "core/hle/kernel/shared_memory.h"
 #include "core/hle/service/gsp/gsp.h"
+#include "core/hle/service/gsp/gsp_gpu.h"
+#include "core/hle/service/gsp/gsp_lcd.h"

 namespace Service::GSP {

-static std::weak_ptr<GSP_GPU> gsp_gpu;
-
-void SignalInterrupt(InterruptId interrupt_id) {
-    auto gpu = gsp_gpu.lock();
-    ASSERT(gpu != nullptr);
-    return gpu->SignalInterrupt(interrupt_id);
-}
-
 void InstallInterfaces(Core::System& system) {
    auto& service_manager = system.ServiceManager();
-    auto gpu = std::make_shared<GSP_GPU>(system);
-    gpu->InstallAsService(service_manager);
-    gsp_gpu = gpu;
-
+    std::make_shared<GSP_GPU>(system)->InstallAsService(service_manager);
    std::make_shared<GSP_LCD>()->InstallAsService(service_manager);
 }

-void SetGlobalModule(Core::System& system) {
-    gsp_gpu = system.ServiceManager().GetService<GSP_GPU>("gsp::Gpu");
-}
-
 } // namespace Service::GSP
--- a/src/core/hle/service/gsp/gsp.h
+++ b/src/core/hle/service/gsp/gsp.h
@ -4,25 +4,12 @@

 #pragma once

-#include <cstddef>
-#include <string>
-#include "common/common_types.h"
-#include "core/hle/result.h"
-#include "core/hle/service/gsp/gsp_gpu.h"
-#include "core/hle/service/gsp/gsp_lcd.h"
-
 namespace Core {
 class System;
 }

 namespace Service::GSP {
-/**
- * Signals that the specified interrupt type has occurred to userland code
- * @param interrupt_id ID of interrupt that is being signalled
- */
-void SignalInterrupt(InterruptId interrupt_id);

 void InstallInterfaces(Core::System& system);

-void SetGlobalModule(Core::System& system);
 } // namespace Service::GSP
--- a/src/core/hle/service/gsp/gsp_command.h
+++ b/src/core/hle/service/gsp/gsp_command.h
@ -0,0 +1,110 @@
+// Copyright 2023 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+
+namespace Service::GSP {
+
+/// GSP command ID
+enum class CommandId : u32 {
+    RequestDma = 0x00,
+    SubmitCmdList = 0x01,
+    MemoryFill = 0x02,
+    DisplayTransfer = 0x03,
+    TextureCopy = 0x04,
+    CacheFlush = 0x05,
+};
+
+struct DmaCommand {
+    u32 source_address;
+    u32 dest_address;
+    u32 size;
+};
+
+struct SubmitCmdListCommand {
+    u32 address;
+    u32 size;
+    u32 flags;
+    u32 unused[3];
+    u32 do_flush;
+};
+
+struct MemoryFillCommand {
+    u32 start1;
+    u32 value1;
+    u32 end1;
+
+    u32 start2;
+    u32 value2;
+    u32 end2;
+
+    u16 control1;
+    u16 control2;
+};
+
+struct DisplayTransferCommand {
+    u32 in_buffer_address;
+    u32 out_buffer_address;
+    u32 in_buffer_size;
+    u32 out_buffer_size;
+    u32 flags;
+};
+
+struct TextureCopyCommand {
+    u32 in_buffer_address;
+    u32 out_buffer_address;
+    u32 size;
+    u32 in_width_gap;
+    u32 out_width_gap;
+    u32 flags;
+};
+
+struct CacheFlushCommand {
+    struct {
+        u32 address;
+        u32 size;
+    } regions[3];
+};
+
+/// GSP command
+struct Command {
+    BitField<0, 8, CommandId> id;
+    union {
+        DmaCommand dma_request;
+        SubmitCmdListCommand submit_gpu_cmdlist;
+        MemoryFillCommand memory_fill;
+        DisplayTransferCommand display_transfer;
+        TextureCopyCommand texture_copy;
+        CacheFlushCommand cache_flush;
+        std::array<u8, 0x1C> raw_data;
+    };
+};
+static_assert(sizeof(Command) == 0x20, "Command struct has incorrect size");
+
+/// GSP shared memory GX command buffer header
+struct CommandBuffer {
+    union {
+        u32 hex;
+
+        // Current command index. This index is updated by GSP module after loading the command
+        // data, right before the command is processed. When this index is updated by GSP module,
+        // the total commands field is decreased by one as well.
+        BitField<0, 8, u32> index;
+
+        // Total commands to process, must not be value 0 when GSP module handles commands. This
+        // must be <=15 when writing a command to shared memory. This is incremented by the
+        // application when writing a command to shared memory, after increasing this value
+        // TriggerCmdReqQueue is only used if this field is value 1.
+        BitField<8, 8, u32> number_commands;
+    };
+
+    u32 unk[7];
+
+    Command commands[0xF];
+};
+static_assert(sizeof(CommandBuffer) == 0x200, "CommandBuffer struct has incorrect size");
+
+} // namespace Service::GSP
--- a/src/core/hle/service/gsp/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp/gsp_gpu.cpp
@ -9,30 +9,21 @@
 #include <boost/serialization/shared_ptr.hpp>
 #include "common/archives.h"
 #include "common/bit_field.h"
-#include "common/microprofile.h"
-#include "common/swap.h"
 #include "core/core.h"
-#include "core/file_sys/plugin_3gx.h"
-#include "core/hle/ipc.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/shared_memory.h"
 #include "core/hle/kernel/shared_page.h"
 #include "core/hle/result.h"
 #include "core/hle/service/gsp/gsp_gpu.h"
-#include "core/hw/gpu.h"
-#include "core/hw/hw.h"
-#include "core/hw/lcd.h"
 #include "core/memory.h"
-#include "video_core/debug_utils/debug_utils.h"
+#include "video_core/gpu.h"
 #include "video_core/gpu_debugger.h"
+#include "video_core/pica/regs_lcd.h"

 SERIALIZE_EXPORT_IMPL(Service::GSP::SessionData)
 SERIALIZE_EXPORT_IMPL(Service::GSP::GSP_GPU)
 SERVICE_CONSTRUCT_IMPL(Service::GSP::GSP_GPU)

-// Main graphics debugger object - TODO: Here is probably not the best place for this
-GraphicsDebugger g_debugger;
-
 namespace Service::GSP {

 // Beginning address of HW regs
@ -59,60 +50,32 @@ constexpr ResultCode ERR_REGS_INVALID_SIZE(ErrorDescription::InvalidSize, ErrorM
                                           ErrorSummary::InvalidArgument,
                                           ErrorLevel::Usage); // 0xE0E02BEC

-static PAddr VirtualToPhysicalAddress(VAddr addr) {
-    if (addr == 0) {
-        return 0;
-    }
-
-    // Note: the region end check is inclusive because the game can pass in an address that
-    // represents an open right boundary
-    if (addr >= Memory::VRAM_VADDR && addr <= Memory::VRAM_VADDR_END) {
-        return addr - Memory::VRAM_VADDR + Memory::VRAM_PADDR;
-    }
-    if (addr >= Memory::LINEAR_HEAP_VADDR && addr <= Memory::LINEAR_HEAP_VADDR_END) {
-        return addr - Memory::LINEAR_HEAP_VADDR + Memory::FCRAM_PADDR;
-    }
-    if (addr >= Memory::NEW_LINEAR_HEAP_VADDR && addr <= Memory::NEW_LINEAR_HEAP_VADDR_END) {
-        return addr - Memory::NEW_LINEAR_HEAP_VADDR + Memory::FCRAM_PADDR;
-    }
-    if (addr >= Memory::PLUGIN_3GX_FB_VADDR && addr <= Memory::PLUGIN_3GX_FB_VADDR_END) {
-        return addr - Memory::PLUGIN_3GX_FB_VADDR + Service::PLGLDR::PLG_LDR::GetPluginFBAddr();
-    }
-
-    LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x{:08X}", addr);
-    // To help with debugging, set bit on address so that it's obviously invalid.
-    // TODO: find the correct way to handle this error
-    return addr | 0x80000000;
-}
-
 u32 GSP_GPU::GetUnusedThreadId() const {
    for (u32 id = 0; id < MaxGSPThreads; ++id) {
-        if (!used_thread_ids[id])
+        if (!used_thread_ids[id]) {
            return id;
+        }
    }

    UNREACHABLE_MSG("All GSP threads are in use");
    return 0;
 }

-/// Gets a pointer to a thread command buffer in GSP shared memory
-static inline u8* GetCommandBuffer(std::shared_ptr<Kernel::SharedMemory> shared_memory,
-                                   u32 thread_id) {
-    return shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer)));
+CommandBuffer* GSP_GPU::GetCommandBuffer(u32 thread_id) {
+    auto* ptr = shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer)));
+    return reinterpret_cast<CommandBuffer*>(ptr);
 }

 FrameBufferUpdate* GSP_GPU::GetFrameBufferInfo(u32 thread_id, u32 screen_index) {
    DEBUG_ASSERT_MSG(screen_index < 2, "Invalid screen index");

    // For each thread there are two FrameBufferUpdate fields
-    u32 offset = 0x200 + (2 * thread_id + screen_index) * sizeof(FrameBufferUpdate);
+    const u32 offset = 0x200 + (2 * thread_id + screen_index) * sizeof(FrameBufferUpdate);
    u8* ptr = shared_memory->GetPointer(offset);
    return reinterpret_cast<FrameBufferUpdate*>(ptr);
 }

-/// Gets a pointer to the interrupt relay queue for a given thread index
-static inline InterruptRelayQueue* GetInterruptRelayQueue(
-    std::shared_ptr<Kernel::SharedMemory> shared_memory, u32 thread_id) {
+InterruptRelayQueue* GSP_GPU::GetInterruptRelayQueue(u32 thread_id) {
    u8* ptr = shared_memory->GetPointer(sizeof(InterruptRelayQueue) * thread_id);
    return reinterpret_cast<InterruptRelayQueue*>(ptr);
 }
@ -125,19 +88,6 @@ void GSP_GPU::ClientDisconnected(std::shared_ptr<Kernel::ServerSession> server_s
    SessionRequestHandler::ClientDisconnected(server_session);
 }

-/**
- * Writes a single GSP GPU hardware registers with a single u32 value
- * (For internal use.)
- *
- * @param base_address The address of the register in question
- * @param data Data to be written
- */
-static void WriteSingleHWReg(u32 base_address, u32 data) {
-    DEBUG_ASSERT_MSG((base_address & 3) == 0 && base_address < 0x420000,
-                     "Write address out of range or misaligned");
-    HW::Write<u32>(base_address + REGS_BEGIN, data);
-}
-
 /**
 * Writes sequential GSP GPU hardware registers using an array of source data
 *
@ -146,7 +96,8 @@ static void WriteSingleHWReg(u32 base_address, u32 data) {
 * @param data A vector containing the source data
 * @return RESULT_SUCCESS if the parameters are valid, error code otherwise
 */
-static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span<const u8> data) {
+static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span<const u8> data,
+                              VideoCore::GPU& gpu) {
    // This magic number is verified to be done by the gsp module
    const u32 max_size_in_bytes = 0x80;

@ -155,28 +106,30 @@ static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span<con
                  "Write address was out of range or misaligned! (address=0x{:08x}, size=0x{:08x})",
                  base_address, size_in_bytes);
        return ERR_REGS_OUTOFRANGE_OR_MISALIGNED;
-    } else if (size_in_bytes <= max_size_in_bytes) {
-        if (size_in_bytes & 3) {
-            LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes);
-            return ERR_REGS_MISALIGNED;
-        } else {
-            std::size_t offset = 0;
-            while (size_in_bytes > 0) {
-                u32 value;
-                std::memcpy(&value, &data[offset], sizeof(u32));
-                WriteSingleHWReg(base_address, value);
+    }

-                size_in_bytes -= 4;
-                offset += 4;
-                base_address += 4;
-            }
-            return RESULT_SUCCESS;
-        }
-
-    } else {
+    if (size_in_bytes > max_size_in_bytes) {
        LOG_ERROR(Service_GSP, "Out of range size 0x{:08x}", size_in_bytes);
        return ERR_REGS_INVALID_SIZE;
    }
+
+    if (size_in_bytes & 3) {
+        LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes);
+        return ERR_REGS_MISALIGNED;
+    }
+
+    std::size_t offset = 0;
+    while (size_in_bytes > 0) {
+        u32 value;
+        std::memcpy(&value, &data[offset], sizeof(u32));
+        gpu.WriteReg(REGS_BEGIN + base_address, value);
+
+        size_in_bytes -= 4;
+        offset += 4;
+        base_address += 4;
+    }
+
+    return RESULT_SUCCESS;
 }

 /**
@ -190,7 +143,7 @@ static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span<con
 * @return RESULT_SUCCESS if the parameters are valid, error code otherwise
 */
 static ResultCode WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes, std::span<const u8> data,
-                                      std::span<const u8> masks) {
+                                      std::span<const u8> masks, VideoCore::GPU& gpu) {
    // This magic number is verified to be done by the gsp module
    const u32 max_size_in_bytes = 0x80;

@ -199,60 +152,58 @@ static ResultCode WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes, std::
                  "Write address was out of range or misaligned! (address=0x{:08x}, size=0x{:08x})",
                  base_address, size_in_bytes);
        return ERR_REGS_OUTOFRANGE_OR_MISALIGNED;
-    } else if (size_in_bytes <= max_size_in_bytes) {
-        if (size_in_bytes & 3) {
-            LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes);
-            return ERR_REGS_MISALIGNED;
-        } else {
-            std::size_t offset = 0;
-            while (size_in_bytes > 0) {
-                const u32 reg_address = base_address + REGS_BEGIN;
+    }

-                u32 reg_value;
-                HW::Read<u32>(reg_value, reg_address);
-
-                u32 value, mask;
-                std::memcpy(&value, &data[offset], sizeof(u32));
-                std::memcpy(&mask, &masks[offset], sizeof(u32));
-
-                // Update the current value of the register only for set mask bits
-                reg_value = (reg_value & ~mask) | (value & mask);
-
-                WriteSingleHWReg(base_address, reg_value);
-
-                size_in_bytes -= 4;
-                offset += 4;
-                base_address += 4;
-            }
-            return RESULT_SUCCESS;
-        }
-
-    } else {
+    if (size_in_bytes > max_size_in_bytes) {
        LOG_ERROR(Service_GSP, "Out of range size 0x{:08x}", size_in_bytes);
        return ERR_REGS_INVALID_SIZE;
    }
+
+    if (size_in_bytes & 3) {
+        LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes);
+        return ERR_REGS_MISALIGNED;
+    }
+
+    std::size_t offset = 0;
+    while (size_in_bytes > 0) {
+        const u32 reg_address = base_address + REGS_BEGIN;
+        u32 reg_value = gpu.ReadReg(reg_address);
+
+        u32 value, mask;
+        std::memcpy(&value, &data[offset], sizeof(u32));
+        std::memcpy(&mask, &masks[offset], sizeof(u32));
+
+        // Update the current value of the register only for set mask bits
+        reg_value = (reg_value & ~mask) | (value & mask);
+        gpu.WriteReg(reg_address, reg_value);
+
+        size_in_bytes -= 4;
+        offset += 4;
+        base_address += 4;
+    }
+
+    return RESULT_SUCCESS;
 }

 void GSP_GPU::WriteHWRegs(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx);
-    u32 reg_addr = rp.Pop<u32>();
-    u32 size = rp.Pop<u32>();
-    std::vector<u8> src_data = rp.PopStaticBuffer();
+    const u32 reg_addr = rp.Pop<u32>();
+    const u32 size = rp.Pop<u32>();
+    const auto src_data = rp.PopStaticBuffer();

    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-    rb.Push(GSP::WriteHWRegs(reg_addr, size, src_data));
+    rb.Push(GSP::WriteHWRegs(reg_addr, size, src_data, system.GPU()));
 }

 void GSP_GPU::WriteHWRegsWithMask(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx);
-    u32 reg_addr = rp.Pop<u32>();
-    u32 size = rp.Pop<u32>();
-
-    std::vector<u8> src_data = rp.PopStaticBuffer();
-    std::vector<u8> mask_data = rp.PopStaticBuffer();
+    const u32 reg_addr = rp.Pop<u32>();
+    const u32 size = rp.Pop<u32>();
+    const auto src_data = rp.PopStaticBuffer();
+    const auto mask_data = rp.PopStaticBuffer();

    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-    rb.Push(GSP::WriteHWRegsWithMask(reg_addr, size, src_data, mask_data));
+    rb.Push(GSP::WriteHWRegsWithMask(reg_addr, size, src_data, mask_data, system.GPU()));
 }

 void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) {
@ -270,7 +221,7 @@ void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) {
        return;
    }

-    // size should be word-aligned
+    // Size should be word-aligned
    if ((size % 4) != 0) {
        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
        rb.Push(ERR_REGS_MISALIGNED);
@ -279,8 +230,9 @@ void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) {
    }

    std::vector<u8> buffer(size);
-    for (u32 offset = 0; offset < size; ++offset) {
-        HW::Read<u8>(buffer[offset], REGS_BEGIN + reg_addr + offset);
+    for (u32 word = 0; word < size / sizeof(u32); ++word) {
+        const u32 data = system.GPU().ReadReg(REGS_BEGIN + reg_addr + word * sizeof(u32));
+        std::memcpy(buffer.data() + word * sizeof(u32), &data, sizeof(u32));
    }

    IPC::RequestBuilder rb = rp.MakeBuilder(1, 2);
@ -288,53 +240,15 @@ void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) {
    rb.PushStaticBuffer(std::move(buffer), 0);
 }

-ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info) {
-    u32 base_address = 0x400000;
-    PAddr phys_address_left = VirtualToPhysicalAddress(info.address_left);
-    PAddr phys_address_right = VirtualToPhysicalAddress(info.address_right);
-    if (info.active_fb == 0) {
-        WriteSingleHWReg(base_address + 4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(
-                                                screen_id, address_left1)),
-                         phys_address_left);
-        WriteSingleHWReg(base_address + 4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(
-                                                screen_id, address_right1)),
-                         phys_address_right);
-    } else {
-        WriteSingleHWReg(base_address + 4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(
-                                                screen_id, address_left2)),
-                         phys_address_left);
-        WriteSingleHWReg(base_address + 4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(
-                                                screen_id, address_right2)),
-                         phys_address_right);
-    }
-    WriteSingleHWReg(base_address +
-                         4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(screen_id, stride)),
-                     info.stride);
-    WriteSingleHWReg(base_address +
-                         4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(screen_id, color_format)),
-                     info.format);
-    WriteSingleHWReg(base_address +
-                         4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(screen_id, active_fb)),
-                     info.shown_fb);
-
-    if (Pica::g_debug_context)
-        Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::BufferSwapped, nullptr);
-
-    if (screen_id == 0) {
-        MicroProfileFlip();
-        Core::System::GetInstance().perf_stats->EndGameFrame();
-    }
-
-    return RESULT_SUCCESS;
-}
-
 void GSP_GPU::SetBufferSwap(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx);
    u32 screen_id = rp.Pop<u32>();
    auto fb_info = rp.PopRaw<FrameBufferInfo>();

+    system.GPU().SetBufferSwap(screen_id, fb_info);
+
    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-    rb.Push(GSP::SetBufferSwap(screen_id, fb_info));
+    rb.Push(RESULT_SUCCESS);
 }

 void GSP_GPU::FlushDataCache(Kernel::HLERequestContext& ctx) {
@ -382,10 +296,9 @@ void GSP_GPU::RegisterInterruptRelayQueue(Kernel::HLERequestContext& ctx) {
    u32 flags = rp.Pop<u32>();

    auto interrupt_event = rp.PopObject<Kernel::Event>();
-    // TODO(mailwl): return right error code instead assert
-    ASSERT_MSG((interrupt_event != nullptr), "handle is not valid!");
+    ASSERT_MSG(interrupt_event, "handle is not valid!");

-    interrupt_event->SetName("GSP_GSP_GPU::interrupt_event");
+    interrupt_event->SetName("GSP_GPU::interrupt_event");

    SessionData* session_data = GetSessionData(ctx.Session());
    session_data->interrupt_event = std::move(interrupt_event);
@ -422,15 +335,17 @@ void GSP_GPU::UnregisterInterruptRelayQueue(Kernel::HLERequestContext& ctx) {

 void GSP_GPU::SignalInterruptForThread(InterruptId interrupt_id, u32 thread_id) {
    SessionData* session_data = FindRegisteredThreadData(thread_id);
-    if (session_data == nullptr)
+    if (!session_data) {
        return;
+    }

    auto interrupt_event = session_data->interrupt_event;
    if (interrupt_event == nullptr) {
        LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!");
        return;
    }
-    InterruptRelayQueue* interrupt_relay_queue = GetInterruptRelayQueue(shared_memory, thread_id);
+
+    auto* interrupt_relay_queue = GetInterruptRelayQueue(thread_id);
    u8 next = interrupt_relay_queue->index;
    next += interrupt_relay_queue->number_interrupts;
    next = next % 0x34; // 0x34 is the number of interrupt slots
@ -441,29 +356,20 @@ void GSP_GPU::SignalInterruptForThread(InterruptId interrupt_id, u32 thread_id)
    interrupt_relay_queue->error_code = 0x0; // No error

    // Update framebuffer information if requested
-    // TODO(yuriks): Confirm where this code should be called. It is definitely updated without
-    //               executing any GSP commands, only waiting on the event.
-    // TODO(Subv): The real GSP module triggers PDC0 after updating both the top and bottom
-    // screen, it is currently unknown what PDC1 does.
-    int screen_id = (interrupt_id == InterruptId::PDC0)   ? 0
-                    : (interrupt_id == InterruptId::PDC1) ? 1
-                                                          : -1;
+    const s32 screen_id = (interrupt_id == InterruptId::PDC0)   ? 0
+                          : (interrupt_id == InterruptId::PDC1) ? 1
+                                                                : -1;
    if (screen_id != -1) {
-        FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id);
+        auto* info = GetFrameBufferInfo(thread_id, screen_id);
        if (info->is_dirty) {
-            GSP::SetBufferSwap(screen_id, info->framebuffer_info[info->index]);
+            system.GPU().SetBufferSwap(screen_id, info->framebuffer_info[info->index]);
            info->is_dirty.Assign(false);
        }
    }
+
    interrupt_event->Signal();
 }

-/**
- * Signals that the specified interrupt type has occurred to userland code
- * @param interrupt_id ID of interrupt that is being signalled
- * @todo This should probably take a thread_id parameter and only signal this thread?
- * @todo This probably does not belong in the GSP module, instead move to video_core
- */
 void GSP_GPU::SignalInterrupt(InterruptId interrupt_id) {
    if (nullptr == shared_memory) {
        LOG_WARNING(Service_GSP, "cannot synchronize until GSP shared memory has been created!");
@ -488,154 +394,13 @@ void GSP_GPU::SignalInterrupt(InterruptId interrupt_id) {
    SignalInterruptForThread(interrupt_id, active_thread_id);
 }

-MICROPROFILE_DEFINE(GPU_GSP_DMA, "GPU", "GSP DMA", MP_RGB(100, 0, 255));
-
-/// Executes the next GSP command
-static void ExecuteCommand(const Command& command, u32 thread_id) {
-    // Utility function to convert register ID to address
-    static auto WriteGPURegister = [](u32 id, u32 data) {
-        GPU::Write<u32>(0x1EF00000 + 4 * id, data);
-    };
-
-    switch (command.id) {
-
-    // GX request DMA - typically used for copying memory from GSP heap to VRAM
-    case CommandId::REQUEST_DMA: {
-        MICROPROFILE_SCOPE(GPU_GSP_DMA);
-        Memory::MemorySystem& memory = Core::System::GetInstance().Memory();
-
-        // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever
-        // possible/likely
-        Memory::RasterizerFlushVirtualRegion(command.dma_request.source_address,
-                                             command.dma_request.size, Memory::FlushMode::Flush);
-        Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address,
-                                             command.dma_request.size,
-                                             Memory::FlushMode::Invalidate);
-
-        // TODO(Subv): These memory accesses should not go through the application's memory mapping.
-        // They should go through the GSP module's memory mapping.
-        memory.CopyBlock(*Core::System::GetInstance().Kernel().GetCurrentProcess(),
-                         command.dma_request.dest_address, command.dma_request.source_address,
-                         command.dma_request.size);
-        SignalInterrupt(InterruptId::DMA);
-        break;
-    }
-    // TODO: This will need some rework in the future. (why?)
-    case CommandId::SUBMIT_GPU_CMDLIST: {
-        auto& params = command.submit_gpu_cmdlist;
-
-        if (params.do_flush) {
-            // This flag flushes the command list (params.address, params.size) from the cache.
-            // Command lists are not processed by the hardware renderer, so we don't need to
-            // actually flush them in Citra.
-        }
-
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(command_processor_config.address)),
-                         VirtualToPhysicalAddress(params.address) >> 3);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(command_processor_config.size)),
-                         params.size);
-
-        // TODO: Not sure if we are supposed to always write this .. seems to trigger processing
-        // though
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(command_processor_config.trigger)), 1);
-
-        // TODO(yuriks): Figure out the meaning of the `flags` field.
-
-        break;
-    }
-
-    // It's assumed that the two "blocks" behave equivalently.
-    // Presumably this is done simply to allow two memory fills to run in parallel.
-    case CommandId::SET_MEMORY_FILL: {
-        auto& params = command.memory_fill;
-
-        if (params.start1 != 0) {
-            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_start)),
-                             VirtualToPhysicalAddress(params.start1) >> 3);
-            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_end)),
-                             VirtualToPhysicalAddress(params.end1) >> 3);
-            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].value_32bit)),
-                             params.value1);
-            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].control)),
-                             params.control1);
-        }
-
-        if (params.start2 != 0) {
-            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_start)),
-                             VirtualToPhysicalAddress(params.start2) >> 3);
-            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_end)),
-                             VirtualToPhysicalAddress(params.end2) >> 3);
-            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].value_32bit)),
-                             params.value2);
-            WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].control)),
-                             params.control2);
-        }
-        break;
-    }
-
-    case CommandId::SET_DISPLAY_TRANSFER: {
-        auto& params = command.display_transfer;
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
-                         VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
-                         VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)),
-                         params.in_buffer_size);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)),
-                         params.out_buffer_size);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)),
-                         params.flags);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1);
-        break;
-    }
-
-    case CommandId::SET_TEXTURE_COPY: {
-        auto& params = command.texture_copy;
-        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address),
-                         VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
-        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address),
-                         VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
-        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size),
-                         params.size);
-        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size),
-                         params.in_width_gap);
-        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size),
-                         params.out_width_gap);
-        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags), params.flags);
-
-        // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to
-        // matter.
-        WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1);
-        break;
-    }
-
-    case CommandId::CACHE_FLUSH: {
-        // NOTE: Rasterizer flushing handled elsewhere in CPU read/write and other GPU handlers
-        // Use command.cache_flush.regions to implement this handler
-        break;
-    }
-
-    default:
-        LOG_ERROR(Service_GSP, "unknown command 0x{:08X}", (int)command.id.Value());
-    }
-
-    if (Pica::g_debug_context)
-        Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::GSPCommandProcessed,
-                                       (void*)&command);
-}
-
 void GSP_GPU::SetLcdForceBlack(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx);
+    const bool enable_black = rp.Pop<bool>();

-    bool enable_black = rp.Pop<bool>();
-    LCD::Regs::ColorFill data = {0};
-
-    // Since data is already zeroed, there is no need to explicitly set
-    // the color to black (all zero).
+    Pica::ColorFill data{};
    data.is_enabled.Assign(enable_black);
-
-    LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_top), data.raw);    // Top LCD
-    LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_bottom), data.raw); // Bottom LCD
+    system.GPU().SetColorFill(data);

    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
    rb.Push(RESULT_SUCCESS);
@ -644,20 +409,17 @@ void GSP_GPU::SetLcdForceBlack(Kernel::HLERequestContext& ctx) {
 void GSP_GPU::TriggerCmdReqQueue(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx);

-    // Iterate through each thread's command queue...
-    for (unsigned thread_id = 0; thread_id < 0x4; ++thread_id) {
-        CommandBuffer* command_buffer = (CommandBuffer*)GetCommandBuffer(shared_memory, thread_id);
+    // Iterate through each command.
+    auto* command_buffer = GetCommandBuffer(active_thread_id);
+    auto& gpu = system.GPU();
+    for (u32 i = 0; i < command_buffer->number_commands; i++) {
+        gpu.Debugger().GXCommandProcessed(command_buffer->commands[i]);

-        // Iterate through each command...
-        for (unsigned i = 0; i < command_buffer->number_commands; ++i) {
-            g_debugger.GXCommandProcessed((u8*)&command_buffer->commands[i]);
+        // Decode and execute command
+        gpu.Execute(command_buffer->commands[i]);

-            // Decode and execute command
-            ExecuteCommand(command_buffer->commands[i], thread_id);
-
-            // Indicates that command has completed
-            command_buffer->number_commands.Assign(command_buffer->number_commands - 1);
-        }
+        // Indicates that command has completed
+        command_buffer->number_commands.Assign(command_buffer->number_commands - 1);
    }

    IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
--- a/src/core/hle/service/gsp/gsp_gpu.h
+++ b/src/core/hle/service/gsp/gsp_gpu.h
@ -13,7 +13,8 @@
 #include "common/common_types.h"
 #include "core/hle/kernel/event.h"
 #include "core/hle/kernel/hle_ipc.h"
-#include "core/hle/result.h"
+#include "core/hle/service/gsp/gsp_command.h"
+#include "core/hle/service/gsp/gsp_interrupt.h"
 #include "core/hle/service/service.h"

 namespace Core {
@ -28,53 +29,6 @@ class SharedMemory;

 namespace Service::GSP {

-/// GSP interrupt ID
-enum class InterruptId : u8 {
-    PSC0 = 0x00,
-    PSC1 = 0x01,
-    PDC0 = 0x02, // Seems called every vertical screen line
-    PDC1 = 0x03, // Seems called every frame
-    PPF = 0x04,
-    P3D = 0x05,
-    DMA = 0x06,
-};
-
-/// GSP command ID
-enum class CommandId : u32 {
-    REQUEST_DMA = 0x00,
-    /// Submits a commandlist for execution by the GPU.
-    SUBMIT_GPU_CMDLIST = 0x01,
-
-    // Fills a given memory range with a particular value
-    SET_MEMORY_FILL = 0x02,
-
-    // Copies an image and optionally performs color-conversion or scaling.
-    // This is highly similar to the GameCube's EFB copy feature
-    SET_DISPLAY_TRANSFER = 0x03,
-
-    // Conceptionally similar to SET_DISPLAY_TRANSFER and presumable uses the same hardware path
-    SET_TEXTURE_COPY = 0x04,
-    /// Flushes up to 3 cache regions in a single command.
-    CACHE_FLUSH = 0x05,
-};
-
-/// GSP thread interrupt relay queue
-struct InterruptRelayQueue {
-    // Index of last interrupt in the queue
-    u8 index;
-    // Number of interrupts remaining to be processed by the userland code
-    u8 number_interrupts;
-    // Error code - zero on success, otherwise an error has occurred
-    u8 error_code;
-    u8 padding1;
-
-    u32 missed_PDC0;
-    u32 missed_PDC1;
-
-    InterruptId slot[0x34]; ///< Interrupt ID slots
-};
-static_assert(sizeof(InterruptRelayQueue) == 0x40, "InterruptRelayQueue struct has incorrect size");
-
 struct FrameBufferInfo {
    u32 active_fb; // 0 = first, 1 = second
    u32 address_left;
@ -96,95 +50,9 @@ struct FrameBufferUpdate {
    u32 pad2;
 };
 static_assert(sizeof(FrameBufferUpdate) == 0x40, "Struct has incorrect size");
-// TODO: Not sure if this padding is correct.
-// Chances are the second block is stored at offset 0x24 rather than 0x20.
 static_assert(offsetof(FrameBufferUpdate, framebuffer_info[1]) == 0x20,
              "FrameBufferInfo element has incorrect alignment");

-/// GSP command
-struct Command {
-    BitField<0, 8, CommandId> id;
-
-    union {
-        struct {
-            u32 source_address;
-            u32 dest_address;
-            u32 size;
-        } dma_request;
-
-        struct {
-            u32 address;
-            u32 size;
-            u32 flags;
-            u32 unused[3];
-            u32 do_flush;
-        } submit_gpu_cmdlist;
-
-        struct {
-            u32 start1;
-            u32 value1;
-            u32 end1;
-
-            u32 start2;
-            u32 value2;
-            u32 end2;
-
-            u16 control1;
-            u16 control2;
-        } memory_fill;
-
-        struct {
-            u32 in_buffer_address;
-            u32 out_buffer_address;
-            u32 in_buffer_size;
-            u32 out_buffer_size;
-            u32 flags;
-        } display_transfer;
-
-        struct {
-            u32 in_buffer_address;
-            u32 out_buffer_address;
-            u32 size;
-            u32 in_width_gap;
-            u32 out_width_gap;
-            u32 flags;
-        } texture_copy;
-
-        struct {
-            struct {
-                u32 address;
-                u32 size;
-            } regions[3];
-        } cache_flush;
-
-        u8 raw_data[0x1C];
-    };
-};
-static_assert(sizeof(Command) == 0x20, "Command struct has incorrect size");
-
-/// GSP shared memory GX command buffer header
-struct CommandBuffer {
-    union {
-        u32 hex;
-
-        // Current command index. This index is updated by GSP module after loading the command
-        // data, right before the command is processed. When this index is updated by GSP module,
-        // the total commands field is decreased by one as well.
-        BitField<0, 8, u32> index;
-
-        // Total commands to process, must not be value 0 when GSP module handles commands. This
-        // must be <=15 when writing a command to shared memory. This is incremented by the
-        // application when writing a command to shared memory, after increasing this value
-        // TriggerCmdReqQueue is only used if this field is value 1.
-        BitField<8, 8, u32> number_commands;
-    };
-
-    u32 unk[7];
-
-    Command commands[0xF];
-};
-static_assert(sizeof(CommandBuffer) == 0x200, "CommandBuffer struct has incorrect size");
-
 constexpr u32 FRAMEBUFFER_WIDTH = 240;
 constexpr u32 FRAMEBUFFER_WIDTH_POW2 = 256;
 constexpr u32 TOP_FRAMEBUFFER_HEIGHT = 400;
@ -242,6 +110,12 @@ public:
     */
    FrameBufferUpdate* GetFrameBufferInfo(u32 thread_id, u32 screen_index);

+    /// Gets a pointer to a thread command buffer in GSP shared memory
+    CommandBuffer* GetCommandBuffer(u32 thread_id);
+
+    /// Gets a pointer to the interrupt relay queue for a given thread index
+    InterruptRelayQueue* GetInterruptRelayQueue(u32 thread_id);
+
    /**
     * Retreives the ID of the thread with GPU rights.
     */
@ -513,7 +387,7 @@ private:
    static constexpr u32 MaxGSPThreads = 4;

    /// Thread ids currently in use by the sessions connected to the GSPGPU service.
-    std::array<bool, MaxGSPThreads> used_thread_ids = {false, false, false, false};
+    std::array<bool, MaxGSPThreads> used_thread_ids{};

    friend class SessionData;

@ -522,8 +396,6 @@ private:
    friend class boost::serialization::access;
 };

-ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info);
-
 } // namespace Service::GSP

 BOOST_CLASS_EXPORT_KEY(Service::GSP::SessionData)
--- a/src/core/hle/service/gsp/gsp_interrupt.h
+++ b/src/core/hle/service/gsp/gsp_interrupt.h
@ -0,0 +1,42 @@
+// Copyright 2023 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <functional>
+#include "common/common_types.h"
+
+namespace Service::GSP {
+
+/// GSP interrupt ID
+enum class InterruptId : u8 {
+    PSC0 = 0x00,
+    PSC1 = 0x01,
+    PDC0 = 0x02,
+    PDC1 = 0x03,
+    PPF = 0x04,
+    P3D = 0x05,
+    DMA = 0x06,
+};
+
+/// GSP thread interrupt relay queue
+struct InterruptRelayQueue {
+    // Index of last interrupt in the queue
+    u8 index;
+    // Number of interrupts remaining to be processed by the userland code
+    u8 number_interrupts;
+    // Error code - zero on success, otherwise an error has occurred
+    u8 error_code;
+    u8 padding1;
+
+    u32 missed_PDC0;
+    u32 missed_PDC1;
+
+    InterruptId slot[0x34]; ///< Interrupt ID slots
+};
+static_assert(sizeof(InterruptRelayQueue) == 0x40, "InterruptRelayQueue struct has incorrect size");
+
+using InterruptHandler = std::function<void(InterruptId)>;
+
+} // namespace Service::GSP
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@ -22,7 +22,6 @@
 #include "core/hle/service/hid/hid_user.h"
 #include "core/hle/service/service.h"
 #include "core/movie.h"
-#include "video_core/video_core.h"

 SERVICE_CONSTRUCT_IMPL(Service::HID::Module)
 SERIALIZE_EXPORT_IMPL(Service::HID::Module)