video_core: Refactor GPU interface (#7272)

* video_core: Refactor GPU interface

* citra_qt: Better debug widget lifetime
This commit is contained in:
GPUCode 2023-12-28 12:46:57 +02:00 committed by GitHub
parent 602f4f60d8
commit 2bb7f89c30
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
167 changed files with 4172 additions and 4866 deletions

View file

@ -2,33 +2,17 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <vector>
#include "core/core.h"
#include "core/hle/kernel/event.h"
#include "core/hle/kernel/shared_memory.h"
#include "core/hle/service/gsp/gsp.h"
#include "core/hle/service/gsp/gsp_gpu.h"
#include "core/hle/service/gsp/gsp_lcd.h"
namespace Service::GSP {
static std::weak_ptr<GSP_GPU> gsp_gpu;
void SignalInterrupt(InterruptId interrupt_id) {
auto gpu = gsp_gpu.lock();
ASSERT(gpu != nullptr);
return gpu->SignalInterrupt(interrupt_id);
}
void InstallInterfaces(Core::System& system) {
auto& service_manager = system.ServiceManager();
auto gpu = std::make_shared<GSP_GPU>(system);
gpu->InstallAsService(service_manager);
gsp_gpu = gpu;
std::make_shared<GSP_GPU>(system)->InstallAsService(service_manager);
std::make_shared<GSP_LCD>()->InstallAsService(service_manager);
}
void SetGlobalModule(Core::System& system) {
gsp_gpu = system.ServiceManager().GetService<GSP_GPU>("gsp::Gpu");
}
} // namespace Service::GSP

View file

@ -4,25 +4,12 @@
#pragma once
#include <cstddef>
#include <string>
#include "common/common_types.h"
#include "core/hle/result.h"
#include "core/hle/service/gsp/gsp_gpu.h"
#include "core/hle/service/gsp/gsp_lcd.h"
namespace Core {
class System;
}
namespace Service::GSP {
/**
* Signals that the specified interrupt type has occurred to userland code
* @param interrupt_id ID of interrupt that is being signalled
*/
void SignalInterrupt(InterruptId interrupt_id);
void InstallInterfaces(Core::System& system);
void SetGlobalModule(Core::System& system);
} // namespace Service::GSP

View file

@ -0,0 +1,110 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/bit_field.h"
namespace Service::GSP {
/// GSP command ID
enum class CommandId : u32 {
RequestDma = 0x00,
SubmitCmdList = 0x01,
MemoryFill = 0x02,
DisplayTransfer = 0x03,
TextureCopy = 0x04,
CacheFlush = 0x05,
};
struct DmaCommand {
u32 source_address;
u32 dest_address;
u32 size;
};
struct SubmitCmdListCommand {
u32 address;
u32 size;
u32 flags;
u32 unused[3];
u32 do_flush;
};
struct MemoryFillCommand {
u32 start1;
u32 value1;
u32 end1;
u32 start2;
u32 value2;
u32 end2;
u16 control1;
u16 control2;
};
struct DisplayTransferCommand {
u32 in_buffer_address;
u32 out_buffer_address;
u32 in_buffer_size;
u32 out_buffer_size;
u32 flags;
};
struct TextureCopyCommand {
u32 in_buffer_address;
u32 out_buffer_address;
u32 size;
u32 in_width_gap;
u32 out_width_gap;
u32 flags;
};
struct CacheFlushCommand {
struct {
u32 address;
u32 size;
} regions[3];
};
/// GSP command
struct Command {
BitField<0, 8, CommandId> id;
union {
DmaCommand dma_request;
SubmitCmdListCommand submit_gpu_cmdlist;
MemoryFillCommand memory_fill;
DisplayTransferCommand display_transfer;
TextureCopyCommand texture_copy;
CacheFlushCommand cache_flush;
std::array<u8, 0x1C> raw_data;
};
};
static_assert(sizeof(Command) == 0x20, "Command struct has incorrect size");
/// GSP shared memory GX command buffer header
struct CommandBuffer {
union {
u32 hex;
// Current command index. This index is updated by GSP module after loading the command
// data, right before the command is processed. When this index is updated by GSP module,
// the total commands field is decreased by one as well.
BitField<0, 8, u32> index;
// Total commands to process, must not be value 0 when GSP module handles commands. This
// must be <=15 when writing a command to shared memory. This is incremented by the
// application when writing a command to shared memory, after increasing this value
// TriggerCmdReqQueue is only used if this field is value 1.
BitField<8, 8, u32> number_commands;
};
u32 unk[7];
Command commands[0xF];
};
static_assert(sizeof(CommandBuffer) == 0x200, "CommandBuffer struct has incorrect size");
} // namespace Service::GSP

View file

@ -9,30 +9,21 @@
#include <boost/serialization/shared_ptr.hpp>
#include "common/archives.h"
#include "common/bit_field.h"
#include "common/microprofile.h"
#include "common/swap.h"
#include "core/core.h"
#include "core/file_sys/plugin_3gx.h"
#include "core/hle/ipc.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/shared_memory.h"
#include "core/hle/kernel/shared_page.h"
#include "core/hle/result.h"
#include "core/hle/service/gsp/gsp_gpu.h"
#include "core/hw/gpu.h"
#include "core/hw/hw.h"
#include "core/hw/lcd.h"
#include "core/memory.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/gpu.h"
#include "video_core/gpu_debugger.h"
#include "video_core/pica/regs_lcd.h"
SERIALIZE_EXPORT_IMPL(Service::GSP::SessionData)
SERIALIZE_EXPORT_IMPL(Service::GSP::GSP_GPU)
SERVICE_CONSTRUCT_IMPL(Service::GSP::GSP_GPU)
// Main graphics debugger object - TODO: Here is probably not the best place for this
GraphicsDebugger g_debugger;
namespace Service::GSP {
// Beginning address of HW regs
@ -59,60 +50,32 @@ constexpr ResultCode ERR_REGS_INVALID_SIZE(ErrorDescription::InvalidSize, ErrorM
ErrorSummary::InvalidArgument,
ErrorLevel::Usage); // 0xE0E02BEC
static PAddr VirtualToPhysicalAddress(VAddr addr) {
if (addr == 0) {
return 0;
}
// Note: the region end check is inclusive because the game can pass in an address that
// represents an open right boundary
if (addr >= Memory::VRAM_VADDR && addr <= Memory::VRAM_VADDR_END) {
return addr - Memory::VRAM_VADDR + Memory::VRAM_PADDR;
}
if (addr >= Memory::LINEAR_HEAP_VADDR && addr <= Memory::LINEAR_HEAP_VADDR_END) {
return addr - Memory::LINEAR_HEAP_VADDR + Memory::FCRAM_PADDR;
}
if (addr >= Memory::NEW_LINEAR_HEAP_VADDR && addr <= Memory::NEW_LINEAR_HEAP_VADDR_END) {
return addr - Memory::NEW_LINEAR_HEAP_VADDR + Memory::FCRAM_PADDR;
}
if (addr >= Memory::PLUGIN_3GX_FB_VADDR && addr <= Memory::PLUGIN_3GX_FB_VADDR_END) {
return addr - Memory::PLUGIN_3GX_FB_VADDR + Service::PLGLDR::PLG_LDR::GetPluginFBAddr();
}
LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x{:08X}", addr);
// To help with debugging, set bit on address so that it's obviously invalid.
// TODO: find the correct way to handle this error
return addr | 0x80000000;
}
u32 GSP_GPU::GetUnusedThreadId() const {
for (u32 id = 0; id < MaxGSPThreads; ++id) {
if (!used_thread_ids[id])
if (!used_thread_ids[id]) {
return id;
}
}
UNREACHABLE_MSG("All GSP threads are in use");
return 0;
}
/// Gets a pointer to a thread command buffer in GSP shared memory
static inline u8* GetCommandBuffer(std::shared_ptr<Kernel::SharedMemory> shared_memory,
u32 thread_id) {
return shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer)));
CommandBuffer* GSP_GPU::GetCommandBuffer(u32 thread_id) {
auto* ptr = shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer)));
return reinterpret_cast<CommandBuffer*>(ptr);
}
FrameBufferUpdate* GSP_GPU::GetFrameBufferInfo(u32 thread_id, u32 screen_index) {
DEBUG_ASSERT_MSG(screen_index < 2, "Invalid screen index");
// For each thread there are two FrameBufferUpdate fields
u32 offset = 0x200 + (2 * thread_id + screen_index) * sizeof(FrameBufferUpdate);
const u32 offset = 0x200 + (2 * thread_id + screen_index) * sizeof(FrameBufferUpdate);
u8* ptr = shared_memory->GetPointer(offset);
return reinterpret_cast<FrameBufferUpdate*>(ptr);
}
/// Gets a pointer to the interrupt relay queue for a given thread index
static inline InterruptRelayQueue* GetInterruptRelayQueue(
std::shared_ptr<Kernel::SharedMemory> shared_memory, u32 thread_id) {
InterruptRelayQueue* GSP_GPU::GetInterruptRelayQueue(u32 thread_id) {
u8* ptr = shared_memory->GetPointer(sizeof(InterruptRelayQueue) * thread_id);
return reinterpret_cast<InterruptRelayQueue*>(ptr);
}
@ -125,19 +88,6 @@ void GSP_GPU::ClientDisconnected(std::shared_ptr<Kernel::ServerSession> server_s
SessionRequestHandler::ClientDisconnected(server_session);
}
/**
* Writes a single GSP GPU hardware registers with a single u32 value
* (For internal use.)
*
* @param base_address The address of the register in question
* @param data Data to be written
*/
static void WriteSingleHWReg(u32 base_address, u32 data) {
DEBUG_ASSERT_MSG((base_address & 3) == 0 && base_address < 0x420000,
"Write address out of range or misaligned");
HW::Write<u32>(base_address + REGS_BEGIN, data);
}
/**
* Writes sequential GSP GPU hardware registers using an array of source data
*
@ -146,7 +96,8 @@ static void WriteSingleHWReg(u32 base_address, u32 data) {
* @param data A vector containing the source data
* @return RESULT_SUCCESS if the parameters are valid, error code otherwise
*/
static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span<const u8> data) {
static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span<const u8> data,
VideoCore::GPU& gpu) {
// This magic number is verified to be done by the gsp module
const u32 max_size_in_bytes = 0x80;
@ -155,28 +106,30 @@ static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span<con
"Write address was out of range or misaligned! (address=0x{:08x}, size=0x{:08x})",
base_address, size_in_bytes);
return ERR_REGS_OUTOFRANGE_OR_MISALIGNED;
} else if (size_in_bytes <= max_size_in_bytes) {
if (size_in_bytes & 3) {
LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes);
return ERR_REGS_MISALIGNED;
} else {
std::size_t offset = 0;
while (size_in_bytes > 0) {
u32 value;
std::memcpy(&value, &data[offset], sizeof(u32));
WriteSingleHWReg(base_address, value);
}
size_in_bytes -= 4;
offset += 4;
base_address += 4;
}
return RESULT_SUCCESS;
}
} else {
if (size_in_bytes > max_size_in_bytes) {
LOG_ERROR(Service_GSP, "Out of range size 0x{:08x}", size_in_bytes);
return ERR_REGS_INVALID_SIZE;
}
if (size_in_bytes & 3) {
LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes);
return ERR_REGS_MISALIGNED;
}
std::size_t offset = 0;
while (size_in_bytes > 0) {
u32 value;
std::memcpy(&value, &data[offset], sizeof(u32));
gpu.WriteReg(REGS_BEGIN + base_address, value);
size_in_bytes -= 4;
offset += 4;
base_address += 4;
}
return RESULT_SUCCESS;
}
/**
@ -190,7 +143,7 @@ static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, std::span<con
* @return RESULT_SUCCESS if the parameters are valid, error code otherwise
*/
static ResultCode WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes, std::span<const u8> data,
std::span<const u8> masks) {
std::span<const u8> masks, VideoCore::GPU& gpu) {
// This magic number is verified to be done by the gsp module
const u32 max_size_in_bytes = 0x80;
@ -199,60 +152,58 @@ static ResultCode WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes, std::
"Write address was out of range or misaligned! (address=0x{:08x}, size=0x{:08x})",
base_address, size_in_bytes);
return ERR_REGS_OUTOFRANGE_OR_MISALIGNED;
} else if (size_in_bytes <= max_size_in_bytes) {
if (size_in_bytes & 3) {
LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes);
return ERR_REGS_MISALIGNED;
} else {
std::size_t offset = 0;
while (size_in_bytes > 0) {
const u32 reg_address = base_address + REGS_BEGIN;
}
u32 reg_value;
HW::Read<u32>(reg_value, reg_address);
u32 value, mask;
std::memcpy(&value, &data[offset], sizeof(u32));
std::memcpy(&mask, &masks[offset], sizeof(u32));
// Update the current value of the register only for set mask bits
reg_value = (reg_value & ~mask) | (value & mask);
WriteSingleHWReg(base_address, reg_value);
size_in_bytes -= 4;
offset += 4;
base_address += 4;
}
return RESULT_SUCCESS;
}
} else {
if (size_in_bytes > max_size_in_bytes) {
LOG_ERROR(Service_GSP, "Out of range size 0x{:08x}", size_in_bytes);
return ERR_REGS_INVALID_SIZE;
}
if (size_in_bytes & 3) {
LOG_ERROR(Service_GSP, "Misaligned size 0x{:08x}", size_in_bytes);
return ERR_REGS_MISALIGNED;
}
std::size_t offset = 0;
while (size_in_bytes > 0) {
const u32 reg_address = base_address + REGS_BEGIN;
u32 reg_value = gpu.ReadReg(reg_address);
u32 value, mask;
std::memcpy(&value, &data[offset], sizeof(u32));
std::memcpy(&mask, &masks[offset], sizeof(u32));
// Update the current value of the register only for set mask bits
reg_value = (reg_value & ~mask) | (value & mask);
gpu.WriteReg(reg_address, reg_value);
size_in_bytes -= 4;
offset += 4;
base_address += 4;
}
return RESULT_SUCCESS;
}
void GSP_GPU::WriteHWRegs(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp(ctx);
u32 reg_addr = rp.Pop<u32>();
u32 size = rp.Pop<u32>();
std::vector<u8> src_data = rp.PopStaticBuffer();
const u32 reg_addr = rp.Pop<u32>();
const u32 size = rp.Pop<u32>();
const auto src_data = rp.PopStaticBuffer();
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
rb.Push(GSP::WriteHWRegs(reg_addr, size, src_data));
rb.Push(GSP::WriteHWRegs(reg_addr, size, src_data, system.GPU()));
}
void GSP_GPU::WriteHWRegsWithMask(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp(ctx);
u32 reg_addr = rp.Pop<u32>();
u32 size = rp.Pop<u32>();
std::vector<u8> src_data = rp.PopStaticBuffer();
std::vector<u8> mask_data = rp.PopStaticBuffer();
const u32 reg_addr = rp.Pop<u32>();
const u32 size = rp.Pop<u32>();
const auto src_data = rp.PopStaticBuffer();
const auto mask_data = rp.PopStaticBuffer();
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
rb.Push(GSP::WriteHWRegsWithMask(reg_addr, size, src_data, mask_data));
rb.Push(GSP::WriteHWRegsWithMask(reg_addr, size, src_data, mask_data, system.GPU()));
}
void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) {
@ -270,7 +221,7 @@ void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) {
return;
}
// size should be word-aligned
// Size should be word-aligned
if ((size % 4) != 0) {
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
rb.Push(ERR_REGS_MISALIGNED);
@ -279,8 +230,9 @@ void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) {
}
std::vector<u8> buffer(size);
for (u32 offset = 0; offset < size; ++offset) {
HW::Read<u8>(buffer[offset], REGS_BEGIN + reg_addr + offset);
for (u32 word = 0; word < size / sizeof(u32); ++word) {
const u32 data = system.GPU().ReadReg(REGS_BEGIN + reg_addr + word * sizeof(u32));
std::memcpy(buffer.data() + word * sizeof(u32), &data, sizeof(u32));
}
IPC::RequestBuilder rb = rp.MakeBuilder(1, 2);
@ -288,53 +240,15 @@ void GSP_GPU::ReadHWRegs(Kernel::HLERequestContext& ctx) {
rb.PushStaticBuffer(std::move(buffer), 0);
}
ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info) {
u32 base_address = 0x400000;
PAddr phys_address_left = VirtualToPhysicalAddress(info.address_left);
PAddr phys_address_right = VirtualToPhysicalAddress(info.address_right);
if (info.active_fb == 0) {
WriteSingleHWReg(base_address + 4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(
screen_id, address_left1)),
phys_address_left);
WriteSingleHWReg(base_address + 4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(
screen_id, address_right1)),
phys_address_right);
} else {
WriteSingleHWReg(base_address + 4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(
screen_id, address_left2)),
phys_address_left);
WriteSingleHWReg(base_address + 4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(
screen_id, address_right2)),
phys_address_right);
}
WriteSingleHWReg(base_address +
4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(screen_id, stride)),
info.stride);
WriteSingleHWReg(base_address +
4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(screen_id, color_format)),
info.format);
WriteSingleHWReg(base_address +
4 * static_cast<u32>(GPU_FRAMEBUFFER_REG_INDEX(screen_id, active_fb)),
info.shown_fb);
if (Pica::g_debug_context)
Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::BufferSwapped, nullptr);
if (screen_id == 0) {
MicroProfileFlip();
Core::System::GetInstance().perf_stats->EndGameFrame();
}
return RESULT_SUCCESS;
}
void GSP_GPU::SetBufferSwap(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp(ctx);
u32 screen_id = rp.Pop<u32>();
auto fb_info = rp.PopRaw<FrameBufferInfo>();
system.GPU().SetBufferSwap(screen_id, fb_info);
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
rb.Push(GSP::SetBufferSwap(screen_id, fb_info));
rb.Push(RESULT_SUCCESS);
}
void GSP_GPU::FlushDataCache(Kernel::HLERequestContext& ctx) {
@ -382,10 +296,9 @@ void GSP_GPU::RegisterInterruptRelayQueue(Kernel::HLERequestContext& ctx) {
u32 flags = rp.Pop<u32>();
auto interrupt_event = rp.PopObject<Kernel::Event>();
// TODO(mailwl): return right error code instead assert
ASSERT_MSG((interrupt_event != nullptr), "handle is not valid!");
ASSERT_MSG(interrupt_event, "handle is not valid!");
interrupt_event->SetName("GSP_GSP_GPU::interrupt_event");
interrupt_event->SetName("GSP_GPU::interrupt_event");
SessionData* session_data = GetSessionData(ctx.Session());
session_data->interrupt_event = std::move(interrupt_event);
@ -422,15 +335,17 @@ void GSP_GPU::UnregisterInterruptRelayQueue(Kernel::HLERequestContext& ctx) {
void GSP_GPU::SignalInterruptForThread(InterruptId interrupt_id, u32 thread_id) {
SessionData* session_data = FindRegisteredThreadData(thread_id);
if (session_data == nullptr)
if (!session_data) {
return;
}
auto interrupt_event = session_data->interrupt_event;
if (interrupt_event == nullptr) {
LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!");
return;
}
InterruptRelayQueue* interrupt_relay_queue = GetInterruptRelayQueue(shared_memory, thread_id);
auto* interrupt_relay_queue = GetInterruptRelayQueue(thread_id);
u8 next = interrupt_relay_queue->index;
next += interrupt_relay_queue->number_interrupts;
next = next % 0x34; // 0x34 is the number of interrupt slots
@ -441,29 +356,20 @@ void GSP_GPU::SignalInterruptForThread(InterruptId interrupt_id, u32 thread_id)
interrupt_relay_queue->error_code = 0x0; // No error
// Update framebuffer information if requested
// TODO(yuriks): Confirm where this code should be called. It is definitely updated without
// executing any GSP commands, only waiting on the event.
// TODO(Subv): The real GSP module triggers PDC0 after updating both the top and bottom
// screen, it is currently unknown what PDC1 does.
int screen_id = (interrupt_id == InterruptId::PDC0) ? 0
: (interrupt_id == InterruptId::PDC1) ? 1
: -1;
const s32 screen_id = (interrupt_id == InterruptId::PDC0) ? 0
: (interrupt_id == InterruptId::PDC1) ? 1
: -1;
if (screen_id != -1) {
FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id);
auto* info = GetFrameBufferInfo(thread_id, screen_id);
if (info->is_dirty) {
GSP::SetBufferSwap(screen_id, info->framebuffer_info[info->index]);
system.GPU().SetBufferSwap(screen_id, info->framebuffer_info[info->index]);
info->is_dirty.Assign(false);
}
}
interrupt_event->Signal();
}
/**
* Signals that the specified interrupt type has occurred to userland code
* @param interrupt_id ID of interrupt that is being signalled
* @todo This should probably take a thread_id parameter and only signal this thread?
* @todo This probably does not belong in the GSP module, instead move to video_core
*/
void GSP_GPU::SignalInterrupt(InterruptId interrupt_id) {
if (nullptr == shared_memory) {
LOG_WARNING(Service_GSP, "cannot synchronize until GSP shared memory has been created!");
@ -488,154 +394,13 @@ void GSP_GPU::SignalInterrupt(InterruptId interrupt_id) {
SignalInterruptForThread(interrupt_id, active_thread_id);
}
MICROPROFILE_DEFINE(GPU_GSP_DMA, "GPU", "GSP DMA", MP_RGB(100, 0, 255));
/// Executes the next GSP command
static void ExecuteCommand(const Command& command, u32 thread_id) {
// Utility function to convert register ID to address
static auto WriteGPURegister = [](u32 id, u32 data) {
GPU::Write<u32>(0x1EF00000 + 4 * id, data);
};
switch (command.id) {
// GX request DMA - typically used for copying memory from GSP heap to VRAM
case CommandId::REQUEST_DMA: {
MICROPROFILE_SCOPE(GPU_GSP_DMA);
Memory::MemorySystem& memory = Core::System::GetInstance().Memory();
// TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever
// possible/likely
Memory::RasterizerFlushVirtualRegion(command.dma_request.source_address,
command.dma_request.size, Memory::FlushMode::Flush);
Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address,
command.dma_request.size,
Memory::FlushMode::Invalidate);
// TODO(Subv): These memory accesses should not go through the application's memory mapping.
// They should go through the GSP module's memory mapping.
memory.CopyBlock(*Core::System::GetInstance().Kernel().GetCurrentProcess(),
command.dma_request.dest_address, command.dma_request.source_address,
command.dma_request.size);
SignalInterrupt(InterruptId::DMA);
break;
}
// TODO: This will need some rework in the future. (why?)
case CommandId::SUBMIT_GPU_CMDLIST: {
auto& params = command.submit_gpu_cmdlist;
if (params.do_flush) {
// This flag flushes the command list (params.address, params.size) from the cache.
// Command lists are not processed by the hardware renderer, so we don't need to
// actually flush them in Citra.
}
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(command_processor_config.address)),
VirtualToPhysicalAddress(params.address) >> 3);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(command_processor_config.size)),
params.size);
// TODO: Not sure if we are supposed to always write this .. seems to trigger processing
// though
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(command_processor_config.trigger)), 1);
// TODO(yuriks): Figure out the meaning of the `flags` field.
break;
}
// It's assumed that the two "blocks" behave equivalently.
// Presumably this is done simply to allow two memory fills to run in parallel.
case CommandId::SET_MEMORY_FILL: {
auto& params = command.memory_fill;
if (params.start1 != 0) {
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_start)),
VirtualToPhysicalAddress(params.start1) >> 3);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_end)),
VirtualToPhysicalAddress(params.end1) >> 3);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].value_32bit)),
params.value1);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].control)),
params.control1);
}
if (params.start2 != 0) {
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_start)),
VirtualToPhysicalAddress(params.start2) >> 3);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_end)),
VirtualToPhysicalAddress(params.end2) >> 3);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].value_32bit)),
params.value2);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].control)),
params.control2);
}
break;
}
case CommandId::SET_DISPLAY_TRANSFER: {
auto& params = command.display_transfer;
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)),
params.in_buffer_size);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)),
params.out_buffer_size);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)),
params.flags);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1);
break;
}
case CommandId::SET_TEXTURE_COPY: {
auto& params = command.texture_copy;
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address),
VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address),
VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size),
params.size);
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size),
params.in_width_gap);
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size),
params.out_width_gap);
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags), params.flags);
// NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to
// matter.
WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1);
break;
}
case CommandId::CACHE_FLUSH: {
// NOTE: Rasterizer flushing handled elsewhere in CPU read/write and other GPU handlers
// Use command.cache_flush.regions to implement this handler
break;
}
default:
LOG_ERROR(Service_GSP, "unknown command 0x{:08X}", (int)command.id.Value());
}
if (Pica::g_debug_context)
Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::GSPCommandProcessed,
(void*)&command);
}
void GSP_GPU::SetLcdForceBlack(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp(ctx);
const bool enable_black = rp.Pop<bool>();
bool enable_black = rp.Pop<bool>();
LCD::Regs::ColorFill data = {0};
// Since data is already zeroed, there is no need to explicitly set
// the color to black (all zero).
Pica::ColorFill data{};
data.is_enabled.Assign(enable_black);
LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_top), data.raw); // Top LCD
LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_bottom), data.raw); // Bottom LCD
system.GPU().SetColorFill(data);
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
rb.Push(RESULT_SUCCESS);
@ -644,20 +409,17 @@ void GSP_GPU::SetLcdForceBlack(Kernel::HLERequestContext& ctx) {
void GSP_GPU::TriggerCmdReqQueue(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp(ctx);
// Iterate through each thread's command queue...
for (unsigned thread_id = 0; thread_id < 0x4; ++thread_id) {
CommandBuffer* command_buffer = (CommandBuffer*)GetCommandBuffer(shared_memory, thread_id);
// Iterate through each command.
auto* command_buffer = GetCommandBuffer(active_thread_id);
auto& gpu = system.GPU();
for (u32 i = 0; i < command_buffer->number_commands; i++) {
gpu.Debugger().GXCommandProcessed(command_buffer->commands[i]);
// Iterate through each command...
for (unsigned i = 0; i < command_buffer->number_commands; ++i) {
g_debugger.GXCommandProcessed((u8*)&command_buffer->commands[i]);
// Decode and execute command
gpu.Execute(command_buffer->commands[i]);
// Decode and execute command
ExecuteCommand(command_buffer->commands[i], thread_id);
// Indicates that command has completed
command_buffer->number_commands.Assign(command_buffer->number_commands - 1);
}
// Indicates that command has completed
command_buffer->number_commands.Assign(command_buffer->number_commands - 1);
}
IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);

View file

@ -13,7 +13,8 @@
#include "common/common_types.h"
#include "core/hle/kernel/event.h"
#include "core/hle/kernel/hle_ipc.h"
#include "core/hle/result.h"
#include "core/hle/service/gsp/gsp_command.h"
#include "core/hle/service/gsp/gsp_interrupt.h"
#include "core/hle/service/service.h"
namespace Core {
@ -28,53 +29,6 @@ class SharedMemory;
namespace Service::GSP {
/// GSP interrupt ID
enum class InterruptId : u8 {
PSC0 = 0x00,
PSC1 = 0x01,
PDC0 = 0x02, // Seems called every vertical screen line
PDC1 = 0x03, // Seems called every frame
PPF = 0x04,
P3D = 0x05,
DMA = 0x06,
};
/// GSP command ID
enum class CommandId : u32 {
REQUEST_DMA = 0x00,
/// Submits a commandlist for execution by the GPU.
SUBMIT_GPU_CMDLIST = 0x01,
// Fills a given memory range with a particular value
SET_MEMORY_FILL = 0x02,
// Copies an image and optionally performs color-conversion or scaling.
// This is highly similar to the GameCube's EFB copy feature
SET_DISPLAY_TRANSFER = 0x03,
// Conceptionally similar to SET_DISPLAY_TRANSFER and presumable uses the same hardware path
SET_TEXTURE_COPY = 0x04,
/// Flushes up to 3 cache regions in a single command.
CACHE_FLUSH = 0x05,
};
/// GSP thread interrupt relay queue
struct InterruptRelayQueue {
// Index of last interrupt in the queue
u8 index;
// Number of interrupts remaining to be processed by the userland code
u8 number_interrupts;
// Error code - zero on success, otherwise an error has occurred
u8 error_code;
u8 padding1;
u32 missed_PDC0;
u32 missed_PDC1;
InterruptId slot[0x34]; ///< Interrupt ID slots
};
static_assert(sizeof(InterruptRelayQueue) == 0x40, "InterruptRelayQueue struct has incorrect size");
struct FrameBufferInfo {
u32 active_fb; // 0 = first, 1 = second
u32 address_left;
@ -96,95 +50,9 @@ struct FrameBufferUpdate {
u32 pad2;
};
static_assert(sizeof(FrameBufferUpdate) == 0x40, "Struct has incorrect size");
// TODO: Not sure if this padding is correct.
// Chances are the second block is stored at offset 0x24 rather than 0x20.
static_assert(offsetof(FrameBufferUpdate, framebuffer_info[1]) == 0x20,
"FrameBufferInfo element has incorrect alignment");
/// GSP command
struct Command {
BitField<0, 8, CommandId> id;
union {
struct {
u32 source_address;
u32 dest_address;
u32 size;
} dma_request;
struct {
u32 address;
u32 size;
u32 flags;
u32 unused[3];
u32 do_flush;
} submit_gpu_cmdlist;
struct {
u32 start1;
u32 value1;
u32 end1;
u32 start2;
u32 value2;
u32 end2;
u16 control1;
u16 control2;
} memory_fill;
struct {
u32 in_buffer_address;
u32 out_buffer_address;
u32 in_buffer_size;
u32 out_buffer_size;
u32 flags;
} display_transfer;
struct {
u32 in_buffer_address;
u32 out_buffer_address;
u32 size;
u32 in_width_gap;
u32 out_width_gap;
u32 flags;
} texture_copy;
struct {
struct {
u32 address;
u32 size;
} regions[3];
} cache_flush;
u8 raw_data[0x1C];
};
};
static_assert(sizeof(Command) == 0x20, "Command struct has incorrect size");
/// GSP shared memory GX command buffer header
struct CommandBuffer {
union {
u32 hex;
// Current command index. This index is updated by GSP module after loading the command
// data, right before the command is processed. When this index is updated by GSP module,
// the total commands field is decreased by one as well.
BitField<0, 8, u32> index;
// Total commands to process, must not be value 0 when GSP module handles commands. This
// must be <=15 when writing a command to shared memory. This is incremented by the
// application when writing a command to shared memory, after increasing this value
// TriggerCmdReqQueue is only used if this field is value 1.
BitField<8, 8, u32> number_commands;
};
u32 unk[7];
Command commands[0xF];
};
static_assert(sizeof(CommandBuffer) == 0x200, "CommandBuffer struct has incorrect size");
constexpr u32 FRAMEBUFFER_WIDTH = 240;
constexpr u32 FRAMEBUFFER_WIDTH_POW2 = 256;
constexpr u32 TOP_FRAMEBUFFER_HEIGHT = 400;
@ -242,6 +110,12 @@ public:
*/
FrameBufferUpdate* GetFrameBufferInfo(u32 thread_id, u32 screen_index);
/// Gets a pointer to a thread command buffer in GSP shared memory
CommandBuffer* GetCommandBuffer(u32 thread_id);
/// Gets a pointer to the interrupt relay queue for a given thread index
InterruptRelayQueue* GetInterruptRelayQueue(u32 thread_id);
/**
* Retreives the ID of the thread with GPU rights.
*/
@ -513,7 +387,7 @@ private:
static constexpr u32 MaxGSPThreads = 4;
/// Thread ids currently in use by the sessions connected to the GSPGPU service.
std::array<bool, MaxGSPThreads> used_thread_ids = {false, false, false, false};
std::array<bool, MaxGSPThreads> used_thread_ids{};
friend class SessionData;
@ -522,8 +396,6 @@ private:
friend class boost::serialization::access;
};
ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info);
} // namespace Service::GSP
BOOST_CLASS_EXPORT_KEY(Service::GSP::SessionData)

View file

@ -0,0 +1,42 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <functional>
#include "common/common_types.h"
namespace Service::GSP {
/// GSP interrupt ID
enum class InterruptId : u8 {
PSC0 = 0x00,
PSC1 = 0x01,
PDC0 = 0x02,
PDC1 = 0x03,
PPF = 0x04,
P3D = 0x05,
DMA = 0x06,
};
/// GSP thread interrupt relay queue
struct InterruptRelayQueue {
// Index of last interrupt in the queue
u8 index;
// Number of interrupts remaining to be processed by the userland code
u8 number_interrupts;
// Error code - zero on success, otherwise an error has occurred
u8 error_code;
u8 padding1;
u32 missed_PDC0;
u32 missed_PDC1;
InterruptId slot[0x34]; ///< Interrupt ID slots
};
static_assert(sizeof(InterruptRelayQueue) == 0x40, "InterruptRelayQueue struct has incorrect size");
using InterruptHandler = std::function<void(InterruptId)>;
} // namespace Service::GSP

View file

@ -22,7 +22,6 @@
#include "core/hle/service/hid/hid_user.h"
#include "core/hle/service/service.h"
#include "core/movie.h"
#include "video_core/video_core.h"
SERVICE_CONSTRUCT_IMPL(Service::HID::Module)
SERIALIZE_EXPORT_IMPL(Service::HID::Module)