mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	Merge pull request #37 from neobrain/pica
Initial work on Pica rendering.
This commit is contained in:
		
						commit
						36cabe35cc
					
				
					 24 changed files with 2367 additions and 260 deletions
				
			
		|  | @ -78,7 +78,7 @@ QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const | ||||||
|         // index refers to a specific command
 |         // index refers to a specific command
 | ||||||
|         const GraphicsDebugger::PicaCommandList& cmdlist = command_lists[item->parent->index].second; |         const GraphicsDebugger::PicaCommandList& cmdlist = command_lists[item->parent->index].second; | ||||||
|         const GraphicsDebugger::PicaCommand& cmd = cmdlist[item->index]; |         const GraphicsDebugger::PicaCommand& cmd = cmdlist[item->index]; | ||||||
|         const Pica::CommandHeader& header = cmd.GetHeader(); |         const Pica::CommandProcessor::CommandHeader& header = cmd.GetHeader(); | ||||||
| 
 | 
 | ||||||
|         if (role == Qt::DisplayRole) { |         if (role == Qt::DisplayRole) { | ||||||
|             QString content; |             QString content; | ||||||
|  |  | ||||||
|  | @ -173,7 +173,7 @@ void ExecuteCommand(const Command& command) { | ||||||
|     case CommandId::SET_COMMAND_LIST_LAST: |     case CommandId::SET_COMMAND_LIST_LAST: | ||||||
|     { |     { | ||||||
|         auto& params = command.set_command_list_last; |         auto& params = command.set_command_list_last; | ||||||
|         WriteGPURegister(GPU_REG_INDEX(command_processor_config.address), params.address >> 3); |         WriteGPURegister(GPU_REG_INDEX(command_processor_config.address), Memory::VirtualToPhysicalAddress(params.address) >> 3); | ||||||
|         WriteGPURegister(GPU_REG_INDEX(command_processor_config.size), params.size >> 3); |         WriteGPURegister(GPU_REG_INDEX(command_processor_config.size), params.size >> 3); | ||||||
| 
 | 
 | ||||||
|         // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though
 |         // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though
 | ||||||
|  | @ -193,20 +193,28 @@ void ExecuteCommand(const Command& command) { | ||||||
|     case CommandId::SET_MEMORY_FILL: |     case CommandId::SET_MEMORY_FILL: | ||||||
|     { |     { | ||||||
|         auto& params = command.memory_fill; |         auto& params = command.memory_fill; | ||||||
|         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_start), params.start1 >> 3); |         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_start), Memory::VirtualToPhysicalAddress(params.start1) >> 3); | ||||||
|         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_end), params.end1 >> 3); |         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_end), Memory::VirtualToPhysicalAddress(params.end1) >> 3); | ||||||
|         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].size), params.end1 - params.start1); |         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].size), params.end1 - params.start1); | ||||||
|         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].value), params.value1); |         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].value), params.value1); | ||||||
| 
 | 
 | ||||||
|         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_start), params.start2 >> 3); |         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_start), Memory::VirtualToPhysicalAddress(params.start2) >> 3); | ||||||
|         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_end), params.end2 >> 3); |         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_end), Memory::VirtualToPhysicalAddress(params.end2) >> 3); | ||||||
|         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].size), params.end2 - params.start2); |         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].size), params.end2 - params.start2); | ||||||
|         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].value), params.value2); |         WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].value), params.value2); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // TODO: Check if texture copies are implemented correctly..
 |  | ||||||
|     case CommandId::SET_DISPLAY_TRANSFER: |     case CommandId::SET_DISPLAY_TRANSFER: | ||||||
|  |     { | ||||||
|  |         auto& params = command.image_copy; | ||||||
|  |         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | ||||||
|  |         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); | ||||||
|  |         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size); | ||||||
|  |         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size); | ||||||
|  |         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags); | ||||||
|  |         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1); | ||||||
|  | 
 | ||||||
|         // TODO(bunnei): Signalling all of these interrupts here is totally wrong, but it seems to
 |         // TODO(bunnei): Signalling all of these interrupts here is totally wrong, but it seems to
 | ||||||
|         // work well enough for running demos. Need to figure out how these all work and trigger
 |         // work well enough for running demos. Need to figure out how these all work and trigger
 | ||||||
|         // them correctly.
 |         // them correctly.
 | ||||||
|  | @ -216,18 +224,19 @@ void ExecuteCommand(const Command& command) { | ||||||
|         SignalInterrupt(InterruptId::P3D); |         SignalInterrupt(InterruptId::P3D); | ||||||
|         SignalInterrupt(InterruptId::DMA); |         SignalInterrupt(InterruptId::DMA); | ||||||
|         break; |         break; | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|  |     // TODO: Check if texture copies are implemented correctly..
 | ||||||
|     case CommandId::SET_TEXTURE_COPY: |     case CommandId::SET_TEXTURE_COPY: | ||||||
|     { |     { | ||||||
|         auto& params = command.image_copy; |         auto& params = command.image_copy; | ||||||
|         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), params.in_buffer_address >> 3); |         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | ||||||
|         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), params.out_buffer_address >> 3); |         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); | ||||||
|         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size); |         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size); | ||||||
|         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size); |         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size); | ||||||
|         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags); |         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags); | ||||||
| 
 | 
 | ||||||
|         // TODO: Should this only be ORed with 1 for texture copies?
 |         // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1?
 | ||||||
|         // trigger transfer
 |  | ||||||
|         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1); |         WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -14,6 +14,7 @@ | ||||||
| 
 | 
 | ||||||
| #include "core/hw/gpu.h" | #include "core/hw/gpu.h" | ||||||
| 
 | 
 | ||||||
|  | #include "video_core/command_processor.h" | ||||||
| #include "video_core/video_core.h" | #include "video_core/video_core.h" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -24,83 +25,6 @@ Regs g_regs; | ||||||
| u32 g_cur_line = 0;         ///< Current vertical screen line
 | u32 g_cur_line = 0;         ///< Current vertical screen line
 | ||||||
| u64 g_last_line_ticks = 0;  ///< CPU tick count from last vertical screen line
 | u64 g_last_line_ticks = 0;  ///< CPU tick count from last vertical screen line
 | ||||||
| 
 | 
 | ||||||
| /**
 |  | ||||||
|  * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM |  | ||||||
|  * @param |  | ||||||
|  */ |  | ||||||
| void SetFramebufferLocation(const FramebufferLocation mode) { |  | ||||||
|     switch (mode) { |  | ||||||
|     case FRAMEBUFFER_LOCATION_FCRAM: |  | ||||||
|     { |  | ||||||
|         auto& framebuffer_top = g_regs.framebuffer_config[0]; |  | ||||||
|         auto& framebuffer_sub = g_regs.framebuffer_config[1]; |  | ||||||
| 
 |  | ||||||
|         framebuffer_top.address_left1  = PADDR_TOP_LEFT_FRAME1; |  | ||||||
|         framebuffer_top.address_left2  = PADDR_TOP_LEFT_FRAME2; |  | ||||||
|         framebuffer_top.address_right1 = PADDR_TOP_RIGHT_FRAME1; |  | ||||||
|         framebuffer_top.address_right2 = PADDR_TOP_RIGHT_FRAME2; |  | ||||||
|         framebuffer_sub.address_left1  = PADDR_SUB_FRAME1; |  | ||||||
|         //framebuffer_sub.address_left2  = unknown;
 |  | ||||||
|         framebuffer_sub.address_right1 = PADDR_SUB_FRAME2; |  | ||||||
|         //framebuffer_sub.address_right2 = unknown;
 |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     case FRAMEBUFFER_LOCATION_VRAM: |  | ||||||
|     { |  | ||||||
|         auto& framebuffer_top = g_regs.framebuffer_config[0]; |  | ||||||
|         auto& framebuffer_sub = g_regs.framebuffer_config[1]; |  | ||||||
| 
 |  | ||||||
|         framebuffer_top.address_left1  = PADDR_VRAM_TOP_LEFT_FRAME1; |  | ||||||
|         framebuffer_top.address_left2  = PADDR_VRAM_TOP_LEFT_FRAME2; |  | ||||||
|         framebuffer_top.address_right1 = PADDR_VRAM_TOP_RIGHT_FRAME1; |  | ||||||
|         framebuffer_top.address_right2 = PADDR_VRAM_TOP_RIGHT_FRAME2; |  | ||||||
|         framebuffer_sub.address_left1  = PADDR_VRAM_SUB_FRAME1; |  | ||||||
|         //framebuffer_sub.address_left2  = unknown;
 |  | ||||||
|         framebuffer_sub.address_right1 = PADDR_VRAM_SUB_FRAME2; |  | ||||||
|         //framebuffer_sub.address_right2 = unknown;
 |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /**
 |  | ||||||
|  * Gets the location of the framebuffers |  | ||||||
|  * @return Location of framebuffers as FramebufferLocation enum |  | ||||||
|  */ |  | ||||||
| FramebufferLocation GetFramebufferLocation(u32 address) { |  | ||||||
|     if ((address & ~Memory::VRAM_MASK) == Memory::VRAM_PADDR) { |  | ||||||
|         return FRAMEBUFFER_LOCATION_VRAM; |  | ||||||
|     } else if ((address & ~Memory::FCRAM_MASK) == Memory::FCRAM_PADDR) { |  | ||||||
|         return FRAMEBUFFER_LOCATION_FCRAM; |  | ||||||
|     } else { |  | ||||||
|         ERROR_LOG(GPU, "unknown framebuffer location!"); |  | ||||||
|     } |  | ||||||
|     return FRAMEBUFFER_LOCATION_UNKNOWN; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| u32 GetFramebufferAddr(const u32 address) { |  | ||||||
|     switch (GetFramebufferLocation(address)) { |  | ||||||
|     case FRAMEBUFFER_LOCATION_FCRAM: |  | ||||||
|         return Memory::VirtualAddressFromPhysical_FCRAM(address); |  | ||||||
|     case FRAMEBUFFER_LOCATION_VRAM: |  | ||||||
|         return Memory::VirtualAddressFromPhysical_VRAM(address); |  | ||||||
|     default: |  | ||||||
|         ERROR_LOG(GPU, "unknown framebuffer location"); |  | ||||||
|     } |  | ||||||
|     return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /**
 |  | ||||||
|  * Gets a read-only pointer to a framebuffer in memory |  | ||||||
|  * @param address Physical address of framebuffer |  | ||||||
|  * @return Returns const pointer to raw framebuffer |  | ||||||
|  */ |  | ||||||
| const u8* GetFramebufferPointer(const u32 address) { |  | ||||||
|     u32 addr = GetFramebufferAddr(address); |  | ||||||
|     return (addr != 0) ? Memory::GetPointer(addr) : nullptr; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| template <typename T> | template <typename T> | ||||||
| inline void Read(T &var, const u32 raw_addr) { | inline void Read(T &var, const u32 raw_addr) { | ||||||
|     u32 addr = raw_addr - 0x1EF00000; |     u32 addr = raw_addr - 0x1EF00000; | ||||||
|  | @ -141,8 +65,8 @@ inline void Write(u32 addr, const T data) { | ||||||
|         // TODO: Not sure if this check should be done at GSP level instead
 |         // TODO: Not sure if this check should be done at GSP level instead
 | ||||||
|         if (config.address_start) { |         if (config.address_start) { | ||||||
|             // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all
 |             // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all
 | ||||||
|             u32* start = (u32*)Memory::GetPointer(config.GetStartAddress()); |             u32* start = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetStartAddress())); | ||||||
|             u32* end = (u32*)Memory::GetPointer(config.GetEndAddress()); |             u32* end = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetEndAddress())); | ||||||
|             for (u32* ptr = start; ptr < end; ++ptr) |             for (u32* ptr = start; ptr < end; ++ptr) | ||||||
|                 *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation
 |                 *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation
 | ||||||
| 
 | 
 | ||||||
|  | @ -155,8 +79,8 @@ inline void Write(u32 addr, const T data) { | ||||||
|     { |     { | ||||||
|         const auto& config = g_regs.display_transfer_config; |         const auto& config = g_regs.display_transfer_config; | ||||||
|         if (config.trigger & 1) { |         if (config.trigger & 1) { | ||||||
|             u8* source_pointer = Memory::GetPointer(config.GetPhysicalInputAddress()); |             u8* source_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalInputAddress())); | ||||||
|             u8* dest_pointer = Memory::GetPointer(config.GetPhysicalOutputAddress()); |             u8* dest_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalOutputAddress())); | ||||||
| 
 | 
 | ||||||
|             for (int y = 0; y < config.output_height; ++y) { |             for (int y = 0; y < config.output_height; ++y) { | ||||||
|                 // TODO: Why does the register seem to hold twice the framebuffer width?
 |                 // TODO: Why does the register seem to hold twice the framebuffer width?
 | ||||||
|  | @ -220,14 +144,15 @@ inline void Write(u32 addr, const T data) { | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     // Seems like writing to this register triggers processing
 | ||||||
|     case GPU_REG_INDEX(command_processor_config.trigger): |     case GPU_REG_INDEX(command_processor_config.trigger): | ||||||
|     { |     { | ||||||
|         const auto& config = g_regs.command_processor_config; |         const auto& config = g_regs.command_processor_config; | ||||||
|         if (config.trigger & 1) |         if (config.trigger & 1) | ||||||
|         { |         { | ||||||
|             // u32* buffer = (u32*)Memory::GetPointer(config.GetPhysicalAddress());
 |             u32* buffer = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalAddress())); | ||||||
|             ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.GetPhysicalAddress()); |             u32 size = config.size << 3; | ||||||
|             // TODO: Process command list!
 |             Pica::CommandProcessor::ProcessCommandList(buffer, size); | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  | @ -276,11 +201,22 @@ void Init() { | ||||||
|     g_cur_line = 0; |     g_cur_line = 0; | ||||||
|     g_last_line_ticks = Core::g_app_core->GetTicks(); |     g_last_line_ticks = Core::g_app_core->GetTicks(); | ||||||
| 
 | 
 | ||||||
| //    SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM);
 |  | ||||||
|     SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM); |  | ||||||
| 
 |  | ||||||
|     auto& framebuffer_top = g_regs.framebuffer_config[0]; |     auto& framebuffer_top = g_regs.framebuffer_config[0]; | ||||||
|     auto& framebuffer_sub = g_regs.framebuffer_config[1]; |     auto& framebuffer_sub = g_regs.framebuffer_config[1]; | ||||||
|  | 
 | ||||||
|  |     // Setup default framebuffer addresses (located in VRAM)
 | ||||||
|  |     // .. or at least these are the ones used by system applets.
 | ||||||
|  |     // There's probably a smarter way to come up with addresses
 | ||||||
|  |     // like this which does not require hardcoding.
 | ||||||
|  |     framebuffer_top.address_left1  = 0x181E6000; | ||||||
|  |     framebuffer_top.address_left2  = 0x1822C800; | ||||||
|  |     framebuffer_top.address_right1 = 0x18273000; | ||||||
|  |     framebuffer_top.address_right2 = 0x182B9800; | ||||||
|  |     framebuffer_sub.address_left1  = 0x1848F000; | ||||||
|  |     //framebuffer_sub.address_left2  = unknown;
 | ||||||
|  |     framebuffer_sub.address_right1 = 0x184C7800; | ||||||
|  |     //framebuffer_sub.address_right2 = unknown;
 | ||||||
|  | 
 | ||||||
|     // TODO: Width should be 240 instead?
 |     // TODO: Width should be 240 instead?
 | ||||||
|     framebuffer_top.width = 480; |     framebuffer_top.width = 480; | ||||||
|     framebuffer_top.height = 400; |     framebuffer_top.height = 400; | ||||||
|  |  | ||||||
|  | @ -249,72 +249,6 @@ static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of regis | ||||||
| 
 | 
 | ||||||
| extern Regs g_regs; | extern Regs g_regs; | ||||||
| 
 | 
 | ||||||
| enum { |  | ||||||
|     TOP_ASPECT_X        = 0x5, |  | ||||||
|     TOP_ASPECT_Y        = 0x3, |  | ||||||
| 
 |  | ||||||
|     TOP_HEIGHT          = 240, |  | ||||||
|     TOP_WIDTH           = 400, |  | ||||||
|     BOTTOM_WIDTH        = 320, |  | ||||||
| 
 |  | ||||||
|     // Physical addresses in FCRAM (chosen arbitrarily)
 |  | ||||||
|     PADDR_TOP_LEFT_FRAME1       = 0x201D4C00, |  | ||||||
|     PADDR_TOP_LEFT_FRAME2       = 0x202D4C00, |  | ||||||
|     PADDR_TOP_RIGHT_FRAME1      = 0x203D4C00, |  | ||||||
|     PADDR_TOP_RIGHT_FRAME2      = 0x204D4C00, |  | ||||||
|     PADDR_SUB_FRAME1            = 0x205D4C00, |  | ||||||
|     PADDR_SUB_FRAME2            = 0x206D4C00, |  | ||||||
|     // Physical addresses in FCRAM used by ARM9 applications
 |  | ||||||
| /*    PADDR_TOP_LEFT_FRAME1       = 0x20184E60,
 |  | ||||||
|     PADDR_TOP_LEFT_FRAME2       = 0x201CB370, |  | ||||||
|     PADDR_TOP_RIGHT_FRAME1      = 0x20282160, |  | ||||||
|     PADDR_TOP_RIGHT_FRAME2      = 0x202C8670, |  | ||||||
|     PADDR_SUB_FRAME1            = 0x202118E0, |  | ||||||
|     PADDR_SUB_FRAME2            = 0x20249CF0,*/ |  | ||||||
| 
 |  | ||||||
|     // Physical addresses in VRAM
 |  | ||||||
|     // TODO: These should just be deduced from the ones above
 |  | ||||||
|     PADDR_VRAM_TOP_LEFT_FRAME1  = 0x181D4C00, |  | ||||||
|     PADDR_VRAM_TOP_LEFT_FRAME2  = 0x182D4C00, |  | ||||||
|     PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x183D4C00, |  | ||||||
|     PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x184D4C00, |  | ||||||
|     PADDR_VRAM_SUB_FRAME1       = 0x185D4C00, |  | ||||||
|     PADDR_VRAM_SUB_FRAME2       = 0x186D4C00, |  | ||||||
|     // Physical addresses in VRAM used by ARM9 applications
 |  | ||||||
| /*    PADDR_VRAM_TOP_LEFT_FRAME2  = 0x181CB370,
 |  | ||||||
|     PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x18282160, |  | ||||||
|     PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x182C8670, |  | ||||||
|     PADDR_VRAM_SUB_FRAME1       = 0x182118E0, |  | ||||||
|     PADDR_VRAM_SUB_FRAME2       = 0x18249CF0,*/ |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Framebuffer location
 |  | ||||||
| enum FramebufferLocation { |  | ||||||
|     FRAMEBUFFER_LOCATION_UNKNOWN,   ///< Framebuffer location is unknown
 |  | ||||||
|     FRAMEBUFFER_LOCATION_FCRAM,     ///< Framebuffer is in the GSP heap
 |  | ||||||
|     FRAMEBUFFER_LOCATION_VRAM,      ///< Framebuffer is in VRAM
 |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /**
 |  | ||||||
|  * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM |  | ||||||
|  * @param |  | ||||||
|  */ |  | ||||||
| void SetFramebufferLocation(const FramebufferLocation mode); |  | ||||||
| 
 |  | ||||||
| /**
 |  | ||||||
|  * Gets a read-only pointer to a framebuffer in memory |  | ||||||
|  * @param address Physical address of framebuffer |  | ||||||
|  * @return Returns const pointer to raw framebuffer |  | ||||||
|  */ |  | ||||||
| const u8* GetFramebufferPointer(const u32 address); |  | ||||||
| 
 |  | ||||||
| u32 GetFramebufferAddr(const u32 address); |  | ||||||
| 
 |  | ||||||
| /**
 |  | ||||||
|  * Gets the location of the framebuffers |  | ||||||
|  */ |  | ||||||
| FramebufferLocation GetFramebufferLocation(u32 address); |  | ||||||
| 
 |  | ||||||
| template <typename T> | template <typename T> | ||||||
| void Read(T &var, const u32 addr); | void Read(T &var, const u32 addr); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -14,7 +14,6 @@ namespace Memory { | ||||||
| enum { | enum { | ||||||
|     BOOTROM_SIZE            = 0x00010000,   ///< Bootrom (super secret code/data @ 0x8000) size
 |     BOOTROM_SIZE            = 0x00010000,   ///< Bootrom (super secret code/data @ 0x8000) size
 | ||||||
|     MPCORE_PRIV_SIZE        = 0x00002000,   ///< MPCore private memory region size
 |     MPCORE_PRIV_SIZE        = 0x00002000,   ///< MPCore private memory region size
 | ||||||
|     VRAM_SIZE               = 0x00600000,   ///< VRAM size
 |  | ||||||
|     DSP_SIZE                = 0x00080000,   ///< DSP memory size
 |     DSP_SIZE                = 0x00080000,   ///< DSP memory size
 | ||||||
|     AXI_WRAM_SIZE           = 0x00080000,   ///< AXI WRAM size
 |     AXI_WRAM_SIZE           = 0x00080000,   ///< AXI WRAM size
 | ||||||
| 
 | 
 | ||||||
|  | @ -23,8 +22,6 @@ enum { | ||||||
|     FCRAM_PADDR_END         = (FCRAM_PADDR + FCRAM_SIZE),       ///< FCRAM end of physical space
 |     FCRAM_PADDR_END         = (FCRAM_PADDR + FCRAM_SIZE),       ///< FCRAM end of physical space
 | ||||||
|     FCRAM_VADDR             = 0x08000000,                       ///< FCRAM virtual address
 |     FCRAM_VADDR             = 0x08000000,                       ///< FCRAM virtual address
 | ||||||
|     FCRAM_VADDR_END         = (FCRAM_VADDR + FCRAM_SIZE),       ///< FCRAM end of virtual space
 |     FCRAM_VADDR_END         = (FCRAM_VADDR + FCRAM_SIZE),       ///< FCRAM end of virtual space
 | ||||||
|     FCRAM_VADDR_FW0B        = 0xF0000000,                       ///< FCRAM adress for firmare FW0B
 |  | ||||||
|     FCRAM_VADDR_FW0B_END    = (FCRAM_VADDR_FW0B + FCRAM_SIZE),  ///< FCRAM adress end for FW0B
 |  | ||||||
|     FCRAM_MASK              = (FCRAM_SIZE - 1),                 ///< FCRAM mask
 |     FCRAM_MASK              = (FCRAM_SIZE - 1),                 ///< FCRAM mask
 | ||||||
| 
 | 
 | ||||||
|     SHARED_MEMORY_SIZE      = 0x04000000,   ///< Shared memory size
 |     SHARED_MEMORY_SIZE      = 0x04000000,   ///< Shared memory size
 | ||||||
|  | @ -73,6 +70,7 @@ enum { | ||||||
|     HARDWARE_IO_PADDR_END   = (HARDWARE_IO_PADDR + HARDWARE_IO_SIZE), |     HARDWARE_IO_PADDR_END   = (HARDWARE_IO_PADDR + HARDWARE_IO_SIZE), | ||||||
|     HARDWARE_IO_VADDR_END   = (HARDWARE_IO_VADDR + HARDWARE_IO_SIZE), |     HARDWARE_IO_VADDR_END   = (HARDWARE_IO_VADDR + HARDWARE_IO_SIZE), | ||||||
| 
 | 
 | ||||||
|  |     VRAM_SIZE               = 0x00600000, | ||||||
|     VRAM_PADDR              = 0x18000000, |     VRAM_PADDR              = 0x18000000, | ||||||
|     VRAM_VADDR              = 0x1F000000, |     VRAM_VADDR              = 0x1F000000, | ||||||
|     VRAM_PADDR_END          = (VRAM_PADDR + VRAM_SIZE), |     VRAM_PADDR_END          = (VRAM_PADDR + VRAM_SIZE), | ||||||
|  | @ -147,7 +145,7 @@ void Write32(const u32 addr, const u32 data); | ||||||
| 
 | 
 | ||||||
| void WriteBlock(const u32 addr, const u8* data, const int size); | void WriteBlock(const u32 addr, const u8* data, const int size); | ||||||
| 
 | 
 | ||||||
| u8* GetPointer(const u32 Address); | u8* GetPointer(const u32 virtual_address); | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  * Maps a block of memory on the heap |  * Maps a block of memory on the heap | ||||||
|  | @ -169,16 +167,10 @@ inline const char* GetCharPointer(const u32 address) { | ||||||
|     return (const char *)GetPointer(address); |     return (const char *)GetPointer(address); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| inline const u32 VirtualAddressFromPhysical_FCRAM(const u32 address) { | /// Converts a physical address to virtual address
 | ||||||
|     return ((address & FCRAM_MASK) | FCRAM_VADDR); | u32 PhysicalToVirtualAddress(const u32 addr); | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| inline const u32 VirtualAddressFromPhysical_IO(const u32 address) { | /// Converts a virtual address to physical address
 | ||||||
|     return (address + 0x0EB00000); | u32 VirtualToPhysicalAddress(const u32 addr); | ||||||
| } |  | ||||||
| 
 |  | ||||||
| inline const u32 VirtualAddressFromPhysical_VRAM(const u32 address) { |  | ||||||
|     return (address + 0x07000000); |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| } // namespace
 | } // namespace
 | ||||||
|  |  | ||||||
|  | @ -17,37 +17,44 @@ std::map<u32, MemoryBlock> g_heap_map; | ||||||
| std::map<u32, MemoryBlock> g_heap_gsp_map; | std::map<u32, MemoryBlock> g_heap_gsp_map; | ||||||
| std::map<u32, MemoryBlock> g_shared_map; | std::map<u32, MemoryBlock> g_shared_map; | ||||||
| 
 | 
 | ||||||
| /// Convert a physical address (or firmware-specific virtual address) to primary virtual address
 | /// Convert a physical address to virtual address
 | ||||||
| u32 _VirtualAddress(const u32 addr) { | u32 PhysicalToVirtualAddress(const u32 addr) { | ||||||
|     // Our memory interface read/write functions assume virtual addresses. Put any physical address
 |     // Our memory interface read/write functions assume virtual addresses. Put any physical address
 | ||||||
|     // to virtual address translations here. This is obviously quite hacky... But we're not doing 
 |     // to virtual address translations here. This is quite hacky, but necessary until we implement
 | ||||||
|     // any MMU emulation yet or anything
 |     // proper MMU emulation.
 | ||||||
|     if ((addr >= FCRAM_PADDR) && (addr < FCRAM_PADDR_END)) { |     // TODO: Screw it, I'll let bunnei figure out how to do this properly.
 | ||||||
|         return VirtualAddressFromPhysical_FCRAM(addr); |     if ((addr >= VRAM_PADDR) && (addr < VRAM_PADDR_END)) { | ||||||
| 
 |         return addr - VRAM_PADDR + VRAM_VADDR; | ||||||
|     // Virtual address mapping FW0B
 |     }else if ((addr >= FCRAM_PADDR) && (addr < FCRAM_PADDR_END)) { | ||||||
|     } else if ((addr >= FCRAM_VADDR_FW0B) && (addr < FCRAM_VADDR_FW0B_END)) { |         return addr - FCRAM_PADDR + FCRAM_VADDR; | ||||||
|         return VirtualAddressFromPhysical_FCRAM(addr); |  | ||||||
| 
 |  | ||||||
|     // Hardware IO
 |  | ||||||
|     // TODO(bunnei): FixMe
 |  | ||||||
|     // This isn't going to work... The physical address of HARDWARE_IO conflicts with the virtual 
 |  | ||||||
|     // address of shared memory.
 |  | ||||||
|     //} else if ((addr >= HARDWARE_IO_PADDR) && (addr < HARDWARE_IO_PADDR_END)) {
 |  | ||||||
|     //    return (addr + 0x0EB00000);
 |  | ||||||
| 
 |  | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     ERROR_LOG(MEMMAP, "Unknown physical address @ 0x%08x", addr); | ||||||
|  |     return addr; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /// Convert a physical address to virtual address
 | ||||||
|  | u32 VirtualToPhysicalAddress(const u32 addr) { | ||||||
|  |     // Our memory interface read/write functions assume virtual addresses. Put any physical address
 | ||||||
|  |     // to virtual address translations here. This is quite hacky, but necessary until we implement
 | ||||||
|  |     // proper MMU emulation.
 | ||||||
|  |     // TODO: Screw it, I'll let bunnei figure out how to do this properly.
 | ||||||
|  |     if ((addr >= VRAM_VADDR) && (addr < VRAM_VADDR_END)) { | ||||||
|  |         return addr - 0x07000000; | ||||||
|  |     } else if ((addr >= FCRAM_VADDR) && (addr < FCRAM_VADDR_END)) { | ||||||
|  |         return addr - FCRAM_VADDR + FCRAM_PADDR; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     ERROR_LOG(MEMMAP, "Unknown virtual address @ 0x%08x", addr); | ||||||
|     return addr; |     return addr; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename T> | template <typename T> | ||||||
| inline void Read(T &var, const u32 addr) { | inline void Read(T &var, const u32 vaddr) { | ||||||
|     // TODO: Figure out the fastest order of tests for both read and write (they are probably different).
 |     // TODO: Figure out the fastest order of tests for both read and write (they are probably different).
 | ||||||
|     // TODO: Make sure this represents the mirrors in a correct way.
 |     // TODO: Make sure this represents the mirrors in a correct way.
 | ||||||
|     // Could just do a base-relative read, too.... TODO
 |     // Could just do a base-relative read, too.... TODO
 | ||||||
| 
 | 
 | ||||||
|     const u32 vaddr = _VirtualAddress(addr); |  | ||||||
| 
 |  | ||||||
|     // Kernel memory command buffer
 |     // Kernel memory command buffer
 | ||||||
|     if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { |     if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { | ||||||
|         var = *((const T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK]); |         var = *((const T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK]); | ||||||
|  | @ -91,8 +98,7 @@ inline void Read(T &var, const u32 addr) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename T> | template <typename T> | ||||||
| inline void Write(u32 addr, const T data) { | inline void Write(u32 vaddr, const T data) { | ||||||
|     u32 vaddr = _VirtualAddress(addr); |  | ||||||
| 
 | 
 | ||||||
|     // Kernel memory command buffer
 |     // Kernel memory command buffer
 | ||||||
|     if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { |     if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { | ||||||
|  | @ -140,9 +146,7 @@ inline void Write(u32 addr, const T data) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u8 *GetPointer(const u32 addr) { | u8 *GetPointer(const u32 vaddr) { | ||||||
|     const u32 vaddr = _VirtualAddress(addr); |  | ||||||
| 
 |  | ||||||
|     // Kernel memory command buffer
 |     // Kernel memory command buffer
 | ||||||
|     if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { |     if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) { | ||||||
|         return g_kernel_mem + (vaddr & KERNEL_MEMORY_MASK); |         return g_kernel_mem + (vaddr & KERNEL_MEMORY_MASK); | ||||||
|  |  | ||||||
|  | @ -1,10 +1,22 @@ | ||||||
| set(SRCS    video_core.cpp | set(SRCS    clipper.cpp | ||||||
|  |             command_processor.cpp | ||||||
|  |             primitive_assembly.cpp | ||||||
|  |             rasterizer.cpp | ||||||
|             utils.cpp |             utils.cpp | ||||||
|  |             vertex_shader.cpp | ||||||
|  |             video_core.cpp | ||||||
|             renderer_opengl/renderer_opengl.cpp) |             renderer_opengl/renderer_opengl.cpp) | ||||||
| 
 | 
 | ||||||
| set(HEADERS video_core.h | set(HEADERS clipper.h | ||||||
|  |             command_processor.h | ||||||
|  |             math.h | ||||||
|  |             primitive_assembly.h | ||||||
|  |             rasterizer.h | ||||||
|             utils.h |             utils.h | ||||||
|  |             video_core.h | ||||||
|             renderer_base.h |             renderer_base.h | ||||||
|  |             vertex_shader.h | ||||||
|  |             video_core.h | ||||||
|             renderer_opengl/renderer_opengl.h) |             renderer_opengl/renderer_opengl.h) | ||||||
| 
 | 
 | ||||||
| add_library(video_core STATIC ${SRCS} ${HEADERS}) | add_library(video_core STATIC ${SRCS} ${HEADERS}) | ||||||
|  |  | ||||||
							
								
								
									
										179
									
								
								src/video_core/clipper.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										179
									
								
								src/video_core/clipper.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,179 @@ | ||||||
|  | // Copyright 2014 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include <vector> | ||||||
|  | 
 | ||||||
|  | #include "clipper.h" | ||||||
|  | #include "pica.h" | ||||||
|  | #include "rasterizer.h" | ||||||
|  | #include "vertex_shader.h" | ||||||
|  | 
 | ||||||
|  | namespace Pica { | ||||||
|  | 
 | ||||||
|  | namespace Clipper { | ||||||
|  | 
 | ||||||
|  | struct ClippingEdge { | ||||||
|  | public: | ||||||
|  |     enum Type { | ||||||
|  |         POS_X = 0, | ||||||
|  |         NEG_X = 1, | ||||||
|  |         POS_Y = 2, | ||||||
|  |         NEG_Y = 3, | ||||||
|  |         POS_Z = 4, | ||||||
|  |         NEG_Z = 5, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     ClippingEdge(Type type, float24 position) : type(type), pos(position) {} | ||||||
|  | 
 | ||||||
|  |     bool IsInside(const OutputVertex& vertex) const { | ||||||
|  |         switch (type) { | ||||||
|  |         case POS_X: return vertex.pos.x <= pos * vertex.pos.w; | ||||||
|  |         case NEG_X: return vertex.pos.x >= pos * vertex.pos.w; | ||||||
|  |         case POS_Y: return vertex.pos.y <= pos * vertex.pos.w; | ||||||
|  |         case NEG_Y: return vertex.pos.y >= pos * vertex.pos.w; | ||||||
|  | 
 | ||||||
|  |         // TODO: Check z compares ... should be 0..1 instead?
 | ||||||
|  |         case POS_Z: return vertex.pos.z <= pos * vertex.pos.w; | ||||||
|  | 
 | ||||||
|  |         default: | ||||||
|  |         case NEG_Z: return vertex.pos.z >= pos * vertex.pos.w; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool IsOutSide(const OutputVertex& vertex) const { | ||||||
|  |         return !IsInside(vertex); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const { | ||||||
|  |         auto dotpr = [this](const OutputVertex& vtx) { | ||||||
|  |             switch (type) { | ||||||
|  |             case POS_X: return vtx.pos.x - vtx.pos.w; | ||||||
|  |             case NEG_X: return -vtx.pos.x - vtx.pos.w; | ||||||
|  |             case POS_Y: return vtx.pos.y - vtx.pos.w; | ||||||
|  |             case NEG_Y: return -vtx.pos.y - vtx.pos.w; | ||||||
|  | 
 | ||||||
|  |             // TODO: Verify z clipping
 | ||||||
|  |             case POS_Z: return vtx.pos.z - vtx.pos.w; | ||||||
|  | 
 | ||||||
|  |             default: | ||||||
|  |             case NEG_Z: return -vtx.pos.w; | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         float24 dp = dotpr(v0); | ||||||
|  |         float24 dp_prev = dotpr(v1); | ||||||
|  |         float24 factor = dp_prev / (dp_prev - dp); | ||||||
|  | 
 | ||||||
|  |         return OutputVertex::Lerp(factor, v0, v1); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     Type type; | ||||||
|  |     float24 pos; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static void InitScreenCoordinates(OutputVertex& vtx) | ||||||
|  | { | ||||||
|  |     struct { | ||||||
|  |         float24 halfsize_x; | ||||||
|  |         float24 offset_x; | ||||||
|  |         float24 halfsize_y; | ||||||
|  |         float24 offset_y; | ||||||
|  |         float24 zscale; | ||||||
|  |         float24 offset_z; | ||||||
|  |     } viewport; | ||||||
|  | 
 | ||||||
|  |     viewport.halfsize_x = float24::FromRawFloat24(registers.viewport_size_x); | ||||||
|  |     viewport.halfsize_y = float24::FromRawFloat24(registers.viewport_size_y); | ||||||
|  |     viewport.offset_x   = float24::FromFloat32(registers.viewport_corner.x); | ||||||
|  |     viewport.offset_y   = float24::FromFloat32(registers.viewport_corner.y); | ||||||
|  |     viewport.zscale     = float24::FromRawFloat24(registers.viewport_depth_range); | ||||||
|  |     viewport.offset_z   = float24::FromRawFloat24(registers.viewport_depth_far_plane); | ||||||
|  | 
 | ||||||
|  |     // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
 | ||||||
|  |     vtx.screenpos[0] = (vtx.pos.x / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_x / float24::FromFloat32(2.0) + viewport.offset_x; | ||||||
|  |     vtx.screenpos[1] = (vtx.pos.y / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; | ||||||
|  |     vtx.screenpos[2] = viewport.offset_z - vtx.pos.z / vtx.pos.w * viewport.zscale; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { | ||||||
|  | 
 | ||||||
|  |     // TODO (neobrain):
 | ||||||
|  |     // The list of output vertices has some fixed maximum size,
 | ||||||
|  |     // however I haven't taken the time to figure out what it is exactly.
 | ||||||
|  |     // For now, we hence just assume a maximal size of 1000 vertices.
 | ||||||
|  |     const size_t max_vertices = 1000; | ||||||
|  |     std::vector<OutputVertex> buffer_vertices; | ||||||
|  |     std::vector<OutputVertex*> output_list{ &v0, &v1, &v2 }; | ||||||
|  | 
 | ||||||
|  |     // Make sure to reserve space for all vertices.
 | ||||||
|  |     // Without this, buffer reallocation would invalidate references.
 | ||||||
|  |     buffer_vertices.reserve(max_vertices); | ||||||
|  | 
 | ||||||
|  |     // Simple implementation of the Sutherland-Hodgman clipping algorithm.
 | ||||||
|  |     // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
 | ||||||
|  |     for (auto edge : { ClippingEdge(ClippingEdge::POS_X, float24::FromFloat32(+1.0)), | ||||||
|  |                        ClippingEdge(ClippingEdge::NEG_X, float24::FromFloat32(-1.0)), | ||||||
|  |                        ClippingEdge(ClippingEdge::POS_Y, float24::FromFloat32(+1.0)), | ||||||
|  |                        ClippingEdge(ClippingEdge::NEG_Y, float24::FromFloat32(-1.0)), | ||||||
|  |                        ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)), | ||||||
|  |                        ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) { | ||||||
|  | 
 | ||||||
|  |         const std::vector<OutputVertex*> input_list = output_list; | ||||||
|  |         output_list.clear(); | ||||||
|  | 
 | ||||||
|  |         const OutputVertex* reference_vertex = input_list.back(); | ||||||
|  | 
 | ||||||
|  |         for (const auto& vertex : input_list) { | ||||||
|  |             // NOTE: This algorithm changes vertex order in some cases!
 | ||||||
|  |             if (edge.IsInside(*vertex)) { | ||||||
|  |                 if (edge.IsOutSide(*reference_vertex)) { | ||||||
|  |                     buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); | ||||||
|  |                     output_list.push_back(&(buffer_vertices.back())); | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 output_list.push_back(vertex); | ||||||
|  |             } else if (edge.IsInside(*reference_vertex)) { | ||||||
|  |                 buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); | ||||||
|  |                 output_list.push_back(&(buffer_vertices.back())); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             reference_vertex = vertex; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Need to have at least a full triangle to continue...
 | ||||||
|  |         if (output_list.size() < 3) | ||||||
|  |             return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     InitScreenCoordinates(*(output_list[0])); | ||||||
|  |     InitScreenCoordinates(*(output_list[1])); | ||||||
|  | 
 | ||||||
|  |     for (int i = 0; i < output_list.size() - 2; i ++) { | ||||||
|  |         OutputVertex& vtx0 = *(output_list[0]); | ||||||
|  |         OutputVertex& vtx1 = *(output_list[i+1]); | ||||||
|  |         OutputVertex& vtx2 = *(output_list[i+2]); | ||||||
|  | 
 | ||||||
|  |         InitScreenCoordinates(vtx2); | ||||||
|  | 
 | ||||||
|  |         DEBUG_LOG(GPU, | ||||||
|  |                   "Triangle %d/%d (%d buffer vertices) at position (%.3f, %.3f, %.3f, %.3f), " | ||||||
|  |                   "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " | ||||||
|  |                   "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", | ||||||
|  |                   i,output_list.size(), buffer_vertices.size(), | ||||||
|  |                   vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(),output_list.size(), | ||||||
|  |                   vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), | ||||||
|  |                   vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), | ||||||
|  |                   vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(), | ||||||
|  |                   vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(), | ||||||
|  |                   vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32()); | ||||||
|  | 
 | ||||||
|  |         Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
							
								
								
									
										21
									
								
								src/video_core/clipper.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								src/video_core/clipper.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,21 @@ | ||||||
|  | // Copyright 2014 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | namespace Pica { | ||||||
|  | 
 | ||||||
|  | namespace VertexShader { | ||||||
|  |     struct OutputVertex; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | namespace Clipper { | ||||||
|  | 
 | ||||||
|  | using VertexShader::OutputVertex; | ||||||
|  | 
 | ||||||
|  | void ProcessTriangle(OutputVertex& v0, OutputVertex& v1, OutputVertex& v2); | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
							
								
								
									
										238
									
								
								src/video_core/command_processor.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										238
									
								
								src/video_core/command_processor.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,238 @@ | ||||||
|  | // Copyright 2014 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include "command_processor.h" | ||||||
|  | #include "math.h" | ||||||
|  | #include "pica.h" | ||||||
|  | #include "primitive_assembly.h" | ||||||
|  | #include "vertex_shader.h" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | namespace Pica { | ||||||
|  | 
 | ||||||
|  | Regs registers; | ||||||
|  | 
 | ||||||
|  | namespace CommandProcessor { | ||||||
|  | 
 | ||||||
|  | static int float_regs_counter = 0; | ||||||
|  | 
 | ||||||
|  | static u32 uniform_write_buffer[4]; | ||||||
|  | 
 | ||||||
|  | // Used for VSLoadProgramData and VSLoadSwizzleData
 | ||||||
|  | static u32 vs_binary_write_offset = 0; | ||||||
|  | static u32 vs_swizzle_write_offset = 0; | ||||||
|  | 
 | ||||||
|  | static inline void WritePicaReg(u32 id, u32 value) { | ||||||
|  |     u32 old_value = registers[id]; | ||||||
|  |     registers[id] = value; | ||||||
|  | 
 | ||||||
|  |     switch(id) { | ||||||
|  |         // It seems like these trigger vertex rendering
 | ||||||
|  |         case PICA_REG_INDEX(trigger_draw): | ||||||
|  |         case PICA_REG_INDEX(trigger_draw_indexed): | ||||||
|  |         { | ||||||
|  |             const auto& attribute_config = registers.vertex_attributes; | ||||||
|  |             const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress()); | ||||||
|  | 
 | ||||||
|  |             // Information about internal vertex attributes
 | ||||||
|  |             const u8* vertex_attribute_sources[16]; | ||||||
|  |             u32 vertex_attribute_strides[16]; | ||||||
|  |             u32 vertex_attribute_formats[16]; | ||||||
|  |             u32 vertex_attribute_elements[16]; | ||||||
|  |             u32 vertex_attribute_element_size[16]; | ||||||
|  | 
 | ||||||
|  |             // Setup attribute data from loaders
 | ||||||
|  |             for (int loader = 0; loader < 12; ++loader) { | ||||||
|  |                 const auto& loader_config = attribute_config.attribute_loaders[loader]; | ||||||
|  | 
 | ||||||
|  |                 const u8* load_address = base_address + loader_config.data_offset; | ||||||
|  | 
 | ||||||
|  |                 // TODO: What happens if a loader overwrites a previous one's data?
 | ||||||
|  |                 for (int component = 0; component < loader_config.component_count; ++component) { | ||||||
|  |                     u32 attribute_index = loader_config.GetComponent(component); | ||||||
|  |                     vertex_attribute_sources[attribute_index] = load_address; | ||||||
|  |                     vertex_attribute_strides[attribute_index] = loader_config.byte_count; | ||||||
|  |                     vertex_attribute_formats[attribute_index] = (u32)attribute_config.GetFormat(attribute_index); | ||||||
|  |                     vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); | ||||||
|  |                     vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index); | ||||||
|  |                     load_address += attribute_config.GetStride(attribute_index); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Load vertices
 | ||||||
|  |             bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); | ||||||
|  | 
 | ||||||
|  |             const auto& index_info = registers.index_array; | ||||||
|  |             const u8* index_address_8 = (u8*)base_address + index_info.offset; | ||||||
|  |             const u16* index_address_16 = (u16*)index_address_8; | ||||||
|  |             bool index_u16 = (bool)index_info.format; | ||||||
|  | 
 | ||||||
|  |             for (int index = 0; index < registers.num_vertices; ++index) | ||||||
|  |             { | ||||||
|  |                 int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; | ||||||
|  | 
 | ||||||
|  |                 if (is_indexed) { | ||||||
|  |                     // TODO: Implement some sort of vertex cache!
 | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 // Initialize data for the current vertex
 | ||||||
|  |                 VertexShader::InputVertex input; | ||||||
|  | 
 | ||||||
|  |                 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { | ||||||
|  |                     for (int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||||
|  |                         const u8* srcdata = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; | ||||||
|  |                         const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata : | ||||||
|  |                                              (vertex_attribute_formats[i] == 1) ? *(u8*)srcdata : | ||||||
|  |                                              (vertex_attribute_formats[i] == 2) ? *(s16*)srcdata : | ||||||
|  |                                                                                   *(float*)srcdata; | ||||||
|  |                         input.attr[i][comp] = float24::FromFloat32(srcval); | ||||||
|  |                         DEBUG_LOG(GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", | ||||||
|  |                                   comp, i, vertex, index, | ||||||
|  |                                   attribute_config.GetBaseAddress(), | ||||||
|  |                                   vertex_attribute_sources[i] - base_address, | ||||||
|  |                                   srcdata - vertex_attribute_sources[i], | ||||||
|  |                                   input.attr[i][comp].ToFloat32()); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); | ||||||
|  | 
 | ||||||
|  |                 if (is_indexed) { | ||||||
|  |                     // TODO: Add processed vertex to vertex cache!
 | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 PrimitiveAssembly::SubmitVertex(output); | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[3], 0x2c4): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[4], 0x2c5): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[5], 0x2c6): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): | ||||||
|  |         { | ||||||
|  |             auto& uniform_setup = registers.vs_uniform_setup; | ||||||
|  | 
 | ||||||
|  |             // TODO: Does actual hardware indeed keep an intermediate buffer or does
 | ||||||
|  |             //       it directly write the values?
 | ||||||
|  |             uniform_write_buffer[float_regs_counter++] = value; | ||||||
|  | 
 | ||||||
|  |             // Uniforms are written in a packed format such that 4 float24 values are encoded in
 | ||||||
|  |             // three 32-bit numbers. We write to internal memory once a full such vector is
 | ||||||
|  |             // written.
 | ||||||
|  |             if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || | ||||||
|  |                 (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { | ||||||
|  |                 float_regs_counter = 0; | ||||||
|  | 
 | ||||||
|  |                 auto& uniform = VertexShader::GetFloatUniform(uniform_setup.index); | ||||||
|  | 
 | ||||||
|  |                 if (uniform_setup.index > 95) { | ||||||
|  |                     ERROR_LOG(GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 // NOTE: The destination component order indeed is "backwards"
 | ||||||
|  |                 if (uniform_setup.IsFloat32()) { | ||||||
|  |                     for (auto i : {0,1,2,3}) | ||||||
|  |                         uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); | ||||||
|  |                 } else { | ||||||
|  |                     // TODO: Untested
 | ||||||
|  |                     uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8); | ||||||
|  |                     uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); | ||||||
|  |                     uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); | ||||||
|  |                     uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF); | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 DEBUG_LOG(GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, | ||||||
|  |                           uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), | ||||||
|  |                           uniform.w.ToFloat32()); | ||||||
|  | 
 | ||||||
|  |                 // TODO: Verify that this actually modifies the register!
 | ||||||
|  |                 uniform_setup.index = uniform_setup.index + 1; | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Seems to be used to reset the write pointer for VSLoadProgramData
 | ||||||
|  |         case PICA_REG_INDEX(vs_program.begin_load): | ||||||
|  |             vs_binary_write_offset = 0; | ||||||
|  |             break; | ||||||
|  | 
 | ||||||
|  |         // Load shader program code
 | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[2], 0x2ce): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[3], 0x2cf): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[4], 0x2d0): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[5], 0x2d1): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): | ||||||
|  |         { | ||||||
|  |             VertexShader::SubmitShaderMemoryChange(vs_binary_write_offset, value); | ||||||
|  |             vs_binary_write_offset++; | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Seems to be used to reset the write pointer for VSLoadSwizzleData
 | ||||||
|  |         case PICA_REG_INDEX(vs_swizzle_patterns.begin_load): | ||||||
|  |             vs_swizzle_write_offset = 0; | ||||||
|  |             break; | ||||||
|  | 
 | ||||||
|  |         // Load swizzle pattern data
 | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[2], 0x2d8): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[3], 0x2d9): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[4], 0x2da): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[5], 0x2db): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): | ||||||
|  |         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): | ||||||
|  |         { | ||||||
|  |             VertexShader::SubmitSwizzleDataChange(vs_swizzle_write_offset, value); | ||||||
|  |             vs_swizzle_write_offset++; | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         default: | ||||||
|  |             break; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) { | ||||||
|  |     const CommandHeader& header = *(const CommandHeader*)(&first_command_word[1]); | ||||||
|  | 
 | ||||||
|  |     u32* read_pointer = (u32*)first_command_word; | ||||||
|  | 
 | ||||||
|  |     // TODO: Take parameter mask into consideration!
 | ||||||
|  | 
 | ||||||
|  |     WritePicaReg(header.cmd_id, *read_pointer); | ||||||
|  |     read_pointer += 2; | ||||||
|  | 
 | ||||||
|  |     for (int i = 1; i < 1+header.extra_data_length; ++i) { | ||||||
|  |         u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0); | ||||||
|  |         WritePicaReg(cmd, *read_pointer); | ||||||
|  |         ++read_pointer; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // align read pointer to 8 bytes
 | ||||||
|  |     if ((first_command_word - read_pointer) % 2) | ||||||
|  |         ++read_pointer; | ||||||
|  | 
 | ||||||
|  |     return read_pointer - first_command_word; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void ProcessCommandList(const u32* list, u32 size) { | ||||||
|  |     u32* read_pointer = (u32*)list; | ||||||
|  | 
 | ||||||
|  |     while (read_pointer < list + size) { | ||||||
|  |         read_pointer += ExecuteCommandBlock(read_pointer); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
							
								
								
									
										31
									
								
								src/video_core/command_processor.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								src/video_core/command_processor.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,31 @@ | ||||||
|  | // Copyright 2014 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include "common/bit_field.h" | ||||||
|  | #include "common/common_types.h" | ||||||
|  | 
 | ||||||
|  | #include "pica.h" | ||||||
|  | 
 | ||||||
|  | namespace Pica { | ||||||
|  | 
 | ||||||
|  | namespace CommandProcessor { | ||||||
|  | 
 | ||||||
|  | union CommandHeader { | ||||||
|  |     u32 hex; | ||||||
|  | 
 | ||||||
|  |     BitField< 0, 16, u32> cmd_id; | ||||||
|  |     BitField<16,  4, u32> parameter_mask; | ||||||
|  |     BitField<20, 11, u32> extra_data_length; | ||||||
|  |     BitField<31,  1, u32> group_commands; | ||||||
|  | }; | ||||||
|  | static_assert(std::is_standard_layout<CommandHeader>::value == true, "CommandHeader does not use standard layout"); | ||||||
|  | static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); | ||||||
|  | 
 | ||||||
|  | void ProcessCommandList(const u32* list, u32 size); | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | @ -11,6 +11,8 @@ | ||||||
| #include "common/log.h" | #include "common/log.h" | ||||||
| 
 | 
 | ||||||
| #include "core/hle/service/gsp.h" | #include "core/hle/service/gsp.h" | ||||||
|  | 
 | ||||||
|  | #include "command_processor.h" | ||||||
| #include "pica.h" | #include "pica.h" | ||||||
| 
 | 
 | ||||||
| class GraphicsDebugger | class GraphicsDebugger | ||||||
|  | @ -20,10 +22,10 @@ public: | ||||||
|     // A vector of commands represented by their raw byte sequence
 |     // A vector of commands represented by their raw byte sequence
 | ||||||
|     struct PicaCommand : public std::vector<u32> |     struct PicaCommand : public std::vector<u32> | ||||||
|     { |     { | ||||||
|         const Pica::CommandHeader& GetHeader() const |         const Pica::CommandProcessor::CommandHeader& GetHeader() const | ||||||
|         { |         { | ||||||
|             const u32& val = at(1); |             const u32& val = at(1); | ||||||
|             return *(Pica::CommandHeader*)&val; |             return *(Pica::CommandProcessor::CommandHeader*)&val; | ||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|  | @ -99,7 +101,7 @@ public: | ||||||
|         PicaCommandList cmdlist; |         PicaCommandList cmdlist; | ||||||
|         for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;) |         for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;) | ||||||
|         { |         { | ||||||
|             const Pica::CommandHeader header = static_cast<Pica::CommandHeader>(parse_pointer[1]); |             const Pica::CommandProcessor::CommandHeader& header = *(Pica::CommandProcessor::CommandHeader*)(&parse_pointer[1]); | ||||||
| 
 | 
 | ||||||
|             cmdlist.push_back(PicaCommand()); |             cmdlist.push_back(PicaCommand()); | ||||||
|             auto& cmd = cmdlist.back(); |             auto& cmd = cmdlist.back(); | ||||||
|  |  | ||||||
							
								
								
									
										578
									
								
								src/video_core/math.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										578
									
								
								src/video_core/math.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,578 @@ | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | // Copyright 2014 Tony Wasserka
 | ||||||
|  | // All rights reserved.
 | ||||||
|  | //
 | ||||||
|  | // Redistribution and use in source and binary forms, with or without
 | ||||||
|  | // modification, are permitted provided that the following conditions are met:
 | ||||||
|  | //
 | ||||||
|  | //     * Redistributions of source code must retain the above copyright
 | ||||||
|  | //       notice, this list of conditions and the following disclaimer.
 | ||||||
|  | //     * Redistributions in binary form must reproduce the above copyright
 | ||||||
|  | //       notice, this list of conditions and the following disclaimer in the
 | ||||||
|  | //       documentation and/or other materials provided with the distribution.
 | ||||||
|  | //     * Neither the name of the owner nor the names of its contributors may
 | ||||||
|  | //       be used to endorse or promote products derived from this software
 | ||||||
|  | //       without specific prior written permission.
 | ||||||
|  | //
 | ||||||
|  | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | ||||||
|  | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | ||||||
|  | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | ||||||
|  | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 | ||||||
|  | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | ||||||
|  | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | ||||||
|  | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | ||||||
|  | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | ||||||
|  | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | ||||||
|  | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | ||||||
|  | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <cmath> | ||||||
|  | 
 | ||||||
|  | namespace Math { | ||||||
|  | 
 | ||||||
|  | template<typename T> class Vec2; | ||||||
|  | template<typename T> class Vec3; | ||||||
|  | template<typename T> class Vec4; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | class Vec2 { | ||||||
|  | public: | ||||||
|  |     struct { | ||||||
|  |         T x,y; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     T* AsArray() { return &x; } | ||||||
|  | 
 | ||||||
|  |     Vec2() = default; | ||||||
|  |     Vec2(const T a[2]) : x(a[0]), y(a[1]) {} | ||||||
|  |     Vec2(const T& _x, const T& _y) : x(_x), y(_y) {} | ||||||
|  | 
 | ||||||
|  |     template<typename T2> | ||||||
|  |     Vec2<T2> Cast() const { | ||||||
|  |         return Vec2<T2>((T2)x, (T2)y); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     static Vec2 AssignToAll(const T& f) | ||||||
|  |     { | ||||||
|  |         return Vec2<T>(f, f); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void Write(T a[2]) | ||||||
|  |     { | ||||||
|  |         a[0] = x; a[1] = y; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     Vec2 operator +(const Vec2& other) const | ||||||
|  |     { | ||||||
|  |         return Vec2(x+other.x, y+other.y); | ||||||
|  |     } | ||||||
|  |     void operator += (const Vec2 &other) | ||||||
|  |     { | ||||||
|  |         x+=other.x; y+=other.y; | ||||||
|  |     } | ||||||
|  |     Vec2 operator -(const Vec2& other) const | ||||||
|  |     { | ||||||
|  |         return Vec2(x-other.x, y-other.y); | ||||||
|  |     } | ||||||
|  |     void operator -= (const Vec2& other) | ||||||
|  |     { | ||||||
|  |         x-=other.x; y-=other.y; | ||||||
|  |     } | ||||||
|  |     Vec2 operator -() const | ||||||
|  |     { | ||||||
|  |         return Vec2(-x,-y); | ||||||
|  |     } | ||||||
|  |     Vec2 operator * (const Vec2& other) const | ||||||
|  |     { | ||||||
|  |         return Vec2(x*other.x, y*other.y); | ||||||
|  |     } | ||||||
|  |     template<typename V> | ||||||
|  |     Vec2 operator * (const V& f) const | ||||||
|  |     { | ||||||
|  |         return Vec2(x*f,y*f); | ||||||
|  |     } | ||||||
|  |     template<typename V> | ||||||
|  |     void operator *= (const V& f) | ||||||
|  |     { | ||||||
|  |         x*=f; y*=f; | ||||||
|  |     } | ||||||
|  |     template<typename V> | ||||||
|  |     Vec2 operator / (const V& f) const | ||||||
|  |     { | ||||||
|  |         return Vec2(x/f,y/f); | ||||||
|  |     } | ||||||
|  |     template<typename V> | ||||||
|  |     void operator /= (const V& f) | ||||||
|  |     { | ||||||
|  |         *this = *this / f; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     T Length2() const | ||||||
|  |     { | ||||||
|  |         return x*x + y*y; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Only implemented for T=float
 | ||||||
|  |     float Length() const; | ||||||
|  |     void SetLength(const float l); | ||||||
|  |     Vec2 WithLength(const float l) const; | ||||||
|  |     float Distance2To(Vec2 &other); | ||||||
|  |     Vec2 Normalized() const; | ||||||
|  |     float Normalize(); // returns the previous length, which is often useful
 | ||||||
|  | 
 | ||||||
|  |     T& operator [] (int i) //allow vector[1] = 3   (vector.y=3)
 | ||||||
|  |     { | ||||||
|  |         return *((&x) + i); | ||||||
|  |     } | ||||||
|  |     T operator [] (const int i) const | ||||||
|  |     { | ||||||
|  |         return *((&x) + i); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void SetZero() | ||||||
|  |     { | ||||||
|  |         x=0; y=0; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Common aliases: UV (texel coordinates), ST (texture coordinates)
 | ||||||
|  |     T& u() { return x; } | ||||||
|  |     T& v() { return y; } | ||||||
|  |     T& s() { return x; } | ||||||
|  |     T& t() { return y; } | ||||||
|  | 
 | ||||||
|  |     const T& u() const { return x; } | ||||||
|  |     const T& v() const { return y; } | ||||||
|  |     const T& s() const { return x; } | ||||||
|  |     const T& t() const { return y; } | ||||||
|  | 
 | ||||||
|  |     // swizzlers - create a subvector of specific components
 | ||||||
|  |     Vec2 yx() const { return Vec2(y, x); } | ||||||
|  |     Vec2 vu() const { return Vec2(y, x); } | ||||||
|  |     Vec2 ts() const { return Vec2(y, x); } | ||||||
|  | 
 | ||||||
|  |     // Inserters to add new elements to effectively create larger vectors containing this Vec2
 | ||||||
|  |     Vec3<T> InsertBeforeX(const T& value) { | ||||||
|  |         return Vec3<T>(value, x, y); | ||||||
|  |     } | ||||||
|  |     Vec3<T> InsertBeforeY(const T& value) { | ||||||
|  |         return Vec3<T>(x, value, y); | ||||||
|  |     } | ||||||
|  |     Vec3<T> Append(const T& value) { | ||||||
|  |         return Vec3<T>(x, y, value); | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | template<typename T, typename V> | ||||||
|  | Vec2<T> operator * (const V& f, const Vec2<T>& vec) | ||||||
|  | { | ||||||
|  |     return Vec2<T>(f*vec.x,f*vec.y); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | typedef Vec2<float> Vec2f; | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | class Vec3 | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     struct | ||||||
|  |     { | ||||||
|  |         T x,y,z; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     T* AsArray() { return &x; } | ||||||
|  | 
 | ||||||
|  |     Vec3() = default; | ||||||
|  |     Vec3(const T a[3]) : x(a[0]), y(a[1]), z(a[2]) {} | ||||||
|  |     Vec3(const T& _x, const T& _y, const T& _z) : x(_x), y(_y), z(_z) {} | ||||||
|  | 
 | ||||||
|  |     template<typename T2> | ||||||
|  |     Vec3<T2> Cast() const { | ||||||
|  |         return Vec3<T2>((T2)x, (T2)y, (T2)z); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Only implemented for T=int and T=float
 | ||||||
|  |     static Vec3 FromRGB(unsigned int rgb); | ||||||
|  |     unsigned int ToRGB() const; // alpha bits set to zero
 | ||||||
|  | 
 | ||||||
|  |     static Vec3 AssignToAll(const T& f) | ||||||
|  |     { | ||||||
|  |         return Vec3<T>(f, f, f); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void Write(T a[3]) | ||||||
|  |     { | ||||||
|  |         a[0] = x; a[1] = y; a[2] = z; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     Vec3 operator +(const Vec3 &other) const | ||||||
|  |     { | ||||||
|  |         return Vec3(x+other.x, y+other.y, z+other.z); | ||||||
|  |     } | ||||||
|  |     void operator += (const Vec3 &other) | ||||||
|  |     { | ||||||
|  |         x+=other.x; y+=other.y; z+=other.z; | ||||||
|  |     } | ||||||
|  |     Vec3 operator -(const Vec3 &other) const | ||||||
|  |     { | ||||||
|  |         return Vec3(x-other.x, y-other.y, z-other.z); | ||||||
|  |     } | ||||||
|  |     void operator -= (const Vec3 &other) | ||||||
|  |     { | ||||||
|  |         x-=other.x; y-=other.y; z-=other.z; | ||||||
|  |     } | ||||||
|  |     Vec3 operator -() const | ||||||
|  |     { | ||||||
|  |         return Vec3(-x,-y,-z); | ||||||
|  |     } | ||||||
|  |     Vec3 operator * (const Vec3 &other) const | ||||||
|  |     { | ||||||
|  |         return Vec3(x*other.x, y*other.y, z*other.z); | ||||||
|  |     } | ||||||
|  |     template<typename V> | ||||||
|  |     Vec3 operator * (const V& f) const | ||||||
|  |     { | ||||||
|  |         return Vec3(x*f,y*f,z*f); | ||||||
|  |     } | ||||||
|  |     template<typename V> | ||||||
|  |     void operator *= (const V& f) | ||||||
|  |     { | ||||||
|  |         x*=f; y*=f; z*=f; | ||||||
|  |     } | ||||||
|  |     template<typename V> | ||||||
|  |     Vec3 operator / (const V& f) const | ||||||
|  |     { | ||||||
|  |         return Vec3(x/f,y/f,z/f); | ||||||
|  |     } | ||||||
|  |     template<typename V> | ||||||
|  |     void operator /= (const V& f) | ||||||
|  |     { | ||||||
|  |         *this = *this / f; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     T Length2() const | ||||||
|  |     { | ||||||
|  |         return x*x + y*y + z*z; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Only implemented for T=float
 | ||||||
|  |     float Length() const; | ||||||
|  |     void SetLength(const float l); | ||||||
|  |     Vec3 WithLength(const float l) const; | ||||||
|  |     float Distance2To(Vec3 &other); | ||||||
|  |     Vec3 Normalized() const; | ||||||
|  |     float Normalize(); // returns the previous length, which is often useful
 | ||||||
|  | 
 | ||||||
|  |     T& operator [] (int i) //allow vector[2] = 3   (vector.z=3)
 | ||||||
|  |     { | ||||||
|  |         return *((&x) + i); | ||||||
|  |     } | ||||||
|  |     T operator [] (const int i) const | ||||||
|  |     { | ||||||
|  |         return *((&x) + i); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void SetZero() | ||||||
|  |     { | ||||||
|  |         x=0; y=0; z=0; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Common aliases: UVW (texel coordinates), RGB (colors), STQ (texture coordinates)
 | ||||||
|  |     T& u() { return x; } | ||||||
|  |     T& v() { return y; } | ||||||
|  |     T& w() { return z; } | ||||||
|  | 
 | ||||||
|  |     T& r() { return x; } | ||||||
|  |     T& g() { return y; } | ||||||
|  |     T& b() { return z; } | ||||||
|  | 
 | ||||||
|  |     T& s() { return x; } | ||||||
|  |     T& t() { return y; } | ||||||
|  |     T& q() { return z; } | ||||||
|  | 
 | ||||||
|  |     const T& u() const { return x; } | ||||||
|  |     const T& v() const { return y; } | ||||||
|  |     const T& w() const { return z; } | ||||||
|  | 
 | ||||||
|  |     const T& r() const { return x; } | ||||||
|  |     const T& g() const { return y; } | ||||||
|  |     const T& b() const { return z; } | ||||||
|  | 
 | ||||||
|  |     const T& s() const { return x; } | ||||||
|  |     const T& t() const { return y; } | ||||||
|  |     const T& q() const { return z; } | ||||||
|  | 
 | ||||||
|  |     // swizzlers - create a subvector of specific components
 | ||||||
|  |     // e.g. Vec2 uv() { return Vec2(x,y); }
 | ||||||
|  |     // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
 | ||||||
|  | #define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); } | ||||||
|  | #define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \ | ||||||
|  |     _DEFINE_SWIZZLER2(a, b, a##b); \ | ||||||
|  |     _DEFINE_SWIZZLER2(a, b, a2##b2); \ | ||||||
|  |     _DEFINE_SWIZZLER2(a, b, a3##b3); \ | ||||||
|  |     _DEFINE_SWIZZLER2(a, b, a4##b4); \ | ||||||
|  |     _DEFINE_SWIZZLER2(b, a, b##a); \ | ||||||
|  |     _DEFINE_SWIZZLER2(b, a, b2##a2); \ | ||||||
|  |     _DEFINE_SWIZZLER2(b, a, b3##a3); \ | ||||||
|  |     _DEFINE_SWIZZLER2(b, a, b4##a4); | ||||||
|  | 
 | ||||||
|  |     DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t); | ||||||
|  |     DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q); | ||||||
|  |     DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q); | ||||||
|  | #undef DEFINE_SWIZZLER2 | ||||||
|  | #undef _DEFINE_SWIZZLER2 | ||||||
|  | 
 | ||||||
|  |     // Inserters to add new elements to effectively create larger vectors containing this Vec2
 | ||||||
|  |     Vec4<T> InsertBeforeX(const T& value) { | ||||||
|  |         return Vec4<T>(value, x, y, z); | ||||||
|  |     } | ||||||
|  |     Vec4<T> InsertBeforeY(const T& value) { | ||||||
|  |         return Vec4<T>(x, value, y, z); | ||||||
|  |     } | ||||||
|  |     Vec4<T> InsertBeforeZ(const T& value) { | ||||||
|  |         return Vec4<T>(x, y, value, z); | ||||||
|  |     } | ||||||
|  |     Vec4<T> Append(const T& value) { | ||||||
|  |         return Vec4<T>(x, y, z, value); | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | template<typename T, typename V> | ||||||
|  | Vec3<T> operator * (const V& f, const Vec3<T>& vec) | ||||||
|  | { | ||||||
|  |     return Vec3<T>(f*vec.x,f*vec.y,f*vec.z); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | typedef Vec3<float> Vec3f; | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | class Vec4 | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     struct | ||||||
|  |     { | ||||||
|  |         T x,y,z,w; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     T* AsArray() { return &x; } | ||||||
|  | 
 | ||||||
|  |     Vec4() = default; | ||||||
|  |     Vec4(const T a[4]) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {} | ||||||
|  |     Vec4(const T& _x, const T& _y, const T& _z, const T& _w) : x(_x), y(_y), z(_z), w(_w) {} | ||||||
|  | 
 | ||||||
|  |     template<typename T2> | ||||||
|  |     Vec4<T2> Cast() const { | ||||||
|  |         return Vec4<T2>((T2)x, (T2)y, (T2)z, (T2)w); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Only implemented for T=int and T=float
 | ||||||
|  |     static Vec4 FromRGBA(unsigned int rgba); | ||||||
|  |     unsigned int ToRGBA() const; | ||||||
|  | 
 | ||||||
|  |     static Vec4 AssignToAll(const T& f) { | ||||||
|  |         return Vec4<T>(f, f, f, f); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void Write(T a[4]) | ||||||
|  |     { | ||||||
|  |         a[0] = x; a[1] = y; a[2] = z; a[3] = w; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     Vec4 operator +(const Vec4& other) const | ||||||
|  |     { | ||||||
|  |         return Vec4(x+other.x, y+other.y, z+other.z, w+other.w); | ||||||
|  |     } | ||||||
|  |     void operator += (const Vec4& other) | ||||||
|  |     { | ||||||
|  |         x+=other.x; y+=other.y; z+=other.z; w+=other.w; | ||||||
|  |     } | ||||||
|  |     Vec4 operator -(const Vec4 &other) const | ||||||
|  |     { | ||||||
|  |         return Vec4(x-other.x, y-other.y, z-other.z, w-other.w); | ||||||
|  |     } | ||||||
|  |     void operator -= (const Vec4 &other) | ||||||
|  |     { | ||||||
|  |         x-=other.x; y-=other.y; z-=other.z; w-=other.w; | ||||||
|  |     } | ||||||
|  |     Vec4 operator -() const | ||||||
|  |     { | ||||||
|  |         return Vec4(-x,-y,-z,-w); | ||||||
|  |     } | ||||||
|  |     Vec4 operator * (const Vec4 &other) const | ||||||
|  |     { | ||||||
|  |         return Vec4(x*other.x, y*other.y, z*other.z, w*other.w); | ||||||
|  |     } | ||||||
|  |     template<typename V> | ||||||
|  |     Vec4 operator * (const V& f) const | ||||||
|  |     { | ||||||
|  |         return Vec4(x*f,y*f,z*f,w*f); | ||||||
|  |     } | ||||||
|  |     template<typename V> | ||||||
|  |     void operator *= (const V& f) | ||||||
|  |     { | ||||||
|  |         x*=f; y*=f; z*=f; w*=f; | ||||||
|  |     } | ||||||
|  |     template<typename V> | ||||||
|  |     Vec4 operator / (const V& f) const | ||||||
|  |     { | ||||||
|  |         return Vec4(x/f,y/f,z/f,w/f); | ||||||
|  |     } | ||||||
|  |     template<typename V> | ||||||
|  |     void operator /= (const V& f) | ||||||
|  |     { | ||||||
|  |         *this = *this / f; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     T Length2() const | ||||||
|  |     { | ||||||
|  |         return x*x + y*y + z*z + w*w; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Only implemented for T=float
 | ||||||
|  |     float Length() const; | ||||||
|  |     void SetLength(const float l); | ||||||
|  |     Vec4 WithLength(const float l) const; | ||||||
|  |     float Distance2To(Vec4 &other); | ||||||
|  |     Vec4 Normalized() const; | ||||||
|  |     float Normalize(); // returns the previous length, which is often useful
 | ||||||
|  | 
 | ||||||
|  |     T& operator [] (int i) //allow vector[2] = 3   (vector.z=3)
 | ||||||
|  |     { | ||||||
|  |         return *((&x) + i); | ||||||
|  |     } | ||||||
|  |     T operator [] (const int i) const | ||||||
|  |     { | ||||||
|  |         return *((&x) + i); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void SetZero() | ||||||
|  |     { | ||||||
|  |         x=0; y=0; z=0; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Common alias: RGBA (colors)
 | ||||||
|  |     T& r() { return x; } | ||||||
|  |     T& g() { return y; } | ||||||
|  |     T& b() { return z; } | ||||||
|  |     T& a() { return w; } | ||||||
|  | 
 | ||||||
|  |     const T& r() const { return x; } | ||||||
|  |     const T& g() const { return y; } | ||||||
|  |     const T& b() const { return z; } | ||||||
|  |     const T& a() const { return w; } | ||||||
|  | 
 | ||||||
|  |     // swizzlers - create a subvector of specific components
 | ||||||
|  |     // e.g. Vec2 uv() { return Vec2(x,y); }
 | ||||||
|  |     // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
 | ||||||
|  | #define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); } | ||||||
|  | #define DEFINE_SWIZZLER2(a, b, a2, b2) \ | ||||||
|  |     _DEFINE_SWIZZLER2(a, b, a##b); \ | ||||||
|  |     _DEFINE_SWIZZLER2(a, b, a2##b2); \ | ||||||
|  |     _DEFINE_SWIZZLER2(b, a, b##a); \ | ||||||
|  |     _DEFINE_SWIZZLER2(b, a, b2##a2); | ||||||
|  | 
 | ||||||
|  |     DEFINE_SWIZZLER2(x, y, r, g); | ||||||
|  |     DEFINE_SWIZZLER2(x, z, r, b); | ||||||
|  |     DEFINE_SWIZZLER2(x, w, r, a); | ||||||
|  |     DEFINE_SWIZZLER2(y, z, g, b); | ||||||
|  |     DEFINE_SWIZZLER2(y, w, g, a); | ||||||
|  |     DEFINE_SWIZZLER2(z, w, b, a); | ||||||
|  | #undef DEFINE_SWIZZLER2 | ||||||
|  | #undef _DEFINE_SWIZZLER2 | ||||||
|  | 
 | ||||||
|  | #define _DEFINE_SWIZZLER3(a, b, c, name) Vec3<T> name() const { return Vec3<T>(a, b, c); } | ||||||
|  | #define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \ | ||||||
|  |     _DEFINE_SWIZZLER3(a, b, c, a##b##c); \ | ||||||
|  |     _DEFINE_SWIZZLER3(a, c, b, a##c##b); \ | ||||||
|  |     _DEFINE_SWIZZLER3(b, a, c, b##a##c); \ | ||||||
|  |     _DEFINE_SWIZZLER3(b, c, a, b##c##a); \ | ||||||
|  |     _DEFINE_SWIZZLER3(c, a, b, c##a##b); \ | ||||||
|  |     _DEFINE_SWIZZLER3(c, b, a, c##b##a); \ | ||||||
|  |     _DEFINE_SWIZZLER3(a, b, c, a2##b2##c2); \ | ||||||
|  |     _DEFINE_SWIZZLER3(a, c, b, a2##c2##b2); \ | ||||||
|  |     _DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \ | ||||||
|  |     _DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \ | ||||||
|  |     _DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \ | ||||||
|  |     _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2); | ||||||
|  | 
 | ||||||
|  |     DEFINE_SWIZZLER3(x, y, z, r, g, b); | ||||||
|  |     DEFINE_SWIZZLER3(x, y, w, r, g, a); | ||||||
|  |     DEFINE_SWIZZLER3(x, z, w, r, b, a); | ||||||
|  |     DEFINE_SWIZZLER3(y, z, w, g, b, a); | ||||||
|  | #undef DEFINE_SWIZZLER3 | ||||||
|  | #undef _DEFINE_SWIZZLER3 | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | template<typename T, typename V> | ||||||
|  | Vec4<T> operator * (const V& f, const Vec4<T>& vec) | ||||||
|  | { | ||||||
|  |     return Vec4<T>(f*vec.x,f*vec.y,f*vec.z,f*vec.w); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | typedef Vec4<float> Vec4f; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | static inline T Dot(const Vec2<T>& a, const Vec2<T>& b) | ||||||
|  | { | ||||||
|  |     return a.x*b.x + a.y*b.y; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | static inline T Dot(const Vec3<T>& a, const Vec3<T>& b) | ||||||
|  | { | ||||||
|  |     return a.x*b.x + a.y*b.y + a.z*b.z; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | static inline T Dot(const Vec4<T>& a, const Vec4<T>& b) | ||||||
|  | { | ||||||
|  |     return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | static inline Vec3<T> Cross(const Vec3<T>& a, const Vec3<T>& b) | ||||||
|  | { | ||||||
|  |     return Vec3<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // linear interpolation via float: 0.0=begin, 1.0=end
 | ||||||
|  | template<typename X> | ||||||
|  | static inline X Lerp(const X& begin, const X& end, const float t) | ||||||
|  | { | ||||||
|  |     return begin*(1.f-t) + end*t; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // linear interpolation via int: 0=begin, base=end
 | ||||||
|  | template<typename X, int base> | ||||||
|  | static inline X LerpInt(const X& begin, const X& end, const int t) | ||||||
|  | { | ||||||
|  |     return (begin*(base-t) + end*t) / base; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Utility vector factories
 | ||||||
|  | template<typename T> | ||||||
|  | static inline Vec2<T> MakeVec2(const T& x, const T& y) | ||||||
|  | { | ||||||
|  |     return Vec2<T>{x, y}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | static inline Vec3<T> MakeVec3(const T& x, const T& y, const T& z) | ||||||
|  | { | ||||||
|  |     return Vec3<T>{x, y, z}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | static inline Vec4<T> MakeVec4(const T& x, const T& y, const T& z, const T& w) | ||||||
|  | { | ||||||
|  |     return Vec4<T>{x, y, z, w}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | @ -11,6 +11,8 @@ | ||||||
| #include "common/bit_field.h" | #include "common/bit_field.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| 
 | 
 | ||||||
|  | #include "core/mem_map.h" | ||||||
|  | 
 | ||||||
| namespace Pica { | namespace Pica { | ||||||
| 
 | 
 | ||||||
| // Returns index corresponding to the Regs member labeled by field_name
 | // Returns index corresponding to the Regs member labeled by field_name
 | ||||||
|  | @ -45,12 +47,104 @@ struct Regs { | ||||||
|     INSERT_PADDING_WORDS(0x41); |     INSERT_PADDING_WORDS(0x41); | ||||||
| 
 | 
 | ||||||
|     BitField<0, 24, u32> viewport_size_x; |     BitField<0, 24, u32> viewport_size_x; | ||||||
|     INSERT_PADDING_WORDS(1); |     INSERT_PADDING_WORDS(0x1); | ||||||
|     BitField<0, 24, u32> viewport_size_y; |     BitField<0, 24, u32> viewport_size_y; | ||||||
| 
 | 
 | ||||||
|     INSERT_PADDING_WORDS(0x1bc); |     INSERT_PADDING_WORDS(0x9); | ||||||
|  | 
 | ||||||
|  |     BitField<0, 24, u32> viewport_depth_range; // float24
 | ||||||
|  |     BitField<0, 24, u32> viewport_depth_far_plane; // float24
 | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x1); | ||||||
| 
 | 
 | ||||||
|     union { |     union { | ||||||
|  |         // Maps components of output vertex attributes to semantics
 | ||||||
|  |         enum Semantic : u32 | ||||||
|  |         { | ||||||
|  |             POSITION_X   =  0, | ||||||
|  |             POSITION_Y   =  1, | ||||||
|  |             POSITION_Z   =  2, | ||||||
|  |             POSITION_W   =  3, | ||||||
|  | 
 | ||||||
|  |             COLOR_R      =  8, | ||||||
|  |             COLOR_G      =  9, | ||||||
|  |             COLOR_B      = 10, | ||||||
|  |             COLOR_A      = 11, | ||||||
|  | 
 | ||||||
|  |             TEXCOORD0_U  = 12, | ||||||
|  |             TEXCOORD0_V  = 13, | ||||||
|  |             TEXCOORD1_U  = 14, | ||||||
|  |             TEXCOORD1_V  = 15, | ||||||
|  |             TEXCOORD2_U  = 22, | ||||||
|  |             TEXCOORD2_V  = 23, | ||||||
|  | 
 | ||||||
|  |             INVALID      = 31, | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         BitField< 0, 5, Semantic> map_x; | ||||||
|  |         BitField< 8, 5, Semantic> map_y; | ||||||
|  |         BitField<16, 5, Semantic> map_z; | ||||||
|  |         BitField<24, 5, Semantic> map_w; | ||||||
|  |     } vs_output_attributes[7]; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x11); | ||||||
|  | 
 | ||||||
|  |     union { | ||||||
|  |         BitField< 0, 16, u32> x; | ||||||
|  |         BitField<16, 16, u32> y; | ||||||
|  |     } viewport_corner; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0xa7); | ||||||
|  | 
 | ||||||
|  |     struct { | ||||||
|  |         enum ColorFormat : u32 { | ||||||
|  |             RGBA8    = 0, | ||||||
|  |             RGB8     = 1, | ||||||
|  |             RGBA5551 = 2, | ||||||
|  |             RGB565   = 3, | ||||||
|  |             RGBA4    = 4, | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         INSERT_PADDING_WORDS(0x6); | ||||||
|  | 
 | ||||||
|  |         u32 depth_format; | ||||||
|  |         u32 color_format; | ||||||
|  | 
 | ||||||
|  |         INSERT_PADDING_WORDS(0x4); | ||||||
|  | 
 | ||||||
|  |         u32 depth_buffer_address; | ||||||
|  |         u32 color_buffer_address; | ||||||
|  | 
 | ||||||
|  |         union { | ||||||
|  |             // Apparently, the framebuffer width is stored as expected,
 | ||||||
|  |             // while the height is stored as the actual height minus one.
 | ||||||
|  |             // Hence, don't access these fields directly but use the accessors
 | ||||||
|  |             // GetWidth() and GetHeight() instead.
 | ||||||
|  |             BitField< 0, 11, u32> width; | ||||||
|  |             BitField<12, 10, u32> height; | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         INSERT_PADDING_WORDS(0x1); | ||||||
|  | 
 | ||||||
|  |         inline u32 GetColorBufferAddress() const { | ||||||
|  |             return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(color_buffer_address)); | ||||||
|  |         } | ||||||
|  |         inline u32 GetDepthBufferAddress() const { | ||||||
|  |             return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(depth_buffer_address)); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         inline u32 GetWidth() const { | ||||||
|  |             return width; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         inline u32 GetHeight() const { | ||||||
|  |             return height + 1; | ||||||
|  |         } | ||||||
|  |     } framebuffer; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0xe0); | ||||||
|  | 
 | ||||||
|  |     struct { | ||||||
|         enum class Format : u64 { |         enum class Format : u64 { | ||||||
|             BYTE = 0, |             BYTE = 0, | ||||||
|             UBYTE = 1, |             UBYTE = 1, | ||||||
|  | @ -58,7 +152,16 @@ struct Regs { | ||||||
|             FLOAT = 3, |             FLOAT = 3, | ||||||
|         }; |         }; | ||||||
| 
 | 
 | ||||||
|         BitField< 0,  2, Format> format0; |         BitField<0, 29, u32> base_address; | ||||||
|  | 
 | ||||||
|  |         inline u32 GetBaseAddress() const { | ||||||
|  |             // TODO: Ugly, should fix PhysicalToVirtualAddress instead
 | ||||||
|  |             return DecodeAddressRegister(base_address) - Memory::FCRAM_PADDR + Memory::HEAP_GSP_VADDR; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Descriptor for internal vertex attributes
 | ||||||
|  |         union { | ||||||
|  |             BitField< 0,  2, Format> format0; // size of one element
 | ||||||
|             BitField< 2,  2, u64> size0;      // number of elements minus 1
 |             BitField< 2,  2, u64> size0;      // number of elements minus 1
 | ||||||
|             BitField< 4,  2, Format> format1; |             BitField< 4,  2, Format> format1; | ||||||
|             BitField< 6,  2, u64> size1; |             BitField< 6,  2, u64> size1; | ||||||
|  | @ -84,10 +187,195 @@ struct Regs { | ||||||
|             BitField<46,  2, u64> size11; |             BitField<46,  2, u64> size11; | ||||||
| 
 | 
 | ||||||
|             BitField<48, 12, u64> attribute_mask; |             BitField<48, 12, u64> attribute_mask; | ||||||
|         BitField<60,  4, u64> num_attributes; // number of total attributes minus 1
 |  | ||||||
|     } vertex_descriptor; |  | ||||||
| 
 | 
 | ||||||
|     INSERT_PADDING_WORDS(0xfe); |             // number of total attributes minus 1
 | ||||||
|  |             BitField<60,  4, u64> num_extra_attributes; | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         inline Format GetFormat(int n) const { | ||||||
|  |             Format formats[] = { | ||||||
|  |                 format0, format1, format2, format3, | ||||||
|  |                 format4, format5, format6, format7, | ||||||
|  |                 format8, format9, format10, format11 | ||||||
|  |             }; | ||||||
|  |             return formats[n]; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         inline int GetNumElements(int n) const { | ||||||
|  |             u64 sizes[] = { | ||||||
|  |                 size0, size1, size2, size3, | ||||||
|  |                 size4, size5, size6, size7, | ||||||
|  |                 size8, size9, size10, size11 | ||||||
|  |             }; | ||||||
|  |             return (int)sizes[n]+1; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         inline int GetElementSizeInBytes(int n) const { | ||||||
|  |             return (GetFormat(n) == Format::FLOAT) ? 4 : | ||||||
|  |                 (GetFormat(n) == Format::SHORT) ? 2 : 1; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         inline int GetStride(int n) const { | ||||||
|  |             return GetNumElements(n) * GetElementSizeInBytes(n); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         inline int GetNumTotalAttributes() const { | ||||||
|  |             return (int)num_extra_attributes+1; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Attribute loaders map the source vertex data to input attributes
 | ||||||
|  |         // This e.g. allows to load different attributes from different memory locations
 | ||||||
|  |         struct { | ||||||
|  |             // Source attribute data offset from the base address
 | ||||||
|  |             u32 data_offset; | ||||||
|  | 
 | ||||||
|  |             union { | ||||||
|  |                 BitField< 0, 4, u64> comp0; | ||||||
|  |                 BitField< 4, 4, u64> comp1; | ||||||
|  |                 BitField< 8, 4, u64> comp2; | ||||||
|  |                 BitField<12, 4, u64> comp3; | ||||||
|  |                 BitField<16, 4, u64> comp4; | ||||||
|  |                 BitField<20, 4, u64> comp5; | ||||||
|  |                 BitField<24, 4, u64> comp6; | ||||||
|  |                 BitField<28, 4, u64> comp7; | ||||||
|  |                 BitField<32, 4, u64> comp8; | ||||||
|  |                 BitField<36, 4, u64> comp9; | ||||||
|  |                 BitField<40, 4, u64> comp10; | ||||||
|  |                 BitField<44, 4, u64> comp11; | ||||||
|  | 
 | ||||||
|  |                 // bytes for a single vertex in this loader
 | ||||||
|  |                 BitField<48, 8, u64> byte_count; | ||||||
|  | 
 | ||||||
|  |                 BitField<60, 4, u64> component_count; | ||||||
|  |             }; | ||||||
|  | 
 | ||||||
|  |             inline int GetComponent(int n) const { | ||||||
|  |                 u64 components[] = { | ||||||
|  |                     comp0, comp1, comp2, comp3, | ||||||
|  |                     comp4, comp5, comp6, comp7, | ||||||
|  |                     comp8, comp9, comp10, comp11 | ||||||
|  |                 }; | ||||||
|  |                 return (int)components[n]; | ||||||
|  |             } | ||||||
|  |         } attribute_loaders[12]; | ||||||
|  |     } vertex_attributes; | ||||||
|  | 
 | ||||||
|  |     struct { | ||||||
|  |         enum IndexFormat : u32 { | ||||||
|  |             BYTE = 0, | ||||||
|  |             SHORT = 1, | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         union { | ||||||
|  |             BitField<0, 31, u32> offset; // relative to base attribute address
 | ||||||
|  |             BitField<31, 1, IndexFormat> format; | ||||||
|  |         }; | ||||||
|  |     } index_array; | ||||||
|  | 
 | ||||||
|  |     // Number of vertices to render
 | ||||||
|  |     u32 num_vertices; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x5); | ||||||
|  | 
 | ||||||
|  |     // These two trigger rendering of triangles
 | ||||||
|  |     u32 trigger_draw; | ||||||
|  |     u32 trigger_draw_indexed; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x2e); | ||||||
|  | 
 | ||||||
|  |     enum class TriangleTopology : u32 { | ||||||
|  |         List        = 0, | ||||||
|  |         Strip       = 1, | ||||||
|  |         Fan         = 2, | ||||||
|  |         ListIndexed = 3, // TODO: No idea if this is correct
 | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     BitField<8, 2, TriangleTopology> triangle_topology; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x5b); | ||||||
|  | 
 | ||||||
|  |     // Offset to shader program entry point (in words)
 | ||||||
|  |     BitField<0, 16, u32> vs_main_offset; | ||||||
|  | 
 | ||||||
|  |     union { | ||||||
|  |         BitField< 0, 4, u64> attribute0_register; | ||||||
|  |         BitField< 4, 4, u64> attribute1_register; | ||||||
|  |         BitField< 8, 4, u64> attribute2_register; | ||||||
|  |         BitField<12, 4, u64> attribute3_register; | ||||||
|  |         BitField<16, 4, u64> attribute4_register; | ||||||
|  |         BitField<20, 4, u64> attribute5_register; | ||||||
|  |         BitField<24, 4, u64> attribute6_register; | ||||||
|  |         BitField<28, 4, u64> attribute7_register; | ||||||
|  |         BitField<32, 4, u64> attribute8_register; | ||||||
|  |         BitField<36, 4, u64> attribute9_register; | ||||||
|  |         BitField<40, 4, u64> attribute10_register; | ||||||
|  |         BitField<44, 4, u64> attribute11_register; | ||||||
|  |         BitField<48, 4, u64> attribute12_register; | ||||||
|  |         BitField<52, 4, u64> attribute13_register; | ||||||
|  |         BitField<56, 4, u64> attribute14_register; | ||||||
|  |         BitField<60, 4, u64> attribute15_register; | ||||||
|  | 
 | ||||||
|  |         int GetRegisterForAttribute(int attribute_index) { | ||||||
|  |             u64 fields[] = { | ||||||
|  |                 attribute0_register,  attribute1_register,  attribute2_register,  attribute3_register, | ||||||
|  |                 attribute4_register,  attribute5_register,  attribute6_register,  attribute7_register, | ||||||
|  |                 attribute8_register,  attribute9_register,  attribute10_register, attribute11_register, | ||||||
|  |                 attribute12_register, attribute13_register, attribute14_register, attribute15_register, | ||||||
|  |             }; | ||||||
|  |             return (int)fields[attribute_index]; | ||||||
|  |         } | ||||||
|  |     } vs_input_register_map; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x3); | ||||||
|  | 
 | ||||||
|  |     struct { | ||||||
|  |         enum Format : u32 | ||||||
|  |         { | ||||||
|  |             FLOAT24 = 0, | ||||||
|  |             FLOAT32 = 1 | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         bool IsFloat32() const { | ||||||
|  |             return format == FLOAT32; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         union { | ||||||
|  |             // Index of the next uniform to write to
 | ||||||
|  |             // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices
 | ||||||
|  |             BitField<0, 7, u32> index; | ||||||
|  | 
 | ||||||
|  |             BitField<31, 1, Format> format; | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         // Writing to these registers sets the "current" uniform.
 | ||||||
|  |         // TODO: It's not clear how the hardware stores what the "current" uniform is.
 | ||||||
|  |         u32 set_value[8]; | ||||||
|  | 
 | ||||||
|  |     } vs_uniform_setup; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x2); | ||||||
|  | 
 | ||||||
|  |     struct { | ||||||
|  |         u32 begin_load; | ||||||
|  | 
 | ||||||
|  |         // Writing to these registers sets the "current" word in the shader program.
 | ||||||
|  |         // TODO: It's not clear how the hardware stores what the "current" word is.
 | ||||||
|  |         u32 set_word[8]; | ||||||
|  |     } vs_program; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x1); | ||||||
|  | 
 | ||||||
|  |     // This register group is used to load an internal table of swizzling patterns,
 | ||||||
|  |     // which are indexed by each shader instruction to specify vector component swizzling.
 | ||||||
|  |     struct { | ||||||
|  |         u32 begin_load; | ||||||
|  | 
 | ||||||
|  |         // Writing to these registers sets the "current" swizzle pattern in the table.
 | ||||||
|  |         // TODO: It's not clear how the hardware stores what the "current" swizzle pattern is.
 | ||||||
|  |         u32 set_word[8]; | ||||||
|  |     } vs_swizzle_patterns; | ||||||
|  | 
 | ||||||
|  |     INSERT_PADDING_WORDS(0x22); | ||||||
| 
 | 
 | ||||||
| #undef INSERT_PADDING_WORDS_HELPER1 | #undef INSERT_PADDING_WORDS_HELPER1 | ||||||
| #undef INSERT_PADDING_WORDS_HELPER2 | #undef INSERT_PADDING_WORDS_HELPER2 | ||||||
|  | @ -112,7 +400,21 @@ struct Regs { | ||||||
| 
 | 
 | ||||||
|         ADD_FIELD(viewport_size_x); |         ADD_FIELD(viewport_size_x); | ||||||
|         ADD_FIELD(viewport_size_y); |         ADD_FIELD(viewport_size_y); | ||||||
|         ADD_FIELD(vertex_descriptor); |         ADD_FIELD(viewport_depth_range); | ||||||
|  |         ADD_FIELD(viewport_depth_far_plane); | ||||||
|  |         ADD_FIELD(viewport_corner); | ||||||
|  |         ADD_FIELD(framebuffer); | ||||||
|  |         ADD_FIELD(vertex_attributes); | ||||||
|  |         ADD_FIELD(index_array); | ||||||
|  |         ADD_FIELD(num_vertices); | ||||||
|  |         ADD_FIELD(trigger_draw); | ||||||
|  |         ADD_FIELD(trigger_draw_indexed); | ||||||
|  |         ADD_FIELD(triangle_topology); | ||||||
|  |         ADD_FIELD(vs_main_offset); | ||||||
|  |         ADD_FIELD(vs_input_register_map); | ||||||
|  |         ADD_FIELD(vs_uniform_setup); | ||||||
|  |         ADD_FIELD(vs_program); | ||||||
|  |         ADD_FIELD(vs_swizzle_patterns); | ||||||
| 
 | 
 | ||||||
|         #undef ADD_FIELD |         #undef ADD_FIELD | ||||||
|         #endif // _MSC_VER
 |         #endif // _MSC_VER
 | ||||||
|  | @ -153,13 +455,106 @@ private: | ||||||
| 
 | 
 | ||||||
| ASSERT_REG_POSITION(viewport_size_x, 0x41); | ASSERT_REG_POSITION(viewport_size_x, 0x41); | ||||||
| ASSERT_REG_POSITION(viewport_size_y, 0x43); | ASSERT_REG_POSITION(viewport_size_y, 0x43); | ||||||
| ASSERT_REG_POSITION(vertex_descriptor, 0x200); | ASSERT_REG_POSITION(viewport_depth_range, 0x4d); | ||||||
|  | ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); | ||||||
|  | ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); | ||||||
|  | ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); | ||||||
|  | ASSERT_REG_POSITION(viewport_corner, 0x68); | ||||||
|  | ASSERT_REG_POSITION(framebuffer, 0x110); | ||||||
|  | ASSERT_REG_POSITION(vertex_attributes, 0x200); | ||||||
|  | ASSERT_REG_POSITION(index_array, 0x227); | ||||||
|  | ASSERT_REG_POSITION(num_vertices, 0x228); | ||||||
|  | ASSERT_REG_POSITION(trigger_draw, 0x22e); | ||||||
|  | ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); | ||||||
|  | ASSERT_REG_POSITION(triangle_topology, 0x25e); | ||||||
|  | ASSERT_REG_POSITION(vs_main_offset, 0x2ba); | ||||||
|  | ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); | ||||||
|  | ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); | ||||||
|  | ASSERT_REG_POSITION(vs_program, 0x2cb); | ||||||
|  | ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5); | ||||||
| 
 | 
 | ||||||
| #undef ASSERT_REG_POSITION | #undef ASSERT_REG_POSITION | ||||||
| #endif // !defined(_MSC_VER)
 | #endif // !defined(_MSC_VER)
 | ||||||
| 
 | 
 | ||||||
| // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
 | // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
 | ||||||
| static_assert(sizeof(Regs) == 0x300 * sizeof(u32), "Invalid total size of register set"); | static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); | ||||||
|  | static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); | ||||||
|  | 
 | ||||||
|  | extern Regs registers; // TODO: Not sure if we want to have one global instance for this
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | struct float24 { | ||||||
|  |     static float24 FromFloat32(float val) { | ||||||
|  |         float24 ret; | ||||||
|  |         ret.value = val; | ||||||
|  |         return ret; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // 16 bit mantissa, 7 bit exponent, 1 bit sign
 | ||||||
|  |     // TODO: No idea if this works as intended
 | ||||||
|  |     static float24 FromRawFloat24(u32 hex) { | ||||||
|  |         float24 ret; | ||||||
|  |         if ((hex & 0xFFFFFF) == 0) { | ||||||
|  |             ret.value = 0; | ||||||
|  |         } else { | ||||||
|  |             u32 mantissa = hex & 0xFFFF; | ||||||
|  |             u32 exponent = (hex >> 16) & 0x7F; | ||||||
|  |             u32 sign = hex >> 23; | ||||||
|  |             ret.value = powf(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * powf(2.0f, -16.f)); | ||||||
|  |             if (sign) | ||||||
|  |                 ret.value = -ret.value; | ||||||
|  |         } | ||||||
|  |         return ret; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Not recommended for anything but logging
 | ||||||
|  |     float ToFloat32() const { | ||||||
|  |         return value; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     float24 operator * (const float24& flt) const { | ||||||
|  |         return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     float24 operator / (const float24& flt) const { | ||||||
|  |         return float24::FromFloat32(ToFloat32() / flt.ToFloat32()); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     float24 operator + (const float24& flt) const { | ||||||
|  |         return float24::FromFloat32(ToFloat32() + flt.ToFloat32()); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     float24 operator - (const float24& flt) const { | ||||||
|  |         return float24::FromFloat32(ToFloat32() - flt.ToFloat32()); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     float24 operator - () const { | ||||||
|  |         return float24::FromFloat32(-ToFloat32()); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool operator < (const float24& flt) const { | ||||||
|  |         return ToFloat32() < flt.ToFloat32(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool operator > (const float24& flt) const { | ||||||
|  |         return ToFloat32() > flt.ToFloat32(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool operator >= (const float24& flt) const { | ||||||
|  |         return ToFloat32() >= flt.ToFloat32(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool operator <= (const float24& flt) const { | ||||||
|  |         return ToFloat32() <= flt.ToFloat32(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     float24() = default; | ||||||
|  | 
 | ||||||
|  |     // Stored as a regular float, merely for convenience
 | ||||||
|  |     // TODO: Perform proper arithmetic on this!
 | ||||||
|  |     float value; | ||||||
|  | }; | ||||||
| 
 | 
 | ||||||
| union CommandHeader { | union CommandHeader { | ||||||
|     CommandHeader(u32 h) : hex(h) {} |     CommandHeader(u32 h) : hex(h) {} | ||||||
|  |  | ||||||
							
								
								
									
										51
									
								
								src/video_core/primitive_assembly.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								src/video_core/primitive_assembly.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,51 @@ | ||||||
|  | // Copyright 2014 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include "clipper.h" | ||||||
|  | #include "pica.h" | ||||||
|  | #include "primitive_assembly.h" | ||||||
|  | #include "vertex_shader.h" | ||||||
|  | 
 | ||||||
|  | namespace Pica { | ||||||
|  | 
 | ||||||
|  | namespace PrimitiveAssembly { | ||||||
|  | 
 | ||||||
|  | static OutputVertex buffer[2]; | ||||||
|  | static int buffer_index = 0; // TODO: reset this on emulation restart
 | ||||||
|  | 
 | ||||||
|  | void SubmitVertex(OutputVertex& vtx) | ||||||
|  | { | ||||||
|  |     switch (registers.triangle_topology) { | ||||||
|  |         case Regs::TriangleTopology::List: | ||||||
|  |         case Regs::TriangleTopology::ListIndexed: | ||||||
|  |             if (buffer_index < 2) { | ||||||
|  |                 buffer[buffer_index++] = vtx; | ||||||
|  |             } else { | ||||||
|  |                 buffer_index = 0; | ||||||
|  | 
 | ||||||
|  |                 Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  | 
 | ||||||
|  |         case Regs::TriangleTopology::Fan: | ||||||
|  |             if (buffer_index == 2) { | ||||||
|  |                 buffer_index = 0; | ||||||
|  | 
 | ||||||
|  |                 Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); | ||||||
|  | 
 | ||||||
|  |                 buffer[1] = vtx; | ||||||
|  |             } else { | ||||||
|  |                 buffer[buffer_index++] = vtx; | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  | 
 | ||||||
|  |         default: | ||||||
|  |             ERROR_LOG(GPU, "Unknown triangle mode %x:", (int)registers.triangle_topology.Value()); | ||||||
|  |             break; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
							
								
								
									
										21
									
								
								src/video_core/primitive_assembly.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								src/video_core/primitive_assembly.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,21 @@ | ||||||
|  | // Copyright 2014 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | namespace Pica { | ||||||
|  | 
 | ||||||
|  | namespace VertexShader { | ||||||
|  |     struct OutputVertex; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | namespace PrimitiveAssembly { | ||||||
|  | 
 | ||||||
|  | using VertexShader::OutputVertex; | ||||||
|  | 
 | ||||||
|  | void SubmitVertex(OutputVertex& vtx); | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
							
								
								
									
										180
									
								
								src/video_core/rasterizer.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										180
									
								
								src/video_core/rasterizer.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,180 @@ | ||||||
|  | // Copyright 2014 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include <algorithm> | ||||||
|  | 
 | ||||||
|  | #include "common/common_types.h" | ||||||
|  | 
 | ||||||
|  | #include "math.h" | ||||||
|  | #include "pica.h" | ||||||
|  | #include "rasterizer.h" | ||||||
|  | #include "vertex_shader.h" | ||||||
|  | 
 | ||||||
|  | namespace Pica { | ||||||
|  | 
 | ||||||
|  | namespace Rasterizer { | ||||||
|  | 
 | ||||||
|  | static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | ||||||
|  |     u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress()); | ||||||
|  |     u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); | ||||||
|  | 
 | ||||||
|  |     // Assuming RGBA8 format until actual framebuffer format handling is implemented
 | ||||||
|  |     *(color_buffer + x + y * registers.framebuffer.GetWidth() / 2) = value; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static u32 GetDepth(int x, int y) { | ||||||
|  |     u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); | ||||||
|  | 
 | ||||||
|  |     // Assuming 16-bit depth buffer format until actual format handling is implemented
 | ||||||
|  |     return *(depth_buffer + x + y * registers.framebuffer.GetWidth() / 2); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void SetDepth(int x, int y, u16 value) { | ||||||
|  |     u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); | ||||||
|  | 
 | ||||||
|  |     // Assuming 16-bit depth buffer format until actual format handling is implemented
 | ||||||
|  |     *(depth_buffer + x + y * registers.framebuffer.GetWidth() / 2) = value; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void ProcessTriangle(const VertexShader::OutputVertex& v0, | ||||||
|  |                      const VertexShader::OutputVertex& v1, | ||||||
|  |                      const VertexShader::OutputVertex& v2) | ||||||
|  | { | ||||||
|  |     // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 | ||||||
|  |     struct Fix12P4 { | ||||||
|  |         Fix12P4() {} | ||||||
|  |         Fix12P4(u16 val) : val(val) {} | ||||||
|  | 
 | ||||||
|  |         static u16 FracMask() { return 0xF; } | ||||||
|  |         static u16 IntMask() { return (u16)~0xF; } | ||||||
|  | 
 | ||||||
|  |         operator u16() const { | ||||||
|  |             return val; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         bool operator < (const Fix12P4& oth) const { | ||||||
|  |             return (u16)*this < (u16)oth; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |     private: | ||||||
|  |         u16 val; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     // vertex positions in rasterizer coordinates
 | ||||||
|  |     auto FloatToFix = [](float24 flt) { | ||||||
|  |                           return Fix12P4(flt.ToFloat32() * 16.0f); | ||||||
|  |                       }; | ||||||
|  |     auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) { | ||||||
|  |                                              return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | ||||||
|  |                                          }; | ||||||
|  |     Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), | ||||||
|  |                                    ScreenToRasterizerCoordinates(v1.screenpos), | ||||||
|  |                                    ScreenToRasterizerCoordinates(v2.screenpos) }; | ||||||
|  | 
 | ||||||
|  |     // TODO: Proper scissor rect test!
 | ||||||
|  |     u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); | ||||||
|  |     u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | ||||||
|  |     u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); | ||||||
|  |     u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | ||||||
|  | 
 | ||||||
|  |     min_x = min_x & Fix12P4::IntMask(); | ||||||
|  |     min_y = min_y & Fix12P4::IntMask(); | ||||||
|  |     max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask(); | ||||||
|  |     max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask(); | ||||||
|  | 
 | ||||||
|  |     // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not
 | ||||||
|  |     // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias
 | ||||||
|  |     // values which are added to the barycentric coordinates w0, w1 and w2, respectively.
 | ||||||
|  |     // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones...
 | ||||||
|  |     auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, | ||||||
|  |                                           const Math::Vec2<Fix12P4>& line1, | ||||||
|  |                                           const Math::Vec2<Fix12P4>& line2) | ||||||
|  |     { | ||||||
|  |         if (line1.y == line2.y) { | ||||||
|  |             // just check if vertex is above us => bottom line parallel to x-axis
 | ||||||
|  |             return vtx.y < line1.y; | ||||||
|  |         } else { | ||||||
|  |             // check if vertex is on our left => right side
 | ||||||
|  |             // TODO: Not sure how likely this is to overflow
 | ||||||
|  |             return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / ((int)line2.y - (int)line1.y); | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |     int bias0 = IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; | ||||||
|  |     int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; | ||||||
|  |     int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; | ||||||
|  | 
 | ||||||
|  |     // TODO: Not sure if looping through x first might be faster
 | ||||||
|  |     for (u16 y = min_y; y < max_y; y += 0x10) { | ||||||
|  |         for (u16 x = min_x; x < max_x; x += 0x10) { | ||||||
|  | 
 | ||||||
|  |             // Calculate the barycentric coordinates w0, w1 and w2
 | ||||||
|  |             auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, | ||||||
|  |                                const Math::Vec2<Fix12P4>& vtx2, | ||||||
|  |                                const Math::Vec2<Fix12P4>& vtx3) { | ||||||
|  |                 const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0); | ||||||
|  |                 const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0); | ||||||
|  |                 // TODO: There is a very small chance this will overflow for sizeof(int) == 4
 | ||||||
|  |                 return Cross(vec1, vec2).z; | ||||||
|  |             }; | ||||||
|  | 
 | ||||||
|  |             int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); | ||||||
|  |             int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); | ||||||
|  |             int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); | ||||||
|  |             int wsum = w0 + w1 + w2; | ||||||
|  | 
 | ||||||
|  |             // If current pixel is not covered by the current primitive
 | ||||||
|  |             if (w0 < 0 || w1 < 0 || w2 < 0) | ||||||
|  |                 continue; | ||||||
|  | 
 | ||||||
|  |             // Perspective correct attribute interpolation:
 | ||||||
|  |             // Attribute values cannot be calculated by simple linear interpolation since
 | ||||||
|  |             // they are not linear in screen space. For example, when interpolating a
 | ||||||
|  |             // texture coordinate across two vertices, something simple like
 | ||||||
|  |             //     u = (u0*w0 + u1*w1)/(w0+w1)
 | ||||||
|  |             // will not work. However, the attribute value divided by the
 | ||||||
|  |             // clipspace w-coordinate (u/w) and and the inverse w-coordinate (1/w) are linear
 | ||||||
|  |             // in screenspace. Hence, we can linearly interpolate these two independently and
 | ||||||
|  |             // calculate the interpolated attribute by dividing the results.
 | ||||||
|  |             // I.e.
 | ||||||
|  |             //     u_over_w   = ((u0/v0.pos.w)*w0 + (u1/v1.pos.w)*w1)/(w0+w1)
 | ||||||
|  |             //     one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1)
 | ||||||
|  |             //     u = u_over_w / one_over_w
 | ||||||
|  |             //
 | ||||||
|  |             // The generalization to three vertices is straightforward in baricentric coordinates.
 | ||||||
|  |             auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { | ||||||
|  |                 auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w, | ||||||
|  |                                                   attr1 / v1.pos.w, | ||||||
|  |                                                   attr2 / v2.pos.w); | ||||||
|  |                 auto w_inverse   = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w, | ||||||
|  |                                                   float24::FromFloat32(1.f) / v1.pos.w, | ||||||
|  |                                                   float24::FromFloat32(1.f) / v2.pos.w); | ||||||
|  |                 auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0), | ||||||
|  |                                                               float24::FromFloat32(w1), | ||||||
|  |                                                               float24::FromFloat32(w2)); | ||||||
|  | 
 | ||||||
|  |                 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); | ||||||
|  |                 float24 interpolated_w_inverse   = Math::Dot(w_inverse,   baricentric_coordinates); | ||||||
|  |                 return interpolated_attr_over_w / interpolated_w_inverse; | ||||||
|  |             }; | ||||||
|  | 
 | ||||||
|  |             Math::Vec4<u8> primary_color{ | ||||||
|  |                 (u8)(GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * 255), | ||||||
|  |                 (u8)(GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * 255), | ||||||
|  |                 (u8)(GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * 255), | ||||||
|  |                 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) | ||||||
|  |             }; | ||||||
|  | 
 | ||||||
|  |             u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + | ||||||
|  |                            (float)v1.screenpos[2].ToFloat32() * w1 + | ||||||
|  |                            (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536?
 | ||||||
|  |             SetDepth(x >> 4, y >> 4, z); | ||||||
|  | 
 | ||||||
|  |             DrawPixel(x >> 4, y >> 4, primary_color); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace Rasterizer
 | ||||||
|  | 
 | ||||||
|  | } // namespace Pica
 | ||||||
							
								
								
									
										21
									
								
								src/video_core/rasterizer.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								src/video_core/rasterizer.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,21 @@ | ||||||
|  | // Copyright 2014 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | namespace Pica { | ||||||
|  | 
 | ||||||
|  | namespace VertexShader { | ||||||
|  |     struct OutputVertex; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | namespace Rasterizer { | ||||||
|  | 
 | ||||||
|  | void ProcessTriangle(const VertexShader::OutputVertex& v0, | ||||||
|  |                      const VertexShader::OutputVertex& v1, | ||||||
|  |                      const VertexShader::OutputVertex& v2); | ||||||
|  | 
 | ||||||
|  | } // namespace Rasterizer
 | ||||||
|  | 
 | ||||||
|  | } // namespace Pica
 | ||||||
|  | @ -81,20 +81,20 @@ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& | ||||||
|     const auto& framebuffer_top = GPU::g_regs.framebuffer_config[0]; |     const auto& framebuffer_top = GPU::g_regs.framebuffer_config[0]; | ||||||
|     const auto& framebuffer_sub = GPU::g_regs.framebuffer_config[1]; |     const auto& framebuffer_sub = GPU::g_regs.framebuffer_config[1]; | ||||||
|     const u32 active_fb_top = (framebuffer_top.active_fb == 1) |     const u32 active_fb_top = (framebuffer_top.active_fb == 1) | ||||||
|                                 ? framebuffer_top.address_left2 |                                 ? Memory::PhysicalToVirtualAddress(framebuffer_top.address_left2) | ||||||
|                                 : framebuffer_top.address_left1; |                                 : Memory::PhysicalToVirtualAddress(framebuffer_top.address_left1); | ||||||
|     const u32 active_fb_sub = (framebuffer_sub.active_fb == 1) |     const u32 active_fb_sub = (framebuffer_sub.active_fb == 1) | ||||||
|                                 ? framebuffer_sub.address_left2 |                                 ? Memory::PhysicalToVirtualAddress(framebuffer_sub.address_left2) | ||||||
|                                 : framebuffer_sub.address_left1; |                                 : Memory::PhysicalToVirtualAddress(framebuffer_sub.address_left1); | ||||||
| 
 | 
 | ||||||
|     DEBUG_LOG(GPU, "RenderXFB: 0x%08x bytes from 0x%08x(%dx%d), fmt %x", |     DEBUG_LOG(GPU, "RenderXFB: 0x%08x bytes from 0x%08x(%dx%d), fmt %x", | ||||||
|               framebuffer_top.stride * framebuffer_top.height, |               framebuffer_top.stride * framebuffer_top.height, | ||||||
|               GPU::GetFramebufferAddr(active_fb_top), (int)framebuffer_top.width, |               active_fb_top, (int)framebuffer_top.width, | ||||||
|               (int)framebuffer_top.height, (int)framebuffer_top.format); |               (int)framebuffer_top.height, (int)framebuffer_top.format); | ||||||
| 
 | 
 | ||||||
|     // TODO: This should consider the GPU registers for framebuffer width, height and stride.
 |     // TODO: This should consider the GPU registers for framebuffer width, height and stride.
 | ||||||
|     FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_top), m_xfb_top_flipped); |     FlipFramebuffer(Memory::GetPointer(active_fb_top), m_xfb_top_flipped); | ||||||
|     FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_sub), m_xfb_bottom_flipped); |     FlipFramebuffer(Memory::GetPointer(active_fb_sub), m_xfb_bottom_flipped); | ||||||
| 
 | 
 | ||||||
|     // Blit the top framebuffer
 |     // Blit the top framebuffer
 | ||||||
|     // ------------------------
 |     // ------------------------
 | ||||||
|  |  | ||||||
							
								
								
									
										270
									
								
								src/video_core/vertex_shader.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										270
									
								
								src/video_core/vertex_shader.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,270 @@ | ||||||
|  | // Copyright 2014 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include "pica.h" | ||||||
|  | #include "vertex_shader.h" | ||||||
|  | #include <core/mem_map.h> | ||||||
|  | #include <common/file_util.h> | ||||||
|  | 
 | ||||||
|  | namespace Pica { | ||||||
|  | 
 | ||||||
|  | namespace VertexShader { | ||||||
|  | 
 | ||||||
|  | static struct { | ||||||
|  |     Math::Vec4<float24> f[96]; | ||||||
|  | } shader_uniforms; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
 | ||||||
|  | // For now, we just keep these local arrays around.
 | ||||||
|  | static u32 shader_memory[1024]; | ||||||
|  | static u32 swizzle_data[1024]; | ||||||
|  | 
 | ||||||
|  | void SubmitShaderMemoryChange(u32 addr, u32 value) | ||||||
|  | { | ||||||
|  |     shader_memory[addr] = value; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void SubmitSwizzleDataChange(u32 addr, u32 value) | ||||||
|  | { | ||||||
|  |     swizzle_data[addr] = value; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Math::Vec4<float24>& GetFloatUniform(u32 index) | ||||||
|  | { | ||||||
|  |     return shader_uniforms.f[index]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | struct VertexShaderState { | ||||||
|  |     u32* program_counter; | ||||||
|  | 
 | ||||||
|  |     const float24* input_register_table[16]; | ||||||
|  |     float24* output_register_table[7*4]; | ||||||
|  | 
 | ||||||
|  |     Math::Vec4<float24> temporary_registers[16]; | ||||||
|  |     bool status_registers[2]; | ||||||
|  | 
 | ||||||
|  |     enum { | ||||||
|  |         INVALID_ADDRESS = 0xFFFFFFFF | ||||||
|  |     }; | ||||||
|  |     u32 call_stack[8]; // TODO: What is the maximal call stack depth?
 | ||||||
|  |     u32* call_stack_pointer; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static void ProcessShaderCode(VertexShaderState& state) { | ||||||
|  |     while (true) { | ||||||
|  |         bool increment_pc = true; | ||||||
|  |         bool exit_loop = false; | ||||||
|  |         const Instruction& instr = *(const Instruction*)state.program_counter; | ||||||
|  | 
 | ||||||
|  |         const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] | ||||||
|  |                              : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x | ||||||
|  |                              : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x | ||||||
|  |                              : nullptr; | ||||||
|  |         const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2] | ||||||
|  |                              : &state.temporary_registers[instr.common.src2-0x10].x; | ||||||
|  |         // TODO: Unsure about the limit values
 | ||||||
|  |         float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest] | ||||||
|  |                              : (instr.common.dest <= 0x3C) ? nullptr | ||||||
|  |                              : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4] | ||||||
|  |                              : nullptr; | ||||||
|  | 
 | ||||||
|  |         const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | ||||||
|  | 
 | ||||||
|  |         const float24 src1[4] = { | ||||||
|  |             src1_[(int)swizzle.GetSelectorSrc1(0)], | ||||||
|  |             src1_[(int)swizzle.GetSelectorSrc1(1)], | ||||||
|  |             src1_[(int)swizzle.GetSelectorSrc1(2)], | ||||||
|  |             src1_[(int)swizzle.GetSelectorSrc1(3)], | ||||||
|  |         }; | ||||||
|  |         const float24 src2[4] = { | ||||||
|  |             src2_[(int)swizzle.GetSelectorSrc2(0)], | ||||||
|  |             src2_[(int)swizzle.GetSelectorSrc2(1)], | ||||||
|  |             src2_[(int)swizzle.GetSelectorSrc2(2)], | ||||||
|  |             src2_[(int)swizzle.GetSelectorSrc2(3)], | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         switch (instr.opcode) { | ||||||
|  |             case Instruction::OpCode::ADD: | ||||||
|  |             { | ||||||
|  |                 for (int i = 0; i < 4; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     dest[i] = src1[i] + src2[i]; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             case Instruction::OpCode::MUL: | ||||||
|  |             { | ||||||
|  |                 for (int i = 0; i < 4; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     dest[i] = src1[i] * src2[i]; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             case Instruction::OpCode::DP3: | ||||||
|  |             case Instruction::OpCode::DP4: | ||||||
|  |             { | ||||||
|  |                 float24 dot = float24::FromFloat32(0.f); | ||||||
|  |                 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; | ||||||
|  |                 for (int i = 0; i < num_components; ++i) | ||||||
|  |                     dot = dot + src1[i] * src2[i]; | ||||||
|  | 
 | ||||||
|  |                 for (int i = 0; i < num_components; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     dest[i] = dot; | ||||||
|  |                 } | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Reciprocal
 | ||||||
|  |             case Instruction::OpCode::RCP: | ||||||
|  |             { | ||||||
|  |                 for (int i = 0; i < 4; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     // TODO: Be stable against division by zero!
 | ||||||
|  |                     // TODO: I think this might be wrong... we should only use one component here
 | ||||||
|  |                     dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32()); | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Reciprocal Square Root
 | ||||||
|  |             case Instruction::OpCode::RSQ: | ||||||
|  |             { | ||||||
|  |                 for (int i = 0; i < 4; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     // TODO: Be stable against division by zero!
 | ||||||
|  |                     // TODO: I think this might be wrong... we should only use one component here
 | ||||||
|  |                     dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32())); | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             case Instruction::OpCode::MOV: | ||||||
|  |             { | ||||||
|  |                 for (int i = 0; i < 4; ++i) { | ||||||
|  |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|  |                         continue; | ||||||
|  | 
 | ||||||
|  |                     dest[i] = src1[i]; | ||||||
|  |                 } | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             case Instruction::OpCode::RET: | ||||||
|  |                 if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { | ||||||
|  |                     exit_loop = true; | ||||||
|  |                 } else { | ||||||
|  |                     state.program_counter = &shader_memory[*state.call_stack_pointer--]; | ||||||
|  |                     *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 break; | ||||||
|  | 
 | ||||||
|  |             case Instruction::OpCode::CALL: | ||||||
|  |                 increment_pc = false; | ||||||
|  | 
 | ||||||
|  |                 _dbg_assert_(GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); | ||||||
|  | 
 | ||||||
|  |                 *++state.call_stack_pointer = state.program_counter - shader_memory; | ||||||
|  |                 // TODO: Does this offset refer to the beginning of shader memory?
 | ||||||
|  |                 state.program_counter = &shader_memory[instr.flow_control.offset_words]; | ||||||
|  |                 break; | ||||||
|  | 
 | ||||||
|  |             case Instruction::OpCode::FLS: | ||||||
|  |                 // TODO: Do whatever needs to be done here?
 | ||||||
|  |                 break; | ||||||
|  | 
 | ||||||
|  |             default: | ||||||
|  |                 ERROR_LOG(GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | ||||||
|  |                           (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); | ||||||
|  |                 break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (increment_pc) | ||||||
|  |             ++state.program_counter; | ||||||
|  | 
 | ||||||
|  |         if (exit_loop) | ||||||
|  |             break; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | OutputVertex RunShader(const InputVertex& input, int num_attributes) | ||||||
|  | { | ||||||
|  |     VertexShaderState state; | ||||||
|  | 
 | ||||||
|  |     const u32* main = &shader_memory[registers.vs_main_offset]; | ||||||
|  |     state.program_counter = (u32*)main; | ||||||
|  | 
 | ||||||
|  |     // Setup input register table
 | ||||||
|  |     const auto& attribute_register_map = registers.vs_input_register_map; | ||||||
|  |     float24 dummy_register; | ||||||
|  |     std::fill(&state.input_register_table[0], &state.input_register_table[16], &dummy_register); | ||||||
|  |     if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x; | ||||||
|  |     if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x; | ||||||
|  |     if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x; | ||||||
|  |     if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x; | ||||||
|  |     if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x; | ||||||
|  |     if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x; | ||||||
|  |     if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x; | ||||||
|  |     if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x; | ||||||
|  |     if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x; | ||||||
|  |     if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x; | ||||||
|  |     if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x; | ||||||
|  |     if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x; | ||||||
|  |     if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x; | ||||||
|  |     if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x; | ||||||
|  |     if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; | ||||||
|  |     if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; | ||||||
|  | 
 | ||||||
|  |     // Setup output register table
 | ||||||
|  |     OutputVertex ret; | ||||||
|  |     for (int i = 0; i < 7; ++i) { | ||||||
|  |         const auto& output_register_map = registers.vs_output_attributes[i]; | ||||||
|  | 
 | ||||||
|  |         u32 semantics[4] = { | ||||||
|  |             output_register_map.map_x, output_register_map.map_y, | ||||||
|  |             output_register_map.map_z, output_register_map.map_w | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         for (int comp = 0; comp < 4; ++comp) | ||||||
|  |             state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     state.status_registers[0] = false; | ||||||
|  |     state.status_registers[1] = false; | ||||||
|  |     std::fill(state.call_stack, state.call_stack + sizeof(state.call_stack) / sizeof(state.call_stack[0]), | ||||||
|  |               VertexShaderState::INVALID_ADDRESS); | ||||||
|  |     state.call_stack_pointer = &state.call_stack[0]; | ||||||
|  | 
 | ||||||
|  |     ProcessShaderCode(state); | ||||||
|  | 
 | ||||||
|  |     DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | ||||||
|  |         ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | ||||||
|  |         ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | ||||||
|  |         ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); | ||||||
|  | 
 | ||||||
|  |     return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
							
								
								
									
										211
									
								
								src/video_core/vertex_shader.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										211
									
								
								src/video_core/vertex_shader.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,211 @@ | ||||||
|  | // Copyright 2014 Citra Emulator Project
 | ||||||
|  | // Licensed under GPLv2
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <initializer_list> | ||||||
|  | 
 | ||||||
|  | #include <common/common_types.h> | ||||||
|  | 
 | ||||||
|  | #include "math.h" | ||||||
|  | #include "pica.h" | ||||||
|  | 
 | ||||||
|  | namespace Pica { | ||||||
|  | 
 | ||||||
|  | namespace VertexShader { | ||||||
|  | 
 | ||||||
|  | struct InputVertex { | ||||||
|  |     Math::Vec4<float24> attr[16]; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct OutputVertex { | ||||||
|  |     OutputVertex() = default; | ||||||
|  | 
 | ||||||
|  |     // VS output attributes
 | ||||||
|  |     Math::Vec4<float24> pos; | ||||||
|  |     Math::Vec4<float24> dummy; // quaternions (not implemented, yet)
 | ||||||
|  |     Math::Vec4<float24> color; | ||||||
|  |     Math::Vec2<float24> tc0; | ||||||
|  |     float24 tc0_v; | ||||||
|  | 
 | ||||||
|  |     // Padding for optimal alignment
 | ||||||
|  |     float24 pad[14]; | ||||||
|  | 
 | ||||||
|  |     // Attributes used to store intermediate results
 | ||||||
|  | 
 | ||||||
|  |     // position after perspective divide
 | ||||||
|  |     Math::Vec3<float24> screenpos; | ||||||
|  | 
 | ||||||
|  |     // Linear interpolation
 | ||||||
|  |     // factor: 0=this, 1=vtx
 | ||||||
|  |     void Lerp(float24 factor, const OutputVertex& vtx) { | ||||||
|  |         pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); | ||||||
|  | 
 | ||||||
|  |         // TODO: Should perform perspective correct interpolation here...
 | ||||||
|  |         tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); | ||||||
|  | 
 | ||||||
|  |         screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); | ||||||
|  | 
 | ||||||
|  |         color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Linear interpolation
 | ||||||
|  |     // factor: 0=v0, 1=v1
 | ||||||
|  |     static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) { | ||||||
|  |         OutputVertex ret = v0; | ||||||
|  |         ret.Lerp(factor, v1); | ||||||
|  |         return ret; | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | ||||||
|  | 
 | ||||||
|  | union Instruction { | ||||||
|  |     enum class OpCode : u32 { | ||||||
|  |         ADD = 0x0, | ||||||
|  |         DP3 = 0x1, | ||||||
|  |         DP4 = 0x2, | ||||||
|  | 
 | ||||||
|  |         MUL = 0x8, | ||||||
|  | 
 | ||||||
|  |         MAX = 0xC, | ||||||
|  |         MIN = 0xD, | ||||||
|  |         RCP = 0xE, | ||||||
|  |         RSQ = 0xF, | ||||||
|  | 
 | ||||||
|  |         MOV = 0x13, | ||||||
|  | 
 | ||||||
|  |         RET = 0x21, | ||||||
|  |         FLS = 0x22, // Flush
 | ||||||
|  |         CALL = 0x24, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     std::string GetOpCodeName() const { | ||||||
|  |         std::map<OpCode, std::string> map = { | ||||||
|  |             { OpCode::ADD, "ADD" }, | ||||||
|  |             { OpCode::DP3, "DP3" }, | ||||||
|  |             { OpCode::DP4, "DP4" }, | ||||||
|  |             { OpCode::MUL, "MUL" }, | ||||||
|  |             { OpCode::MAX, "MAX" }, | ||||||
|  |             { OpCode::MIN, "MIN" }, | ||||||
|  |             { OpCode::RCP, "RCP" }, | ||||||
|  |             { OpCode::RSQ, "RSQ" }, | ||||||
|  |             { OpCode::MOV, "MOV" }, | ||||||
|  |             { OpCode::RET, "RET" }, | ||||||
|  |             { OpCode::FLS, "FLS" }, | ||||||
|  |         }; | ||||||
|  |         auto it = map.find(opcode); | ||||||
|  |         if (it == map.end()) | ||||||
|  |             return "UNK"; | ||||||
|  |         else | ||||||
|  |             return it->second; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u32 hex; | ||||||
|  | 
 | ||||||
|  |     BitField<0x1a, 0x6, OpCode> opcode; | ||||||
|  | 
 | ||||||
|  |     // General notes:
 | ||||||
|  |     //
 | ||||||
|  |     // When two input registers are used, one of them uses a 5-bit index while the other
 | ||||||
|  |     // one uses a 7-bit index. This is because at most one floating point uniform may be used
 | ||||||
|  |     // as an input.
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     // Format used e.g. by arithmetic instructions and comparisons
 | ||||||
|  |     // "src1" and "src2" specify register indices (i.e. indices referring to groups of 4 floats),
 | ||||||
|  |     // while "dest" addresses individual floats.
 | ||||||
|  |     union { | ||||||
|  |         BitField<0x00, 0x5, u32> operand_desc_id; | ||||||
|  |         BitField<0x07, 0x5, u32> src2; | ||||||
|  |         BitField<0x0c, 0x7, u32> src1; | ||||||
|  |         BitField<0x13, 0x7, u32> dest; | ||||||
|  |     } common; | ||||||
|  | 
 | ||||||
|  |     // Format used for flow control instructions ("if")
 | ||||||
|  |     union { | ||||||
|  |         BitField<0x00, 0x8, u32> num_instructions; | ||||||
|  |         BitField<0x0a, 0xc, u32> offset_words; | ||||||
|  |     } flow_control; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | union SwizzlePattern { | ||||||
|  |     u32 hex; | ||||||
|  | 
 | ||||||
|  |     enum class Selector : u32 { | ||||||
|  |         x = 0, | ||||||
|  |         y = 1, | ||||||
|  |         z = 2, | ||||||
|  |         w = 3 | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     Selector GetSelectorSrc1(int comp) const { | ||||||
|  |         Selector selectors[] = { | ||||||
|  |             src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3 | ||||||
|  |         }; | ||||||
|  |         return selectors[comp]; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     Selector GetSelectorSrc2(int comp) const { | ||||||
|  |         Selector selectors[] = { | ||||||
|  |             src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3 | ||||||
|  |         }; | ||||||
|  |         return selectors[comp]; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool DestComponentEnabled(int i) const { | ||||||
|  |         return (dest_mask & (0x8 >> i)); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::string SelectorToString(bool src2) const { | ||||||
|  |         std::map<Selector, std::string> map = { | ||||||
|  |             { Selector::x, "x" }, | ||||||
|  |             { Selector::y, "y" }, | ||||||
|  |             { Selector::z, "z" }, | ||||||
|  |             { Selector::w, "w" } | ||||||
|  |         }; | ||||||
|  |         std::string ret; | ||||||
|  |         for (int i = 0; i < 4; ++i) { | ||||||
|  |             ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i)); | ||||||
|  |         } | ||||||
|  |         return ret; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::string DestMaskToString() const { | ||||||
|  |         std::string ret; | ||||||
|  |         for (int i = 0; i < 4; ++i) { | ||||||
|  |             if (!DestComponentEnabled(i)) | ||||||
|  |                 ret += "_"; | ||||||
|  |             else | ||||||
|  |                 ret += "xyzw"[i]; | ||||||
|  |         } | ||||||
|  |         return ret; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x
 | ||||||
|  |     BitField< 0, 4, u32> dest_mask; | ||||||
|  | 
 | ||||||
|  |     BitField< 5, 2, Selector> src1_selector_3; | ||||||
|  |     BitField< 7, 2, Selector> src1_selector_2; | ||||||
|  |     BitField< 9, 2, Selector> src1_selector_1; | ||||||
|  |     BitField<11, 2, Selector> src1_selector_0; | ||||||
|  | 
 | ||||||
|  |     BitField<14, 2, Selector> src2_selector_3; | ||||||
|  |     BitField<16, 2, Selector> src2_selector_2; | ||||||
|  |     BitField<18, 2, Selector> src2_selector_1; | ||||||
|  |     BitField<20, 2, Selector> src2_selector_0; | ||||||
|  | 
 | ||||||
|  |     BitField<31, 1, u32> flag; // not sure what this means, maybe it's the sign?
 | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | void SubmitShaderMemoryChange(u32 addr, u32 value); | ||||||
|  | void SubmitSwizzleDataChange(u32 addr, u32 value); | ||||||
|  | 
 | ||||||
|  | OutputVertex RunShader(const InputVertex& input, int num_attributes); | ||||||
|  | 
 | ||||||
|  | Math::Vec4<float24>& GetFloatUniform(u32 index); | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | @ -20,14 +20,25 @@ | ||||||
|   </ItemGroup> |   </ItemGroup> | ||||||
|   <ItemGroup> |   <ItemGroup> | ||||||
|     <ClCompile Include="renderer_opengl\renderer_opengl.cpp" /> |     <ClCompile Include="renderer_opengl\renderer_opengl.cpp" /> | ||||||
|  |     <ClCompile Include="clipper.cpp" /> | ||||||
|  |     <ClCompile Include="command_processor.cpp" /> | ||||||
|  |     <ClCompile Include="primitive_assembly.cpp" /> | ||||||
|  |     <ClCompile Include="rasterizer.cpp" /> | ||||||
|     <ClCompile Include="utils.cpp" /> |     <ClCompile Include="utils.cpp" /> | ||||||
|  |     <ClCompile Include="vertex_shader.cpp" /> | ||||||
|     <ClCompile Include="video_core.cpp" /> |     <ClCompile Include="video_core.cpp" /> | ||||||
|   </ItemGroup> |   </ItemGroup> | ||||||
|   <ItemGroup> |   <ItemGroup> | ||||||
|  |     <ClInclude Include="clipper.h" /> | ||||||
|  |     <ClInclude Include="command_processor.h" /> | ||||||
|     <ClInclude Include="gpu_debugger.h" /> |     <ClInclude Include="gpu_debugger.h" /> | ||||||
|  |     <ClInclude Include="math.h" /> | ||||||
|     <ClInclude Include="pica.h" /> |     <ClInclude Include="pica.h" /> | ||||||
|  |     <ClInclude Include="primitive_assembly.h" /> | ||||||
|  |     <ClInclude Include="rasterizer.h" /> | ||||||
|     <ClInclude Include="renderer_base.h" /> |     <ClInclude Include="renderer_base.h" /> | ||||||
|     <ClInclude Include="utils.h" /> |     <ClInclude Include="utils.h" /> | ||||||
|  |     <ClInclude Include="vertex_shader.h" /> | ||||||
|     <ClInclude Include="video_core.h" /> |     <ClInclude Include="video_core.h" /> | ||||||
|     <ClInclude Include="renderer_opengl\renderer_opengl.h" /> |     <ClInclude Include="renderer_opengl\renderer_opengl.h" /> | ||||||
|   </ItemGroup> |   </ItemGroup> | ||||||
|  |  | ||||||
|  | @ -9,17 +9,28 @@ | ||||||
|     <ClCompile Include="renderer_opengl\renderer_opengl.cpp"> |     <ClCompile Include="renderer_opengl\renderer_opengl.cpp"> | ||||||
|       <Filter>renderer_opengl</Filter> |       <Filter>renderer_opengl</Filter> | ||||||
|     </ClCompile> |     </ClCompile> | ||||||
|  |     <ClCompile Include="clipper.cpp" /> | ||||||
|  |     <ClCompile Include="command_processor.cpp" /> | ||||||
|  |     <ClCompile Include="primitive_assembly.cpp" /> | ||||||
|  |     <ClCompile Include="rasterizer.cpp" /> | ||||||
|     <ClCompile Include="utils.cpp" /> |     <ClCompile Include="utils.cpp" /> | ||||||
|  |     <ClCompile Include="vertex_shader.cpp" /> | ||||||
|     <ClCompile Include="video_core.cpp" /> |     <ClCompile Include="video_core.cpp" /> | ||||||
|   </ItemGroup> |   </ItemGroup> | ||||||
|   <ItemGroup> |   <ItemGroup> | ||||||
|     <ClInclude Include="renderer_opengl\renderer_opengl.h"> |     <ClInclude Include="renderer_opengl\renderer_opengl.h"> | ||||||
|       <Filter>renderer_opengl</Filter> |       <Filter>renderer_opengl</Filter> | ||||||
|     </ClInclude> |     </ClInclude> | ||||||
|  |     <ClInclude Include="clipper.h" /> | ||||||
|  |     <ClInclude Include="command_processor.h" /> | ||||||
|     <ClInclude Include="gpu_debugger.h" /> |     <ClInclude Include="gpu_debugger.h" /> | ||||||
|  |     <ClInclude Include="math.h" /> | ||||||
|     <ClInclude Include="pica.h" /> |     <ClInclude Include="pica.h" /> | ||||||
|  |     <ClInclude Include="primitive_assembly.h" /> | ||||||
|  |     <ClInclude Include="rasterizer.h" /> | ||||||
|     <ClInclude Include="renderer_base.h" /> |     <ClInclude Include="renderer_base.h" /> | ||||||
|     <ClInclude Include="utils.h" /> |     <ClInclude Include="utils.h" /> | ||||||
|  |     <ClInclude Include="vertex_shader.h" /> | ||||||
|     <ClInclude Include="video_core.h" /> |     <ClInclude Include="video_core.h" /> | ||||||
|   </ItemGroup> |   </ItemGroup> | ||||||
|   <ItemGroup> |   <ItemGroup> | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue