mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	Merge pull request #5546 from FearlessTobi/port-5524
Port yuzu-emu/yuzu#4086 and yuzu-emu/yuzu#4611: Xbyak cleanups
This commit is contained in:
		
						commit
						5776bdda82
					
				
					 4 changed files with 79 additions and 70 deletions
				
			
		
							
								
								
									
										2
									
								
								externals/xbyak
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								externals/xbyak
									
										
									
									
										vendored
									
									
								
							|  | @ -1 +1 @@ | ||||||
| Subproject commit 18c9caaa0a3ed5706c39f5aa86cce0db6e65b174 | Subproject commit c306b8e5786eeeb87b8925a8af5c3bf057ff5a90 | ||||||
|  | @ -4,14 +4,14 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
|  | #include <bitset> | ||||||
| #include <initializer_list> | #include <initializer_list> | ||||||
| #include <xbyak.h> | #include <xbyak.h> | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/bit_set.h" |  | ||||||
| 
 | 
 | ||||||
| namespace Common::X64 { | namespace Common::X64 { | ||||||
| 
 | 
 | ||||||
| inline int RegToIndex(const Xbyak::Reg& reg) { | constexpr std::size_t RegToIndex(const Xbyak::Reg& reg) { | ||||||
|     using Kind = Xbyak::Reg::Kind; |     using Kind = Xbyak::Reg::Kind; | ||||||
|     ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, |     ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, | ||||||
|                "RegSet only support GPRs and XMM registers."); |                "RegSet only support GPRs and XMM registers."); | ||||||
|  | @ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) { | ||||||
|     return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); |     return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| inline Xbyak::Reg64 IndexToReg64(int reg_index) { | constexpr Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { | ||||||
|     ASSERT(reg_index < 16); |     ASSERT(reg_index < 16); | ||||||
|     return Xbyak::Reg64(reg_index); |     return Xbyak::Reg64(static_cast<int>(reg_index)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| inline Xbyak::Xmm IndexToXmm(int reg_index) { | constexpr Xbyak::Xmm IndexToXmm(std::size_t reg_index) { | ||||||
|     ASSERT(reg_index >= 16 && reg_index < 32); |     ASSERT(reg_index >= 16 && reg_index < 32); | ||||||
|     return Xbyak::Xmm(reg_index - 16); |     return Xbyak::Xmm(static_cast<int>(reg_index - 16)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| inline Xbyak::Reg IndexToReg(int reg_index) { | constexpr Xbyak::Reg IndexToReg(std::size_t reg_index) { | ||||||
|     if (reg_index < 16) { |     if (reg_index < 16) { | ||||||
|         return IndexToReg64(reg_index); |         return IndexToReg64(reg_index); | ||||||
|     } else { |     } else { | ||||||
|  | @ -37,27 +37,27 @@ inline Xbyak::Reg IndexToReg(int reg_index) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| inline BitSet32 BuildRegSet(std::initializer_list<Xbyak::Reg> regs) { | inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) { | ||||||
|     BitSet32 bits; |     std::bitset<32> bits; | ||||||
|     for (const Xbyak::Reg& reg : regs) { |     for (const Xbyak::Reg& reg : regs) { | ||||||
|         bits[RegToIndex(reg)] = true; |         bits[RegToIndex(reg)] = true; | ||||||
|     } |     } | ||||||
|     return bits; |     return bits; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const BitSet32 ABI_ALL_GPRS(0x0000FFFF); | constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); | ||||||
| const BitSet32 ABI_ALL_XMMS(0xFFFF0000); | constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000); | ||||||
| 
 | 
 | ||||||
| #ifdef _WIN32 | #ifdef _WIN32 | ||||||
| 
 | 
 | ||||||
| // Microsoft x64 ABI
 | // Microsoft x64 ABI
 | ||||||
| const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; | constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax; | ||||||
| const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx; | constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx; | ||||||
| const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; | constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; | ||||||
| const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; | constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; | ||||||
| const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; | constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; | ||||||
| 
 | 
 | ||||||
| const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ | const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ | ||||||
|     // GPRs
 |     // GPRs
 | ||||||
|     Xbyak::util::rcx, |     Xbyak::util::rcx, | ||||||
|     Xbyak::util::rdx, |     Xbyak::util::rdx, | ||||||
|  | @ -74,7 +74,7 @@ const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ | ||||||
|     Xbyak::util::xmm5, |     Xbyak::util::xmm5, | ||||||
| }); | }); | ||||||
| 
 | 
 | ||||||
| const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({ | const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ | ||||||
|     // GPRs
 |     // GPRs
 | ||||||
|     Xbyak::util::rbx, |     Xbyak::util::rbx, | ||||||
|     Xbyak::util::rsi, |     Xbyak::util::rsi, | ||||||
|  | @ -102,13 +102,13 @@ constexpr std::size_t ABI_SHADOW_SPACE = 0x20; | ||||||
| #else | #else | ||||||
| 
 | 
 | ||||||
| // System V x86-64 ABI
 | // System V x86-64 ABI
 | ||||||
| const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; | constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax; | ||||||
| const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi; | constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi; | ||||||
| const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; | constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; | ||||||
| const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; | constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; | ||||||
| const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; | constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; | ||||||
| 
 | 
 | ||||||
| const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ | const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ | ||||||
|     // GPRs
 |     // GPRs
 | ||||||
|     Xbyak::util::rcx, |     Xbyak::util::rcx, | ||||||
|     Xbyak::util::rdx, |     Xbyak::util::rdx, | ||||||
|  | @ -137,7 +137,7 @@ const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ | ||||||
|     Xbyak::util::xmm15, |     Xbyak::util::xmm15, | ||||||
| }); | }); | ||||||
| 
 | 
 | ||||||
| const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({ | const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ | ||||||
|     // GPRs
 |     // GPRs
 | ||||||
|     Xbyak::util::rbx, |     Xbyak::util::rbx, | ||||||
|     Xbyak::util::rbp, |     Xbyak::util::rbp, | ||||||
|  | @ -151,13 +151,17 @@ constexpr std::size_t ABI_SHADOW_SPACE = 0; | ||||||
| 
 | 
 | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| inline void ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment, | struct ABIFrameInfo { | ||||||
|                                    std::size_t needed_frame_size, s32* out_subtraction, |     s32 subtraction; | ||||||
|                                    s32* out_xmm_offset) { |     s32 xmm_offset; | ||||||
|     int count = (regs & ABI_ALL_GPRS).Count(); | }; | ||||||
|  | 
 | ||||||
|  | inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, std::size_t rsp_alignment, | ||||||
|  |                                            std::size_t needed_frame_size) { | ||||||
|  |     int count = (regs & ABI_ALL_GPRS).count(); | ||||||
|     rsp_alignment -= count * 8; |     rsp_alignment -= count * 8; | ||||||
|     std::size_t subtraction = 0; |     std::size_t subtraction = 0; | ||||||
|     int xmm_count = (regs & ABI_ALL_XMMS).Count(); |     int xmm_count = (regs & ABI_ALL_XMMS).count(); | ||||||
|     if (xmm_count) { |     if (xmm_count) { | ||||||
|         // If we have any XMMs to save, we must align the stack here.
 |         // If we have any XMMs to save, we must align the stack here.
 | ||||||
|         subtraction = rsp_alignment & 0xF; |         subtraction = rsp_alignment & 0xF; | ||||||
|  | @ -170,45 +174,49 @@ inline void ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment, | ||||||
|     rsp_alignment -= subtraction; |     rsp_alignment -= subtraction; | ||||||
|     subtraction += rsp_alignment & 0xF; |     subtraction += rsp_alignment & 0xF; | ||||||
| 
 | 
 | ||||||
|     *out_subtraction = (s32)subtraction; |     return ABIFrameInfo{static_cast<s32>(subtraction), | ||||||
|     *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); |                         static_cast<s32>(subtraction - xmm_base_subtraction)}; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, | inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||||||
|                                                    std::size_t rsp_alignment, |                                                    std::size_t rsp_alignment, | ||||||
|                                                    std::size_t needed_frame_size = 0) { |                                                    std::size_t needed_frame_size = 0) { | ||||||
|     s32 subtraction, xmm_offset; |     auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); | ||||||
|     ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); |  | ||||||
| 
 | 
 | ||||||
|     for (int reg_index : (regs & ABI_ALL_GPRS)) { |     for (std::size_t i = 0; i < regs.size(); ++i) { | ||||||
|         code.push(IndexToReg64(reg_index)); |         if (regs[i] && ABI_ALL_GPRS[i]) { | ||||||
|  |             code.push(IndexToReg64(i)); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (subtraction != 0) { |     if (frame_info.subtraction != 0) { | ||||||
|         code.sub(code.rsp, subtraction); |         code.sub(code.rsp, frame_info.subtraction); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     for (int reg_index : (regs & ABI_ALL_XMMS)) { |     for (std::size_t i = 0; i < regs.size(); ++i) { | ||||||
|         code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(reg_index)); |         if (regs[i] && ABI_ALL_XMMS[i]) { | ||||||
|         xmm_offset += 0x10; |             code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); | ||||||
|  |             frame_info.xmm_offset += 0x10; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return ABI_SHADOW_SPACE; |     return ABI_SHADOW_SPACE; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, | inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||||||
|                                            std::size_t rsp_alignment, |                                            std::size_t rsp_alignment, | ||||||
|                                            std::size_t needed_frame_size = 0) { |                                            std::size_t needed_frame_size = 0) { | ||||||
|     s32 subtraction, xmm_offset; |     auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); | ||||||
|     ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); |  | ||||||
| 
 | 
 | ||||||
|     for (int reg_index : (regs & ABI_ALL_XMMS)) { |     for (std::size_t i = 0; i < regs.size(); ++i) { | ||||||
|         code.movaps(IndexToXmm(reg_index), code.xword[code.rsp + xmm_offset]); |         if (regs[i] && ABI_ALL_XMMS[i]) { | ||||||
|         xmm_offset += 0x10; |             code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); | ||||||
|  |             frame_info.xmm_offset += 0x10; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (subtraction != 0) { |     if (frame_info.subtraction != 0) { | ||||||
|         code.add(code.rsp, subtraction); |         code.add(code.rsp, frame_info.subtraction); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // GPRs need to be popped in reverse order
 |     // GPRs need to be popped in reverse order
 | ||||||
|  |  | ||||||
|  | @ -102,40 +102,40 @@ const JitFunction instr_table[64] = { | ||||||
| // purposes, as documented below:
 | // purposes, as documented below:
 | ||||||
| 
 | 
 | ||||||
| /// Pointer to the uniform memory
 | /// Pointer to the uniform memory
 | ||||||
| static const Reg64 UNIFORMS = r9; | constexpr Reg64 UNIFORMS = r9; | ||||||
| /// The two 32-bit VS address offset registers set by the MOVA instruction
 | /// The two 32-bit VS address offset registers set by the MOVA instruction
 | ||||||
| static const Reg64 ADDROFFS_REG_0 = r10; | constexpr Reg64 ADDROFFS_REG_0 = r10; | ||||||
| static const Reg64 ADDROFFS_REG_1 = r11; | constexpr Reg64 ADDROFFS_REG_1 = r11; | ||||||
| /// VS loop count register (Multiplied by 16)
 | /// VS loop count register (Multiplied by 16)
 | ||||||
| static const Reg32 LOOPCOUNT_REG = r12d; | constexpr Reg32 LOOPCOUNT_REG = r12d; | ||||||
| /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
 | /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
 | ||||||
| static const Reg32 LOOPCOUNT = esi; | constexpr Reg32 LOOPCOUNT = esi; | ||||||
| /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
 | /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
 | ||||||
| static const Reg32 LOOPINC = edi; | constexpr Reg32 LOOPINC = edi; | ||||||
| /// Result of the previous CMP instruction for the X-component comparison
 | /// Result of the previous CMP instruction for the X-component comparison
 | ||||||
| static const Reg64 COND0 = r13; | constexpr Reg64 COND0 = r13; | ||||||
| /// Result of the previous CMP instruction for the Y-component comparison
 | /// Result of the previous CMP instruction for the Y-component comparison
 | ||||||
| static const Reg64 COND1 = r14; | constexpr Reg64 COND1 = r14; | ||||||
| /// Pointer to the UnitState instance for the current VS unit
 | /// Pointer to the UnitState instance for the current VS unit
 | ||||||
| static const Reg64 STATE = r15; | constexpr Reg64 STATE = r15; | ||||||
| /// SIMD scratch register
 | /// SIMD scratch register
 | ||||||
| static const Xmm SCRATCH = xmm0; | constexpr Xmm SCRATCH = xmm0; | ||||||
| /// Loaded with the first swizzled source register, otherwise can be used as a scratch register
 | /// Loaded with the first swizzled source register, otherwise can be used as a scratch register
 | ||||||
| static const Xmm SRC1 = xmm1; | constexpr Xmm SRC1 = xmm1; | ||||||
| /// Loaded with the second swizzled source register, otherwise can be used as a scratch register
 | /// Loaded with the second swizzled source register, otherwise can be used as a scratch register
 | ||||||
| static const Xmm SRC2 = xmm2; | constexpr Xmm SRC2 = xmm2; | ||||||
| /// Loaded with the third swizzled source register, otherwise can be used as a scratch register
 | /// Loaded with the third swizzled source register, otherwise can be used as a scratch register
 | ||||||
| static const Xmm SRC3 = xmm3; | constexpr Xmm SRC3 = xmm3; | ||||||
| /// Additional scratch register
 | /// Additional scratch register
 | ||||||
| static const Xmm SCRATCH2 = xmm4; | constexpr Xmm SCRATCH2 = xmm4; | ||||||
| /// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
 | /// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
 | ||||||
| static const Xmm ONE = xmm14; | constexpr Xmm ONE = xmm14; | ||||||
| /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
 | /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
 | ||||||
| static const Xmm NEGBIT = xmm15; | constexpr Xmm NEGBIT = xmm15; | ||||||
| 
 | 
 | ||||||
| // State registers that must not be modified by external functions calls
 | // State registers that must not be modified by external functions calls
 | ||||||
| // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
 | ||||||
| static const BitSet32 persistent_regs = BuildRegSet({ | static const std::bitset<32> persistent_regs = BuildRegSet({ | ||||||
|     // Pointers to register blocks
 |     // Pointers to register blocks
 | ||||||
|     UNIFORMS, |     UNIFORMS, | ||||||
|     STATE, |     STATE, | ||||||
|  | @ -356,7 +356,7 @@ void JitShader::Compile_UniformCondition(Instruction instr) { | ||||||
|     cmp(byte[UNIFORMS + offset], 0); |     cmp(byte[UNIFORMS + offset], 0); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| BitSet32 JitShader::PersistentCallerSavedRegs() { | std::bitset<32> JitShader::PersistentCallerSavedRegs() { | ||||||
|     return persistent_regs & ABI_ALL_CALLER_SAVED; |     return persistent_regs & ABI_ALL_CALLER_SAVED; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -5,6 +5,7 @@ | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
| #include <array> | #include <array> | ||||||
|  | #include <bitset> | ||||||
| #include <cstddef> | #include <cstddef> | ||||||
| #include <optional> | #include <optional> | ||||||
| #include <utility> | #include <utility> | ||||||
|  | @ -91,7 +92,7 @@ private: | ||||||
|      */ |      */ | ||||||
|     void Compile_Return(); |     void Compile_Return(); | ||||||
| 
 | 
 | ||||||
|     BitSet32 PersistentCallerSavedRegs(); |     std::bitset<32> PersistentCallerSavedRegs(); | ||||||
| 
 | 
 | ||||||
|     /**
 |     /**
 | ||||||
|      * Assertion evaluated at compile-time, but only triggered if executed at runtime. |      * Assertion evaluated at compile-time, but only triggered if executed at runtime. | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue