mirror of
https://github.com/PabloMK7/citra.git
synced 2025-01-19 02:13:05 +01:00
Merge pull request #5546 from FearlessTobi/port-5524
Port yuzu-emu/yuzu#4086 and yuzu-emu/yuzu#4611: Xbyak cleanups
This commit is contained in:
commit
5776bdda82
4 changed files with 79 additions and 70 deletions
2
externals/xbyak
vendored
2
externals/xbyak
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit 18c9caaa0a3ed5706c39f5aa86cce0db6e65b174
|
Subproject commit c306b8e5786eeeb87b8925a8af5c3bf057ff5a90
|
|
@ -4,14 +4,14 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <bitset>
|
||||||
#include <initializer_list>
|
#include <initializer_list>
|
||||||
#include <xbyak.h>
|
#include <xbyak.h>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/bit_set.h"
|
|
||||||
|
|
||||||
namespace Common::X64 {
|
namespace Common::X64 {
|
||||||
|
|
||||||
inline int RegToIndex(const Xbyak::Reg& reg) {
|
constexpr std::size_t RegToIndex(const Xbyak::Reg& reg) {
|
||||||
using Kind = Xbyak::Reg::Kind;
|
using Kind = Xbyak::Reg::Kind;
|
||||||
ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
|
ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
|
||||||
"RegSet only support GPRs and XMM registers.");
|
"RegSet only support GPRs and XMM registers.");
|
||||||
|
@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) {
|
||||||
return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
|
return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Xbyak::Reg64 IndexToReg64(int reg_index) {
|
constexpr Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
|
||||||
ASSERT(reg_index < 16);
|
ASSERT(reg_index < 16);
|
||||||
return Xbyak::Reg64(reg_index);
|
return Xbyak::Reg64(static_cast<int>(reg_index));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Xbyak::Xmm IndexToXmm(int reg_index) {
|
constexpr Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
|
||||||
ASSERT(reg_index >= 16 && reg_index < 32);
|
ASSERT(reg_index >= 16 && reg_index < 32);
|
||||||
return Xbyak::Xmm(reg_index - 16);
|
return Xbyak::Xmm(static_cast<int>(reg_index - 16));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Xbyak::Reg IndexToReg(int reg_index) {
|
constexpr Xbyak::Reg IndexToReg(std::size_t reg_index) {
|
||||||
if (reg_index < 16) {
|
if (reg_index < 16) {
|
||||||
return IndexToReg64(reg_index);
|
return IndexToReg64(reg_index);
|
||||||
} else {
|
} else {
|
||||||
|
@ -37,27 +37,27 @@ inline Xbyak::Reg IndexToReg(int reg_index) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline BitSet32 BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
|
inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
|
||||||
BitSet32 bits;
|
std::bitset<32> bits;
|
||||||
for (const Xbyak::Reg& reg : regs) {
|
for (const Xbyak::Reg& reg : regs) {
|
||||||
bits[RegToIndex(reg)] = true;
|
bits[RegToIndex(reg)] = true;
|
||||||
}
|
}
|
||||||
return bits;
|
return bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BitSet32 ABI_ALL_GPRS(0x0000FFFF);
|
constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
|
||||||
const BitSet32 ABI_ALL_XMMS(0xFFFF0000);
|
constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
||||||
// Microsoft x64 ABI
|
// Microsoft x64 ABI
|
||||||
const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
|
constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
|
||||||
const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
|
constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
|
||||||
const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
|
constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
|
||||||
const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
|
constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
|
||||||
const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
|
constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
|
||||||
|
|
||||||
const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({
|
const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||||
// GPRs
|
// GPRs
|
||||||
Xbyak::util::rcx,
|
Xbyak::util::rcx,
|
||||||
Xbyak::util::rdx,
|
Xbyak::util::rdx,
|
||||||
|
@ -74,7 +74,7 @@ const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||||
Xbyak::util::xmm5,
|
Xbyak::util::xmm5,
|
||||||
});
|
});
|
||||||
|
|
||||||
const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({
|
const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
|
||||||
// GPRs
|
// GPRs
|
||||||
Xbyak::util::rbx,
|
Xbyak::util::rbx,
|
||||||
Xbyak::util::rsi,
|
Xbyak::util::rsi,
|
||||||
|
@ -102,13 +102,13 @@ constexpr std::size_t ABI_SHADOW_SPACE = 0x20;
|
||||||
#else
|
#else
|
||||||
|
|
||||||
// System V x86-64 ABI
|
// System V x86-64 ABI
|
||||||
const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
|
constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
|
||||||
const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
|
constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
|
||||||
const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
|
constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
|
||||||
const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
|
constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
|
||||||
const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
|
constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
|
||||||
|
|
||||||
const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({
|
const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||||
// GPRs
|
// GPRs
|
||||||
Xbyak::util::rcx,
|
Xbyak::util::rcx,
|
||||||
Xbyak::util::rdx,
|
Xbyak::util::rdx,
|
||||||
|
@ -137,7 +137,7 @@ const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||||
Xbyak::util::xmm15,
|
Xbyak::util::xmm15,
|
||||||
});
|
});
|
||||||
|
|
||||||
const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({
|
const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
|
||||||
// GPRs
|
// GPRs
|
||||||
Xbyak::util::rbx,
|
Xbyak::util::rbx,
|
||||||
Xbyak::util::rbp,
|
Xbyak::util::rbp,
|
||||||
|
@ -151,13 +151,17 @@ constexpr std::size_t ABI_SHADOW_SPACE = 0;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
inline void ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment,
|
struct ABIFrameInfo {
|
||||||
std::size_t needed_frame_size, s32* out_subtraction,
|
s32 subtraction;
|
||||||
s32* out_xmm_offset) {
|
s32 xmm_offset;
|
||||||
int count = (regs & ABI_ALL_GPRS).Count();
|
};
|
||||||
|
|
||||||
|
inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, std::size_t rsp_alignment,
|
||||||
|
std::size_t needed_frame_size) {
|
||||||
|
int count = (regs & ABI_ALL_GPRS).count();
|
||||||
rsp_alignment -= count * 8;
|
rsp_alignment -= count * 8;
|
||||||
std::size_t subtraction = 0;
|
std::size_t subtraction = 0;
|
||||||
int xmm_count = (regs & ABI_ALL_XMMS).Count();
|
int xmm_count = (regs & ABI_ALL_XMMS).count();
|
||||||
if (xmm_count) {
|
if (xmm_count) {
|
||||||
// If we have any XMMs to save, we must align the stack here.
|
// If we have any XMMs to save, we must align the stack here.
|
||||||
subtraction = rsp_alignment & 0xF;
|
subtraction = rsp_alignment & 0xF;
|
||||||
|
@ -170,45 +174,49 @@ inline void ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment,
|
||||||
rsp_alignment -= subtraction;
|
rsp_alignment -= subtraction;
|
||||||
subtraction += rsp_alignment & 0xF;
|
subtraction += rsp_alignment & 0xF;
|
||||||
|
|
||||||
*out_subtraction = (s32)subtraction;
|
return ABIFrameInfo{static_cast<s32>(subtraction),
|
||||||
*out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
|
static_cast<s32>(subtraction - xmm_base_subtraction)};
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs,
|
inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||||
std::size_t rsp_alignment,
|
std::size_t rsp_alignment,
|
||||||
std::size_t needed_frame_size = 0) {
|
std::size_t needed_frame_size = 0) {
|
||||||
s32 subtraction, xmm_offset;
|
auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
|
||||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
|
||||||
|
|
||||||
for (int reg_index : (regs & ABI_ALL_GPRS)) {
|
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||||
code.push(IndexToReg64(reg_index));
|
if (regs[i] && ABI_ALL_GPRS[i]) {
|
||||||
|
code.push(IndexToReg64(i));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (subtraction != 0) {
|
if (frame_info.subtraction != 0) {
|
||||||
code.sub(code.rsp, subtraction);
|
code.sub(code.rsp, frame_info.subtraction);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int reg_index : (regs & ABI_ALL_XMMS)) {
|
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||||
code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(reg_index));
|
if (regs[i] && ABI_ALL_XMMS[i]) {
|
||||||
xmm_offset += 0x10;
|
code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
|
||||||
|
frame_info.xmm_offset += 0x10;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ABI_SHADOW_SPACE;
|
return ABI_SHADOW_SPACE;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs,
|
inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||||
std::size_t rsp_alignment,
|
std::size_t rsp_alignment,
|
||||||
std::size_t needed_frame_size = 0) {
|
std::size_t needed_frame_size = 0) {
|
||||||
s32 subtraction, xmm_offset;
|
auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
|
||||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
|
||||||
|
|
||||||
for (int reg_index : (regs & ABI_ALL_XMMS)) {
|
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||||
code.movaps(IndexToXmm(reg_index), code.xword[code.rsp + xmm_offset]);
|
if (regs[i] && ABI_ALL_XMMS[i]) {
|
||||||
xmm_offset += 0x10;
|
code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
|
||||||
|
frame_info.xmm_offset += 0x10;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (subtraction != 0) {
|
if (frame_info.subtraction != 0) {
|
||||||
code.add(code.rsp, subtraction);
|
code.add(code.rsp, frame_info.subtraction);
|
||||||
}
|
}
|
||||||
|
|
||||||
// GPRs need to be popped in reverse order
|
// GPRs need to be popped in reverse order
|
||||||
|
|
|
@ -102,40 +102,40 @@ const JitFunction instr_table[64] = {
|
||||||
// purposes, as documented below:
|
// purposes, as documented below:
|
||||||
|
|
||||||
/// Pointer to the uniform memory
|
/// Pointer to the uniform memory
|
||||||
static const Reg64 UNIFORMS = r9;
|
constexpr Reg64 UNIFORMS = r9;
|
||||||
/// The two 32-bit VS address offset registers set by the MOVA instruction
|
/// The two 32-bit VS address offset registers set by the MOVA instruction
|
||||||
static const Reg64 ADDROFFS_REG_0 = r10;
|
constexpr Reg64 ADDROFFS_REG_0 = r10;
|
||||||
static const Reg64 ADDROFFS_REG_1 = r11;
|
constexpr Reg64 ADDROFFS_REG_1 = r11;
|
||||||
/// VS loop count register (Multiplied by 16)
|
/// VS loop count register (Multiplied by 16)
|
||||||
static const Reg32 LOOPCOUNT_REG = r12d;
|
constexpr Reg32 LOOPCOUNT_REG = r12d;
|
||||||
/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
|
/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
|
||||||
static const Reg32 LOOPCOUNT = esi;
|
constexpr Reg32 LOOPCOUNT = esi;
|
||||||
/// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
|
/// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
|
||||||
static const Reg32 LOOPINC = edi;
|
constexpr Reg32 LOOPINC = edi;
|
||||||
/// Result of the previous CMP instruction for the X-component comparison
|
/// Result of the previous CMP instruction for the X-component comparison
|
||||||
static const Reg64 COND0 = r13;
|
constexpr Reg64 COND0 = r13;
|
||||||
/// Result of the previous CMP instruction for the Y-component comparison
|
/// Result of the previous CMP instruction for the Y-component comparison
|
||||||
static const Reg64 COND1 = r14;
|
constexpr Reg64 COND1 = r14;
|
||||||
/// Pointer to the UnitState instance for the current VS unit
|
/// Pointer to the UnitState instance for the current VS unit
|
||||||
static const Reg64 STATE = r15;
|
constexpr Reg64 STATE = r15;
|
||||||
/// SIMD scratch register
|
/// SIMD scratch register
|
||||||
static const Xmm SCRATCH = xmm0;
|
constexpr Xmm SCRATCH = xmm0;
|
||||||
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
|
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
|
||||||
static const Xmm SRC1 = xmm1;
|
constexpr Xmm SRC1 = xmm1;
|
||||||
/// Loaded with the second swizzled source register, otherwise can be used as a scratch register
|
/// Loaded with the second swizzled source register, otherwise can be used as a scratch register
|
||||||
static const Xmm SRC2 = xmm2;
|
constexpr Xmm SRC2 = xmm2;
|
||||||
/// Loaded with the third swizzled source register, otherwise can be used as a scratch register
|
/// Loaded with the third swizzled source register, otherwise can be used as a scratch register
|
||||||
static const Xmm SRC3 = xmm3;
|
constexpr Xmm SRC3 = xmm3;
|
||||||
/// Additional scratch register
|
/// Additional scratch register
|
||||||
static const Xmm SCRATCH2 = xmm4;
|
constexpr Xmm SCRATCH2 = xmm4;
|
||||||
/// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
|
/// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
|
||||||
static const Xmm ONE = xmm14;
|
constexpr Xmm ONE = xmm14;
|
||||||
/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
|
/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
|
||||||
static const Xmm NEGBIT = xmm15;
|
constexpr Xmm NEGBIT = xmm15;
|
||||||
|
|
||||||
// State registers that must not be modified by external functions calls
|
// State registers that must not be modified by external functions calls
|
||||||
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
|
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
|
||||||
static const BitSet32 persistent_regs = BuildRegSet({
|
static const std::bitset<32> persistent_regs = BuildRegSet({
|
||||||
// Pointers to register blocks
|
// Pointers to register blocks
|
||||||
UNIFORMS,
|
UNIFORMS,
|
||||||
STATE,
|
STATE,
|
||||||
|
@ -356,7 +356,7 @@ void JitShader::Compile_UniformCondition(Instruction instr) {
|
||||||
cmp(byte[UNIFORMS + offset], 0);
|
cmp(byte[UNIFORMS + offset], 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
BitSet32 JitShader::PersistentCallerSavedRegs() {
|
std::bitset<32> JitShader::PersistentCallerSavedRegs() {
|
||||||
return persistent_regs & ABI_ALL_CALLER_SAVED;
|
return persistent_regs & ABI_ALL_CALLER_SAVED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <bitset>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
@ -91,7 +92,7 @@ private:
|
||||||
*/
|
*/
|
||||||
void Compile_Return();
|
void Compile_Return();
|
||||||
|
|
||||||
BitSet32 PersistentCallerSavedRegs();
|
std::bitset<32> PersistentCallerSavedRegs();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assertion evaluated at compile-time, but only triggered if executed at runtime.
|
* Assertion evaluated at compile-time, but only triggered if executed at runtime.
|
||||||
|
|
Loading…
Reference in a new issue