mirror of
https://github.com/PabloMK7/citra.git
synced 2025-09-11 13:20:04 +00:00
Refactor software renderer (#6621)
This commit is contained in:
parent
7198243319
commit
9b82de6b24
39 changed files with 1815 additions and 1796 deletions
|
@ -54,12 +54,12 @@ struct DebugData<true> {
|
|||
LOOP_INT_IN = 0x800,
|
||||
};
|
||||
|
||||
Common::Vec4<float24> src1;
|
||||
Common::Vec4<float24> src2;
|
||||
Common::Vec4<float24> src3;
|
||||
Common::Vec4<f24> src1;
|
||||
Common::Vec4<f24> src2;
|
||||
Common::Vec4<f24> src3;
|
||||
|
||||
Common::Vec4<float24> dest_in;
|
||||
Common::Vec4<float24> dest_out;
|
||||
Common::Vec4<f24> dest_in;
|
||||
Common::Vec4<f24> dest_out;
|
||||
|
||||
s32 address_registers[2];
|
||||
bool conditional_code[2];
|
||||
|
@ -89,7 +89,7 @@ template <DebugDataRecord::Type type, typename ValueType>
|
|||
inline void SetField(DebugDataRecord& record, ValueType value);
|
||||
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) {
|
||||
inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, f24* value) {
|
||||
record.src1.x = value[0];
|
||||
record.src1.y = value[1];
|
||||
record.src1.z = value[2];
|
||||
|
@ -97,7 +97,7 @@ inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* va
|
|||
}
|
||||
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) {
|
||||
inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, f24* value) {
|
||||
record.src2.x = value[0];
|
||||
record.src2.y = value[1];
|
||||
record.src2.z = value[2];
|
||||
|
@ -105,7 +105,7 @@ inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* va
|
|||
}
|
||||
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) {
|
||||
inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, f24* value) {
|
||||
record.src3.x = value[0];
|
||||
record.src3.y = value[1];
|
||||
record.src3.z = value[2];
|
||||
|
@ -113,7 +113,7 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va
|
|||
}
|
||||
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
|
||||
inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, f24* value) {
|
||||
record.dest_in.x = value[0];
|
||||
record.dest_in.y = value[1];
|
||||
record.dest_in.z = value[2];
|
||||
|
@ -121,7 +121,7 @@ inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24*
|
|||
}
|
||||
|
||||
template <>
|
||||
inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) {
|
||||
inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, f24* value) {
|
||||
record.dest_out.x = value[0];
|
||||
record.dest_out.y = value[1];
|
||||
record.dest_out.z = value[2];
|
||||
|
|
|
@ -5,10 +5,10 @@
|
|||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include "common/arch.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_set.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/pica_state.h"
|
||||
#include "video_core/regs_rasterizer.h"
|
||||
#include "video_core/regs_shader.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
|
@ -41,11 +41,11 @@ OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
|
|||
// Allow us to overflow OutputVertex to avoid branches, since
|
||||
// RasterizerRegs::VSOutputAttributes::INVALID would write to slot 31, which
|
||||
// would be out of bounds otherwise.
|
||||
std::array<float24, 32> vertex_slots_overflow;
|
||||
std::array<f24, 32> vertex_slots_overflow;
|
||||
};
|
||||
|
||||
// Assert that OutputVertex has enough space for 24 semantic registers
|
||||
static_assert(sizeof(std::array<float24, 24>) == sizeof(ret),
|
||||
static_assert(sizeof(std::array<f24, 24>) == sizeof(ret),
|
||||
"Struct and array have different sizes.");
|
||||
|
||||
unsigned int num_attributes = regs.vs_output_total & 7;
|
||||
|
@ -61,7 +61,7 @@ OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
|
|||
// interpolation
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
float c = std::fabs(ret.color[i].ToFloat32());
|
||||
ret.color[i] = float24::FromFloat32(c < 1.0f ? c : 1.0f);
|
||||
ret.color[i] = f24::FromFloat32(c < 1.0f ? c : 1.0f);
|
||||
}
|
||||
|
||||
LOG_TRACE(HW_GPU,
|
||||
|
@ -86,7 +86,7 @@ void UnitState::LoadInput(const ShaderRegs& config, const AttributeBuffer& input
|
|||
}
|
||||
}
|
||||
|
||||
static void CopyRegistersToOutput(std::span<Common::Vec4<float24>, 16> regs, u32 mask,
|
||||
static void CopyRegistersToOutput(std::span<Common::Vec4<f24>, 16> regs, u32 mask,
|
||||
AttributeBuffer& buffer) {
|
||||
int output_i = 0;
|
||||
for (int reg : Common::BitSet<u32>(mask)) {
|
||||
|
@ -108,7 +108,7 @@ GSEmitter::~GSEmitter() {
|
|||
delete handlers;
|
||||
}
|
||||
|
||||
void GSEmitter::Emit(std::span<Common::Vec4<float24>, 16> output_regs) {
|
||||
void GSEmitter::Emit(std::span<Common::Vec4<f24>, 16> output_regs) {
|
||||
ASSERT(vertex_id < 3);
|
||||
// TODO: This should be merged with UnitState::WriteOutput somehow
|
||||
CopyRegistersToOutput(output_regs, output_mask, buffer[vertex_id]);
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
#include <boost/serialization/access.hpp>
|
||||
#include <boost/serialization/array.hpp>
|
||||
#include <boost/serialization/base_object.hpp>
|
||||
#include "common/assert.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/hash.h"
|
||||
|
@ -29,7 +28,7 @@ using ProgramCode = std::array<u32, MAX_PROGRAM_CODE_LENGTH>;
|
|||
using SwizzleData = std::array<u32, MAX_SWIZZLE_DATA_LENGTH>;
|
||||
|
||||
struct AttributeBuffer {
|
||||
alignas(16) Common::Vec4<float24> attr[16];
|
||||
alignas(16) Common::Vec4<f24> attr[16];
|
||||
|
||||
private:
|
||||
friend class boost::serialization::access;
|
||||
|
@ -46,16 +45,16 @@ using VertexHandler = std::function<void(const AttributeBuffer&)>;
|
|||
using WindingSetter = std::function<void()>;
|
||||
|
||||
struct OutputVertex {
|
||||
Common::Vec4<float24> pos;
|
||||
Common::Vec4<float24> quat;
|
||||
Common::Vec4<float24> color;
|
||||
Common::Vec2<float24> tc0;
|
||||
Common::Vec2<float24> tc1;
|
||||
float24 tc0_w;
|
||||
Common::Vec4<f24> pos;
|
||||
Common::Vec4<f24> quat;
|
||||
Common::Vec4<f24> color;
|
||||
Common::Vec2<f24> tc0;
|
||||
Common::Vec2<f24> tc1;
|
||||
f24 tc0_w;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
Common::Vec3<float24> view;
|
||||
Common::Vec3<f24> view;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
Common::Vec2<float24> tc2;
|
||||
Common::Vec2<f24> tc2;
|
||||
|
||||
static void ValidateSemantics(const RasterizerRegs& regs);
|
||||
static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs,
|
||||
|
@ -76,8 +75,8 @@ private:
|
|||
friend class boost::serialization::access;
|
||||
};
|
||||
#define ASSERT_POS(var, pos) \
|
||||
static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \
|
||||
"offset.")
|
||||
static_assert(offsetof(OutputVertex, var) == pos * sizeof(f24), "Semantic at wrong " \
|
||||
"offset.")
|
||||
ASSERT_POS(pos, RasterizerRegs::VSOutputAttributes::POSITION_X);
|
||||
ASSERT_POS(quat, RasterizerRegs::VSOutputAttributes::QUATERNION_X);
|
||||
ASSERT_POS(color, RasterizerRegs::VSOutputAttributes::COLOR_R);
|
||||
|
@ -109,7 +108,7 @@ struct GSEmitter {
|
|||
|
||||
GSEmitter();
|
||||
~GSEmitter();
|
||||
void Emit(std::span<Common::Vec4<float24>, 16> output_regs);
|
||||
void Emit(std::span<Common::Vec4<f24>, 16> output_regs);
|
||||
|
||||
private:
|
||||
friend class boost::serialization::access;
|
||||
|
@ -136,9 +135,9 @@ struct UnitState {
|
|||
struct Registers {
|
||||
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
|
||||
// required to be 16-byte aligned.
|
||||
alignas(16) std::array<Common::Vec4<float24>, 16> input;
|
||||
alignas(16) std::array<Common::Vec4<float24>, 16> temporary;
|
||||
alignas(16) std::array<Common::Vec4<float24>, 16> output;
|
||||
alignas(16) std::array<Common::Vec4<f24>, 16> input;
|
||||
alignas(16) std::array<Common::Vec4<f24>, 16> temporary;
|
||||
alignas(16) std::array<Common::Vec4<f24>, 16> output;
|
||||
|
||||
private:
|
||||
friend class boost::serialization::access;
|
||||
|
@ -160,18 +159,16 @@ struct UnitState {
|
|||
GSEmitter* emitter_ptr;
|
||||
|
||||
static std::size_t InputOffset(int register_index) {
|
||||
return offsetof(UnitState, registers.input) +
|
||||
register_index * sizeof(Common::Vec4<float24>);
|
||||
return offsetof(UnitState, registers.input) + register_index * sizeof(Common::Vec4<f24>);
|
||||
}
|
||||
|
||||
static std::size_t OutputOffset(int register_index) {
|
||||
return offsetof(UnitState, registers.output) +
|
||||
register_index * sizeof(Common::Vec4<float24>);
|
||||
return offsetof(UnitState, registers.output) + register_index * sizeof(Common::Vec4<f24>);
|
||||
}
|
||||
|
||||
static std::size_t TemporaryOffset(int register_index) {
|
||||
return offsetof(UnitState, registers.temporary) +
|
||||
register_index * sizeof(Common::Vec4<float24>);
|
||||
register_index * sizeof(Common::Vec4<f24>);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -219,13 +216,13 @@ private:
|
|||
struct Uniforms {
|
||||
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
|
||||
// therefore required to be 16-byte aligned.
|
||||
alignas(16) std::array<Common::Vec4<float24>, 96> f;
|
||||
alignas(16) std::array<Common::Vec4<f24>, 96> f;
|
||||
|
||||
std::array<bool, 16> b;
|
||||
std::array<Common::Vec4<u8>, 4> i;
|
||||
|
||||
static std::size_t GetFloatUniformOffset(unsigned index) {
|
||||
return offsetof(Uniforms, f) + index * sizeof(Common::Vec4<float24>);
|
||||
return offsetof(Uniforms, f) + index * sizeof(Common::Vec4<f24>);
|
||||
}
|
||||
|
||||
static std::size_t GetBoolUniformOffset(unsigned index) {
|
||||
|
|
|
@ -80,7 +80,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
const auto& program_code = setup.program_code;
|
||||
|
||||
// Placeholder for invalid inputs
|
||||
static float24 dummy_vec4_float24[4];
|
||||
static f24 dummy_vec4_float24[4];
|
||||
|
||||
unsigned iteration = 0;
|
||||
bool exit_loop = false;
|
||||
|
@ -111,7 +111,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
|
||||
debug_data.max_offset = std::max<u32>(debug_data.max_offset, 1 + program_counter);
|
||||
|
||||
auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
|
||||
auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const f24* {
|
||||
switch (source_reg.GetRegisterType()) {
|
||||
case RegisterType::Input:
|
||||
return &state.registers.input[source_reg.GetIndex()].x;
|
||||
|
@ -137,15 +137,15 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
? 0
|
||||
: state.address_registers[instr.common.address_register_index - 1];
|
||||
|
||||
const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) +
|
||||
(is_inverted ? 0 : address_offset));
|
||||
const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) +
|
||||
(is_inverted ? address_offset : 0));
|
||||
const f24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) +
|
||||
(is_inverted ? 0 : address_offset));
|
||||
const f24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) +
|
||||
(is_inverted ? address_offset : 0));
|
||||
|
||||
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
|
||||
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
|
||||
|
||||
float24 src1[4] = {
|
||||
f24 src1[4] = {
|
||||
src1_[(int)swizzle.src1_selector_0.Value()],
|
||||
src1_[(int)swizzle.src1_selector_1.Value()],
|
||||
src1_[(int)swizzle.src1_selector_2.Value()],
|
||||
|
@ -157,7 +157,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
src1[2] = -src1[2];
|
||||
src1[3] = -src1[3];
|
||||
}
|
||||
float24 src2[4] = {
|
||||
f24 src2[4] = {
|
||||
src2_[(int)swizzle.src2_selector_0.Value()],
|
||||
src2_[(int)swizzle.src2_selector_1.Value()],
|
||||
src2_[(int)swizzle.src2_selector_2.Value()],
|
||||
|
@ -170,12 +170,11 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
src2[3] = -src2[3];
|
||||
}
|
||||
|
||||
float24* dest =
|
||||
(instr.common.dest.Value() < 0x10)
|
||||
? &state.registers.output[instr.common.dest.Value().GetIndex()][0]
|
||||
: (instr.common.dest.Value() < 0x20)
|
||||
? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
|
||||
: dummy_vec4_float24;
|
||||
f24* dest = (instr.common.dest.Value() < 0x10)
|
||||
? &state.registers.output[instr.common.dest.Value().GetIndex()][0]
|
||||
: (instr.common.dest.Value() < 0x20)
|
||||
? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
|
||||
: dummy_vec4_float24;
|
||||
|
||||
debug_data.max_opdesc_id =
|
||||
std::max<u32>(debug_data.max_opdesc_id, 1 + instr.common.operand_desc_id);
|
||||
|
@ -216,7 +215,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32()));
|
||||
dest[i] = f24::FromFloat32(std::floor(src1[i].ToFloat32()));
|
||||
}
|
||||
Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest);
|
||||
break;
|
||||
|
@ -263,11 +262,10 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
|
||||
OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode();
|
||||
if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI)
|
||||
src1[3] = float24::FromFloat32(1.0f);
|
||||
src1[3] = f24::One();
|
||||
|
||||
int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4;
|
||||
float24 dot = std::inner_product(src1, src1 + num_components, src2,
|
||||
float24::FromFloat32(0.f));
|
||||
f24 dot = std::inner_product(src1, src1 + num_components, src2, f24::Zero());
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
|
@ -283,7 +281,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
case OpCode::Id::RCP: {
|
||||
Record<DebugDataRecord::SRC1>(debug_data, iteration, src1);
|
||||
Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest);
|
||||
float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32());
|
||||
f24 rcp_res = f24::FromFloat32(1.0f / src1[0].ToFloat32());
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
@ -298,7 +296,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
case OpCode::Id::RSQ: {
|
||||
Record<DebugDataRecord::SRC1>(debug_data, iteration, src1);
|
||||
Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest);
|
||||
float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32()));
|
||||
f24 rsq_res = f24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32()));
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
@ -345,8 +343,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f)
|
||||
: float24::FromFloat32(0.0f);
|
||||
dest[i] = (src1[i] >= src2[i]) ? f24::One() : f24::Zero();
|
||||
}
|
||||
Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest);
|
||||
break;
|
||||
|
@ -360,8 +357,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
||||
dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f)
|
||||
: float24::FromFloat32(0.0f);
|
||||
dest[i] = (src1[i] < src2[i]) ? f24::One() : f24::Zero();
|
||||
}
|
||||
Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest);
|
||||
break;
|
||||
|
@ -413,7 +409,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest);
|
||||
|
||||
// EX2 only takes first component exp2 and writes it to all dest components
|
||||
float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32()));
|
||||
f24 ex2_res = f24::FromFloat32(std::exp2(src1[0].ToFloat32()));
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
@ -430,7 +426,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest);
|
||||
|
||||
// LG2 only takes the first component log2 and writes it to all dest components
|
||||
float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32()));
|
||||
f24 lg2_res = f24::FromFloat32(std::log2(src1[0].ToFloat32()));
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!swizzle.DestComponentEnabled(i))
|
||||
continue;
|
||||
|
@ -466,17 +462,17 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
? 0
|
||||
: state.address_registers[instr.mad.address_register_index - 1];
|
||||
|
||||
const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
|
||||
const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) +
|
||||
(!is_inverted * address_offset));
|
||||
const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) +
|
||||
(is_inverted * address_offset));
|
||||
const f24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
|
||||
const f24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) +
|
||||
(!is_inverted * address_offset));
|
||||
const f24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) +
|
||||
(is_inverted * address_offset));
|
||||
|
||||
const bool negate_src1 = ((bool)mad_swizzle.negate_src1 != false);
|
||||
const bool negate_src2 = ((bool)mad_swizzle.negate_src2 != false);
|
||||
const bool negate_src3 = ((bool)mad_swizzle.negate_src3 != false);
|
||||
|
||||
float24 src1[4] = {
|
||||
f24 src1[4] = {
|
||||
src1_[(int)mad_swizzle.src1_selector_0.Value()],
|
||||
src1_[(int)mad_swizzle.src1_selector_1.Value()],
|
||||
src1_[(int)mad_swizzle.src1_selector_2.Value()],
|
||||
|
@ -488,7 +484,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
src1[2] = -src1[2];
|
||||
src1[3] = -src1[3];
|
||||
}
|
||||
float24 src2[4] = {
|
||||
f24 src2[4] = {
|
||||
src2_[(int)mad_swizzle.src2_selector_0.Value()],
|
||||
src2_[(int)mad_swizzle.src2_selector_1.Value()],
|
||||
src2_[(int)mad_swizzle.src2_selector_2.Value()],
|
||||
|
@ -500,7 +496,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
src2[2] = -src2[2];
|
||||
src2[3] = -src2[3];
|
||||
}
|
||||
float24 src3[4] = {
|
||||
f24 src3[4] = {
|
||||
src3_[(int)mad_swizzle.src3_selector_0.Value()],
|
||||
src3_[(int)mad_swizzle.src3_selector_1.Value()],
|
||||
src3_[(int)mad_swizzle.src3_selector_2.Value()],
|
||||
|
@ -513,12 +509,11 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
|
|||
src3[3] = -src3[3];
|
||||
}
|
||||
|
||||
float24* dest =
|
||||
(instr.mad.dest.Value() < 0x10)
|
||||
? &state.registers.output[instr.mad.dest.Value().GetIndex()][0]
|
||||
: (instr.mad.dest.Value() < 0x20)
|
||||
? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
|
||||
: dummy_vec4_float24;
|
||||
f24* dest = (instr.mad.dest.Value() < 0x10)
|
||||
? &state.registers.output[instr.mad.dest.Value().GetIndex()][0]
|
||||
: (instr.mad.dest.Value() < 0x20)
|
||||
? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
|
||||
: dummy_vec4_float24;
|
||||
|
||||
Record<DebugDataRecord::SRC1>(debug_data, iteration, src1);
|
||||
Record<DebugDataRecord::SRC2>(debug_data, iteration, src2);
|
||||
|
@ -687,7 +682,7 @@ DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup,
|
|||
DebugData<true> debug_data;
|
||||
|
||||
// Setup input register table
|
||||
state.registers.input.fill(Common::Vec4<float24>::AssignToAll(float24::Zero()));
|
||||
state.registers.input.fill(Common::Vec4<f24>::AssignToAll(f24::Zero()));
|
||||
state.LoadInput(config, input);
|
||||
RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point);
|
||||
return debug_data;
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include "common/arch.h"
|
||||
#if CITRA_ARCH(x86_64)
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
#include "video_core/shader/shader_jit_x64.h"
|
||||
|
|
|
@ -813,7 +813,7 @@ void JitShader::Compile_JMP(Instruction instr) {
|
|||
}
|
||||
}
|
||||
|
||||
static void Emit(GSEmitter* emitter, Common::Vec4<float24> (*output)[16]) {
|
||||
static void Emit(GSEmitter* emitter, Common::Vec4<f24> (*output)[16]) {
|
||||
emitter->Emit(*output);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue