video_core: Refactor GPU interface (#7272)

* video_core: Refactor GPU interface

* citra_qt: Better debug widget lifetime
This commit is contained in:
GPUCode 2023-12-28 12:46:57 +02:00 committed by GitHub
parent 602f4f60d8
commit 2bb7f89c30
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
167 changed files with 4172 additions and 4866 deletions

View file

@ -914,7 +914,7 @@ void FragmentModule::WriteLogicOp() {
}
void FragmentModule::WriteBlending() {
if (!config.EmulateBlend()) [[likely]] {
if (!config.EmulateBlend() || profile.is_vulkan) [[likely]] {
return;
}
@ -1258,7 +1258,7 @@ void FragmentModule::DefineExtensions() {
use_fragment_shader_barycentric = false;
}
}
if (config.EmulateBlend()) {
if (config.EmulateBlend() && !profile.is_vulkan) {
if (profile.has_gl_ext_framebuffer_fetch) {
out += "#extension GL_EXT_shader_framebuffer_fetch : enable\n";
out += "#define destFactor color\n";

View file

@ -23,7 +23,7 @@ using nihstro::RegisterType;
using nihstro::SourceRegister;
using nihstro::SwizzlePattern;
constexpr u32 PROGRAM_END = Pica::Shader::MAX_PROGRAM_CODE_LENGTH;
constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
class DecompileFail : public std::runtime_error {
public:
@ -58,7 +58,7 @@ struct Subroutine {
/// Analyzes shader code and produces a set of subroutines.
class ControlFlowAnalyzer {
public:
ControlFlowAnalyzer(const Pica::Shader::ProgramCode& program_code, u32 main_offset)
ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset)
: program_code(program_code) {
// Recursively finds all subroutines.
@ -72,7 +72,7 @@ public:
}
private:
const Pica::Shader::ProgramCode& program_code;
const ProgramCode& program_code;
std::set<Subroutine> subroutines;
std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
@ -265,9 +265,8 @@ constexpr auto GetSelectorSrc3 = GetSelectorSrc<&SwizzlePattern::GetSelectorSrc3
class GLSLGenerator {
public:
GLSLGenerator(const std::set<Subroutine>& subroutines,
const Pica::Shader::ProgramCode& program_code,
const Pica::Shader::SwizzleData& swizzle_data, u32 main_offset,
GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
const SwizzleData& swizzle_data, u32 main_offset,
const RegGetter& inputreg_getter, const RegGetter& outputreg_getter,
bool sanitize_mul)
: subroutines(subroutines), program_code(program_code), swizzle_data(swizzle_data),
@ -921,8 +920,8 @@ private:
private:
const std::set<Subroutine>& subroutines;
const Pica::Shader::ProgramCode& program_code;
const Pica::Shader::SwizzleData& swizzle_data;
const ProgramCode& program_code;
const SwizzleData& swizzle_data;
const u32 main_offset;
const RegGetter& inputreg_getter;
const RegGetter& outputreg_getter;
@ -931,10 +930,9 @@ private:
ShaderWriter shader;
};
std::string DecompileProgram(const Pica::Shader::ProgramCode& program_code,
const Pica::Shader::SwizzleData& swizzle_data, u32 main_offset,
const RegGetter& inputreg_getter, const RegGetter& outputreg_getter,
bool sanitize_mul) {
std::string DecompileProgram(const ProgramCode& program_code, const SwizzleData& swizzle_data,
u32 main_offset, const RegGetter& inputreg_getter,
const RegGetter& outputreg_getter, bool sanitize_mul) {
try {
auto subroutines = ControlFlowAnalyzer(program_code, main_offset).MoveSubroutines();

View file

@ -6,15 +6,14 @@
#include <functional>
#include <string>
#include "common/common_types.h"
#include "video_core/shader/shader.h"
#include "video_core/pica/shader_setup.h"
namespace Pica::Shader::Generator::GLSL {
using RegGetter = std::function<std::string(u32)>;
std::string DecompileProgram(const Pica::Shader::ProgramCode& program_code,
const Pica::Shader::SwizzleData& swizzle_data, u32 main_offset,
std::string DecompileProgram(const Pica::ProgramCode& program_code,
const Pica::SwizzleData& swizzle_data, u32 main_offset,
const RegGetter& inputreg_getter, const RegGetter& outputreg_getter,
bool sanitize_mul);

View file

@ -5,9 +5,10 @@
#include <string_view>
#include <fmt/format.h>
#include "common/logging/log.h"
#include "video_core/pica/regs_rasterizer.h"
#include "video_core/shader/generator/glsl_shader_decompiler.h"
#include "video_core/shader/generator/glsl_shader_gen.h"
#include "video_core/shader/generator/shader_gen.h"
using VSOutputAttributes = Pica::RasterizerRegs::VSOutputAttributes;
@ -141,7 +142,7 @@ std::string_view MakeLoadPrefix(AttribLoadFlags flag) {
return "";
}
std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config,
std::string GenerateVertexShader(const ShaderSetup& setup, const PicaVSConfig& config,
bool separable_shader) {
std::string out;
if (separable_shader) {

View file

@ -4,9 +4,6 @@
#pragma once
#include "video_core/shader/generator/shader_gen.h"
#include "video_core/shader/shader.h"
// High precision may or may not be supported in GLES3. If it isn't, use medium precision instead.
static constexpr char fragment_shader_precision_OES[] = R"(
#if GL_ES
@ -24,6 +21,15 @@ precision mediump uimage2D;
#endif
)";
namespace Pica {
struct ShaderSetup;
}
namespace Pica::Shader::Generator {
struct PicaVSConfig;
struct PicaFixedGSConfig;
} // namespace Pica::Shader::Generator
namespace Pica::Shader::Generator::GLSL {
/**
@ -37,7 +43,7 @@ std::string GenerateTrivialVertexShader(bool use_clip_planes, bool separable_sha
* Generates the GLSL vertex shader program source code for the given VS program
* @returns String of the shader source code; empty on failure
*/
std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config,
std::string GenerateVertexShader(const Pica::ShaderSetup& setup, const PicaVSConfig& config,
bool separable_shader);
/**

View file

@ -6,7 +6,7 @@
namespace Pica::Shader {
FramebufferConfig::FramebufferConfig(const Pica::Regs& regs, const Profile& profile) {
FramebufferConfig::FramebufferConfig(const Pica::RegsInternal& regs, const Profile& profile) {
const auto& output_merger = regs.framebuffer.output_merger;
scissor_test_mode.Assign(regs.rasterizer.scissor_test.mode);
depthmap_enable.Assign(regs.rasterizer.depthmap_enable);
@ -186,7 +186,7 @@ ProcTexConfig::ProcTexConfig(const Pica::TexturingRegs& regs) {
lut_filter.Assign(regs.proctex_lut.filter);
}
FSConfig::FSConfig(const Pica::Regs& regs, const UserConfig& user_, const Profile& profile)
FSConfig::FSConfig(const Pica::RegsInternal& regs, const UserConfig& user_, const Profile& profile)
: framebuffer{regs, profile}, texture{regs.texturing, profile}, lighting{regs.lighting},
proctex{regs.texturing}, user{user_} {}

View file

@ -5,7 +5,7 @@
#pragma once
#include "common/hash.h"
#include "video_core/regs.h"
#include "video_core/pica/regs_internal.h"
#include "video_core/shader/generator/profile.h"
namespace Pica::Shader {
@ -17,7 +17,7 @@ struct BlendConfig {
};
struct FramebufferConfig {
explicit FramebufferConfig(const Pica::Regs& regs, const Profile& profile);
explicit FramebufferConfig(const Pica::RegsInternal& regs, const Profile& profile);
union {
u32 raw{};
@ -158,7 +158,8 @@ union UserConfig {
static_assert(std::has_unique_object_representations_v<UserConfig>);
struct FSConfig {
explicit FSConfig(const Pica::Regs& regs, const UserConfig& user, const Profile& profile);
explicit FSConfig(const Pica::RegsInternal& regs, const UserConfig& user,
const Profile& profile);
[[nodiscard]] bool TevStageUpdatesCombinerBufferColor(u32 stage_index) const {
return (stage_index < 4) && (texture.combiner_buffer_input & (1 << stage_index));

View file

@ -4,12 +4,14 @@
#include "common/bit_set.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "video_core/pica/regs_internal.h"
#include "video_core/pica/shader_setup.h"
#include "video_core/shader/generator/shader_gen.h"
#include "video_core/video_core.h"
namespace Pica::Shader::Generator {
void PicaGSConfigState::Init(const Pica::Regs& regs, bool use_clip_planes_) {
void PicaGSConfigState::Init(const Pica::RegsInternal& regs, bool use_clip_planes_) {
use_clip_planes = use_clip_planes_;
vs_output_attributes = Common::BitSet<u32>(regs.vs.output_mask).Count();
@ -34,7 +36,7 @@ void PicaGSConfigState::Init(const Pica::Regs& regs, bool use_clip_planes_) {
}
}
void PicaVSConfigState::Init(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup,
void PicaVSConfigState::Init(const Pica::RegsInternal& regs, Pica::ShaderSetup& setup,
bool use_clip_planes_, bool use_geometry_shader_) {
use_clip_planes = use_clip_planes_;
use_geometry_shader = use_geometry_shader_;
@ -42,13 +44,13 @@ void PicaVSConfigState::Init(const Pica::Regs& regs, Pica::Shader::ShaderSetup&
program_hash = setup.GetProgramCodeHash();
swizzle_hash = setup.GetSwizzleDataHash();
main_offset = regs.vs.main_offset;
sanitize_mul = VideoCore::g_hw_shader_accurate_mul;
sanitize_mul = Settings::values.shaders_accurate_mul.GetValue();
num_outputs = 0;
load_flags.fill(AttribLoadFlags::Float);
output_map.fill(16);
for (int reg : Common::BitSet<u32>(regs.vs.output_mask)) {
for (u32 reg : Common::BitSet<u32>(regs.vs.output_mask)) {
output_map[reg] = num_outputs++;
}
@ -57,12 +59,12 @@ void PicaVSConfigState::Init(const Pica::Regs& regs, Pica::Shader::ShaderSetup&
}
}
PicaVSConfig::PicaVSConfig(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup,
PicaVSConfig::PicaVSConfig(const Pica::RegsInternal& regs, Pica::ShaderSetup& setup,
bool use_clip_planes_, bool use_geometry_shader_) {
state.Init(regs, setup, use_clip_planes_, use_geometry_shader_);
}
PicaFixedGSConfig::PicaFixedGSConfig(const Pica::Regs& regs, bool use_clip_planes_) {
PicaFixedGSConfig::PicaFixedGSConfig(const Pica::RegsInternal& regs, bool use_clip_planes_) {
state.Init(regs, use_clip_planes_);
}

View file

@ -5,8 +5,11 @@
#pragma once
#include "common/hash.h"
#include "video_core/regs.h"
#include "video_core/shader/shader.h"
namespace Pica {
struct RegsInternal;
struct ShaderSetup;
} // namespace Pica
namespace Pica::Shader::Generator {
@ -41,7 +44,7 @@ DECLARE_ENUM_FLAG_OPERATORS(AttribLoadFlags)
* PICA geometry shader.
*/
struct PicaGSConfigState {
void Init(const Pica::Regs& regs, bool use_clip_planes_);
void Init(const Pica::RegsInternal& regs, bool use_clip_planes_);
bool use_clip_planes;
@ -62,7 +65,7 @@ struct PicaGSConfigState {
* PICA vertex shader.
*/
struct PicaVSConfigState {
void Init(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, bool use_clip_planes_,
void Init(const Pica::RegsInternal& regs, Pica::ShaderSetup& setup, bool use_clip_planes_,
bool use_geometry_shader_);
bool use_clip_planes;
@ -88,7 +91,7 @@ struct PicaVSConfigState {
* shader.
*/
struct PicaVSConfig : Common::HashableStruct<PicaVSConfigState> {
explicit PicaVSConfig(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup,
explicit PicaVSConfig(const Pica::RegsInternal& regs, Pica::ShaderSetup& setup,
bool use_clip_planes_, bool use_geometry_shader_);
};
@ -97,7 +100,7 @@ struct PicaVSConfig : Common::HashableStruct<PicaVSConfigState> {
* shader pipeline
*/
struct PicaFixedGSConfig : Common::HashableStruct<PicaGSConfigState> {
explicit PicaFixedGSConfig(const Pica::Regs& regs, bool use_clip_planes_);
explicit PicaFixedGSConfig(const Pica::RegsInternal& regs, bool use_clip_planes_);
};
} // namespace Pica::Shader::Generator

View file

@ -3,13 +3,13 @@
// Refer to the license.txt file included.
#include <algorithm>
#include "video_core/pica/regs_shader.h"
#include "video_core/pica/shader_setup.h"
#include "video_core/shader/generator/shader_uniforms.h"
#include "video_core/shader/shader.h"
namespace Pica::Shader::Generator {
void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs,
const Pica::Shader::ShaderSetup& setup) {
void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, const Pica::ShaderSetup& setup) {
std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools),
[](bool value) -> BoolAligned { return {value ? 1 : 0}; });
std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i),

View file

@ -5,15 +5,12 @@
#pragma once
#include "common/vector_math.h"
#include "video_core/regs_lighting.h"
#include "video_core/pica/regs_lighting.h"
namespace Pica {
struct ShaderRegs;
}
namespace Pica::Shader {
struct ShaderSetup;
}
} // namespace Pica
namespace Pica::Shader::Generator {
@ -24,8 +21,8 @@ struct LightSrc {
alignas(16) Common::Vec3f ambient;
alignas(16) Common::Vec3f position;
alignas(16) Common::Vec3f spot_direction; // negated
float dist_atten_bias;
float dist_atten_scale;
f32 dist_atten_bias;
f32 dist_atten_scale;
};
/**

View file

@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <boost/container/small_vector.hpp>
#include "video_core/shader/generator/pica_fs_config.h"
#include "video_core/shader/generator/spv_fs_shader_gen.h"
namespace Pica::Shader::Generator::SPIRV {

View file

@ -7,7 +7,13 @@
#include <array>
#include <sirit/sirit.h>
#include "video_core/shader/generator/pica_fs_config.h"
#include "video_core/pica/regs_framebuffer.h"
#include "video_core/pica/regs_texturing.h"
namespace Pica::Shader {
struct FSConfig;
struct Profile;
} // namespace Pica::Shader
namespace Pica::Shader::Generator::SPIRV {

View file

@ -2,166 +2,23 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cmath>
#include <cstring>
#include "common/arch.h"
#include "common/assert.h"
#include "common/bit_set.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "video_core/regs_rasterizer.h"
#include "video_core/regs_shader.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_interpreter.h"
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
#include "video_core/shader/shader_jit.h"
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
#include "video_core/video_core.h"
#endif
#include "video_core/shader/shader.h"
namespace Pica::Shader {
void OutputVertex::ValidateSemantics(const RasterizerRegs& regs) {
u32 num_attributes = regs.vs_output_total;
ASSERT(num_attributes <= 7);
for (std::size_t attrib = 0; attrib < num_attributes; ++attrib) {
u32 output_register_map = regs.vs_output_attributes[attrib].raw;
for (std::size_t comp = 0; comp < 4; ++comp) {
u32 semantic = (output_register_map >> (8 * comp)) & 0x1F;
ASSERT_MSG(semantic < 24 || semantic == RasterizerRegs::VSOutputAttributes::INVALID,
"Invalid/unknown semantic id: {}", semantic);
}
}
}
OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
const AttributeBuffer& input) {
// Setup output data
union {
OutputVertex ret{};
// Allow us to overflow OutputVertex to avoid branches, since
// RasterizerRegs::VSOutputAttributes::INVALID would write to slot 31, which
// would be out of bounds otherwise.
std::array<f24, 32> vertex_slots_overflow;
};
// Some games use attributes without setting them in GPUREG_SH_OUTMAP_Oi
// Hardware tests have shown that they are initialized to 1.f in this case.
vertex_slots_overflow.fill(f24::One());
// Assert that OutputVertex has enough space for 24 semantic registers
static_assert(sizeof(std::array<f24, 24>) == sizeof(ret),
"Struct and array have different sizes.");
u32 num_attributes = regs.vs_output_total & 7;
for (std::size_t attrib = 0; attrib < num_attributes; ++attrib) {
const auto output_register_map = regs.vs_output_attributes[attrib];
vertex_slots_overflow[output_register_map.map_x] = input.attr[attrib][0];
vertex_slots_overflow[output_register_map.map_y] = input.attr[attrib][1];
vertex_slots_overflow[output_register_map.map_z] = input.attr[attrib][2];
vertex_slots_overflow[output_register_map.map_w] = input.attr[attrib][3];
}
// The hardware takes the absolute and saturates vertex colors like this, *before* doing
// interpolation
for (u32 i = 0; i < 4; ++i) {
float c = std::fabs(ret.color[i].ToFloat32());
ret.color[i] = f24::FromFloat32(c < 1.0f ? c : 1.0f);
}
LOG_TRACE(HW_GPU,
"Output vertex: pos({:.2}, {:.2}, {:.2}, {:.2}), quat({:.2}, {:.2}, {:.2}, {:.2}), "
"col({:.2}, {:.2}, {:.2}, {:.2}), tc0({:.2}, {:.2}), view({:.2}, {:.2}, {:.2})",
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(),
ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(),
ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(),
ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), ret.view.x.ToFloat32(),
ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
return ret;
}
void UnitState::LoadInput(const ShaderRegs& config, const AttributeBuffer& input) {
const u32 max_attribute = config.max_input_attribute_index;
for (u32 attr = 0; attr <= max_attribute; ++attr) {
u32 reg = config.GetRegisterForAttribute(attr);
registers.input[reg] = input.attr[attr];
}
}
static void CopyRegistersToOutput(std::span<Common::Vec4<f24>, 16> regs, u32 mask,
AttributeBuffer& buffer) {
int output_i = 0;
for (int reg : Common::BitSet<u32>(mask)) {
buffer.attr[output_i++] = regs[reg];
}
}
void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) {
CopyRegistersToOutput(registers.output, config.output_mask, output);
}
UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {}
GSEmitter::GSEmitter() {
handlers = new Handlers;
}
GSEmitter::~GSEmitter() {
delete handlers;
}
void GSEmitter::Emit(std::span<Common::Vec4<f24>, 16> output_regs) {
ASSERT(vertex_id < 3);
// TODO: This should be merged with UnitState::WriteOutput somehow
CopyRegistersToOutput(output_regs, output_mask, buffer[vertex_id]);
if (prim_emit) {
if (winding)
handlers->winding_setter();
for (std::size_t i = 0; i < buffer.size(); ++i) {
handlers->vertex_handler(buffer[i]);
}
}
}
GSUnitState::GSUnitState() : UnitState(&emitter) {}
void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) {
emitter.handlers->vertex_handler = std::move(vertex_handler);
emitter.handlers->winding_setter = std::move(winding_setter);
}
void GSUnitState::ConfigOutput(const ShaderRegs& config) {
emitter.output_mask = config.output_mask;
}
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
namespace Pica {
std::unique_ptr<ShaderEngine> CreateEngine(bool use_jit) {
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
static std::unique_ptr<JitEngine> jit_engine;
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
static InterpreterEngine interpreter_engine;
ShaderEngine* GetEngine() {
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
// TODO(yuriks): Re-initialize on each change rather than being persistent
if (VideoCore::g_shader_jit_enabled) {
if (jit_engine == nullptr) {
jit_engine = std::make_unique<JitEngine>();
}
return jit_engine.get();
if (use_jit) {
return std::make_unique<Shader::JitEngine>();
}
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
#endif
return &interpreter_engine;
return std::make_unique<Shader::InterpreterEngine>();
}
void Shutdown() {
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
jit_engine.reset();
#endif // CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
}
} // namespace Pica::Shader
} // namespace Pica

View file

@ -4,301 +4,12 @@
#pragma once
#include <array>
#include <cstddef>
#include <functional>
#include <span>
#include <type_traits>
#include <boost/serialization/access.hpp>
#include <boost/serialization/array.hpp>
#include <boost/serialization/base_object.hpp>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/vector_math.h"
#include "video_core/pica_types.h"
#include "video_core/regs_rasterizer.h"
#include "video_core/regs_shader.h"
namespace Pica::Shader {
namespace Pica {
constexpr u32 MAX_PROGRAM_CODE_LENGTH = 4096;
constexpr u32 MAX_SWIZZLE_DATA_LENGTH = 4096;
using ProgramCode = std::array<u32, MAX_PROGRAM_CODE_LENGTH>;
using SwizzleData = std::array<u32, MAX_SWIZZLE_DATA_LENGTH>;
struct AttributeBuffer {
alignas(16) Common::Vec4<f24> attr[16];
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& attr;
}
};
/// Handler type for receiving vertex outputs from vertex shader or geometry shader
using VertexHandler = std::function<void(const AttributeBuffer&)>;
/// Handler type for signaling to invert the vertex order of the next triangle
using WindingSetter = std::function<void()>;
struct OutputVertex {
Common::Vec4<f24> pos;
Common::Vec4<f24> quat;
Common::Vec4<f24> color;
Common::Vec2<f24> tc0;
Common::Vec2<f24> tc1;
f24 tc0_w;
INSERT_PADDING_WORDS(1);
Common::Vec3<f24> view;
INSERT_PADDING_WORDS(1);
Common::Vec2<f24> tc2;
static void ValidateSemantics(const RasterizerRegs& regs);
static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs,
const AttributeBuffer& output);
private:
template <class Archive>
void serialize(Archive& ar, const u32) {
ar& pos;
ar& quat;
ar& color;
ar& tc0;
ar& tc1;
ar& tc0_w;
ar& view;
ar& tc2;
}
friend class boost::serialization::access;
};
#define ASSERT_POS(var, pos) \
static_assert(offsetof(OutputVertex, var) == pos * sizeof(f24), "Semantic at wrong " \
"offset.")
ASSERT_POS(pos, RasterizerRegs::VSOutputAttributes::POSITION_X);
ASSERT_POS(quat, RasterizerRegs::VSOutputAttributes::QUATERNION_X);
ASSERT_POS(color, RasterizerRegs::VSOutputAttributes::COLOR_R);
ASSERT_POS(tc0, RasterizerRegs::VSOutputAttributes::TEXCOORD0_U);
ASSERT_POS(tc1, RasterizerRegs::VSOutputAttributes::TEXCOORD1_U);
ASSERT_POS(tc0_w, RasterizerRegs::VSOutputAttributes::TEXCOORD0_W);
ASSERT_POS(view, RasterizerRegs::VSOutputAttributes::VIEW_X);
ASSERT_POS(tc2, RasterizerRegs::VSOutputAttributes::TEXCOORD2_U);
#undef ASSERT_POS
static_assert(std::is_trivial_v<OutputVertex>, "Structure is not POD");
static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size");
/**
* This structure contains state information for primitive emitting in geometry shader.
*/
struct GSEmitter {
std::array<AttributeBuffer, 3> buffer;
u8 vertex_id;
bool prim_emit;
bool winding;
u32 output_mask;
// Function objects are hidden behind a raw pointer to make the structure standard layout type,
// for JIT to use offsetof to access other members.
struct Handlers {
VertexHandler vertex_handler;
WindingSetter winding_setter;
}* handlers;
GSEmitter();
~GSEmitter();
void Emit(std::span<Common::Vec4<f24>, 16> output_regs);
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& buffer;
ar& vertex_id;
ar& prim_emit;
ar& winding;
ar& output_mask;
// Handlers are ignored because they're constant
}
};
static_assert(std::is_standard_layout<GSEmitter>::value, "GSEmitter is not standard layout type");
/**
* This structure contains the state information that needs to be unique for a shader unit. The 3DS
* has four shader units that process shaders in parallel. At the present, Citra only implements a
* single shader unit that processes all shaders serially. Putting the state information in a struct
* here will make it easier for us to parallelize the shader processing later.
*/
struct UnitState {
explicit UnitState(GSEmitter* emitter = nullptr);
// Two Address registers and one loop counter
// TODO: How many bits do these actually have?
s32 address_registers[3];
bool conditional_code[2];
struct Registers {
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
// required to be 16-byte aligned.
alignas(16) std::array<Common::Vec4<f24>, 16> input;
alignas(16) std::array<Common::Vec4<f24>, 16> temporary;
alignas(16) std::array<Common::Vec4<f24>, 16> output;
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& input;
ar& temporary;
ar& output;
}
} registers;
static_assert(std::is_trivial_v<Registers>, "Structure is not POD");
GSEmitter* emitter_ptr;
static std::size_t InputOffset(s32 register_index) {
return offsetof(UnitState, registers.input) + register_index * sizeof(Common::Vec4<f24>);
}
static std::size_t OutputOffset(s32 register_index) {
return offsetof(UnitState, registers.output) + register_index * sizeof(Common::Vec4<f24>);
}
static std::size_t TemporaryOffset(s32 register_index) {
return offsetof(UnitState, registers.temporary) +
register_index * sizeof(Common::Vec4<f24>);
}
/**
* Loads the unit state with an input vertex.
*
* @param config Shader configuration registers corresponding to the unit.
* @param input Attribute buffer to load into the input registers.
*/
void LoadInput(const ShaderRegs& config, const AttributeBuffer& input);
void WriteOutput(const ShaderRegs& config, AttributeBuffer& output);
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& registers;
ar& conditional_code;
ar& address_registers;
// emitter_ptr is only set by GSUnitState and is serialized there
}
};
/**
* This is an extended shader unit state that represents the special unit that can run both vertex
* shader and geometry shader. It contains an additional primitive emitter and utilities for
* geometry shader.
*/
struct GSUnitState : public UnitState {
GSUnitState();
void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter);
void ConfigOutput(const ShaderRegs& config);
GSEmitter emitter;
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& boost::serialization::base_object<UnitState>(*this);
ar& emitter;
}
};
struct Uniforms {
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
// therefore required to be 16-byte aligned.
alignas(16) std::array<Common::Vec4<f24>, 96> f;
std::array<bool, 16> b;
std::array<Common::Vec4<u8>, 4> i;
static std::size_t GetFloatUniformOffset(u32 index) {
return offsetof(Uniforms, f) + index * sizeof(Common::Vec4<f24>);
}
static std::size_t GetBoolUniformOffset(u32 index) {
return offsetof(Uniforms, b) + index * sizeof(bool);
}
static std::size_t GetIntUniformOffset(u32 index) {
return offsetof(Uniforms, i) + index * sizeof(Common::Vec4<u8>);
}
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& f;
ar& b;
ar& i;
}
};
struct ShaderSetup {
Uniforms uniforms;
ProgramCode program_code;
SwizzleData swizzle_data;
/// Data private to ShaderEngines
struct EngineData {
u32 entry_point;
/// Used by the JIT, points to a compiled shader object.
const void* cached_shader = nullptr;
} engine_data;
void MarkProgramCodeDirty() {
program_code_hash_dirty = true;
}
void MarkSwizzleDataDirty() {
swizzle_data_hash_dirty = true;
}
u64 GetProgramCodeHash() {
if (program_code_hash_dirty) {
program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
program_code_hash_dirty = false;
}
return program_code_hash;
}
u64 GetSwizzleDataHash() {
if (swizzle_data_hash_dirty) {
swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data));
swizzle_data_hash_dirty = false;
}
return swizzle_data_hash;
}
private:
bool program_code_hash_dirty = true;
bool swizzle_data_hash_dirty = true;
u64 program_code_hash = 0xDEADC0DE;
u64 swizzle_data_hash = 0xDEADC0DE;
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& uniforms;
ar& program_code;
ar& swizzle_data;
ar& program_code_hash_dirty;
ar& swizzle_data_hash_dirty;
ar& program_code_hash;
ar& swizzle_data_hash;
}
};
struct ShaderSetup;
struct ShaderUnit;
class ShaderEngine {
public:
@ -316,11 +27,9 @@ public:
* @param setup Shader engine state, must be setup with SetupBatch on each shader change.
* @param state Shader unit state, must be setup with input data before each shader invocation.
*/
virtual void Run(const ShaderSetup& setup, UnitState& state) const = 0;
virtual void Run(const ShaderSetup& setup, ShaderUnit& state) const = 0;
};
// TODO(yuriks): Remove and make it non-global state somewhere
ShaderEngine* GetEngine();
void Shutdown();
std::unique_ptr<ShaderEngine> CreateEngine(bool use_jit);
} // namespace Pica::Shader
} // namespace Pica

View file

@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cmath>
#include <numeric>
#include <boost/circular_buffer.hpp>
@ -14,9 +13,9 @@
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/vector_math.h"
#include "video_core/pica_state.h"
#include "video_core/pica/shader_setup.h"
#include "video_core/pica/shader_unit.h"
#include "video_core/pica_types.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_interpreter.h"
using nihstro::Instruction;
@ -46,8 +45,8 @@ struct LoopStackElement {
};
template <bool Debug>
static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
unsigned entry_point) {
static void RunInterpreter(const ShaderSetup& setup, ShaderUnit& state,
DebugData<Debug>& debug_data, unsigned entry_point) {
boost::circular_buffer<IfStackElement> if_stack(8);
boost::circular_buffer<CallStackElement> call_stack(4);
boost::circular_buffer<LoopStackElement> loop_stack(4);
@ -136,10 +135,10 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
int index = source_reg.GetIndex();
switch (source_reg.GetRegisterType()) {
case RegisterType::Input:
return &state.registers.input[index].x;
return &state.input[index].x;
case RegisterType::Temporary:
return &state.registers.temporary[index].x;
return &state.temporary[index].x;
case RegisterType::FloatUniform:
if (address_register_index != 0) {
@ -202,9 +201,9 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
}
f24* dest = (instr.common.dest.Value() < 0x10)
? &state.registers.output[instr.common.dest.Value().GetIndex()][0]
? &state.output[instr.common.dest.Value().GetIndex()][0]
: (instr.common.dest.Value() < 0x20)
? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
? &state.temporary[instr.common.dest.Value().GetIndex()][0]
: dummy_vec4_float24_zeros;
debug_data.max_opdesc_id =
@ -537,9 +536,9 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
}
f24* dest = (instr.mad.dest.Value() < 0x10)
? &state.registers.output[instr.mad.dest.Value().GetIndex()][0]
? &state.output[instr.mad.dest.Value().GetIndex()][0]
: (instr.mad.dest.Value() < 0x20)
? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
? &state.temporary[instr.mad.dest.Value().GetIndex()][0]
: dummy_vec4_float24_zeros;
Record<DebugDataRecord::SRC1>(debug_data, iteration, src1);
@ -652,14 +651,14 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
}
case OpCode::Id::EMIT: {
GSEmitter* emitter = state.emitter_ptr;
auto* emitter = state.emitter_ptr;
ASSERT_MSG(emitter, "Execute EMIT on VS");
emitter->Emit(state.registers.output);
emitter->Emit(state.output);
break;
}
case OpCode::Id::SETEMIT: {
GSEmitter* emitter = state.emitter_ptr;
auto* emitter = state.emitter_ptr;
ASSERT_MSG(emitter, "Execute SETEMIT on VS");
emitter->vertex_id = instr.setemit.vertex_id;
emitter->prim_emit = instr.setemit.prim_emit != 0;
@ -726,29 +725,29 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
void InterpreterEngine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
setup.engine_data.entry_point = entry_point;
setup.entry_point = entry_point;
}
MICROPROFILE_DECLARE(GPU_Shader);
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const {
void InterpreterEngine::Run(const ShaderSetup& setup, ShaderUnit& state) const {
MICROPROFILE_SCOPE(GPU_Shader);
DebugData<false> dummy_debug_data;
RunInterpreter(setup, state, dummy_debug_data, setup.engine_data.entry_point);
RunInterpreter(setup, state, dummy_debug_data, setup.entry_point);
}
DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup,
const AttributeBuffer& input,
const ShaderRegs& config) const {
UnitState state;
ShaderUnit state;
DebugData<true> debug_data;
// Setup input register table
state.registers.input.fill(Common::Vec4<f24>::AssignToAll(f24::Zero()));
state.input.fill(Common::Vec4<f24>::AssignToAll(f24::Zero()));
state.LoadInput(config, input);
RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point);
RunInterpreter(setup, state, debug_data, setup.entry_point);
return debug_data;
}

View file

@ -4,15 +4,20 @@
#pragma once
#include "video_core/pica/output_vertex.h"
#include "video_core/shader/debug_data.h"
#include "video_core/shader/shader.h"
namespace Pica {
struct ShaderRegs;
}
namespace Pica::Shader {
class InterpreterEngine final : public ShaderEngine {
public:
void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override;
void Run(const ShaderSetup& setup, UnitState& state) const override;
void SetupBatch(ShaderSetup& setup, u32 entry_point) override;
void Run(const ShaderSetup& setup, ShaderUnit& state) const override;
/**
* Produce debug information based on the given shader and input vertex

View file

@ -6,6 +6,7 @@
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
#include "common/assert.h"
#include "common/hash.h"
#include "common/microprofile.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_jit.h"
@ -23,7 +24,7 @@ JitEngine::~JitEngine() = default;
void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) {
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
setup.engine_data.entry_point = entry_point;
setup.entry_point = entry_point;
const u64 code_hash = setup.GetProgramCodeHash();
const u64 swizzle_hash = setup.GetSwizzleDataHash();
@ -31,24 +32,24 @@ void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) {
const u64 cache_key = Common::HashCombine(code_hash, swizzle_hash);
auto iter = cache.find(cache_key);
if (iter != cache.end()) {
setup.engine_data.cached_shader = iter->second.get();
setup.cached_shader = iter->second.get();
} else {
auto shader = std::make_unique<JitShader>();
shader->Compile(&setup.program_code, &setup.swizzle_data);
setup.engine_data.cached_shader = shader.get();
setup.cached_shader = shader.get();
cache.emplace_hint(iter, cache_key, std::move(shader));
}
}
MICROPROFILE_DECLARE(GPU_Shader);
void JitEngine::Run(const ShaderSetup& setup, UnitState& state) const {
ASSERT(setup.engine_data.cached_shader != nullptr);
void JitEngine::Run(const ShaderSetup& setup, ShaderUnit& state) const {
ASSERT(setup.cached_shader != nullptr);
MICROPROFILE_SCOPE(GPU_Shader);
const JitShader* shader = static_cast<const JitShader*>(setup.engine_data.cached_shader);
shader->Run(setup, state, setup.engine_data.entry_point);
const JitShader* shader = static_cast<const JitShader*>(setup.cached_shader);
shader->Run(setup, state, setup.entry_point);
}
} // namespace Pica::Shader

View file

@ -22,7 +22,7 @@ public:
~JitEngine() override;
void SetupBatch(ShaderSetup& setup, u32 entry_point) override;
void Run(const ShaderSetup& setup, UnitState& state) const override;
void Run(const ShaderSetup& setup, ShaderUnit& state) const override;
private:
std::unordered_map<u64, std::unique_ptr<JitShader>> cache;

View file

@ -15,9 +15,8 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/vector_math.h"
#include "video_core/pica_state.h"
#include "video_core/pica/shader_unit.h"
#include "video_core/pica_types.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_jit_a64_compiler.h"
using namespace Common::A64;
@ -174,11 +173,11 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, u32 src_num, SourceRegiste
break;
case RegisterType::Input:
src_ptr = STATE;
src_offset = UnitState::InputOffset(src_reg.GetIndex());
src_offset = ShaderUnit::InputOffset(src_reg.GetIndex());
break;
case RegisterType::Temporary:
src_ptr = STATE;
src_offset = UnitState::TemporaryOffset(src_reg.GetIndex());
src_offset = ShaderUnit::TemporaryOffset(src_reg.GetIndex());
break;
default:
UNREACHABLE_MSG("Encountered unknown source register type: {}", src_reg.GetRegisterType());
@ -317,10 +316,10 @@ void JitShader::Compile_DestEnable(Instruction instr, QReg src) {
std::size_t dest_offset_disp;
switch (dest.GetRegisterType()) {
case RegisterType::Output:
dest_offset_disp = UnitState::OutputOffset(dest.GetIndex());
dest_offset_disp = ShaderUnit::OutputOffset(dest.GetIndex());
break;
case RegisterType::Temporary:
dest_offset_disp = UnitState::TemporaryOffset(dest.GetIndex());
dest_offset_disp = ShaderUnit::TemporaryOffset(dest.GetIndex());
break;
default:
UNREACHABLE_MSG("Encountered unknown destination register type: {}",
@ -628,13 +627,13 @@ void JitShader::Compile_NOP(Instruction instr) {}
void JitShader::Compile_END(Instruction instr) {
// Save conditional code
STRB(COND0.toW(), STATE, u32(offsetof(UnitState, conditional_code[0])));
STRB(COND1.toW(), STATE, u32(offsetof(UnitState, conditional_code[1])));
STRB(COND0.toW(), STATE, u32(offsetof(ShaderUnit, conditional_code[0])));
STRB(COND1.toW(), STATE, u32(offsetof(ShaderUnit, conditional_code[1])));
// Save address/loop registers
STP(ADDROFFS_REG_0.toW(), ADDROFFS_REG_1.toW(), STATE,
u32(offsetof(UnitState, address_registers)));
STR(LOOPCOUNT_REG.toW(), STATE, u32(offsetof(UnitState, address_registers[2])));
u32(offsetof(ShaderUnit, address_registers)));
STR(LOOPCOUNT_REG.toW(), STATE, u32(offsetof(ShaderUnit, address_registers[2])));
ABI_PopRegisters(*this, ABI_ALL_CALLEE_SAVED, 16);
RET();
@ -804,14 +803,14 @@ void JitShader::Compile_JMP(Instruction instr) {
}
}
static void Emit(GSEmitter* emitter, Common::Vec4<f24> (*output)[16]) {
static void Emit(GeometryEmitter* emitter, Common::Vec4<f24> (*output)[16]) {
emitter->Emit(*output);
}
void JitShader::Compile_EMIT(Instruction instr) {
Label have_emitter, end;
LDR(XSCRATCH0, STATE, u32(offsetof(UnitState, emitter_ptr)));
LDR(XSCRATCH0, STATE, u32(offsetof(ShaderUnit, emitter_ptr)));
CBNZ(XSCRATCH0, have_emitter);
ABI_PushRegisters(*this, PersistentCallerSavedRegs());
@ -824,7 +823,7 @@ void JitShader::Compile_EMIT(Instruction instr) {
ABI_PushRegisters(*this, PersistentCallerSavedRegs());
MOV(ABI_PARAM1, XSCRATCH0);
MOV(ABI_PARAM2, STATE);
ADD(ABI_PARAM2, ABI_PARAM2, u32(offsetof(UnitState, registers.output)));
ADD(ABI_PARAM2, ABI_PARAM2, u32(offsetof(ShaderUnit, output)));
CallFarFunction(*this, Emit);
ABI_PopRegisters(*this, PersistentCallerSavedRegs());
l(end);
@ -833,7 +832,7 @@ void JitShader::Compile_EMIT(Instruction instr) {
void JitShader::Compile_SETE(Instruction instr) {
Label have_emitter, end;
LDR(XSCRATCH0, STATE, u32(offsetof(UnitState, emitter_ptr)));
LDR(XSCRATCH0, STATE, u32(offsetof(ShaderUnit, emitter_ptr)));
CBNZ(XSCRATCH0, have_emitter);
@ -846,11 +845,11 @@ void JitShader::Compile_SETE(Instruction instr) {
l(have_emitter);
MOV(XSCRATCH1.toW(), instr.setemit.vertex_id);
STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GSEmitter, vertex_id)));
STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GeometryEmitter, vertex_id)));
MOV(XSCRATCH1.toW(), instr.setemit.prim_emit);
STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GSEmitter, prim_emit)));
STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GeometryEmitter, prim_emit)));
MOV(XSCRATCH1.toW(), instr.setemit.winding);
STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GSEmitter, winding)));
STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GeometryEmitter, winding)));
l(end);
}
@ -943,12 +942,12 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
// Load address/loop registers
LDP(ADDROFFS_REG_0.toW(), ADDROFFS_REG_1.toW(), STATE,
u32(offsetof(UnitState, address_registers)));
LDR(LOOPCOUNT_REG.toW(), STATE, u32(offsetof(UnitState, address_registers[2])));
u32(offsetof(ShaderUnit, address_registers)));
LDR(LOOPCOUNT_REG.toW(), STATE, u32(offsetof(ShaderUnit, address_registers[2])));
//// Load conditional code
LDRB(COND0.toW(), STATE, u32(offsetof(UnitState, conditional_code[0])));
LDRB(COND1.toW(), STATE, u32(offsetof(UnitState, conditional_code[1])));
LDRB(COND0.toW(), STATE, u32(offsetof(ShaderUnit, conditional_code[0])));
LDRB(COND1.toW(), STATE, u32(offsetof(ShaderUnit, conditional_code[1])));
// Used to set a register to one
FMOV(ONE.S4(), FImm8(false, 7, 0));

View file

@ -17,13 +17,17 @@
#include <oaknut/code_block.hpp>
#include <oaknut/oaknut.hpp>
#include "common/common_types.h"
#include "video_core/shader/shader.h"
#include "video_core/pica/shader_setup.h"
using nihstro::Instruction;
using nihstro::OpCode;
using nihstro::SourceRegister;
using nihstro::SwizzlePattern;
namespace Pica {
struct ShaderUnit;
}
namespace Pica::Shader {
/// Memory allocated for each compiled shader
@ -37,7 +41,7 @@ class JitShader : private oaknut::CodeBlock, public oaknut::CodeGenerator {
public:
JitShader();
void Run(const ShaderSetup& setup, UnitState& state, u32 offset) const {
void Run(const ShaderSetup& setup, ShaderUnit& state, u32 offset) const {
program(&setup.uniforms, &state, instruction_labels[offset].ptr<const std::byte*>());
}

View file

@ -5,9 +5,6 @@
#include "common/arch.h"
#if CITRA_ARCH(x86_64)
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <nihstro/shader_bytecode.h>
#include <smmintrin.h>
#include <xbyak/xbyak_util.h>
@ -18,9 +15,8 @@
#include "common/x64/cpu_detect.h"
#include "common/x64/xbyak_abi.h"
#include "common/x64/xbyak_util.h"
#include "video_core/pica_state.h"
#include "video_core/pica/shader_unit.h"
#include "video_core/pica_types.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_jit_x64_compiler.h"
using namespace Common::X64;
@ -125,7 +121,7 @@ constexpr Reg32 LOOPINC = edi;
constexpr Reg64 COND0 = r13;
/// Result of the previous CMP instruction for the Y-component comparison
constexpr Reg64 COND1 = r14;
/// Pointer to the UnitState instance for the current VS unit
/// Pointer to the ShaderUnit instance for the current VS unit
constexpr Reg64 STATE = r15;
/// SIMD scratch register
constexpr Xmm SCRATCH = xmm0;
@ -198,11 +194,11 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, u32 src_num, SourceRegiste
break;
case RegisterType::Input:
src_ptr = STATE;
src_offset = UnitState::InputOffset(src_reg.GetIndex());
src_offset = ShaderUnit::InputOffset(src_reg.GetIndex());
break;
case RegisterType::Temporary:
src_ptr = STATE;
src_offset = UnitState::TemporaryOffset(src_reg.GetIndex());
src_offset = ShaderUnit::TemporaryOffset(src_reg.GetIndex());
break;
default:
UNREACHABLE_MSG("Encountered unknown source register type: {}", src_reg.GetRegisterType());
@ -312,10 +308,10 @@ void JitShader::Compile_DestEnable(Instruction instr, Xmm src) {
std::size_t dest_offset_disp;
switch (dest.GetRegisterType()) {
case RegisterType::Output:
dest_offset_disp = UnitState::OutputOffset(dest.GetIndex());
dest_offset_disp = ShaderUnit::OutputOffset(dest.GetIndex());
break;
case RegisterType::Temporary:
dest_offset_disp = UnitState::TemporaryOffset(dest.GetIndex());
dest_offset_disp = ShaderUnit::TemporaryOffset(dest.GetIndex());
break;
default:
UNREACHABLE_MSG("Encountered unknown destination register type: {}",
@ -669,13 +665,13 @@ void JitShader::Compile_NOP(Instruction instr) {}
void JitShader::Compile_END(Instruction instr) {
// Save conditional code
mov(byte[STATE + offsetof(UnitState, conditional_code[0])], COND0.cvt8());
mov(byte[STATE + offsetof(UnitState, conditional_code[1])], COND1.cvt8());
mov(byte[STATE + offsetof(ShaderUnit, conditional_code[0])], COND0.cvt8());
mov(byte[STATE + offsetof(ShaderUnit, conditional_code[1])], COND1.cvt8());
// Save address/loop registers
mov(dword[STATE + offsetof(UnitState, address_registers[0])], ADDROFFS_REG_0.cvt32());
mov(dword[STATE + offsetof(UnitState, address_registers[1])], ADDROFFS_REG_1.cvt32());
mov(dword[STATE + offsetof(UnitState, address_registers[2])], LOOPCOUNT_REG);
mov(dword[STATE + offsetof(ShaderUnit, address_registers[0])], ADDROFFS_REG_0.cvt32());
mov(dword[STATE + offsetof(ShaderUnit, address_registers[1])], ADDROFFS_REG_1.cvt32());
mov(dword[STATE + offsetof(ShaderUnit, address_registers[2])], LOOPCOUNT_REG);
ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16);
ret();
@ -870,13 +866,13 @@ void JitShader::Compile_JMP(Instruction instr) {
}
}
static void Emit(GSEmitter* emitter, Common::Vec4<f24> (*output)[16]) {
static void Emit(GeometryEmitter* emitter, Common::Vec4<f24> (*output)[16]) {
emitter->Emit(*output);
}
void JitShader::Compile_EMIT(Instruction instr) {
Label have_emitter, end;
mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]);
mov(rax, qword[STATE + offsetof(ShaderUnit, emitter_ptr)]);
test(rax, rax);
jnz(have_emitter);
@ -890,7 +886,7 @@ void JitShader::Compile_EMIT(Instruction instr) {
ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(ABI_PARAM1, rax);
mov(ABI_PARAM2, STATE);
add(ABI_PARAM2, static_cast<Xbyak::uint32>(offsetof(UnitState, registers.output)));
add(ABI_PARAM2, static_cast<Xbyak::uint32>(offsetof(ShaderUnit, output)));
CallFarFunction(*this, Emit);
ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
L(end);
@ -898,7 +894,7 @@ void JitShader::Compile_EMIT(Instruction instr) {
void JitShader::Compile_SETE(Instruction instr) {
Label have_emitter, end;
mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]);
mov(rax, qword[STATE + offsetof(ShaderUnit, emitter_ptr)]);
test(rax, rax);
jnz(have_emitter);
@ -909,9 +905,9 @@ void JitShader::Compile_SETE(Instruction instr) {
jmp(end);
L(have_emitter);
mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id);
mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit);
mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding);
mov(byte[rax + offsetof(GeometryEmitter, vertex_id)], instr.setemit.vertex_id);
mov(byte[rax + offsetof(GeometryEmitter, prim_emit)], instr.setemit.prim_emit);
mov(byte[rax + offsetof(GeometryEmitter, winding)], instr.setemit.winding);
L(end);
}
@ -1001,13 +997,13 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
mov(STATE, ABI_PARAM2);
// Load address/loop registers
movsxd(ADDROFFS_REG_0, dword[STATE + offsetof(UnitState, address_registers[0])]);
movsxd(ADDROFFS_REG_1, dword[STATE + offsetof(UnitState, address_registers[1])]);
mov(LOOPCOUNT_REG, dword[STATE + offsetof(UnitState, address_registers[2])]);
movsxd(ADDROFFS_REG_0, dword[STATE + offsetof(ShaderUnit, address_registers[0])]);
movsxd(ADDROFFS_REG_1, dword[STATE + offsetof(ShaderUnit, address_registers[1])]);
mov(LOOPCOUNT_REG, dword[STATE + offsetof(ShaderUnit, address_registers[2])]);
// Load conditional code
mov(COND0, byte[STATE + offsetof(UnitState, conditional_code[0])]);
mov(COND1, byte[STATE + offsetof(UnitState, conditional_code[1])]);
mov(COND0, byte[STATE + offsetof(ShaderUnit, conditional_code[0])]);
mov(COND1, byte[STATE + offsetof(ShaderUnit, conditional_code[1])]);
// Used to set a register to one
static const __m128 one = {1.f, 1.f, 1.f, 1.f};

View file

@ -10,19 +10,21 @@
#include <array>
#include <bitset>
#include <cstddef>
#include <optional>
#include <utility>
#include <vector>
#include <nihstro/shader_bytecode.h>
#include <xbyak/xbyak.h>
#include "common/common_types.h"
#include "video_core/shader/shader.h"
#include "video_core/pica/shader_setup.h"
using nihstro::Instruction;
using nihstro::OpCode;
using nihstro::SourceRegister;
using nihstro::SwizzlePattern;
namespace Pica {
struct ShaderUnit;
}
namespace Pica::Shader {
/// Memory allocated for each compiled shader
@ -36,7 +38,7 @@ class JitShader : public Xbyak::CodeGenerator {
public:
JitShader();
void Run(const ShaderSetup& setup, UnitState& state, u32 offset) const {
void Run(const ShaderSetup& setup, ShaderUnit& state, u32 offset) const {
program(&setup.uniforms, &state, instruction_labels[offset].getAddress());
}