mirror of
https://github.com/PabloMK7/citra.git
synced 2025-11-03 15:18:47 +00:00
video_core: Abstract shader generators. (#6990)
* video_core: Abstract shader generators. * shader: Extract common generator structures and move generators to specific namespaces. * shader: Minor fixes and clean-up.
This commit is contained in:
parent
1492d73ccb
commit
50f22d1f59
35 changed files with 1374 additions and 3344 deletions
|
|
@ -168,7 +168,7 @@ void Driver::CheckExtensionSupport() {
|
|||
arb_clear_texture = GLAD_GL_ARB_clear_texture;
|
||||
arb_get_texture_sub_image = GLAD_GL_ARB_get_texture_sub_image;
|
||||
arb_texture_compression_bptc = GLAD_GL_ARB_texture_compression_bptc;
|
||||
ext_clip_cull_distance = GLAD_GL_EXT_clip_cull_distance;
|
||||
clip_cull_distance = !is_gles || GLAD_GL_EXT_clip_cull_distance;
|
||||
ext_texture_compression_s3tc = GLAD_GL_EXT_texture_compression_s3tc;
|
||||
shader_framebuffer_fetch =
|
||||
GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch;
|
||||
|
|
|
|||
|
|
@ -100,9 +100,9 @@ public:
|
|||
return arb_get_texture_sub_image;
|
||||
}
|
||||
|
||||
/// Returns true if the implementation supports EXT_clip_cull_distance
|
||||
bool HasExtClipCullDistance() const {
|
||||
return ext_clip_cull_distance;
|
||||
/// Returns true if the implementation supports shader-defined clipping planes
|
||||
bool HasClipCullDistance() const {
|
||||
return clip_cull_distance;
|
||||
}
|
||||
|
||||
/// Returns true if the implementation supports (EXT/ARM)_shader_framebuffer_fetch
|
||||
|
|
@ -132,7 +132,7 @@ private:
|
|||
bool arb_buffer_storage{};
|
||||
bool arb_clear_texture{};
|
||||
bool arb_get_texture_sub_image{};
|
||||
bool ext_clip_cull_distance{};
|
||||
bool clip_cull_distance{};
|
||||
bool ext_texture_compression_s3tc{};
|
||||
bool arb_texture_compression_bptc{};
|
||||
bool shader_framebuffer_fetch{};
|
||||
|
|
|
|||
|
|
@ -11,9 +11,9 @@
|
|||
#include "video_core/regs_framebuffer.h"
|
||||
#include "video_core/regs_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||
#include "video_core/renderer_opengl/pica_to_gl.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/shader/generator/glsl_shader_gen.h"
|
||||
#include "video_core/texture/texture_decode.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
|
|
@ -28,6 +28,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
|||
MICROPROFILE_DEFINE(OpenGL_Display, "OpenGL", "Display", MP_RGB(128, 128, 192));
|
||||
|
||||
using VideoCore::SurfaceType;
|
||||
using namespace Pica::Shader::Generator;
|
||||
|
||||
constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
constexpr std::size_t INDEX_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
|
|
@ -95,10 +96,12 @@ RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory,
|
|||
hw_vao.Create();
|
||||
|
||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
|
||||
uniform_size_aligned_vs_pica =
|
||||
Common::AlignUp<std::size_t>(sizeof(VSPicaUniformData), uniform_buffer_alignment);
|
||||
uniform_size_aligned_vs =
|
||||
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment);
|
||||
Common::AlignUp<std::size_t>(sizeof(VSUniformData), uniform_buffer_alignment);
|
||||
uniform_size_aligned_fs =
|
||||
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);
|
||||
Common::AlignUp<std::size_t>(sizeof(FSUniformData), uniform_buffer_alignment);
|
||||
|
||||
// Set vertex attributes for software shader path
|
||||
state.draw.vertex_array = sw_vao.handle;
|
||||
|
|
@ -405,16 +408,16 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
|
|||
|
||||
// Update scissor uniforms
|
||||
const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor();
|
||||
if (uniform_block_data.data.scissor_x1 != scissor_x1 ||
|
||||
uniform_block_data.data.scissor_x2 != scissor_x2 ||
|
||||
uniform_block_data.data.scissor_y1 != scissor_y1 ||
|
||||
uniform_block_data.data.scissor_y2 != scissor_y2) {
|
||||
if (fs_uniform_block_data.data.scissor_x1 != scissor_x1 ||
|
||||
fs_uniform_block_data.data.scissor_x2 != scissor_x2 ||
|
||||
fs_uniform_block_data.data.scissor_y1 != scissor_y1 ||
|
||||
fs_uniform_block_data.data.scissor_y2 != scissor_y2) {
|
||||
|
||||
uniform_block_data.data.scissor_x1 = scissor_x1;
|
||||
uniform_block_data.data.scissor_x2 = scissor_x2;
|
||||
uniform_block_data.data.scissor_y1 = scissor_y1;
|
||||
uniform_block_data.data.scissor_y2 = scissor_y2;
|
||||
uniform_block_data.dirty = true;
|
||||
fs_uniform_block_data.data.scissor_x1 = scissor_x1;
|
||||
fs_uniform_block_data.data.scissor_x2 = scissor_x2;
|
||||
fs_uniform_block_data.data.scissor_y1 = scissor_y1;
|
||||
fs_uniform_block_data.data.scissor_y2 = scissor_y2;
|
||||
fs_uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
// Sync and bind the texture surfaces
|
||||
|
|
@ -831,9 +834,9 @@ void RasterizerOpenGL::SyncBlendColor() {
|
|||
state.blend.color.blue = blend_color[2];
|
||||
state.blend.color.alpha = blend_color[3];
|
||||
|
||||
if (blend_color != uniform_block_data.data.blend_color) {
|
||||
uniform_block_data.data.blend_color = blend_color;
|
||||
uniform_block_data.dirty = true;
|
||||
if (blend_color != fs_uniform_block_data.data.blend_color) {
|
||||
fs_uniform_block_data.data.blend_color = blend_color;
|
||||
fs_uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -921,7 +924,7 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() {
|
|||
sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler +
|
||||
sizeof(Common::Vec2f) * 128; // fog
|
||||
|
||||
if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) {
|
||||
if (!fs_uniform_block_data.lighting_lut_dirty_any && !fs_uniform_block_data.fog_lut_dirty) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -931,9 +934,9 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() {
|
|||
texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f));
|
||||
|
||||
// Sync the lighting luts
|
||||
if (uniform_block_data.lighting_lut_dirty_any || invalidate) {
|
||||
for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) {
|
||||
if (uniform_block_data.lighting_lut_dirty[index] || invalidate) {
|
||||
if (fs_uniform_block_data.lighting_lut_dirty_any || invalidate) {
|
||||
for (unsigned index = 0; index < fs_uniform_block_data.lighting_lut_dirty.size(); index++) {
|
||||
if (fs_uniform_block_data.lighting_lut_dirty[index] || invalidate) {
|
||||
std::array<Common::Vec2f, 256> new_data;
|
||||
const auto& source_lut = Pica::g_state.lighting.luts[index];
|
||||
std::transform(source_lut.begin(), source_lut.end(), new_data.begin(),
|
||||
|
|
@ -945,19 +948,19 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() {
|
|||
lighting_lut_data[index] = new_data;
|
||||
std::memcpy(buffer + bytes_used, new_data.data(),
|
||||
new_data.size() * sizeof(Common::Vec2f));
|
||||
uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] =
|
||||
fs_uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] =
|
||||
static_cast<GLint>((offset + bytes_used) / sizeof(Common::Vec2f));
|
||||
uniform_block_data.dirty = true;
|
||||
fs_uniform_block_data.dirty = true;
|
||||
bytes_used += new_data.size() * sizeof(Common::Vec2f);
|
||||
}
|
||||
uniform_block_data.lighting_lut_dirty[index] = false;
|
||||
fs_uniform_block_data.lighting_lut_dirty[index] = false;
|
||||
}
|
||||
}
|
||||
uniform_block_data.lighting_lut_dirty_any = false;
|
||||
fs_uniform_block_data.lighting_lut_dirty_any = false;
|
||||
}
|
||||
|
||||
// Sync the fog lut
|
||||
if (uniform_block_data.fog_lut_dirty || invalidate) {
|
||||
if (fs_uniform_block_data.fog_lut_dirty || invalidate) {
|
||||
std::array<Common::Vec2f, 128> new_data;
|
||||
|
||||
std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(),
|
||||
|
|
@ -969,12 +972,12 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() {
|
|||
fog_lut_data = new_data;
|
||||
std::memcpy(buffer + bytes_used, new_data.data(),
|
||||
new_data.size() * sizeof(Common::Vec2f));
|
||||
uniform_block_data.data.fog_lut_offset =
|
||||
fs_uniform_block_data.data.fog_lut_offset =
|
||||
static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f));
|
||||
uniform_block_data.dirty = true;
|
||||
fs_uniform_block_data.dirty = true;
|
||||
bytes_used += new_data.size() * sizeof(Common::Vec2f);
|
||||
}
|
||||
uniform_block_data.fog_lut_dirty = false;
|
||||
fs_uniform_block_data.fog_lut_dirty = false;
|
||||
}
|
||||
|
||||
texture_lf_buffer.Unmap(bytes_used);
|
||||
|
|
@ -986,10 +989,10 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
|
|||
sizeof(Common::Vec4f) * 256 + // proctex
|
||||
sizeof(Common::Vec4f) * 256; // proctex diff
|
||||
|
||||
if (!uniform_block_data.proctex_noise_lut_dirty &&
|
||||
!uniform_block_data.proctex_color_map_dirty &&
|
||||
!uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty &&
|
||||
!uniform_block_data.proctex_diff_lut_dirty) {
|
||||
if (!fs_uniform_block_data.proctex_noise_lut_dirty &&
|
||||
!fs_uniform_block_data.proctex_color_map_dirty &&
|
||||
!fs_uniform_block_data.proctex_alpha_map_dirty &&
|
||||
!fs_uniform_block_data.proctex_lut_dirty && !fs_uniform_block_data.proctex_diff_lut_dirty) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -1012,34 +1015,34 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
|
|||
std::memcpy(buffer + bytes_used, new_data.data(),
|
||||
new_data.size() * sizeof(Common::Vec2f));
|
||||
lut_offset = static_cast<GLint>((offset + bytes_used) / sizeof(Common::Vec2f));
|
||||
uniform_block_data.dirty = true;
|
||||
fs_uniform_block_data.dirty = true;
|
||||
bytes_used += new_data.size() * sizeof(Common::Vec2f);
|
||||
}
|
||||
};
|
||||
|
||||
// Sync the proctex noise lut
|
||||
if (uniform_block_data.proctex_noise_lut_dirty || invalidate) {
|
||||
if (fs_uniform_block_data.proctex_noise_lut_dirty || invalidate) {
|
||||
sync_proc_tex_value_lut(Pica::g_state.proctex.noise_table, proctex_noise_lut_data,
|
||||
uniform_block_data.data.proctex_noise_lut_offset);
|
||||
uniform_block_data.proctex_noise_lut_dirty = false;
|
||||
fs_uniform_block_data.data.proctex_noise_lut_offset);
|
||||
fs_uniform_block_data.proctex_noise_lut_dirty = false;
|
||||
}
|
||||
|
||||
// Sync the proctex color map
|
||||
if (uniform_block_data.proctex_color_map_dirty || invalidate) {
|
||||
if (fs_uniform_block_data.proctex_color_map_dirty || invalidate) {
|
||||
sync_proc_tex_value_lut(Pica::g_state.proctex.color_map_table, proctex_color_map_data,
|
||||
uniform_block_data.data.proctex_color_map_offset);
|
||||
uniform_block_data.proctex_color_map_dirty = false;
|
||||
fs_uniform_block_data.data.proctex_color_map_offset);
|
||||
fs_uniform_block_data.proctex_color_map_dirty = false;
|
||||
}
|
||||
|
||||
// Sync the proctex alpha map
|
||||
if (uniform_block_data.proctex_alpha_map_dirty || invalidate) {
|
||||
if (fs_uniform_block_data.proctex_alpha_map_dirty || invalidate) {
|
||||
sync_proc_tex_value_lut(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data,
|
||||
uniform_block_data.data.proctex_alpha_map_offset);
|
||||
uniform_block_data.proctex_alpha_map_dirty = false;
|
||||
fs_uniform_block_data.data.proctex_alpha_map_offset);
|
||||
fs_uniform_block_data.proctex_alpha_map_dirty = false;
|
||||
}
|
||||
|
||||
// Sync the proctex lut
|
||||
if (uniform_block_data.proctex_lut_dirty || invalidate) {
|
||||
if (fs_uniform_block_data.proctex_lut_dirty || invalidate) {
|
||||
std::array<Common::Vec4f, 256> new_data;
|
||||
|
||||
std::transform(Pica::g_state.proctex.color_table.begin(),
|
||||
|
|
@ -1053,16 +1056,16 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
|
|||
proctex_lut_data = new_data;
|
||||
std::memcpy(buffer + bytes_used, new_data.data(),
|
||||
new_data.size() * sizeof(Common::Vec4f));
|
||||
uniform_block_data.data.proctex_lut_offset =
|
||||
fs_uniform_block_data.data.proctex_lut_offset =
|
||||
static_cast<GLint>((offset + bytes_used) / sizeof(Common::Vec4f));
|
||||
uniform_block_data.dirty = true;
|
||||
fs_uniform_block_data.dirty = true;
|
||||
bytes_used += new_data.size() * sizeof(Common::Vec4f);
|
||||
}
|
||||
uniform_block_data.proctex_lut_dirty = false;
|
||||
fs_uniform_block_data.proctex_lut_dirty = false;
|
||||
}
|
||||
|
||||
// Sync the proctex difference lut
|
||||
if (uniform_block_data.proctex_diff_lut_dirty || invalidate) {
|
||||
if (fs_uniform_block_data.proctex_diff_lut_dirty || invalidate) {
|
||||
std::array<Common::Vec4f, 256> new_data;
|
||||
|
||||
std::transform(Pica::g_state.proctex.color_diff_table.begin(),
|
||||
|
|
@ -1076,12 +1079,12 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
|
|||
proctex_diff_lut_data = new_data;
|
||||
std::memcpy(buffer + bytes_used, new_data.data(),
|
||||
new_data.size() * sizeof(Common::Vec4f));
|
||||
uniform_block_data.data.proctex_diff_lut_offset =
|
||||
fs_uniform_block_data.data.proctex_diff_lut_offset =
|
||||
static_cast<GLint>((offset + bytes_used) / sizeof(Common::Vec4f));
|
||||
uniform_block_data.dirty = true;
|
||||
fs_uniform_block_data.dirty = true;
|
||||
bytes_used += new_data.size() * sizeof(Common::Vec4f);
|
||||
}
|
||||
uniform_block_data.proctex_diff_lut_dirty = false;
|
||||
fs_uniform_block_data.proctex_diff_lut_dirty = false;
|
||||
}
|
||||
|
||||
texture_buffer.Unmap(bytes_used);
|
||||
|
|
@ -1092,38 +1095,47 @@ void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) {
|
|||
state.draw.uniform_buffer = uniform_buffer.GetHandle();
|
||||
state.Apply();
|
||||
|
||||
const bool sync_vs = accelerate_draw;
|
||||
const bool sync_fs = uniform_block_data.dirty;
|
||||
if (!sync_vs && !sync_fs) {
|
||||
const bool sync_vs_pica = accelerate_draw;
|
||||
const bool sync_vs = vs_uniform_block_data.dirty;
|
||||
const bool sync_fs = fs_uniform_block_data.dirty;
|
||||
if (!sync_vs_pica && !sync_vs && !sync_fs) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::size_t uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs;
|
||||
std::size_t uniform_size =
|
||||
uniform_size_aligned_vs_pica + uniform_size_aligned_vs + uniform_size_aligned_fs;
|
||||
std::size_t used_bytes = 0;
|
||||
|
||||
const auto [uniforms, offset, invalidate] =
|
||||
uniform_buffer.Map(uniform_size, uniform_buffer_alignment);
|
||||
|
||||
if (sync_vs) {
|
||||
Pica::Shader::VSUniformData vs_uniforms;
|
||||
vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs);
|
||||
std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms));
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(Pica::Shader::UniformBindings::VS),
|
||||
uniform_buffer.GetHandle(), offset + used_bytes,
|
||||
sizeof(Pica::Shader::VSUniformData));
|
||||
if (sync_vs || invalidate) {
|
||||
std::memcpy(uniforms + used_bytes, &vs_uniform_block_data.data,
|
||||
sizeof(vs_uniform_block_data.data));
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, UniformBindings::VSData, uniform_buffer.GetHandle(),
|
||||
offset + used_bytes, sizeof(vs_uniform_block_data.data));
|
||||
vs_uniform_block_data.dirty = false;
|
||||
used_bytes += uniform_size_aligned_vs;
|
||||
}
|
||||
|
||||
if (sync_fs || invalidate) {
|
||||
std::memcpy(uniforms + used_bytes, &uniform_block_data.data,
|
||||
sizeof(Pica::Shader::UniformData));
|
||||
glBindBufferRange(
|
||||
GL_UNIFORM_BUFFER, static_cast<GLuint>(Pica::Shader::UniformBindings::Common),
|
||||
uniform_buffer.GetHandle(), offset + used_bytes, sizeof(Pica::Shader::UniformData));
|
||||
uniform_block_data.dirty = false;
|
||||
std::memcpy(uniforms + used_bytes, &fs_uniform_block_data.data,
|
||||
sizeof(fs_uniform_block_data.data));
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, UniformBindings::FSData, uniform_buffer.GetHandle(),
|
||||
offset + used_bytes, sizeof(fs_uniform_block_data.data));
|
||||
fs_uniform_block_data.dirty = false;
|
||||
used_bytes += uniform_size_aligned_fs;
|
||||
}
|
||||
|
||||
if (sync_vs_pica) {
|
||||
VSPicaUniformData vs_uniforms;
|
||||
vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs);
|
||||
std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms));
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, UniformBindings::VSPicaData,
|
||||
uniform_buffer.GetHandle(), offset + used_bytes, sizeof(vs_uniforms));
|
||||
used_bytes += uniform_size_aligned_vs_pica;
|
||||
}
|
||||
|
||||
uniform_buffer.Unmap(used_bytes);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -148,6 +148,7 @@ private:
|
|||
OGLStreamBuffer texture_buffer;
|
||||
OGLStreamBuffer texture_lf_buffer;
|
||||
GLint uniform_buffer_alignment;
|
||||
std::size_t uniform_size_aligned_vs_pica;
|
||||
std::size_t uniform_size_aligned_vs;
|
||||
std::size_t uniform_size_aligned_fs;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,972 +0,0 @@
|
|||
// Copyright 2017 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <exception>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <fmt/format.h>
|
||||
#include <nihstro/shader_bytecode.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
|
||||
namespace OpenGL::ShaderDecompiler {
|
||||
|
||||
using nihstro::DestRegister;
|
||||
using nihstro::Instruction;
|
||||
using nihstro::OpCode;
|
||||
using nihstro::RegisterType;
|
||||
using nihstro::SourceRegister;
|
||||
using nihstro::SwizzlePattern;
|
||||
|
||||
constexpr u32 PROGRAM_END = Pica::Shader::MAX_PROGRAM_CODE_LENGTH;
|
||||
|
||||
class DecompileFail : public std::runtime_error {
|
||||
public:
|
||||
using std::runtime_error::runtime_error;
|
||||
};
|
||||
|
||||
/// Describes the behaviour of code path of a given entry point and a return point.
|
||||
enum class ExitMethod {
|
||||
Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
|
||||
AlwaysReturn, ///< All code paths reach the return point.
|
||||
Conditional, ///< Code path reaches the return point or an END instruction conditionally.
|
||||
AlwaysEnd, ///< All code paths reach a END instruction.
|
||||
};
|
||||
|
||||
/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction.
|
||||
struct Subroutine {
|
||||
/// Generates a name suitable for GLSL source code.
|
||||
std::string GetName() const {
|
||||
return "sub_" + std::to_string(begin) + "_" + std::to_string(end);
|
||||
}
|
||||
|
||||
u32 begin; ///< Entry point of the subroutine.
|
||||
u32 end; ///< Return point of the subroutine.
|
||||
ExitMethod exit_method; ///< Exit method of the subroutine.
|
||||
std::set<u32> labels; ///< Addresses refereced by JMP instructions.
|
||||
|
||||
bool operator<(const Subroutine& rhs) const {
|
||||
return std::tie(begin, end) < std::tie(rhs.begin, rhs.end);
|
||||
}
|
||||
};
|
||||
|
||||
/// Analyzes shader code and produces a set of subroutines.
|
||||
class ControlFlowAnalyzer {
|
||||
public:
|
||||
ControlFlowAnalyzer(const Pica::Shader::ProgramCode& program_code, u32 main_offset)
|
||||
: program_code(program_code) {
|
||||
|
||||
// Recursively finds all subroutines.
|
||||
const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END);
|
||||
if (program_main.exit_method != ExitMethod::AlwaysEnd)
|
||||
throw DecompileFail("Program does not always end");
|
||||
}
|
||||
|
||||
std::set<Subroutine> MoveSubroutines() {
|
||||
return std::move(subroutines);
|
||||
}
|
||||
|
||||
private:
|
||||
const Pica::Shader::ProgramCode& program_code;
|
||||
std::set<Subroutine> subroutines;
|
||||
std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
|
||||
|
||||
/// Adds and analyzes a new subroutine if it is not added yet.
|
||||
const Subroutine& AddSubroutine(u32 begin, u32 end) {
|
||||
auto iter = subroutines.find(Subroutine{begin, end});
|
||||
if (iter != subroutines.end())
|
||||
return *iter;
|
||||
|
||||
Subroutine subroutine{begin, end};
|
||||
subroutine.exit_method = Scan(begin, end, subroutine.labels);
|
||||
if (subroutine.exit_method == ExitMethod::Undetermined)
|
||||
throw DecompileFail("Recursive function detected");
|
||||
return *subroutines.insert(std::move(subroutine)).first;
|
||||
}
|
||||
|
||||
/// Merges exit method of two parallel branches.
|
||||
static ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
|
||||
if (a == ExitMethod::Undetermined) {
|
||||
return b;
|
||||
}
|
||||
if (b == ExitMethod::Undetermined) {
|
||||
return a;
|
||||
}
|
||||
if (a == b) {
|
||||
return a;
|
||||
}
|
||||
return ExitMethod::Conditional;
|
||||
}
|
||||
|
||||
/// Cascades exit method of two blocks of code.
|
||||
static ExitMethod SeriesExit(ExitMethod a, ExitMethod b) {
|
||||
// This should be handled before evaluating b.
|
||||
DEBUG_ASSERT(a != ExitMethod::AlwaysEnd);
|
||||
|
||||
if (a == ExitMethod::Undetermined) {
|
||||
return ExitMethod::Undetermined;
|
||||
}
|
||||
|
||||
if (a == ExitMethod::AlwaysReturn) {
|
||||
return b;
|
||||
}
|
||||
|
||||
if (b == ExitMethod::Undetermined || b == ExitMethod::AlwaysEnd) {
|
||||
return ExitMethod::AlwaysEnd;
|
||||
}
|
||||
|
||||
return ExitMethod::Conditional;
|
||||
}
|
||||
|
||||
/// Scans a range of code for labels and determines the exit method.
|
||||
ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
|
||||
auto [iter, inserted] =
|
||||
exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
|
||||
ExitMethod& exit_method = iter->second;
|
||||
if (!inserted)
|
||||
return exit_method;
|
||||
|
||||
for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
|
||||
const Instruction instr = {program_code[offset]};
|
||||
switch (instr.opcode.Value()) {
|
||||
case OpCode::Id::END: {
|
||||
return exit_method = ExitMethod::AlwaysEnd;
|
||||
}
|
||||
case OpCode::Id::JMPC:
|
||||
case OpCode::Id::JMPU: {
|
||||
labels.insert(instr.flow_control.dest_offset);
|
||||
ExitMethod no_jmp = Scan(offset + 1, end, labels);
|
||||
ExitMethod jmp = Scan(instr.flow_control.dest_offset, end, labels);
|
||||
return exit_method = ParallelExit(no_jmp, jmp);
|
||||
}
|
||||
case OpCode::Id::CALL: {
|
||||
auto& call = AddSubroutine(instr.flow_control.dest_offset,
|
||||
instr.flow_control.dest_offset +
|
||||
instr.flow_control.num_instructions);
|
||||
if (call.exit_method == ExitMethod::AlwaysEnd)
|
||||
return exit_method = ExitMethod::AlwaysEnd;
|
||||
ExitMethod after_call = Scan(offset + 1, end, labels);
|
||||
return exit_method = SeriesExit(call.exit_method, after_call);
|
||||
}
|
||||
case OpCode::Id::LOOP: {
|
||||
auto& loop = AddSubroutine(offset + 1, instr.flow_control.dest_offset + 1);
|
||||
if (loop.exit_method == ExitMethod::AlwaysEnd)
|
||||
return exit_method = ExitMethod::AlwaysEnd;
|
||||
ExitMethod after_loop = Scan(instr.flow_control.dest_offset + 1, end, labels);
|
||||
return exit_method = SeriesExit(loop.exit_method, after_loop);
|
||||
}
|
||||
case OpCode::Id::CALLC:
|
||||
case OpCode::Id::CALLU: {
|
||||
auto& call = AddSubroutine(instr.flow_control.dest_offset,
|
||||
instr.flow_control.dest_offset +
|
||||
instr.flow_control.num_instructions);
|
||||
ExitMethod after_call = Scan(offset + 1, end, labels);
|
||||
return exit_method = SeriesExit(
|
||||
ParallelExit(call.exit_method, ExitMethod::AlwaysReturn), after_call);
|
||||
}
|
||||
case OpCode::Id::IFU:
|
||||
case OpCode::Id::IFC: {
|
||||
auto& if_sub = AddSubroutine(offset + 1, instr.flow_control.dest_offset);
|
||||
ExitMethod else_method;
|
||||
if (instr.flow_control.num_instructions != 0) {
|
||||
auto& else_sub = AddSubroutine(instr.flow_control.dest_offset,
|
||||
instr.flow_control.dest_offset +
|
||||
instr.flow_control.num_instructions);
|
||||
else_method = else_sub.exit_method;
|
||||
} else {
|
||||
else_method = ExitMethod::AlwaysReturn;
|
||||
}
|
||||
|
||||
ExitMethod both = ParallelExit(if_sub.exit_method, else_method);
|
||||
if (both == ExitMethod::AlwaysEnd)
|
||||
return exit_method = ExitMethod::AlwaysEnd;
|
||||
ExitMethod after_call =
|
||||
Scan(instr.flow_control.dest_offset + instr.flow_control.num_instructions, end,
|
||||
labels);
|
||||
return exit_method = SeriesExit(both, after_call);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return exit_method = ExitMethod::AlwaysReturn;
|
||||
}
|
||||
};
|
||||
|
||||
class ShaderWriter {
|
||||
public:
|
||||
// Forwards all arguments directly to libfmt.
|
||||
// Note that all formatting requirements for fmt must be
|
||||
// obeyed when using this function. (e.g. {{ must be used
|
||||
// printing the character '{' is desirable. Ditto for }} and '}',
|
||||
// etc).
|
||||
template <typename... Args>
|
||||
void AddLine(fmt::format_string<Args...> text, Args&&... args) {
|
||||
AddExpression(fmt::format(text, std::forward<Args>(args)...));
|
||||
AddNewLine();
|
||||
}
|
||||
|
||||
void AddNewLine() {
|
||||
DEBUG_ASSERT(scope >= 0);
|
||||
shader_source += '\n';
|
||||
}
|
||||
|
||||
std::string MoveResult() {
|
||||
return std::move(shader_source);
|
||||
}
|
||||
|
||||
int scope = 0;
|
||||
|
||||
private:
|
||||
void AddExpression(std::string_view text) {
|
||||
if (!text.empty()) {
|
||||
shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
|
||||
}
|
||||
shader_source += text;
|
||||
}
|
||||
|
||||
std::string shader_source;
|
||||
};
|
||||
|
||||
/// An adaptor for getting swizzle pattern string from nihstro interfaces.
|
||||
template <SwizzlePattern::Selector (SwizzlePattern::*getter)(int) const>
|
||||
std::string GetSelectorSrc(const SwizzlePattern& pattern) {
|
||||
std::string out;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
switch ((pattern.*getter)(i)) {
|
||||
case SwizzlePattern::Selector::x:
|
||||
out += 'x';
|
||||
break;
|
||||
case SwizzlePattern::Selector::y:
|
||||
out += 'y';
|
||||
break;
|
||||
case SwizzlePattern::Selector::z:
|
||||
out += 'z';
|
||||
break;
|
||||
case SwizzlePattern::Selector::w:
|
||||
out += 'w';
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return "";
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
constexpr auto GetSelectorSrc1 = GetSelectorSrc<&SwizzlePattern::GetSelectorSrc1>;
|
||||
constexpr auto GetSelectorSrc2 = GetSelectorSrc<&SwizzlePattern::GetSelectorSrc2>;
|
||||
constexpr auto GetSelectorSrc3 = GetSelectorSrc<&SwizzlePattern::GetSelectorSrc3>;
|
||||
|
||||
class GLSLGenerator {
|
||||
public:
|
||||
GLSLGenerator(const std::set<Subroutine>& subroutines,
|
||||
const Pica::Shader::ProgramCode& program_code,
|
||||
const Pica::Shader::SwizzleData& swizzle_data, u32 main_offset,
|
||||
const RegGetter& inputreg_getter, const RegGetter& outputreg_getter,
|
||||
bool sanitize_mul)
|
||||
: subroutines(subroutines), program_code(program_code), swizzle_data(swizzle_data),
|
||||
main_offset(main_offset), inputreg_getter(inputreg_getter),
|
||||
outputreg_getter(outputreg_getter), sanitize_mul(sanitize_mul) {
|
||||
|
||||
Generate();
|
||||
}
|
||||
|
||||
std::string MoveShaderCode() {
|
||||
return shader.MoveResult();
|
||||
}
|
||||
|
||||
private:
|
||||
/// Gets the Subroutine object corresponding to the specified address.
|
||||
const Subroutine& GetSubroutine(u32 begin, u32 end) const {
|
||||
auto iter = subroutines.find(Subroutine{begin, end});
|
||||
ASSERT(iter != subroutines.end());
|
||||
return *iter;
|
||||
}
|
||||
|
||||
/// Generates condition evaluation code for the flow control instruction.
|
||||
static std::string EvaluateCondition(Instruction::FlowControlType flow_control) {
|
||||
using Op = Instruction::FlowControlType::Op;
|
||||
|
||||
const std::string_view result_x =
|
||||
flow_control.refx.Value() ? "conditional_code.x" : "!conditional_code.x";
|
||||
const std::string_view result_y =
|
||||
flow_control.refy.Value() ? "conditional_code.y" : "!conditional_code.y";
|
||||
|
||||
switch (flow_control.op) {
|
||||
case Op::JustX:
|
||||
return std::string(result_x);
|
||||
case Op::JustY:
|
||||
return std::string(result_y);
|
||||
case Op::Or:
|
||||
case Op::And: {
|
||||
const std::string_view and_or = flow_control.op == Op::Or ? "any" : "all";
|
||||
std::string bvec;
|
||||
if (flow_control.refx.Value() && flow_control.refy.Value()) {
|
||||
bvec = "conditional_code";
|
||||
} else if (!flow_control.refx.Value() && !flow_control.refy.Value()) {
|
||||
bvec = "not(conditional_code)";
|
||||
} else {
|
||||
bvec = fmt::format("bvec2({}, {})", result_x, result_y);
|
||||
}
|
||||
return fmt::format("{}({})", and_or, bvec);
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates code representing a source register.
|
||||
std::string GetSourceRegister(const SourceRegister& source_reg,
|
||||
u32 address_register_index) const {
|
||||
const u32 index = static_cast<u32>(source_reg.GetIndex());
|
||||
|
||||
switch (source_reg.GetRegisterType()) {
|
||||
case RegisterType::Input:
|
||||
return inputreg_getter(index);
|
||||
case RegisterType::Temporary:
|
||||
return fmt::format("reg_tmp{}", index);
|
||||
case RegisterType::FloatUniform:
|
||||
if (address_register_index != 0) {
|
||||
return fmt::format("get_offset_register({}, address_registers.{})", index,
|
||||
"xyz"[address_register_index - 1]);
|
||||
}
|
||||
return fmt::format("uniforms.f[{}]", index);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates code representing a destination register.
|
||||
std::string GetDestRegister(const DestRegister& dest_reg) const {
|
||||
const u32 index = static_cast<u32>(dest_reg.GetIndex());
|
||||
|
||||
switch (dest_reg.GetRegisterType()) {
|
||||
case RegisterType::Output:
|
||||
return outputreg_getter(index);
|
||||
case RegisterType::Temporary:
|
||||
return fmt::format("reg_tmp{}", index);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates code representing a bool uniform
|
||||
std::string GetUniformBool(u32 index) const {
|
||||
return fmt::format("uniforms.b[{}]", index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds code that calls a subroutine.
|
||||
* @param subroutine the subroutine to call.
|
||||
*/
|
||||
void CallSubroutine(const Subroutine& subroutine) {
|
||||
if (subroutine.exit_method == ExitMethod::AlwaysEnd) {
|
||||
shader.AddLine("{}();", subroutine.GetName());
|
||||
shader.AddLine("return true;");
|
||||
} else if (subroutine.exit_method == ExitMethod::Conditional) {
|
||||
shader.AddLine("if ({}()) {{ return true; }}", subroutine.GetName());
|
||||
} else {
|
||||
shader.AddLine("{}();", subroutine.GetName());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes code that does an assignment operation.
|
||||
* @param swizzle the swizzle data of the current instruction.
|
||||
* @param reg the destination register code.
|
||||
* @param value the code representing the value to assign.
|
||||
* @param dest_num_components number of components of the destination register.
|
||||
* @param value_num_components number of components of the value to assign.
|
||||
*/
|
||||
void SetDest(const SwizzlePattern& swizzle, std::string_view reg, std::string_view value,
|
||||
u32 dest_num_components, u32 value_num_components) {
|
||||
u32 dest_mask_num_components = 0;
|
||||
std::string dest_mask_swizzle = ".";
|
||||
|
||||
for (u32 i = 0; i < dest_num_components; ++i) {
|
||||
if (swizzle.DestComponentEnabled(static_cast<int>(i))) {
|
||||
dest_mask_swizzle += "xyzw"[i];
|
||||
++dest_mask_num_components;
|
||||
}
|
||||
}
|
||||
|
||||
if (reg.empty() || dest_mask_num_components == 0) {
|
||||
return;
|
||||
}
|
||||
DEBUG_ASSERT(value_num_components >= dest_num_components || value_num_components == 1);
|
||||
|
||||
const std::string dest =
|
||||
fmt::format("{}{}", reg, dest_num_components != 1 ? dest_mask_swizzle : "");
|
||||
|
||||
std::string src{value};
|
||||
if (value_num_components == 1) {
|
||||
if (dest_mask_num_components != 1) {
|
||||
src = fmt::format("vec{}({})", dest_mask_num_components, value);
|
||||
}
|
||||
} else if (value_num_components != dest_mask_num_components) {
|
||||
src = fmt::format("({}){}", value, dest_mask_swizzle);
|
||||
}
|
||||
|
||||
shader.AddLine("{} = {};", dest, src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compiles a single instruction from PICA to GLSL.
|
||||
* @param offset the offset of the PICA shader instruction.
|
||||
* @return the offset of the next instruction to execute. Usually it is the current offset + 1.
|
||||
* If the current instruction is IF or LOOP, the next instruction is after the IF or LOOP block.
|
||||
* If the current instruction always terminates the program, returns PROGRAM_END.
|
||||
*/
|
||||
u32 CompileInstr(u32 offset) {
|
||||
const Instruction instr = {program_code[offset]};
|
||||
|
||||
std::size_t swizzle_offset =
|
||||
instr.opcode.Value().GetInfo().type == OpCode::Type::MultiplyAdd
|
||||
? instr.mad.operand_desc_id
|
||||
: instr.common.operand_desc_id;
|
||||
const SwizzlePattern swizzle = {swizzle_data[swizzle_offset]};
|
||||
|
||||
shader.AddLine("// {}: {}", offset, instr.opcode.Value().GetInfo().name);
|
||||
|
||||
switch (instr.opcode.Value().GetInfo().type) {
|
||||
case OpCode::Type::Arithmetic: {
|
||||
const bool is_inverted =
|
||||
(0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
|
||||
|
||||
std::string src1 = swizzle.negate_src1 ? "-" : "";
|
||||
src1 += GetSourceRegister(instr.common.GetSrc1(is_inverted),
|
||||
!is_inverted * instr.common.address_register_index);
|
||||
src1 += "." + GetSelectorSrc1(swizzle);
|
||||
|
||||
std::string src2 = swizzle.negate_src2 ? "-" : "";
|
||||
src2 += GetSourceRegister(instr.common.GetSrc2(is_inverted),
|
||||
is_inverted * instr.common.address_register_index);
|
||||
src2 += "." + GetSelectorSrc2(swizzle);
|
||||
|
||||
std::string dest_reg = GetDestRegister(instr.common.dest.Value());
|
||||
|
||||
switch (instr.opcode.Value().EffectiveOpCode()) {
|
||||
case OpCode::Id::ADD: {
|
||||
SetDest(swizzle, dest_reg, fmt::format("{} + {}", src1, src2), 4, 4);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::MUL: {
|
||||
if (sanitize_mul) {
|
||||
SetDest(swizzle, dest_reg, fmt::format("sanitize_mul({}, {})", src1, src2), 4,
|
||||
4);
|
||||
} else {
|
||||
SetDest(swizzle, dest_reg, fmt::format("{} * {}", src1, src2), 4, 4);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::FLR: {
|
||||
SetDest(swizzle, dest_reg, fmt::format("floor({})", src1), 4, 4);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::MAX: {
|
||||
if (sanitize_mul) {
|
||||
SetDest(swizzle, dest_reg,
|
||||
fmt::format("mix({1}, {0}, greaterThan({0}, {1}))", src1, src2), 4, 4);
|
||||
} else {
|
||||
SetDest(swizzle, dest_reg, fmt::format("max({}, {})", src1, src2), 4, 4);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::MIN: {
|
||||
if (sanitize_mul) {
|
||||
SetDest(swizzle, dest_reg,
|
||||
fmt::format("mix({1}, {0}, lessThan({0}, {1}))", src1, src2), 4, 4);
|
||||
} else {
|
||||
SetDest(swizzle, dest_reg, fmt::format("min({}, {})", src1, src2), 4, 4);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::DP3:
|
||||
case OpCode::Id::DP4:
|
||||
case OpCode::Id::DPH:
|
||||
case OpCode::Id::DPHI: {
|
||||
OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode();
|
||||
std::string dot;
|
||||
if (opcode == OpCode::Id::DP3) {
|
||||
if (sanitize_mul) {
|
||||
dot = fmt::format("dot(vec3(sanitize_mul({}, {})), vec3(1.0))", src1, src2);
|
||||
} else {
|
||||
dot = fmt::format("dot(vec3({}), vec3({}))", src1, src2);
|
||||
}
|
||||
} else {
|
||||
if (sanitize_mul) {
|
||||
const std::string src1_ =
|
||||
(opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI)
|
||||
? fmt::format("vec4({}.xyz, 1.0)", src1)
|
||||
: std::move(src1);
|
||||
|
||||
dot = fmt::format("dot(sanitize_mul({}, {}), vec4(1.0))", src1_, src2);
|
||||
} else {
|
||||
dot = fmt::format("dot({}, {})", src1, src2);
|
||||
}
|
||||
}
|
||||
|
||||
SetDest(swizzle, dest_reg, dot, 4, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::RCP: {
|
||||
if (!sanitize_mul) {
|
||||
// When accurate multiplication is OFF, NaN are not really handled. This is a
|
||||
// workaround to cheaply avoid NaN. Fixes graphical issues in Ocarina of Time.
|
||||
shader.AddLine("if ({}.x != 0.0)", src1);
|
||||
}
|
||||
SetDest(swizzle, dest_reg, fmt::format("(1.0 / {}.x)", src1), 4, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::RSQ: {
|
||||
if (!sanitize_mul) {
|
||||
// When accurate multiplication is OFF, NaN are not really handled. This is a
|
||||
// workaround to cheaply avoid NaN. Fixes graphical issues in Ocarina of Time.
|
||||
shader.AddLine("if ({}.x > 0.0)", src1);
|
||||
}
|
||||
SetDest(swizzle, dest_reg, fmt::format("inversesqrt({}.x)", src1), 4, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::MOVA: {
|
||||
SetDest(swizzle, "address_registers", fmt::format("ivec2({})", src1), 2, 2);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::MOV: {
|
||||
SetDest(swizzle, dest_reg, src1, 4, 4);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::SGE:
|
||||
case OpCode::Id::SGEI: {
|
||||
SetDest(swizzle, dest_reg,
|
||||
fmt::format("vec4(greaterThanEqual({}, {}))", src1, src2), 4, 4);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::SLT:
|
||||
case OpCode::Id::SLTI: {
|
||||
SetDest(swizzle, dest_reg, fmt::format("vec4(lessThan({}, {}))", src1, src2), 4, 4);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::CMP: {
|
||||
using CompareOp = Instruction::Common::CompareOpType::Op;
|
||||
const std::map<CompareOp, std::pair<std::string_view, std::string_view>> cmp_ops{
|
||||
{CompareOp::Equal, {"==", "equal"}},
|
||||
{CompareOp::NotEqual, {"!=", "notEqual"}},
|
||||
{CompareOp::LessThan, {"<", "lessThan"}},
|
||||
{CompareOp::LessEqual, {"<=", "lessThanEqual"}},
|
||||
{CompareOp::GreaterThan, {">", "greaterThan"}},
|
||||
{CompareOp::GreaterEqual, {">=", "greaterThanEqual"}},
|
||||
};
|
||||
|
||||
const CompareOp op_x = instr.common.compare_op.x.Value();
|
||||
const CompareOp op_y = instr.common.compare_op.y.Value();
|
||||
|
||||
if (cmp_ops.find(op_x) == cmp_ops.end()) {
|
||||
LOG_ERROR(HW_GPU, "Unknown compare mode {:x}", op_x);
|
||||
} else if (cmp_ops.find(op_y) == cmp_ops.end()) {
|
||||
LOG_ERROR(HW_GPU, "Unknown compare mode {:x}", op_y);
|
||||
} else if (op_x != op_y) {
|
||||
shader.AddLine("conditional_code.x = {}.x {} {}.x;", src1,
|
||||
cmp_ops.find(op_x)->second.first, src2);
|
||||
shader.AddLine("conditional_code.y = {}.y {} {}.y;", src1,
|
||||
cmp_ops.find(op_y)->second.first, src2);
|
||||
} else {
|
||||
shader.AddLine("conditional_code = {}(vec2({}), vec2({}));",
|
||||
cmp_ops.find(op_x)->second.second, src1, src2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::EX2: {
|
||||
SetDest(swizzle, dest_reg, fmt::format("exp2({}.x)", src1), 4, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::LG2: {
|
||||
SetDest(swizzle, dest_reg, fmt::format("log2({}.x)", src1), 4, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x{:02x} ({}): 0x{:08x}",
|
||||
(int)instr.opcode.Value().EffectiveOpCode(),
|
||||
instr.opcode.Value().GetInfo().name, instr.hex);
|
||||
throw DecompileFail("Unhandled instruction");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Type::MultiplyAdd: {
|
||||
if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
|
||||
(instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
|
||||
bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
|
||||
|
||||
std::string src1 = swizzle.negate_src1 ? "-" : "";
|
||||
src1 += GetSourceRegister(instr.mad.GetSrc1(is_inverted), 0);
|
||||
src1 += "." + GetSelectorSrc1(swizzle);
|
||||
|
||||
std::string src2 = swizzle.negate_src2 ? "-" : "";
|
||||
src2 += GetSourceRegister(instr.mad.GetSrc2(is_inverted),
|
||||
!is_inverted * instr.mad.address_register_index);
|
||||
src2 += "." + GetSelectorSrc2(swizzle);
|
||||
|
||||
std::string src3 = swizzle.negate_src3 ? "-" : "";
|
||||
src3 += GetSourceRegister(instr.mad.GetSrc3(is_inverted),
|
||||
is_inverted * instr.mad.address_register_index);
|
||||
src3 += "." + GetSelectorSrc3(swizzle);
|
||||
|
||||
std::string dest_reg =
|
||||
(instr.mad.dest.Value() < 0x10)
|
||||
? outputreg_getter(static_cast<u32>(instr.mad.dest.Value().GetIndex()))
|
||||
: (instr.mad.dest.Value() < 0x20)
|
||||
? "reg_tmp" + std::to_string(instr.mad.dest.Value().GetIndex())
|
||||
: "";
|
||||
|
||||
if (sanitize_mul) {
|
||||
SetDest(swizzle, dest_reg,
|
||||
fmt::format("sanitize_mul({}, {}) + {}", src1, src2, src3), 4, 4);
|
||||
} else {
|
||||
SetDest(swizzle, dest_reg, fmt::format("{} * {} + {}", src1, src2, src3), 4, 4);
|
||||
}
|
||||
} else {
|
||||
LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x{:02x} ({}): 0x{:08x}",
|
||||
(int)instr.opcode.Value().EffectiveOpCode(),
|
||||
instr.opcode.Value().GetInfo().name, instr.hex);
|
||||
throw DecompileFail("Unhandled instruction");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
switch (instr.opcode.Value()) {
|
||||
case OpCode::Id::END: {
|
||||
shader.AddLine("return true;");
|
||||
offset = PROGRAM_END - 1;
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::JMPC:
|
||||
case OpCode::Id::JMPU: {
|
||||
std::string condition;
|
||||
if (instr.opcode.Value() == OpCode::Id::JMPC) {
|
||||
condition = EvaluateCondition(instr.flow_control);
|
||||
} else {
|
||||
bool invert_test = instr.flow_control.num_instructions & 1;
|
||||
condition = (invert_test ? "!" : "") +
|
||||
GetUniformBool(instr.flow_control.bool_uniform_id);
|
||||
}
|
||||
|
||||
shader.AddLine("if ({}) {{", condition);
|
||||
++shader.scope;
|
||||
shader.AddLine("{{ jmp_to = {}u; break; }}",
|
||||
instr.flow_control.dest_offset.Value());
|
||||
|
||||
--shader.scope;
|
||||
shader.AddLine("}}");
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::CALL:
|
||||
case OpCode::Id::CALLC:
|
||||
case OpCode::Id::CALLU: {
|
||||
std::string condition;
|
||||
if (instr.opcode.Value() == OpCode::Id::CALLC) {
|
||||
condition = EvaluateCondition(instr.flow_control);
|
||||
} else if (instr.opcode.Value() == OpCode::Id::CALLU) {
|
||||
condition = GetUniformBool(instr.flow_control.bool_uniform_id);
|
||||
}
|
||||
|
||||
if (condition.empty()) {
|
||||
shader.AddLine("{{");
|
||||
} else {
|
||||
shader.AddLine("if ({}) {{", condition);
|
||||
}
|
||||
++shader.scope;
|
||||
|
||||
auto& call_sub = GetSubroutine(instr.flow_control.dest_offset,
|
||||
instr.flow_control.dest_offset +
|
||||
instr.flow_control.num_instructions);
|
||||
|
||||
CallSubroutine(call_sub);
|
||||
if (instr.opcode.Value() == OpCode::Id::CALL &&
|
||||
call_sub.exit_method == ExitMethod::AlwaysEnd) {
|
||||
offset = PROGRAM_END - 1;
|
||||
}
|
||||
|
||||
--shader.scope;
|
||||
shader.AddLine("}}");
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::NOP: {
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::IFC:
|
||||
case OpCode::Id::IFU: {
|
||||
std::string condition;
|
||||
if (instr.opcode.Value() == OpCode::Id::IFC) {
|
||||
condition = EvaluateCondition(instr.flow_control);
|
||||
} else {
|
||||
condition = GetUniformBool(instr.flow_control.bool_uniform_id);
|
||||
}
|
||||
|
||||
const u32 if_offset = offset + 1;
|
||||
const u32 else_offset = instr.flow_control.dest_offset;
|
||||
const u32 endif_offset =
|
||||
instr.flow_control.dest_offset + instr.flow_control.num_instructions;
|
||||
|
||||
shader.AddLine("if ({}) {{", condition);
|
||||
++shader.scope;
|
||||
|
||||
auto& if_sub = GetSubroutine(if_offset, else_offset);
|
||||
CallSubroutine(if_sub);
|
||||
offset = else_offset - 1;
|
||||
|
||||
if (instr.flow_control.num_instructions != 0) {
|
||||
--shader.scope;
|
||||
shader.AddLine("}} else {{");
|
||||
++shader.scope;
|
||||
|
||||
auto& else_sub = GetSubroutine(else_offset, endif_offset);
|
||||
CallSubroutine(else_sub);
|
||||
offset = endif_offset - 1;
|
||||
|
||||
if (if_sub.exit_method == ExitMethod::AlwaysEnd &&
|
||||
else_sub.exit_method == ExitMethod::AlwaysEnd) {
|
||||
offset = PROGRAM_END - 1;
|
||||
}
|
||||
}
|
||||
|
||||
--shader.scope;
|
||||
shader.AddLine("}}");
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::LOOP: {
|
||||
const std::string int_uniform =
|
||||
fmt::format("uniforms.i[{}]", instr.flow_control.int_uniform_id.Value());
|
||||
|
||||
shader.AddLine("address_registers.z = int({}.y);", int_uniform);
|
||||
|
||||
const std::string loop_var = fmt::format("loop{}", offset);
|
||||
shader.AddLine(
|
||||
"for (uint {} = 0u; {} <= {}.x; address_registers.z += int({}.z), ++{}) {{",
|
||||
loop_var, loop_var, int_uniform, int_uniform, loop_var);
|
||||
++shader.scope;
|
||||
|
||||
auto& loop_sub = GetSubroutine(offset + 1, instr.flow_control.dest_offset + 1);
|
||||
CallSubroutine(loop_sub);
|
||||
offset = instr.flow_control.dest_offset;
|
||||
|
||||
--shader.scope;
|
||||
shader.AddLine("}}");
|
||||
|
||||
if (loop_sub.exit_method == ExitMethod::AlwaysEnd) {
|
||||
offset = PROGRAM_END - 1;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::EMIT:
|
||||
case OpCode::Id::SETEMIT:
|
||||
LOG_ERROR(HW_GPU, "Geometry shader operation detected in vertex shader");
|
||||
break;
|
||||
|
||||
default: {
|
||||
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x{:02x} ({}): 0x{:08x}",
|
||||
(int)instr.opcode.Value().EffectiveOpCode(),
|
||||
instr.opcode.Value().GetInfo().name, instr.hex);
|
||||
throw DecompileFail("Unhandled instruction");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
return offset + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compiles a range of instructions from PICA to GLSL.
|
||||
* @param begin the offset of the starting instruction.
|
||||
* @param end the offset where the compilation should stop (exclusive).
|
||||
* @return the offset of the next instruction to compile. PROGRAM_END if the program terminates.
|
||||
*/
|
||||
u32 CompileRange(u32 begin, u32 end) {
|
||||
u32 program_counter;
|
||||
for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) {
|
||||
program_counter = CompileInstr(program_counter);
|
||||
}
|
||||
return program_counter;
|
||||
}
|
||||
|
||||
void Generate() {
|
||||
if (sanitize_mul) {
|
||||
#ifdef ANDROID
|
||||
// Use a cheaper sanitize_mul on Android, as mobile GPUs struggle here
|
||||
// This seems to be sufficient at least for Ocarina of Time and Attack on Titan accurate
|
||||
// multiplication bugs
|
||||
shader.AddLine(
|
||||
"#define sanitize_mul(lhs, rhs) mix(lhs * rhs, vec4(0.0), isnan(lhs * rhs))");
|
||||
#else
|
||||
shader.AddLine("vec4 sanitize_mul(vec4 lhs, vec4 rhs) {{");
|
||||
++shader.scope;
|
||||
shader.AddLine("vec4 product = lhs * rhs;");
|
||||
shader.AddLine("return mix(product, mix(mix(vec4(0.0), product, isnan(rhs)), product, "
|
||||
"isnan(lhs)), isnan(product));");
|
||||
--shader.scope;
|
||||
shader.AddLine("}}\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
shader.AddLine("vec4 get_offset_register(int base_index, int offset) {{");
|
||||
++shader.scope;
|
||||
shader.AddLine("int fixed_offset = offset >= -128 && offset <= 127 ? offset : 0;");
|
||||
shader.AddLine("uint index = uint((base_index + fixed_offset) & 0x7F);");
|
||||
shader.AddLine("return index < 96u ? uniforms.f[index] : vec4(1.0);");
|
||||
--shader.scope;
|
||||
shader.AddLine("}}\n");
|
||||
|
||||
// Add declarations for registers
|
||||
shader.AddLine("bvec2 conditional_code = bvec2(false);");
|
||||
shader.AddLine("ivec3 address_registers = ivec3(0);");
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
shader.AddLine("vec4 reg_tmp{} = vec4(0.0, 0.0, 0.0, 1.0);", i);
|
||||
}
|
||||
shader.AddNewLine();
|
||||
|
||||
// Add declarations for all subroutines
|
||||
for (const auto& subroutine : subroutines) {
|
||||
shader.AddLine("bool {}();", subroutine.GetName());
|
||||
}
|
||||
shader.AddNewLine();
|
||||
|
||||
// Add the main entry point
|
||||
shader.AddLine("bool exec_shader() {{");
|
||||
++shader.scope;
|
||||
CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
|
||||
--shader.scope;
|
||||
shader.AddLine("}}\n");
|
||||
|
||||
// Add definitions for all subroutines
|
||||
for (const auto& subroutine : subroutines) {
|
||||
std::set<u32> labels = subroutine.labels;
|
||||
|
||||
shader.AddLine("bool {}() {{", subroutine.GetName());
|
||||
++shader.scope;
|
||||
|
||||
if (labels.empty()) {
|
||||
if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) {
|
||||
shader.AddLine("return false;");
|
||||
}
|
||||
} else {
|
||||
labels.insert(subroutine.begin);
|
||||
shader.AddLine("uint jmp_to = {}u;", subroutine.begin);
|
||||
shader.AddLine("while (true) {{");
|
||||
++shader.scope;
|
||||
|
||||
shader.AddLine("switch (jmp_to) {{");
|
||||
|
||||
for (auto label : labels) {
|
||||
shader.AddLine("case {}u: {{", label);
|
||||
++shader.scope;
|
||||
|
||||
auto next_it = labels.lower_bound(label + 1);
|
||||
u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
|
||||
|
||||
u32 compile_end = CompileRange(label, next_label);
|
||||
if (compile_end > next_label && compile_end != PROGRAM_END) {
|
||||
// This happens only when there is a label inside a IF/LOOP block
|
||||
shader.AddLine("{{ jmp_to = {}u; break; }}", compile_end);
|
||||
labels.emplace(compile_end);
|
||||
}
|
||||
|
||||
--shader.scope;
|
||||
shader.AddLine("}}");
|
||||
}
|
||||
|
||||
shader.AddLine("default: return false;");
|
||||
shader.AddLine("}}");
|
||||
|
||||
--shader.scope;
|
||||
shader.AddLine("}}");
|
||||
|
||||
shader.AddLine("return false;");
|
||||
}
|
||||
|
||||
--shader.scope;
|
||||
shader.AddLine("}}\n");
|
||||
|
||||
DEBUG_ASSERT(shader.scope == 0);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const std::set<Subroutine>& subroutines;
|
||||
const Pica::Shader::ProgramCode& program_code;
|
||||
const Pica::Shader::SwizzleData& swizzle_data;
|
||||
const u32 main_offset;
|
||||
const RegGetter& inputreg_getter;
|
||||
const RegGetter& outputreg_getter;
|
||||
const bool sanitize_mul;
|
||||
|
||||
ShaderWriter shader;
|
||||
};
|
||||
|
||||
std::string GetCommonDeclarations() {
|
||||
return R"(
|
||||
struct pica_uniforms {
|
||||
bool b[16];
|
||||
uvec4 i[4];
|
||||
vec4 f[96];
|
||||
};
|
||||
|
||||
bool exec_shader();
|
||||
|
||||
)";
|
||||
}
|
||||
|
||||
std::optional<ProgramResult> DecompileProgram(const Pica::Shader::ProgramCode& program_code,
|
||||
const Pica::Shader::SwizzleData& swizzle_data,
|
||||
u32 main_offset, const RegGetter& inputreg_getter,
|
||||
const RegGetter& outputreg_getter,
|
||||
bool sanitize_mul) {
|
||||
|
||||
try {
|
||||
auto subroutines = ControlFlowAnalyzer(program_code, main_offset).MoveSubroutines();
|
||||
GLSLGenerator generator(subroutines, program_code, swizzle_data, main_offset,
|
||||
inputreg_getter, outputreg_getter, sanitize_mul);
|
||||
return {ProgramResult{generator.MoveShaderCode()}};
|
||||
} catch (const DecompileFail& exception) {
|
||||
LOG_INFO(HW_GPU, "Shader decompilation failed: {}", exception.what());
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL::ShaderDecompiler
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
// Copyright 2017 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
namespace OpenGL::ShaderDecompiler {
|
||||
|
||||
using RegGetter = std::function<std::string(u32)>;
|
||||
|
||||
struct ProgramResult {
|
||||
std::string code;
|
||||
};
|
||||
|
||||
std::string GetCommonDeclarations();
|
||||
|
||||
std::optional<ProgramResult> DecompileProgram(const Pica::Shader::ProgramCode& program_code,
|
||||
const Pica::Shader::SwizzleData& swizzle_data,
|
||||
u32 main_offset, const RegGetter& inputreg_getter,
|
||||
const RegGetter& outputreg_getter, bool sanitize_mul);
|
||||
|
||||
} // namespace OpenGL::ShaderDecompiler
|
||||
|
|
@ -297,35 +297,33 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCache::LoadDecompiledEntry()
|
|||
}
|
||||
|
||||
ShaderDiskCacheDecompiled entry;
|
||||
entry.result.code = std::move(code);
|
||||
entry.code = std::move(code);
|
||||
entry.sanitize_mul = sanitize_mul;
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
void ShaderDiskCache::SaveDecompiledToFile(FileUtil::IOFile& file, u64 unique_identifier,
|
||||
const ShaderDecompiler::ProgramResult& result,
|
||||
bool sanitize_mul) {
|
||||
const std::string& code, bool sanitize_mul) {
|
||||
if (!IsUsable())
|
||||
return;
|
||||
|
||||
if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
|
||||
file.WriteObject(unique_identifier) != 1 || file.WriteObject(sanitize_mul) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(result.code.size())) != 1 ||
|
||||
file.WriteArray(result.code.data(), result.code.size()) != result.code.size()) {
|
||||
file.WriteObject(static_cast<u32>(code.size())) != 1 ||
|
||||
file.WriteArray(code.data(), code.size()) != code.size()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to save decompiled cache entry - removing");
|
||||
file.Close();
|
||||
InvalidatePrecompiled();
|
||||
}
|
||||
}
|
||||
|
||||
bool ShaderDiskCache::SaveDecompiledToCache(u64 unique_identifier,
|
||||
const ShaderDecompiler::ProgramResult& result,
|
||||
bool ShaderDiskCache::SaveDecompiledToCache(u64 unique_identifier, const std::string& code,
|
||||
bool sanitize_mul) {
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
|
||||
!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(sanitize_mul) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(result.code.size())) ||
|
||||
!SaveArrayToPrecompiled(result.code.data(), result.code.size())) {
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(code.size())) ||
|
||||
!SaveArrayToPrecompiled(code.data(), code.size())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -374,8 +372,7 @@ void ShaderDiskCache::SaveRaw(const ShaderDiskCacheRaw& entry) {
|
|||
transferable_file.Flush();
|
||||
}
|
||||
|
||||
void ShaderDiskCache::SaveDecompiled(u64 unique_identifier,
|
||||
const ShaderDecompiler::ProgramResult& code,
|
||||
void ShaderDiskCache::SaveDecompiled(u64 unique_identifier, const std::string& code,
|
||||
bool sanitize_mul) {
|
||||
if (!IsUsable())
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -20,8 +20,7 @@
|
|||
#include "common/common_types.h"
|
||||
#include "common/file_util.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||
#include "video_core/shader/generator/glsl_shader_gen.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
|
|
@ -38,6 +37,7 @@ struct ShaderDiskCacheDump;
|
|||
|
||||
using RawShaderConfig = Pica::Regs;
|
||||
using ProgramCode = std::vector<u32>;
|
||||
using ProgramType = Pica::Shader::Generator::ProgramType;
|
||||
using ShaderDecompiledMap = std::unordered_map<u64, ShaderDiskCacheDecompiled>;
|
||||
using ShaderDumpsMap = std::unordered_map<u64, ShaderDiskCacheDump>;
|
||||
|
||||
|
|
@ -78,7 +78,7 @@ private:
|
|||
|
||||
/// Contains decompiled data from a shader
|
||||
struct ShaderDiskCacheDecompiled {
|
||||
ShaderDecompiler::ProgramResult result;
|
||||
std::string code;
|
||||
bool sanitize_mul;
|
||||
};
|
||||
|
||||
|
|
@ -109,8 +109,7 @@ public:
|
|||
void SaveRaw(const ShaderDiskCacheRaw& entry);
|
||||
|
||||
/// Saves a decompiled entry to the precompiled file. Does not check for collisions.
|
||||
void SaveDecompiled(u64 unique_identifier, const ShaderDecompiler::ProgramResult& code,
|
||||
bool sanitize_mul);
|
||||
void SaveDecompiled(u64 unique_identifier, const std::string& code, bool sanitize_mul);
|
||||
|
||||
/// Saves a dump entry to the precompiled file. Does not check for collisions.
|
||||
void SaveDump(u64 unique_identifier, GLuint program);
|
||||
|
|
@ -132,11 +131,10 @@ private:
|
|||
|
||||
/// Saves a decompiled entry to the passed file. Does not check for collisions.
|
||||
void SaveDecompiledToFile(FileUtil::IOFile& file, u64 unique_identifier,
|
||||
const ShaderDecompiler::ProgramResult& code, bool sanitize_mul);
|
||||
const std::string& code, bool sanitize_mul);
|
||||
|
||||
/// Saves a decompiled entry to the virtual precompiled cache. Does not check for collisions.
|
||||
bool SaveDecompiledToCache(u64 unique_identifier, const ShaderDecompiler::ProgramResult& code,
|
||||
bool sanitize_mul);
|
||||
bool SaveDecompiledToCache(u64 unique_identifier, const std::string& code, bool sanitize_mul);
|
||||
|
||||
/// Returns if the cache can be used
|
||||
bool IsUsable() const;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,266 +0,0 @@
|
|||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include "common/hash.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Driver;
|
||||
|
||||
namespace ShaderDecompiler {
|
||||
struct ProgramResult;
|
||||
}
|
||||
|
||||
enum class ProgramType : u32 { VS, GS, FS };
|
||||
|
||||
enum Attributes {
|
||||
ATTRIBUTE_POSITION,
|
||||
ATTRIBUTE_COLOR,
|
||||
ATTRIBUTE_TEXCOORD0,
|
||||
ATTRIBUTE_TEXCOORD1,
|
||||
ATTRIBUTE_TEXCOORD2,
|
||||
ATTRIBUTE_TEXCOORD0_W,
|
||||
ATTRIBUTE_NORMQUAT,
|
||||
ATTRIBUTE_VIEW,
|
||||
};
|
||||
|
||||
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
|
||||
struct TevStageConfigRaw {
|
||||
u32 sources_raw;
|
||||
u32 modifiers_raw;
|
||||
u32 ops_raw;
|
||||
u32 scales_raw;
|
||||
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
|
||||
Pica::TexturingRegs::TevStageConfig stage;
|
||||
stage.sources_raw = sources_raw;
|
||||
stage.modifiers_raw = modifiers_raw;
|
||||
stage.ops_raw = ops_raw;
|
||||
stage.const_color = 0;
|
||||
stage.scales_raw = scales_raw;
|
||||
return stage;
|
||||
}
|
||||
};
|
||||
|
||||
struct PicaFSConfigState {
|
||||
Pica::FramebufferRegs::CompareFunc alpha_test_func;
|
||||
Pica::RasterizerRegs::ScissorMode scissor_test_mode;
|
||||
Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
|
||||
bool texture2_use_coord1;
|
||||
std::array<TevStageConfigRaw, 6> tev_stages;
|
||||
u8 combiner_buffer_input;
|
||||
|
||||
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
|
||||
Pica::TexturingRegs::FogMode fog_mode;
|
||||
bool fog_flip;
|
||||
bool alphablend_enable;
|
||||
Pica::FramebufferRegs::LogicOp logic_op;
|
||||
|
||||
struct {
|
||||
struct {
|
||||
unsigned num;
|
||||
bool directional;
|
||||
bool two_sided_diffuse;
|
||||
bool dist_atten_enable;
|
||||
bool spot_atten_enable;
|
||||
bool geometric_factor_0;
|
||||
bool geometric_factor_1;
|
||||
bool shadow_enable;
|
||||
} light[8];
|
||||
|
||||
bool enable;
|
||||
unsigned src_num;
|
||||
Pica::LightingRegs::LightingBumpMode bump_mode;
|
||||
unsigned bump_selector;
|
||||
bool bump_renorm;
|
||||
bool clamp_highlights;
|
||||
|
||||
Pica::LightingRegs::LightingConfig config;
|
||||
bool enable_primary_alpha;
|
||||
bool enable_secondary_alpha;
|
||||
|
||||
bool enable_shadow;
|
||||
bool shadow_primary;
|
||||
bool shadow_secondary;
|
||||
bool shadow_invert;
|
||||
bool shadow_alpha;
|
||||
unsigned shadow_selector;
|
||||
|
||||
struct {
|
||||
bool enable;
|
||||
bool abs_input;
|
||||
Pica::LightingRegs::LightingLutInput type;
|
||||
float scale;
|
||||
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
|
||||
} lighting;
|
||||
|
||||
struct {
|
||||
bool enable;
|
||||
u32 coord;
|
||||
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
|
||||
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
|
||||
bool separate_alpha;
|
||||
bool noise_enable;
|
||||
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
|
||||
u32 lut_width;
|
||||
u32 lut_offset0;
|
||||
u32 lut_offset1;
|
||||
u32 lut_offset2;
|
||||
u32 lut_offset3;
|
||||
u32 lod_min;
|
||||
u32 lod_max;
|
||||
Pica::TexturingRegs::ProcTexFilter lut_filter;
|
||||
} proctex;
|
||||
|
||||
struct {
|
||||
bool emulate_blending;
|
||||
Pica::FramebufferRegs::BlendEquation eq;
|
||||
Pica::FramebufferRegs::BlendFactor src_factor;
|
||||
Pica::FramebufferRegs::BlendFactor dst_factor;
|
||||
} rgb_blend, alpha_blend;
|
||||
|
||||
bool shadow_rendering;
|
||||
bool shadow_texture_orthographic;
|
||||
bool use_custom_normal_map;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains all state used to generate the GLSL fragment shader that emulates the
|
||||
* current Pica register configuration. This struct is used as a cache key for generated GLSL shader
|
||||
* programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
|
||||
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
|
||||
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
|
||||
* two separate shaders sharing the same key.
|
||||
*/
|
||||
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
|
||||
|
||||
/// Construct a PicaFSConfig with the given Pica register configuration.
|
||||
static PicaFSConfig BuildFromRegs(const Pica::Regs& regs, bool has_blend_minmax_factor,
|
||||
bool use_normal = false);
|
||||
|
||||
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
|
||||
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
|
||||
}
|
||||
|
||||
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
|
||||
return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains common information to identify a GL vertex/geometry shader generated from
|
||||
* PICA vertex/geometry shader.
|
||||
*/
|
||||
struct PicaShaderConfigCommon {
|
||||
void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
|
||||
|
||||
u64 program_hash;
|
||||
u64 swizzle_hash;
|
||||
u32 main_offset;
|
||||
bool sanitize_mul;
|
||||
|
||||
u32 num_outputs;
|
||||
|
||||
// output_map[output register index] -> output attribute index
|
||||
std::array<u32, 16> output_map;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains information to identify a GL vertex shader generated from PICA vertex
|
||||
* shader.
|
||||
*/
|
||||
struct PicaVSConfig : Common::HashableStruct<PicaShaderConfigCommon> {
|
||||
explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
state.Init(regs, setup);
|
||||
}
|
||||
explicit PicaVSConfig(const PicaShaderConfigCommon& conf) {
|
||||
state = conf;
|
||||
}
|
||||
};
|
||||
|
||||
struct PicaGSConfigCommonRaw {
|
||||
void Init(const Pica::Regs& regs);
|
||||
|
||||
u32 vs_output_attributes;
|
||||
u32 gs_output_attributes;
|
||||
|
||||
struct SemanticMap {
|
||||
u32 attribute_index;
|
||||
u32 component_index;
|
||||
};
|
||||
|
||||
// semantic_maps[semantic name] -> GS output attribute index + component index
|
||||
std::array<SemanticMap, 24> semantic_maps;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains information to identify a GL geometry shader generated from PICA no-geometry
|
||||
* shader pipeline
|
||||
*/
|
||||
struct PicaFixedGSConfig : Common::HashableStruct<PicaGSConfigCommonRaw> {
|
||||
explicit PicaFixedGSConfig(const Pica::Regs& regs) {
|
||||
state.Init(regs);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Generates the GLSL vertex shader program source code that accepts vertices from software shader
|
||||
* and directly passes them to the fragment shader.
|
||||
* @param separable_shader generates shader that can be used for separate shader object
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader);
|
||||
|
||||
/**
|
||||
* Generates the GLSL vertex shader program source code for the given VS program
|
||||
* @returns String of the shader source code; boost::none on failure
|
||||
*/
|
||||
std::optional<ShaderDecompiler::ProgramResult> GenerateVertexShader(
|
||||
const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader);
|
||||
|
||||
/*
|
||||
* Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config,
|
||||
bool separable_shader);
|
||||
|
||||
/**
|
||||
* Generates the GLSL fragment shader program source code for the current Pica state
|
||||
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
|
||||
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
|
||||
* @param separable_shader generates shader that can be used for separate shader object
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config,
|
||||
bool separable_shader);
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<OpenGL::PicaFSConfig> {
|
||||
std::size_t operator()(const OpenGL::PicaFSConfig& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<OpenGL::PicaVSConfig> {
|
||||
std::size_t operator()(const OpenGL::PicaVSConfig& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<OpenGL::PicaFixedGSConfig> {
|
||||
std::size_t operator()(const OpenGL::PicaFixedGSConfig& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
|
@ -14,9 +14,11 @@
|
|||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/shader/shader_uniforms.h"
|
||||
#include "video_core/shader/generator/shader_uniforms.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
using namespace Pica::Shader::Generator;
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
static u64 GetUniqueIdentifier(const Pica::Regs& regs, const ProgramCode& code) {
|
||||
|
|
@ -74,7 +76,7 @@ static std::set<GLenum> GetSupportedFormats() {
|
|||
}
|
||||
|
||||
static std::tuple<PicaVSConfig, Pica::Shader::ShaderSetup> BuildVSConfigFromRaw(
|
||||
const ShaderDiskCacheRaw& raw) {
|
||||
const ShaderDiskCacheRaw& raw, const Driver& driver) {
|
||||
Pica::Shader::ProgramCode program_code{};
|
||||
Pica::Shader::SwizzleData swizzle_data{};
|
||||
std::copy_n(raw.GetProgramCode().begin(), Pica::Shader::MAX_PROGRAM_CODE_LENGTH,
|
||||
|
|
@ -84,7 +86,8 @@ static std::tuple<PicaVSConfig, Pica::Shader::ShaderSetup> BuildVSConfigFromRaw(
|
|||
Pica::Shader::ShaderSetup setup;
|
||||
setup.program_code = program_code;
|
||||
setup.swizzle_data = swizzle_data;
|
||||
return {PicaVSConfig{raw.GetRawShaderConfig().vs, setup}, setup};
|
||||
return {PicaVSConfig{raw.GetRawShaderConfig(), setup, driver.HasClipCullDistance(), true},
|
||||
setup};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -130,8 +133,10 @@ private:
|
|||
|
||||
class TrivialVertexShader {
|
||||
public:
|
||||
explicit TrivialVertexShader(bool separable) : program(separable) {
|
||||
program.Create(GenerateTrivialVertexShader(separable).code.c_str(), GL_VERTEX_SHADER);
|
||||
explicit TrivialVertexShader(const Driver& driver, bool separable) : program(separable) {
|
||||
const auto code =
|
||||
GLSL::GenerateTrivialVertexShader(driver.HasClipCullDistance(), separable);
|
||||
program.Create(code.c_str(), GL_VERTEX_SHADER);
|
||||
}
|
||||
GLuint Get() const {
|
||||
return program.GetHandle();
|
||||
|
|
@ -141,20 +146,18 @@ private:
|
|||
OGLShaderStage program;
|
||||
};
|
||||
|
||||
template <typename KeyConfigType,
|
||||
ShaderDecompiler::ProgramResult (*CodeGenerator)(const KeyConfigType&, bool),
|
||||
template <typename KeyConfigType, std::string (*CodeGenerator)(const KeyConfigType&, bool),
|
||||
GLenum ShaderType>
|
||||
class ShaderCache {
|
||||
public:
|
||||
explicit ShaderCache(bool separable) : separable(separable) {}
|
||||
std::tuple<GLuint, std::optional<ShaderDecompiler::ProgramResult>> Get(
|
||||
const KeyConfigType& config) {
|
||||
std::tuple<GLuint, std::optional<std::string>> Get(const KeyConfigType& config) {
|
||||
auto [iter, new_shader] = shaders.emplace(config, OGLShaderStage{separable});
|
||||
OGLShaderStage& cached_shader = iter->second;
|
||||
std::optional<ShaderDecompiler::ProgramResult> result{};
|
||||
std::optional<std::string> result{};
|
||||
if (new_shader) {
|
||||
result = CodeGenerator(config, separable);
|
||||
cached_shader.Create(result->code.c_str(), ShaderType);
|
||||
cached_shader.Create(result->c_str(), ShaderType);
|
||||
}
|
||||
return {cached_shader.GetHandle(), std::move(result)};
|
||||
}
|
||||
|
|
@ -180,29 +183,27 @@ private:
|
|||
// program buffer from the previous shader, which is hashed into the config, resulting several
|
||||
// different config values from the same shader program.
|
||||
template <typename KeyConfigType,
|
||||
std::optional<ShaderDecompiler::ProgramResult> (*CodeGenerator)(
|
||||
const Pica::Shader::ShaderSetup&, const KeyConfigType&, bool),
|
||||
std::string (*CodeGenerator)(const Pica::Shader::ShaderSetup&, const KeyConfigType&,
|
||||
bool),
|
||||
GLenum ShaderType>
|
||||
class ShaderDoubleCache {
|
||||
public:
|
||||
explicit ShaderDoubleCache(bool separable) : separable(separable) {}
|
||||
std::tuple<GLuint, std::optional<ShaderDecompiler::ProgramResult>> Get(
|
||||
const KeyConfigType& key, const Pica::Shader::ShaderSetup& setup) {
|
||||
std::optional<ShaderDecompiler::ProgramResult> result{};
|
||||
std::tuple<GLuint, std::optional<std::string>> Get(const KeyConfigType& key,
|
||||
const Pica::Shader::ShaderSetup& setup) {
|
||||
std::optional<std::string> result{};
|
||||
auto map_it = shader_map.find(key);
|
||||
if (map_it == shader_map.end()) {
|
||||
auto program_opt = CodeGenerator(setup, key, separable);
|
||||
if (!program_opt) {
|
||||
auto program = CodeGenerator(setup, key, separable);
|
||||
if (program.empty()) {
|
||||
shader_map[key] = nullptr;
|
||||
return {0, std::nullopt};
|
||||
}
|
||||
|
||||
std::string& program = program_opt->code;
|
||||
auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{separable});
|
||||
OGLShaderStage& cached_shader = iter->second;
|
||||
if (new_shader) {
|
||||
result.emplace();
|
||||
result->code = program;
|
||||
result = program;
|
||||
cached_shader.Create(program.c_str(), ShaderType);
|
||||
}
|
||||
shader_map[key] = &cached_shader;
|
||||
|
|
@ -237,18 +238,19 @@ private:
|
|||
};
|
||||
|
||||
using ProgrammableVertexShaders =
|
||||
ShaderDoubleCache<PicaVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>;
|
||||
ShaderDoubleCache<PicaVSConfig, &GLSL::GenerateVertexShader, GL_VERTEX_SHADER>;
|
||||
|
||||
using FixedGeometryShaders =
|
||||
ShaderCache<PicaFixedGSConfig, &GenerateFixedGeometryShader, GL_GEOMETRY_SHADER>;
|
||||
ShaderCache<PicaFixedGSConfig, &GLSL::GenerateFixedGeometryShader, GL_GEOMETRY_SHADER>;
|
||||
|
||||
using FragmentShaders = ShaderCache<PicaFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>;
|
||||
using FragmentShaders =
|
||||
ShaderCache<PicaFSConfig, &GLSL::GenerateFragmentShader, GL_FRAGMENT_SHADER>;
|
||||
|
||||
class ShaderProgramManager::Impl {
|
||||
public:
|
||||
explicit Impl(bool separable)
|
||||
explicit Impl(const Driver& driver, bool separable)
|
||||
: separable(separable), programmable_vertex_shaders(separable),
|
||||
trivial_vertex_shader(separable), fixed_geometry_shaders(separable),
|
||||
trivial_vertex_shader(driver, separable), fixed_geometry_shaders(separable),
|
||||
fragment_shaders(separable), disk_cache(separable) {
|
||||
if (separable)
|
||||
pipeline.Create();
|
||||
|
|
@ -299,13 +301,13 @@ ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, con
|
|||
bool separable)
|
||||
: emu_window{emu_window_}, driver{driver_},
|
||||
strict_context_required{emu_window.StrictContextRequired()}, impl{std::make_unique<Impl>(
|
||||
separable)} {}
|
||||
driver_, separable)} {}
|
||||
|
||||
ShaderProgramManager::~ShaderProgramManager() = default;
|
||||
|
||||
bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs,
|
||||
Pica::Shader::ShaderSetup& setup) {
|
||||
PicaVSConfig config{regs.vs, setup};
|
||||
PicaVSConfig config{regs, setup, driver.HasClipCullDistance(), true};
|
||||
auto [handle, result] = impl->programmable_vertex_shaders.Get(config, setup);
|
||||
if (handle == 0)
|
||||
return false;
|
||||
|
|
@ -333,7 +335,7 @@ void ShaderProgramManager::UseTrivialVertexShader() {
|
|||
}
|
||||
|
||||
void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) {
|
||||
PicaFixedGSConfig gs_config(regs);
|
||||
PicaFixedGSConfig gs_config(regs, driver.HasClipCullDistance());
|
||||
auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config);
|
||||
impl->current.gs = handle;
|
||||
impl->current.gs_hash = gs_config.Hash();
|
||||
|
|
@ -345,8 +347,8 @@ void ShaderProgramManager::UseTrivialGeometryShader() {
|
|||
}
|
||||
|
||||
void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs, bool use_normal) {
|
||||
PicaFSConfig config =
|
||||
PicaFSConfig::BuildFromRegs(regs, driver.HasBlendMinMaxFactor(), use_normal);
|
||||
PicaFSConfig config(regs, false, driver.IsOpenGLES(), false, driver.HasBlendMinMaxFactor(),
|
||||
use_normal);
|
||||
auto [handle, result] = impl->fragment_shaders.Get(config);
|
||||
impl->current.fs = handle;
|
||||
impl->current.fs_hash = config.Hash();
|
||||
|
|
@ -463,13 +465,13 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
|
|||
// we have both the binary shader and the decompiled, so inject it into the
|
||||
// cache
|
||||
if (raw.GetProgramType() == ProgramType::VS) {
|
||||
auto [conf, setup] = BuildVSConfigFromRaw(raw);
|
||||
auto [conf, setup] = BuildVSConfigFromRaw(raw, driver);
|
||||
std::scoped_lock lock(mutex);
|
||||
impl->programmable_vertex_shaders.Inject(conf, decomp->second.result.code,
|
||||
impl->programmable_vertex_shaders.Inject(conf, decomp->second.code,
|
||||
std::move(shader));
|
||||
} else if (raw.GetProgramType() == ProgramType::FS) {
|
||||
PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig(),
|
||||
driver.HasBlendMinMaxFactor());
|
||||
PicaFSConfig conf(raw.GetRawShaderConfig(), false, driver.IsOpenGLES(), false,
|
||||
driver.HasBlendMinMaxFactor());
|
||||
std::scoped_lock lock(mutex);
|
||||
impl->fragment_shaders.Inject(conf, std::move(shader));
|
||||
} else {
|
||||
|
|
@ -566,24 +568,24 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
|
|||
|
||||
bool sanitize_mul = false;
|
||||
GLuint handle{0};
|
||||
std::optional<ShaderDecompiler::ProgramResult> result;
|
||||
std::string code;
|
||||
// Otherwise decompile and build the shader at boot and save the result to the
|
||||
// precompiled file
|
||||
if (raw.GetProgramType() == ProgramType::VS) {
|
||||
auto [conf, setup] = BuildVSConfigFromRaw(raw);
|
||||
result = GenerateVertexShader(setup, conf, impl->separable);
|
||||
auto [conf, setup] = BuildVSConfigFromRaw(raw, driver);
|
||||
code = GLSL::GenerateVertexShader(setup, conf, impl->separable);
|
||||
OGLShaderStage stage{impl->separable};
|
||||
stage.Create(result->code.c_str(), GL_VERTEX_SHADER);
|
||||
stage.Create(code.c_str(), GL_VERTEX_SHADER);
|
||||
handle = stage.GetHandle();
|
||||
sanitize_mul = conf.state.sanitize_mul;
|
||||
std::scoped_lock lock(mutex);
|
||||
impl->programmable_vertex_shaders.Inject(conf, result->code, std::move(stage));
|
||||
impl->programmable_vertex_shaders.Inject(conf, code, std::move(stage));
|
||||
} else if (raw.GetProgramType() == ProgramType::FS) {
|
||||
PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig(),
|
||||
driver.HasBlendMinMaxFactor());
|
||||
result = GenerateFragmentShader(conf, impl->separable);
|
||||
PicaFSConfig conf(raw.GetRawShaderConfig(), false, driver.IsOpenGLES(), false,
|
||||
driver.HasBlendMinMaxFactor());
|
||||
code = GLSL::GenerateFragmentShader(conf, impl->separable);
|
||||
OGLShaderStage stage{impl->separable};
|
||||
stage.Create(result->code.c_str(), GL_FRAGMENT_SHADER);
|
||||
stage.Create(code.c_str(), GL_FRAGMENT_SHADER);
|
||||
handle = stage.GetHandle();
|
||||
std::scoped_lock lock(mutex);
|
||||
impl->fragment_shaders.Inject(conf, std::move(stage));
|
||||
|
|
@ -602,8 +604,8 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
|
|||
|
||||
std::scoped_lock lock(mutex);
|
||||
// If this is a new separable shader, add it the precompiled cache
|
||||
if (result) {
|
||||
disk_cache.SaveDecompiled(unique_identifier, *result, sanitize_mul);
|
||||
if (!code.empty()) {
|
||||
disk_cache.SaveDecompiled(unique_identifier, code, sanitize_mul);
|
||||
disk_cache.SaveDump(unique_identifier, handle);
|
||||
precompiled_cache_altered = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,12 @@ namespace OpenGL {
|
|||
class Driver;
|
||||
class OpenGLState;
|
||||
|
||||
enum UniformBindings {
|
||||
VSPicaData = 0,
|
||||
VSData = 1,
|
||||
FSData = 2,
|
||||
};
|
||||
|
||||
/// A class that manage different shader stages and configures them with given config data.
|
||||
class ShaderProgramManager {
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -14,9 +14,9 @@
|
|||
namespace OpenGL {
|
||||
|
||||
GLuint LoadShader(std::string_view source, GLenum type) {
|
||||
const std::string version = GLES ? R"(#version 320 es
|
||||
|
||||
#define CITRA_GLES
|
||||
std::string preamble;
|
||||
if (GLES) {
|
||||
preamble = R"(#version 320 es
|
||||
|
||||
#if defined(GL_ANDROID_extension_pack_es31a)
|
||||
#extension GL_ANDROID_extension_pack_es31a : enable
|
||||
|
|
@ -25,8 +25,10 @@ GLuint LoadShader(std::string_view source, GLenum type) {
|
|||
#if defined(GL_EXT_clip_cull_distance)
|
||||
#extension GL_EXT_clip_cull_distance : enable
|
||||
#endif // defined(GL_EXT_clip_cull_distance)
|
||||
)"
|
||||
: "#version 430 core\n";
|
||||
)";
|
||||
} else {
|
||||
preamble = "#version 430 core\n";
|
||||
}
|
||||
|
||||
std::string_view debug_type;
|
||||
switch (type) {
|
||||
|
|
@ -43,8 +45,8 @@ GLuint LoadShader(std::string_view source, GLenum type) {
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
std::array<const GLchar*, 2> src_arr{version.data(), source.data()};
|
||||
std::array<GLint, 2> lengths{static_cast<GLint>(version.size()),
|
||||
std::array<const GLchar*, 2> src_arr{preamble.data(), source.data()};
|
||||
std::array<GLint, 2> lengths{static_cast<GLint>(preamble.size()),
|
||||
static_cast<GLint>(source.size())};
|
||||
GLuint shader_id = glCreateShader(type);
|
||||
glShaderSource(shader_id, static_cast<GLsizei>(src_arr.size()), src_arr.data(), lengths.data());
|
||||
|
|
|
|||
|
|
@ -9,21 +9,6 @@
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
// High precision may or may not supported in GLES3. If it isn't, use medium precision instead.
|
||||
static constexpr char fragment_shader_precision_OES[] = R"(
|
||||
#ifdef GL_FRAGMENT_PRECISION_HIGH
|
||||
precision highp int;
|
||||
precision highp float;
|
||||
precision highp samplerBuffer;
|
||||
precision highp uimage2D;
|
||||
#else
|
||||
precision mediump int;
|
||||
precision mediump float;
|
||||
precision mediump samplerBuffer;
|
||||
precision mediump uimage2D;
|
||||
#endif // GL_FRAGMENT_PRECISION_HIGH
|
||||
)";
|
||||
|
||||
/**
|
||||
* Utility function to create and compile an OpenGL GLSL shader
|
||||
* @param source String of the GLSL shader program
|
||||
|
|
|
|||
|
|
@ -11,12 +11,12 @@
|
|||
#include "core/hw/hw.h"
|
||||
#include "core/hw/lcd.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_mailbox.h"
|
||||
#include "video_core/renderer_opengl/gl_vars.h"
|
||||
#include "video_core/renderer_opengl/post_processing_opengl.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/shader/generator/glsl_shader_gen.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
#include "video_core/host_shaders/opengl_present_anaglyph_frag.h"
|
||||
|
|
@ -387,11 +387,7 @@ void RendererOpenGL::InitOpenGLObjects() {
|
|||
|
||||
void RendererOpenGL::ReloadShader() {
|
||||
// Link shaders and get variable locations
|
||||
std::string shader_data;
|
||||
if (GLES) {
|
||||
shader_data += fragment_shader_precision_OES;
|
||||
}
|
||||
|
||||
std::string shader_data = fragment_shader_precision_OES;
|
||||
if (Settings::values.render_3d.GetValue() == Settings::StereoRenderOption::Anaglyph) {
|
||||
if (Settings::values.anaglyph_shader_name.GetValue() == "dubois (builtin)") {
|
||||
shader_data += HostShaders::OPENGL_PRESENT_ANAGLYPH_FRAG;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue