From 781912e854863d6f565eab8b53b5b0869ab27ab9 Mon Sep 17 00:00:00 2001
From: wwylele <wwylele@gmail.com>
Date: Wed, 11 Apr 2018 15:47:02 +0300
Subject: [PATCH] gl_rasterize: implement shadow mapping using image load/store

---
 .../renderer_opengl/gl_rasterizer.cpp         | 174 +++++++++++++--
 .../renderer_opengl/gl_rasterizer.h           |   5 +
 .../renderer_opengl/gl_rasterizer_cache.cpp   |   5 +
 .../renderer_opengl/gl_shader_gen.cpp         | 207 +++++++++++++++++-
 .../renderer_opengl/gl_shader_gen.h           |   4 +
 .../renderer_opengl/gl_shader_manager.cpp     |  15 ++
 .../renderer_opengl/gl_shader_manager.h       |   4 +-
 src/video_core/renderer_opengl/gl_state.cpp   |  58 +++++
 src/video_core/renderer_opengl/gl_state.h     |  19 ++
 9 files changed, 464 insertions(+), 27 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index b19c04f6f..1318a332e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -38,6 +38,15 @@ RasterizerOpenGL::RasterizerOpenGL()
     : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE),
       uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE),
       index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE) {
+
+    allow_shadow = GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size &&
+                   GLAD_GL_ARB_framebuffer_no_attachments;
+    if (!allow_shadow) {
+        NGLOG_WARNING(
+            Render_OpenGL,
+            "Shadow might not be able to render because of unsupported OpenGL extensions.");
+    }
+
     // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
     state.clip_distance[0] = true;
 
@@ -237,6 +246,7 @@ void RasterizerOpenGL::SyncEntireState() {
 
     SyncFogColor();
     SyncProcTexNoise();
+    SyncShadowBias();
 }
 
 /**
@@ -533,12 +543,16 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
     MICROPROFILE_SCOPE(OpenGL_Drawing);
     const auto& regs = Pica::g_state.regs;
 
+    bool shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
+                            Pica::FramebufferRegs::FragmentOperationMode::Shadow;
+
     const bool has_stencil =
         regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8;
 
-    const bool write_color_fb =
-        state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
-        state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE;
+    const bool write_color_fb = shadow_rendering || state.color_mask.red_enabled == GL_TRUE ||
+                                state.color_mask.green_enabled == GL_TRUE ||
+                                state.color_mask.blue_enabled == GL_TRUE ||
+                                state.color_mask.alpha_enabled == GL_TRUE;
 
     const bool write_depth_fb =
         (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) ||
@@ -547,7 +561,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
     const bool using_color_fb =
         regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb;
     const bool using_depth_fb =
-        regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 &&
+        !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 &&
         (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 ||
          (has_stencil && state.stencil.test_enabled));
 
@@ -591,24 +605,39 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
     state.draw.draw_framebuffer = framebuffer.handle;
     state.Apply();
 
-    glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
-                           color_surface != nullptr ? color_surface->texture.handle : 0, 0);
-    if (depth_surface != nullptr) {
-        if (has_stencil) {
-            // attach both depth and stencil
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
-                                   depth_surface->texture.handle, 0);
-        } else {
-            // attach depth
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
-                                   depth_surface->texture.handle, 0);
-            // clear stencil attachment
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+    if (shadow_rendering) {
+        if (!allow_shadow || color_surface == nullptr) {
+            return true;
         }
-    } else {
-        // clear both depth and stencil attachment
+        glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_WIDTH,
+                                color_surface->width * color_surface->res_scale);
+        glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_HEIGHT,
+                                color_surface->height * color_surface->res_scale);
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
                                0);
+        state.image_shadow_buffer = color_surface->texture.handle;
+    } else {
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
+                               color_surface != nullptr ? color_surface->texture.handle : 0, 0);
+        if (depth_surface != nullptr) {
+            if (has_stencil) {
+                // attach both depth and stencil
+                glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                                       GL_TEXTURE_2D, depth_surface->texture.handle, 0);
+            } else {
+                // attach depth
+                glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+                                       depth_surface->texture.handle, 0);
+                // clear stencil attachment
+                glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+                                       0);
+            }
+        } else {
+            // clear both depth and stencil attachment
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+        }
     }
 
     // Sync the viewport
@@ -658,6 +687,82 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
             if (texture_index == 0) {
                 using TextureType = Pica::TexturingRegs::TextureConfig::TextureType;
                 switch (texture.config.type.Value()) {
+                case TextureType::Shadow2D: {
+                    if (!allow_shadow)
+                        continue;
+
+                    Surface surface = res_cache.GetTextureSurface(texture);
+                    if (surface != nullptr) {
+                        state.image_shadow_texture_px = surface->texture.handle;
+                    } else {
+                        state.image_shadow_texture_px = 0;
+                    }
+                    continue;
+                }
+                case TextureType::ShadowCube: {
+                    if (!allow_shadow)
+                        continue;
+                    Pica::Texture::TextureInfo info = Pica::Texture::TextureInfo::FromPicaRegister(
+                        texture.config, texture.format);
+                    Surface surface;
+
+                    using CubeFace = Pica::TexturingRegs::CubeFace;
+                    info.physical_address =
+                        regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX);
+                    surface = res_cache.GetTextureSurface(info);
+                    if (surface != nullptr) {
+                        state.image_shadow_texture_px = surface->texture.handle;
+                    } else {
+                        state.image_shadow_texture_px = 0;
+                    }
+
+                    info.physical_address =
+                        regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX);
+                    surface = res_cache.GetTextureSurface(info);
+                    if (surface != nullptr) {
+                        state.image_shadow_texture_nx = surface->texture.handle;
+                    } else {
+                        state.image_shadow_texture_nx = 0;
+                    }
+
+                    info.physical_address =
+                        regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY);
+                    surface = res_cache.GetTextureSurface(info);
+                    if (surface != nullptr) {
+                        state.image_shadow_texture_py = surface->texture.handle;
+                    } else {
+                        state.image_shadow_texture_py = 0;
+                    }
+
+                    info.physical_address =
+                        regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY);
+                    surface = res_cache.GetTextureSurface(info);
+                    if (surface != nullptr) {
+                        state.image_shadow_texture_ny = surface->texture.handle;
+                    } else {
+                        state.image_shadow_texture_ny = 0;
+                    }
+
+                    info.physical_address =
+                        regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ);
+                    surface = res_cache.GetTextureSurface(info);
+                    if (surface != nullptr) {
+                        state.image_shadow_texture_pz = surface->texture.handle;
+                    } else {
+                        state.image_shadow_texture_pz = 0;
+                    }
+
+                    info.physical_address =
+                        regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ);
+                    surface = res_cache.GetTextureSurface(info);
+                    if (surface != nullptr) {
+                        state.image_shadow_texture_nz = surface->texture.handle;
+                    } else {
+                        state.image_shadow_texture_nz = 0;
+                    }
+
+                    continue;
+                }
                 case TextureType::TextureCube:
                     using CubeFace = Pica::TexturingRegs::CubeFace;
                     TextureCubeConfig config;
@@ -791,8 +896,22 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
         state.texture_units[texture_index].texture_2d = 0;
     }
     state.texture_cube_unit.texture_cube = 0;
+    if (allow_shadow) {
+        state.image_shadow_texture_px = 0;
+        state.image_shadow_texture_nx = 0;
+        state.image_shadow_texture_py = 0;
+        state.image_shadow_texture_ny = 0;
+        state.image_shadow_texture_pz = 0;
+        state.image_shadow_texture_nz = 0;
+        state.image_shadow_buffer = 0;
+    }
     state.Apply();
 
+    if (shadow_rendering) {
+        glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT |
+                        GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT);
+    }
+
     // Mark framebuffer surfaces as dirty
     MathUtil::Rectangle<u32> draw_rect_unscaled{
         draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale,
@@ -951,6 +1070,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
         SyncColorWriteMask();
         break;
 
+    case PICA_REG_INDEX(framebuffer.shadow):
+        SyncShadowBias();
+        break;
+
     // Scissor test
     case PICA_REG_INDEX(rasterizer.scissor_test.mode):
         shader_dirty = true;
@@ -1926,6 +2049,19 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) {
     }
 }
 
+void RasterizerOpenGL::SyncShadowBias() {
+    const auto& shadow = Pica::g_state.regs.framebuffer.shadow;
+    GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
+    GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
+
+    if (constant != uniform_block_data.data.shadow_bias_constant ||
+        linear != uniform_block_data.data.shadow_bias_linear) {
+        uniform_block_data.data.shadow_bias_constant = constant;
+        uniform_block_data.data.shadow_bias_linear = linear;
+        uniform_block_data.dirty = true;
+    }
+}
+
 void RasterizerOpenGL::UploadUniforms(bool accelerate_draw, bool use_gs) {
     // glBindBufferRange below also changes the generic buffer binding point, so we sync the state
     // first
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 547b73aae..34058796b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -217,6 +217,9 @@ private:
     /// Syncs the specified light's distance attenuation scale to match the PICA register
     void SyncLightDistanceAttenuationScale(int light_index);
 
+    /// Syncs the shadow rendering bias to match the PICA register
+    void SyncShadowBias();
+
     /// Upload the uniform blocks to the uniform buffer object
     void UploadUniforms(bool accelerate_draw, bool use_gs);
 
@@ -315,4 +318,6 @@ private:
     OGLBuffer proctex_diff_lut_buffer;
     OGLTexture proctex_diff_lut;
     std::array<GLvec4, 256> proctex_diff_lut_data{};
+
+    bool allow_shadow;
 };
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 8c6558813..f2f1d73c5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -303,6 +303,11 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec
         buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
     }
 
+    // TODO (wwylele): use GL_NEAREST for shadow map texture
+    // Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but
+    // doing linear intepolation componentwise would cause incorrect value. However, for a
+    // well-programmed game this code path should be rarely executed for shadow map with
+    // inconsistent scale.
     glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
                       dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
                       buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index cea74a36f..0ae9794de 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -49,6 +49,8 @@ layout (std140) uniform shader_data {
     int alphatest_ref;
     float depth_scale;
     float depth_offset;
+    float shadow_bias_constant;
+    float shadow_bias_linear;
     int scissor_x1;
     int scissor_y1;
     int scissor_x2;
@@ -220,6 +222,12 @@ PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) {
         state.proctex.lut_filter = regs.texturing.proctex_lut.filter;
     }
 
+    state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
+                             Pica::FramebufferRegs::FragmentOperationMode::Shadow;
+
+    state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0;
+    state.shadow_texture_bias = regs.texturing.shadow.bias << 1;
+
     return res;
 }
 
@@ -300,10 +308,9 @@ static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_un
         case TexturingRegs::TextureConfig::TextureCube:
             return "texture(tex_cube, vec3(texcoord0, texcoord0_w))";
         case TexturingRegs::TextureConfig::Shadow2D:
+            return "shadowTexture(texcoord0, texcoord0_w)";
         case TexturingRegs::TextureConfig::ShadowCube:
-            NGLOG_CRITICAL(HW_GPU, "Unhandled shadow texture");
-            UNIMPLEMENTED();
-            return "vec4(1.0)"; // stubbed to avoid rendering with wrong shadow
+            return "shadowTextureCube(texcoord0, texcoord0_w)";
         default:
             LOG_CRITICAL(HW_GPU, "Unhandled texture type %x",
                          static_cast<int>(state.texture0_type));
@@ -1181,7 +1188,13 @@ float ProcTexNoiseCoef(vec2 x) {
 std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) {
     const auto& state = config.state;
 
-    std::string out = "#version 330 core\n";
+    std::string out = R"(
+#version 330 core
+#extension GL_ARB_shader_image_load_store : enable
+#extension GL_ARB_shader_image_size : enable
+#define ALLOW_SHADOW (defined(GL_ARB_shader_image_load_store) && defined(GL_ARB_shader_image_size))
+)";
+
     if (separable_shader) {
         out += "#extension GL_ARB_separate_shader_objects : enable\n";
     }
@@ -1204,6 +1217,16 @@ uniform samplerBuffer proctex_color_map;
 uniform samplerBuffer proctex_alpha_map;
 uniform samplerBuffer proctex_lut;
 uniform samplerBuffer proctex_diff_lut;
+
+#if ALLOW_SHADOW
+layout(r32ui) uniform readonly uimage2D shadow_texture_px;
+layout(r32ui) uniform readonly uimage2D shadow_texture_nx;
+layout(r32ui) uniform readonly uimage2D shadow_texture_py;
+layout(r32ui) uniform readonly uimage2D shadow_texture_ny;
+layout(r32ui) uniform readonly uimage2D shadow_texture_pz;
+layout(r32ui) uniform readonly uimage2D shadow_texture_nz;
+layout(r32ui) uniform uimage2D shadow_buffer;
+#endif
 )";
 
     out += UniformBlockDef;
@@ -1248,6 +1271,147 @@ vec4 byteround(vec4 x) {
     return round(x * 255.0) * (1.0 / 255.0);
 }
 
+#if ALLOW_SHADOW
+
+uvec2 DecodeShadow(uint pixel) {
+    return uvec2(pixel >> 8, pixel & 0xFFu);
+}
+
+uint EncodeShadow(uvec2 pixel) {
+    return (pixel.x << 8) | pixel.y;
+}
+
+float CompareShadow(uint pixel, uint z) {
+    uvec2 p = DecodeShadow(pixel);
+    return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z);
+}
+
+float SampleShadow2D(ivec2 uv, uint z) {
+    if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) )))
+        return 1.0;
+    return CompareShadow(imageLoad(shadow_texture_px, uv).x, z);
+}
+
+float mix2(vec4 s, vec2 a) {
+    vec2 t = mix(s.xy, s.zw, a.yy);
+    return mix(t.x, t.y, a.x);
+}
+
+vec4 shadowTexture(vec2 uv, float w) {
+)";
+    if (!config.state.shadow_texture_orthographic) {
+        out += "uv /= w;";
+    }
+    out += "uint z = uint(max(0, int(min(abs(w), 1.0) * 0xFFFFFF) - " +
+           std::to_string(state.shadow_texture_bias) + "));";
+    out += R"(
+    vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5);
+    vec2 coord_floor = floor(coord);
+    vec2 f = coord - coord_floor;
+    ivec2 i = ivec2(coord_floor);
+    vec4 s = vec4(
+        SampleShadow2D(i              , z),
+        SampleShadow2D(i + ivec2(1, 0), z),
+        SampleShadow2D(i + ivec2(0, 1), z),
+        SampleShadow2D(i + ivec2(1, 1), z));
+    return vec4(mix2(s, f));
+}
+
+vec4 shadowTextureCube(vec2 uv, float w) {
+    ivec2 size = imageSize(shadow_texture_px);
+    vec3 c = vec3(uv, w);
+    vec3 a = abs(c);
+    if (a.x > a.y && a.x > a.z) {
+        w = a.x;
+        uv = -c.zy;
+        if (c.x < 0.0) uv.x = -uv.x;
+    } else if (a.y > a.z) {
+        w = a.y;
+        uv = c.xz;
+        if (c.y < 0.0) uv.y = -uv.y;
+    } else {
+        w = a.z;
+        uv = -c.xy;
+        if (c.z > 0.0) uv.x = -uv.x;
+    }
+)";
+    out += "uint z = uint(max(0, int(min(w, 1.0) * 0xFFFFFF) - " +
+           std::to_string(state.shadow_texture_bias) + "));";
+    out += R"(
+    vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5);
+    vec2 coord_floor = floor(coord);
+    vec2 f = coord - coord_floor;
+    ivec2 i00 = ivec2(coord_floor);
+    ivec2 i10 = i00 + ivec2(1, 0);
+    ivec2 i01 = i00 + ivec2(0, 1);
+    ivec2 i11 = i00 + ivec2(1, 1);
+    ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1);
+    i00 = clamp(i00, cmin, cmax);
+    i10 = clamp(i10, cmin, cmax);
+    i01 = clamp(i01, cmin, cmax);
+    i11 = clamp(i11, cmin, cmax);
+    uvec4 pixels;
+    // This part should have been refactored into functions,
+    // but many drivers don't like passing uimage2D as parameters
+    if (a.x > a.y && a.x > a.z) {
+        if (c.x > 0.0)
+            pixels = uvec4(
+                imageLoad(shadow_texture_px, i00).r,
+                imageLoad(shadow_texture_px, i10).r,
+                imageLoad(shadow_texture_px, i01).r,
+                imageLoad(shadow_texture_px, i11).r);
+        else
+            pixels = uvec4(
+                imageLoad(shadow_texture_nx, i00).r,
+                imageLoad(shadow_texture_nx, i10).r,
+                imageLoad(shadow_texture_nx, i01).r,
+                imageLoad(shadow_texture_nx, i11).r);
+    } else if (a.y > a.z) {
+        if (c.y > 0.0)
+            pixels = uvec4(
+                imageLoad(shadow_texture_py, i00).r,
+                imageLoad(shadow_texture_py, i10).r,
+                imageLoad(shadow_texture_py, i01).r,
+                imageLoad(shadow_texture_py, i11).r);
+        else
+            pixels = uvec4(
+                imageLoad(shadow_texture_ny, i00).r,
+                imageLoad(shadow_texture_ny, i10).r,
+                imageLoad(shadow_texture_ny, i01).r,
+                imageLoad(shadow_texture_ny, i11).r);
+    } else {
+        if (c.z > 0.0)
+            pixels = uvec4(
+                imageLoad(shadow_texture_pz, i00).r,
+                imageLoad(shadow_texture_pz, i10).r,
+                imageLoad(shadow_texture_pz, i01).r,
+                imageLoad(shadow_texture_pz, i11).r);
+        else
+            pixels = uvec4(
+                imageLoad(shadow_texture_nz, i00).r,
+                imageLoad(shadow_texture_nz, i10).r,
+                imageLoad(shadow_texture_nz, i01).r,
+                imageLoad(shadow_texture_nz, i11).r);
+    }
+    vec4 s = vec4(
+        CompareShadow(pixels.x, z),
+        CompareShadow(pixels.y, z),
+        CompareShadow(pixels.z, z),
+        CompareShadow(pixels.w, z));
+    return vec4(mix2(s, f));
+}
+
+#else
+
+vec4 shadowTexture(vec2 uv, float w) {
+    return vec4(1.0);
+}
+
+vec4 shadowTextureCube(vec2 uv, float w) {
+    return vec4(1.0);
+}
+
+#endif
 )";
 
     if (config.state.proctex.enable)
@@ -1331,9 +1495,38 @@ vec4 secondary_fragment_color = vec4(0.0);
         return out;
     }
 
-    out += "gl_FragDepth = depth;\n";
-    // Round the final fragment color to maintain the PICA's 8 bits of precision
-    out += "color = byteround(last_tex_env_out);\n";
+    if (state.shadow_rendering) {
+        out += R"(
+#if ALLOW_SHADOW
+uint d = uint(clamp(depth, 0.0, 1.0) * 0xFFFFFF);
+uint s = uint(last_tex_env_out.g * 0xFF);
+ivec2 image_coord = ivec2(gl_FragCoord.xy);
+
+uint old = imageLoad(shadow_buffer, image_coord).x;
+uint new;
+uint old2;
+do {
+    old2 = old;
+
+    uvec2 ref = DecodeShadow(old);
+    if (d < ref.x) {
+        if (s == 0u) {
+            ref.x = d;
+        } else {
+            s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x)));
+            ref.y = min(s, ref.y);
+        }
+    }
+    new = EncodeShadow(ref);
+
+} while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new)) != old2);
+#endif // ALLOW_SHADOW
+)";
+    } else {
+        out += "gl_FragDepth = depth;\n";
+        // Round the final fragment color to maintain the PICA's 8 bits of precision
+        out += "color = byteround(last_tex_env_out);\n";
+    }
 
     out += "}";
 
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index ada4060e2..e3f3fd5bd 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -110,6 +110,10 @@ struct PicaFSConfigState {
         u32 lut_offset;
         Pica::TexturingRegs::ProcTexFilter lut_filter;
     } proctex;
+
+    bool shadow_rendering;
+    bool shadow_texture_orthographic;
+    u32 shadow_texture_bias;
 };
 
 /**
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 71acdc5ff..e70b5d87a 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -36,6 +36,13 @@ static void SetShaderSamplerBinding(GLuint shader, const char* name,
     }
 }
 
+static void SetShaderImageBinding(GLuint shader, const char* name, GLuint binding) {
+    GLint uniform_tex = glGetUniformLocation(shader, name);
+    if (uniform_tex != -1) {
+        glUniform1i(uniform_tex, static_cast<GLint>(binding));
+    }
+}
+
 static void SetShaderSamplerBindings(GLuint shader) {
     OpenGLState cur_state = OpenGLState::GetCurState();
     GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
@@ -56,6 +63,14 @@ static void SetShaderSamplerBindings(GLuint shader) {
     SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT);
     SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT);
 
+    SetShaderImageBinding(shader, "shadow_buffer", ImageUnits::ShadowBuffer);
+    SetShaderImageBinding(shader, "shadow_texture_px", ImageUnits::ShadowTexturePX);
+    SetShaderImageBinding(shader, "shadow_texture_nx", ImageUnits::ShadowTextureNX);
+    SetShaderImageBinding(shader, "shadow_texture_py", ImageUnits::ShadowTexturePY);
+    SetShaderImageBinding(shader, "shadow_texture_ny", ImageUnits::ShadowTextureNY);
+    SetShaderImageBinding(shader, "shadow_texture_pz", ImageUnits::ShadowTexturePZ);
+    SetShaderImageBinding(shader, "shadow_texture_nz", ImageUnits::ShadowTextureNZ);
+
     cur_state.draw.shader_program = old_program;
     cur_state.Apply();
 }
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 364b8090c..73acc3297 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -32,6 +32,8 @@ struct UniformData {
     GLint alphatest_ref;
     GLfloat depth_scale;
     GLfloat depth_offset;
+    GLfloat shadow_bias_constant;
+    GLfloat shadow_bias_linear;
     GLint scissor_x1;
     GLint scissor_y1;
     GLint scissor_x2;
@@ -48,7 +50,7 @@ struct UniformData {
 };
 
 static_assert(
-    sizeof(UniformData) == 0x460,
+    sizeof(UniformData) == 0x470,
     "The size of the UniformData structure has changed, update the structure in the shader");
 static_assert(sizeof(UniformData) < 16384,
               "UniformData structure must be less than 16kb as per the OpenGL spec");
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 124a41cd9..95dbd591b 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -65,6 +65,14 @@ OpenGLState::OpenGLState() {
     proctex_alpha_map.texture_buffer = 0;
     proctex_noise_lut.texture_buffer = 0;
 
+    image_shadow_buffer = 0;
+    image_shadow_texture_px = 0;
+    image_shadow_texture_nx = 0;
+    image_shadow_texture_py = 0;
+    image_shadow_texture_ny = 0;
+    image_shadow_texture_pz = 0;
+    image_shadow_texture_nz = 0;
+
     draw.read_framebuffer = 0;
     draw.draw_framebuffer = 0;
     draw.vertex_array = 0;
@@ -255,6 +263,42 @@ void OpenGLState::Apply() const {
         glBindTexture(GL_TEXTURE_BUFFER, proctex_diff_lut.texture_buffer);
     }
 
+    // Shadow Images
+    if (image_shadow_buffer != cur_state.image_shadow_buffer) {
+        glBindImageTexture(ImageUnits::ShadowBuffer, image_shadow_buffer, 0, GL_FALSE, 0,
+                           GL_READ_WRITE, GL_R32UI);
+    }
+
+    if (image_shadow_texture_px != cur_state.image_shadow_texture_px) {
+        glBindImageTexture(ImageUnits::ShadowTexturePX, image_shadow_texture_px, 0, GL_FALSE, 0,
+                           GL_READ_ONLY, GL_R32UI);
+    }
+
+    if (image_shadow_texture_nx != cur_state.image_shadow_texture_nx) {
+        glBindImageTexture(ImageUnits::ShadowTextureNX, image_shadow_texture_nx, 0, GL_FALSE, 0,
+                           GL_READ_ONLY, GL_R32UI);
+    }
+
+    if (image_shadow_texture_py != cur_state.image_shadow_texture_py) {
+        glBindImageTexture(ImageUnits::ShadowTexturePY, image_shadow_texture_py, 0, GL_FALSE, 0,
+                           GL_READ_ONLY, GL_R32UI);
+    }
+
+    if (image_shadow_texture_ny != cur_state.image_shadow_texture_ny) {
+        glBindImageTexture(ImageUnits::ShadowTextureNY, image_shadow_texture_ny, 0, GL_FALSE, 0,
+                           GL_READ_ONLY, GL_R32UI);
+    }
+
+    if (image_shadow_texture_pz != cur_state.image_shadow_texture_pz) {
+        glBindImageTexture(ImageUnits::ShadowTexturePZ, image_shadow_texture_pz, 0, GL_FALSE, 0,
+                           GL_READ_ONLY, GL_R32UI);
+    }
+
+    if (image_shadow_texture_nz != cur_state.image_shadow_texture_nz) {
+        glBindImageTexture(ImageUnits::ShadowTextureNZ, image_shadow_texture_nz, 0, GL_FALSE, 0,
+                           GL_READ_ONLY, GL_R32UI);
+    }
+
     // Framebuffer
     if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
         glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
@@ -344,6 +388,20 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) {
         proctex_lut.texture_buffer = 0;
     if (proctex_diff_lut.texture_buffer == handle)
         proctex_diff_lut.texture_buffer = 0;
+    if (image_shadow_buffer == handle)
+        image_shadow_buffer = 0;
+    if (image_shadow_texture_px == handle)
+        image_shadow_texture_px = 0;
+    if (image_shadow_texture_nx == handle)
+        image_shadow_texture_nx = 0;
+    if (image_shadow_texture_py == handle)
+        image_shadow_texture_py = 0;
+    if (image_shadow_texture_ny == handle)
+        image_shadow_texture_ny = 0;
+    if (image_shadow_texture_pz == handle)
+        image_shadow_texture_pz = 0;
+    if (image_shadow_texture_nz == handle)
+        image_shadow_texture_nz = 0;
     return *this;
 }
 
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 29a0aabb5..ebc217349 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -31,6 +31,16 @@ constexpr TextureUnit TextureCube{10};
 
 } // namespace TextureUnits
 
+namespace ImageUnits {
+constexpr GLuint ShadowBuffer = 0;
+constexpr GLuint ShadowTexturePX = 1;
+constexpr GLuint ShadowTextureNX = 2;
+constexpr GLuint ShadowTexturePY = 3;
+constexpr GLuint ShadowTextureNY = 4;
+constexpr GLuint ShadowTexturePZ = 5;
+constexpr GLuint ShadowTextureNZ = 6;
+} // namespace ImageUnits
+
 class OpenGLState {
 public:
     struct {
@@ -121,6 +131,15 @@ public:
         GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
     } proctex_diff_lut;
 
+    // GL_IMAGE_BINDING_NAME
+    GLuint image_shadow_buffer;
+    GLuint image_shadow_texture_px;
+    GLuint image_shadow_texture_nx;
+    GLuint image_shadow_texture_py;
+    GLuint image_shadow_texture_ny;
+    GLuint image_shadow_texture_pz;
+    GLuint image_shadow_texture_nz;
+
     struct {
         GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
         GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING