mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	gl_rasterize: implement shadow mapping using image load/store
This commit is contained in:
		
							parent
							
								
									08b119153d
								
							
						
					
					
						commit
						781912e854
					
				
					 9 changed files with 464 additions and 27 deletions
				
			
		|  | @ -38,6 +38,15 @@ RasterizerOpenGL::RasterizerOpenGL() | |||
|     : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE), | ||||
|       uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE), | ||||
|       index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE) { | ||||
| 
 | ||||
|     allow_shadow = GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size && | ||||
|                    GLAD_GL_ARB_framebuffer_no_attachments; | ||||
|     if (!allow_shadow) { | ||||
|         NGLOG_WARNING( | ||||
|             Render_OpenGL, | ||||
|             "Shadow might not be able to render because of unsupported OpenGL extensions."); | ||||
|     } | ||||
| 
 | ||||
|     // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
 | ||||
|     state.clip_distance[0] = true; | ||||
| 
 | ||||
|  | @ -237,6 +246,7 @@ void RasterizerOpenGL::SyncEntireState() { | |||
| 
 | ||||
|     SyncFogColor(); | ||||
|     SyncProcTexNoise(); | ||||
|     SyncShadowBias(); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  | @ -533,12 +543,16 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { | |||
|     MICROPROFILE_SCOPE(OpenGL_Drawing); | ||||
|     const auto& regs = Pica::g_state.regs; | ||||
| 
 | ||||
|     bool shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == | ||||
|                             Pica::FramebufferRegs::FragmentOperationMode::Shadow; | ||||
| 
 | ||||
|     const bool has_stencil = | ||||
|         regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; | ||||
| 
 | ||||
|     const bool write_color_fb = | ||||
|         state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || | ||||
|         state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; | ||||
|     const bool write_color_fb = shadow_rendering || state.color_mask.red_enabled == GL_TRUE || | ||||
|                                 state.color_mask.green_enabled == GL_TRUE || | ||||
|                                 state.color_mask.blue_enabled == GL_TRUE || | ||||
|                                 state.color_mask.alpha_enabled == GL_TRUE; | ||||
| 
 | ||||
|     const bool write_depth_fb = | ||||
|         (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || | ||||
|  | @ -547,7 +561,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { | |||
|     const bool using_color_fb = | ||||
|         regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; | ||||
|     const bool using_depth_fb = | ||||
|         regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && | ||||
|         !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && | ||||
|         (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || | ||||
|          (has_stencil && state.stencil.test_enabled)); | ||||
| 
 | ||||
|  | @ -591,24 +605,39 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { | |||
|     state.draw.draw_framebuffer = framebuffer.handle; | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | ||||
|                            color_surface != nullptr ? color_surface->texture.handle : 0, 0); | ||||
|     if (depth_surface != nullptr) { | ||||
|         if (has_stencil) { | ||||
|             // attach both depth and stencil
 | ||||
|             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                    depth_surface->texture.handle, 0); | ||||
|         } else { | ||||
|             // attach depth
 | ||||
|             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                    depth_surface->texture.handle, 0); | ||||
|             // clear stencil attachment
 | ||||
|             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||||
|     if (shadow_rendering) { | ||||
|         if (!allow_shadow || color_surface == nullptr) { | ||||
|             return true; | ||||
|         } | ||||
|     } else { | ||||
|         // clear both depth and stencil attachment
 | ||||
|         glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_WIDTH, | ||||
|                                 color_surface->width * color_surface->res_scale); | ||||
|         glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_HEIGHT, | ||||
|                                 color_surface->height * color_surface->res_scale); | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||||
|                                0); | ||||
|         state.image_shadow_buffer = color_surface->texture.handle; | ||||
|     } else { | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | ||||
|                                color_surface != nullptr ? color_surface->texture.handle : 0, 0); | ||||
|         if (depth_surface != nullptr) { | ||||
|             if (has_stencil) { | ||||
|                 // attach both depth and stencil
 | ||||
|                 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||||
|                                        GL_TEXTURE_2D, depth_surface->texture.handle, 0); | ||||
|             } else { | ||||
|                 // attach depth
 | ||||
|                 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                        depth_surface->texture.handle, 0); | ||||
|                 // clear stencil attachment
 | ||||
|                 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||||
|                                        0); | ||||
|             } | ||||
|         } else { | ||||
|             // clear both depth and stencil attachment
 | ||||
|             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                    0, 0); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // Sync the viewport
 | ||||
|  | @ -658,6 +687,82 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { | |||
|             if (texture_index == 0) { | ||||
|                 using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; | ||||
|                 switch (texture.config.type.Value()) { | ||||
|                 case TextureType::Shadow2D: { | ||||
|                     if (!allow_shadow) | ||||
|                         continue; | ||||
| 
 | ||||
|                     Surface surface = res_cache.GetTextureSurface(texture); | ||||
|                     if (surface != nullptr) { | ||||
|                         state.image_shadow_texture_px = surface->texture.handle; | ||||
|                     } else { | ||||
|                         state.image_shadow_texture_px = 0; | ||||
|                     } | ||||
|                     continue; | ||||
|                 } | ||||
|                 case TextureType::ShadowCube: { | ||||
|                     if (!allow_shadow) | ||||
|                         continue; | ||||
|                     Pica::Texture::TextureInfo info = Pica::Texture::TextureInfo::FromPicaRegister( | ||||
|                         texture.config, texture.format); | ||||
|                     Surface surface; | ||||
| 
 | ||||
|                     using CubeFace = Pica::TexturingRegs::CubeFace; | ||||
|                     info.physical_address = | ||||
|                         regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX); | ||||
|                     surface = res_cache.GetTextureSurface(info); | ||||
|                     if (surface != nullptr) { | ||||
|                         state.image_shadow_texture_px = surface->texture.handle; | ||||
|                     } else { | ||||
|                         state.image_shadow_texture_px = 0; | ||||
|                     } | ||||
| 
 | ||||
|                     info.physical_address = | ||||
|                         regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX); | ||||
|                     surface = res_cache.GetTextureSurface(info); | ||||
|                     if (surface != nullptr) { | ||||
|                         state.image_shadow_texture_nx = surface->texture.handle; | ||||
|                     } else { | ||||
|                         state.image_shadow_texture_nx = 0; | ||||
|                     } | ||||
| 
 | ||||
|                     info.physical_address = | ||||
|                         regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY); | ||||
|                     surface = res_cache.GetTextureSurface(info); | ||||
|                     if (surface != nullptr) { | ||||
|                         state.image_shadow_texture_py = surface->texture.handle; | ||||
|                     } else { | ||||
|                         state.image_shadow_texture_py = 0; | ||||
|                     } | ||||
| 
 | ||||
|                     info.physical_address = | ||||
|                         regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY); | ||||
|                     surface = res_cache.GetTextureSurface(info); | ||||
|                     if (surface != nullptr) { | ||||
|                         state.image_shadow_texture_ny = surface->texture.handle; | ||||
|                     } else { | ||||
|                         state.image_shadow_texture_ny = 0; | ||||
|                     } | ||||
| 
 | ||||
|                     info.physical_address = | ||||
|                         regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ); | ||||
|                     surface = res_cache.GetTextureSurface(info); | ||||
|                     if (surface != nullptr) { | ||||
|                         state.image_shadow_texture_pz = surface->texture.handle; | ||||
|                     } else { | ||||
|                         state.image_shadow_texture_pz = 0; | ||||
|                     } | ||||
| 
 | ||||
|                     info.physical_address = | ||||
|                         regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ); | ||||
|                     surface = res_cache.GetTextureSurface(info); | ||||
|                     if (surface != nullptr) { | ||||
|                         state.image_shadow_texture_nz = surface->texture.handle; | ||||
|                     } else { | ||||
|                         state.image_shadow_texture_nz = 0; | ||||
|                     } | ||||
| 
 | ||||
|                     continue; | ||||
|                 } | ||||
|                 case TextureType::TextureCube: | ||||
|                     using CubeFace = Pica::TexturingRegs::CubeFace; | ||||
|                     TextureCubeConfig config; | ||||
|  | @ -791,8 +896,22 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { | |||
|         state.texture_units[texture_index].texture_2d = 0; | ||||
|     } | ||||
|     state.texture_cube_unit.texture_cube = 0; | ||||
|     if (allow_shadow) { | ||||
|         state.image_shadow_texture_px = 0; | ||||
|         state.image_shadow_texture_nx = 0; | ||||
|         state.image_shadow_texture_py = 0; | ||||
|         state.image_shadow_texture_ny = 0; | ||||
|         state.image_shadow_texture_pz = 0; | ||||
|         state.image_shadow_texture_nz = 0; | ||||
|         state.image_shadow_buffer = 0; | ||||
|     } | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     if (shadow_rendering) { | ||||
|         glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | | ||||
|                         GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT); | ||||
|     } | ||||
| 
 | ||||
|     // Mark framebuffer surfaces as dirty
 | ||||
|     MathUtil::Rectangle<u32> draw_rect_unscaled{ | ||||
|         draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, | ||||
|  | @ -951,6 +1070,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { | |||
|         SyncColorWriteMask(); | ||||
|         break; | ||||
| 
 | ||||
|     case PICA_REG_INDEX(framebuffer.shadow): | ||||
|         SyncShadowBias(); | ||||
|         break; | ||||
| 
 | ||||
|     // Scissor test
 | ||||
|     case PICA_REG_INDEX(rasterizer.scissor_test.mode): | ||||
|         shader_dirty = true; | ||||
|  | @ -1926,6 +2049,19 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SyncShadowBias() { | ||||
|     const auto& shadow = Pica::g_state.regs.framebuffer.shadow; | ||||
|     GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32(); | ||||
|     GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32(); | ||||
| 
 | ||||
|     if (constant != uniform_block_data.data.shadow_bias_constant || | ||||
|         linear != uniform_block_data.data.shadow_bias_linear) { | ||||
|         uniform_block_data.data.shadow_bias_constant = constant; | ||||
|         uniform_block_data.data.shadow_bias_linear = linear; | ||||
|         uniform_block_data.dirty = true; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::UploadUniforms(bool accelerate_draw, bool use_gs) { | ||||
|     // glBindBufferRange below also changes the generic buffer binding point, so we sync the state
 | ||||
|     // first
 | ||||
|  |  | |||
|  | @ -217,6 +217,9 @@ private: | |||
|     /// Syncs the specified light's distance attenuation scale to match the PICA register
 | ||||
|     void SyncLightDistanceAttenuationScale(int light_index); | ||||
| 
 | ||||
|     /// Syncs the shadow rendering bias to match the PICA register
 | ||||
|     void SyncShadowBias(); | ||||
| 
 | ||||
|     /// Upload the uniform blocks to the uniform buffer object
 | ||||
|     void UploadUniforms(bool accelerate_draw, bool use_gs); | ||||
| 
 | ||||
|  | @ -315,4 +318,6 @@ private: | |||
|     OGLBuffer proctex_diff_lut_buffer; | ||||
|     OGLTexture proctex_diff_lut; | ||||
|     std::array<GLvec4, 256> proctex_diff_lut_data{}; | ||||
| 
 | ||||
|     bool allow_shadow; | ||||
| }; | ||||
|  |  | |||
|  | @ -303,6 +303,11 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec | |||
|         buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | ||||
|     } | ||||
| 
 | ||||
|     // TODO (wwylele): use GL_NEAREST for shadow map texture
 | ||||
|     // Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but
 | ||||
|     // doing linear intepolation componentwise would cause incorrect value. However, for a
 | ||||
|     // well-programmed game this code path should be rarely executed for shadow map with
 | ||||
|     // inconsistent scale.
 | ||||
|     glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, | ||||
|                       dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, | ||||
|                       buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); | ||||
|  |  | |||
|  | @ -49,6 +49,8 @@ layout (std140) uniform shader_data { | |||
|     int alphatest_ref; | ||||
|     float depth_scale; | ||||
|     float depth_offset; | ||||
|     float shadow_bias_constant; | ||||
|     float shadow_bias_linear; | ||||
|     int scissor_x1; | ||||
|     int scissor_y1; | ||||
|     int scissor_x2; | ||||
|  | @ -220,6 +222,12 @@ PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) { | |||
|         state.proctex.lut_filter = regs.texturing.proctex_lut.filter; | ||||
|     } | ||||
| 
 | ||||
|     state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == | ||||
|                              Pica::FramebufferRegs::FragmentOperationMode::Shadow; | ||||
| 
 | ||||
|     state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0; | ||||
|     state.shadow_texture_bias = regs.texturing.shadow.bias << 1; | ||||
| 
 | ||||
|     return res; | ||||
| } | ||||
| 
 | ||||
|  | @ -300,10 +308,9 @@ static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_un | |||
|         case TexturingRegs::TextureConfig::TextureCube: | ||||
|             return "texture(tex_cube, vec3(texcoord0, texcoord0_w))"; | ||||
|         case TexturingRegs::TextureConfig::Shadow2D: | ||||
|             return "shadowTexture(texcoord0, texcoord0_w)"; | ||||
|         case TexturingRegs::TextureConfig::ShadowCube: | ||||
|             NGLOG_CRITICAL(HW_GPU, "Unhandled shadow texture"); | ||||
|             UNIMPLEMENTED(); | ||||
|             return "vec4(1.0)"; // stubbed to avoid rendering with wrong shadow
 | ||||
|             return "shadowTextureCube(texcoord0, texcoord0_w)"; | ||||
|         default: | ||||
|             LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", | ||||
|                          static_cast<int>(state.texture0_type)); | ||||
|  | @ -1181,7 +1188,13 @@ float ProcTexNoiseCoef(vec2 x) { | |||
| std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) { | ||||
|     const auto& state = config.state; | ||||
| 
 | ||||
|     std::string out = "#version 330 core\n"; | ||||
|     std::string out = R"( | ||||
| #version 330 core | ||||
| #extension GL_ARB_shader_image_load_store : enable | ||||
| #extension GL_ARB_shader_image_size : enable | ||||
| #define ALLOW_SHADOW (defined(GL_ARB_shader_image_load_store) && defined(GL_ARB_shader_image_size)) | ||||
| )"; | ||||
| 
 | ||||
|     if (separable_shader) { | ||||
|         out += "#extension GL_ARB_separate_shader_objects : enable\n"; | ||||
|     } | ||||
|  | @ -1204,6 +1217,16 @@ uniform samplerBuffer proctex_color_map; | |||
| uniform samplerBuffer proctex_alpha_map; | ||||
| uniform samplerBuffer proctex_lut; | ||||
| uniform samplerBuffer proctex_diff_lut; | ||||
| 
 | ||||
| #if ALLOW_SHADOW | ||||
| layout(r32ui) uniform readonly uimage2D shadow_texture_px; | ||||
| layout(r32ui) uniform readonly uimage2D shadow_texture_nx; | ||||
| layout(r32ui) uniform readonly uimage2D shadow_texture_py; | ||||
| layout(r32ui) uniform readonly uimage2D shadow_texture_ny; | ||||
| layout(r32ui) uniform readonly uimage2D shadow_texture_pz; | ||||
| layout(r32ui) uniform readonly uimage2D shadow_texture_nz; | ||||
| layout(r32ui) uniform uimage2D shadow_buffer; | ||||
| #endif | ||||
| )"; | ||||
| 
 | ||||
|     out += UniformBlockDef; | ||||
|  | @ -1248,6 +1271,147 @@ vec4 byteround(vec4 x) { | |||
|     return round(x * 255.0) * (1.0 / 255.0); | ||||
| } | ||||
| 
 | ||||
| #if ALLOW_SHADOW | ||||
| 
 | ||||
| uvec2 DecodeShadow(uint pixel) { | ||||
|     return uvec2(pixel >> 8, pixel & 0xFFu); | ||||
| } | ||||
| 
 | ||||
| uint EncodeShadow(uvec2 pixel) { | ||||
|     return (pixel.x << 8) | pixel.y; | ||||
| } | ||||
| 
 | ||||
| float CompareShadow(uint pixel, uint z) { | ||||
|     uvec2 p = DecodeShadow(pixel); | ||||
|     return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z); | ||||
| } | ||||
| 
 | ||||
| float SampleShadow2D(ivec2 uv, uint z) { | ||||
|     if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) ))) | ||||
|         return 1.0; | ||||
|     return CompareShadow(imageLoad(shadow_texture_px, uv).x, z); | ||||
| } | ||||
| 
 | ||||
| float mix2(vec4 s, vec2 a) { | ||||
|     vec2 t = mix(s.xy, s.zw, a.yy); | ||||
|     return mix(t.x, t.y, a.x); | ||||
| } | ||||
| 
 | ||||
| vec4 shadowTexture(vec2 uv, float w) { | ||||
| )"; | ||||
|     if (!config.state.shadow_texture_orthographic) { | ||||
|         out += "uv /= w;"; | ||||
|     } | ||||
|     out += "uint z = uint(max(0, int(min(abs(w), 1.0) * 0xFFFFFF) - " + | ||||
|            std::to_string(state.shadow_texture_bias) + "));"; | ||||
|     out += R"( | ||||
|     vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5); | ||||
|     vec2 coord_floor = floor(coord); | ||||
|     vec2 f = coord - coord_floor; | ||||
|     ivec2 i = ivec2(coord_floor); | ||||
|     vec4 s = vec4( | ||||
|         SampleShadow2D(i              , z), | ||||
|         SampleShadow2D(i + ivec2(1, 0), z), | ||||
|         SampleShadow2D(i + ivec2(0, 1), z), | ||||
|         SampleShadow2D(i + ivec2(1, 1), z)); | ||||
|     return vec4(mix2(s, f)); | ||||
| } | ||||
| 
 | ||||
| vec4 shadowTextureCube(vec2 uv, float w) { | ||||
|     ivec2 size = imageSize(shadow_texture_px); | ||||
|     vec3 c = vec3(uv, w); | ||||
|     vec3 a = abs(c); | ||||
|     if (a.x > a.y && a.x > a.z) { | ||||
|         w = a.x; | ||||
|         uv = -c.zy; | ||||
|         if (c.x < 0.0) uv.x = -uv.x; | ||||
|     } else if (a.y > a.z) { | ||||
|         w = a.y; | ||||
|         uv = c.xz; | ||||
|         if (c.y < 0.0) uv.y = -uv.y; | ||||
|     } else { | ||||
|         w = a.z; | ||||
|         uv = -c.xy; | ||||
|         if (c.z > 0.0) uv.x = -uv.x; | ||||
|     } | ||||
| )"; | ||||
|     out += "uint z = uint(max(0, int(min(w, 1.0) * 0xFFFFFF) - " + | ||||
|            std::to_string(state.shadow_texture_bias) + "));"; | ||||
|     out += R"( | ||||
|     vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); | ||||
|     vec2 coord_floor = floor(coord); | ||||
|     vec2 f = coord - coord_floor; | ||||
|     ivec2 i00 = ivec2(coord_floor); | ||||
|     ivec2 i10 = i00 + ivec2(1, 0); | ||||
|     ivec2 i01 = i00 + ivec2(0, 1); | ||||
|     ivec2 i11 = i00 + ivec2(1, 1); | ||||
|     ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1); | ||||
|     i00 = clamp(i00, cmin, cmax); | ||||
|     i10 = clamp(i10, cmin, cmax); | ||||
|     i01 = clamp(i01, cmin, cmax); | ||||
|     i11 = clamp(i11, cmin, cmax); | ||||
|     uvec4 pixels; | ||||
|     // This part should have been refactored into functions,
 | ||||
|     // but many drivers don't like passing uimage2D as parameters
 | ||||
|     if (a.x > a.y && a.x > a.z) { | ||||
|         if (c.x > 0.0) | ||||
|             pixels = uvec4( | ||||
|                 imageLoad(shadow_texture_px, i00).r, | ||||
|                 imageLoad(shadow_texture_px, i10).r, | ||||
|                 imageLoad(shadow_texture_px, i01).r, | ||||
|                 imageLoad(shadow_texture_px, i11).r); | ||||
|         else | ||||
|             pixels = uvec4( | ||||
|                 imageLoad(shadow_texture_nx, i00).r, | ||||
|                 imageLoad(shadow_texture_nx, i10).r, | ||||
|                 imageLoad(shadow_texture_nx, i01).r, | ||||
|                 imageLoad(shadow_texture_nx, i11).r); | ||||
|     } else if (a.y > a.z) { | ||||
|         if (c.y > 0.0) | ||||
|             pixels = uvec4( | ||||
|                 imageLoad(shadow_texture_py, i00).r, | ||||
|                 imageLoad(shadow_texture_py, i10).r, | ||||
|                 imageLoad(shadow_texture_py, i01).r, | ||||
|                 imageLoad(shadow_texture_py, i11).r); | ||||
|         else | ||||
|             pixels = uvec4( | ||||
|                 imageLoad(shadow_texture_ny, i00).r, | ||||
|                 imageLoad(shadow_texture_ny, i10).r, | ||||
|                 imageLoad(shadow_texture_ny, i01).r, | ||||
|                 imageLoad(shadow_texture_ny, i11).r); | ||||
|     } else { | ||||
|         if (c.z > 0.0) | ||||
|             pixels = uvec4( | ||||
|                 imageLoad(shadow_texture_pz, i00).r, | ||||
|                 imageLoad(shadow_texture_pz, i10).r, | ||||
|                 imageLoad(shadow_texture_pz, i01).r, | ||||
|                 imageLoad(shadow_texture_pz, i11).r); | ||||
|         else | ||||
|             pixels = uvec4( | ||||
|                 imageLoad(shadow_texture_nz, i00).r, | ||||
|                 imageLoad(shadow_texture_nz, i10).r, | ||||
|                 imageLoad(shadow_texture_nz, i01).r, | ||||
|                 imageLoad(shadow_texture_nz, i11).r); | ||||
|     } | ||||
|     vec4 s = vec4( | ||||
|         CompareShadow(pixels.x, z), | ||||
|         CompareShadow(pixels.y, z), | ||||
|         CompareShadow(pixels.z, z), | ||||
|         CompareShadow(pixels.w, z)); | ||||
|     return vec4(mix2(s, f)); | ||||
| } | ||||
| 
 | ||||
| #else | ||||
| 
 | ||||
| vec4 shadowTexture(vec2 uv, float w) { | ||||
|     return vec4(1.0); | ||||
| } | ||||
| 
 | ||||
| vec4 shadowTextureCube(vec2 uv, float w) { | ||||
|     return vec4(1.0); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
| )"; | ||||
| 
 | ||||
|     if (config.state.proctex.enable) | ||||
|  | @ -1331,9 +1495,38 @@ vec4 secondary_fragment_color = vec4(0.0); | |||
|         return out; | ||||
|     } | ||||
| 
 | ||||
|     out += "gl_FragDepth = depth;\n"; | ||||
|     // Round the final fragment color to maintain the PICA's 8 bits of precision
 | ||||
|     out += "color = byteround(last_tex_env_out);\n"; | ||||
|     if (state.shadow_rendering) { | ||||
|         out += R"( | ||||
| #if ALLOW_SHADOW | ||||
| uint d = uint(clamp(depth, 0.0, 1.0) * 0xFFFFFF); | ||||
| uint s = uint(last_tex_env_out.g * 0xFF); | ||||
| ivec2 image_coord = ivec2(gl_FragCoord.xy); | ||||
| 
 | ||||
| uint old = imageLoad(shadow_buffer, image_coord).x; | ||||
| uint new; | ||||
| uint old2; | ||||
| do { | ||||
|     old2 = old; | ||||
| 
 | ||||
|     uvec2 ref = DecodeShadow(old); | ||||
|     if (d < ref.x) { | ||||
|         if (s == 0u) { | ||||
|             ref.x = d; | ||||
|         } else { | ||||
|             s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x))); | ||||
|             ref.y = min(s, ref.y); | ||||
|         } | ||||
|     } | ||||
|     new = EncodeShadow(ref); | ||||
| 
 | ||||
| } while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new)) != old2); | ||||
| #endif // ALLOW_SHADOW
 | ||||
| )"; | ||||
|     } else { | ||||
|         out += "gl_FragDepth = depth;\n"; | ||||
|         // Round the final fragment color to maintain the PICA's 8 bits of precision
 | ||||
|         out += "color = byteround(last_tex_env_out);\n"; | ||||
|     } | ||||
| 
 | ||||
|     out += "}"; | ||||
| 
 | ||||
|  |  | |||
|  | @ -110,6 +110,10 @@ struct PicaFSConfigState { | |||
|         u32 lut_offset; | ||||
|         Pica::TexturingRegs::ProcTexFilter lut_filter; | ||||
|     } proctex; | ||||
| 
 | ||||
|     bool shadow_rendering; | ||||
|     bool shadow_texture_orthographic; | ||||
|     u32 shadow_texture_bias; | ||||
| }; | ||||
| 
 | ||||
| /**
 | ||||
|  |  | |||
|  | @ -36,6 +36,13 @@ static void SetShaderSamplerBinding(GLuint shader, const char* name, | |||
|     } | ||||
| } | ||||
| 
 | ||||
| static void SetShaderImageBinding(GLuint shader, const char* name, GLuint binding) { | ||||
|     GLint uniform_tex = glGetUniformLocation(shader, name); | ||||
|     if (uniform_tex != -1) { | ||||
|         glUniform1i(uniform_tex, static_cast<GLint>(binding)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void SetShaderSamplerBindings(GLuint shader) { | ||||
|     OpenGLState cur_state = OpenGLState::GetCurState(); | ||||
|     GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); | ||||
|  | @ -56,6 +63,14 @@ static void SetShaderSamplerBindings(GLuint shader) { | |||
|     SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT); | ||||
|     SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT); | ||||
| 
 | ||||
|     SetShaderImageBinding(shader, "shadow_buffer", ImageUnits::ShadowBuffer); | ||||
|     SetShaderImageBinding(shader, "shadow_texture_px", ImageUnits::ShadowTexturePX); | ||||
|     SetShaderImageBinding(shader, "shadow_texture_nx", ImageUnits::ShadowTextureNX); | ||||
|     SetShaderImageBinding(shader, "shadow_texture_py", ImageUnits::ShadowTexturePY); | ||||
|     SetShaderImageBinding(shader, "shadow_texture_ny", ImageUnits::ShadowTextureNY); | ||||
|     SetShaderImageBinding(shader, "shadow_texture_pz", ImageUnits::ShadowTexturePZ); | ||||
|     SetShaderImageBinding(shader, "shadow_texture_nz", ImageUnits::ShadowTextureNZ); | ||||
| 
 | ||||
|     cur_state.draw.shader_program = old_program; | ||||
|     cur_state.Apply(); | ||||
| } | ||||
|  |  | |||
|  | @ -32,6 +32,8 @@ struct UniformData { | |||
|     GLint alphatest_ref; | ||||
|     GLfloat depth_scale; | ||||
|     GLfloat depth_offset; | ||||
|     GLfloat shadow_bias_constant; | ||||
|     GLfloat shadow_bias_linear; | ||||
|     GLint scissor_x1; | ||||
|     GLint scissor_y1; | ||||
|     GLint scissor_x2; | ||||
|  | @ -48,7 +50,7 @@ struct UniformData { | |||
| }; | ||||
| 
 | ||||
| static_assert( | ||||
|     sizeof(UniformData) == 0x460, | ||||
|     sizeof(UniformData) == 0x470, | ||||
|     "The size of the UniformData structure has changed, update the structure in the shader"); | ||||
| static_assert(sizeof(UniformData) < 16384, | ||||
|               "UniformData structure must be less than 16kb as per the OpenGL spec"); | ||||
|  |  | |||
|  | @ -65,6 +65,14 @@ OpenGLState::OpenGLState() { | |||
|     proctex_alpha_map.texture_buffer = 0; | ||||
|     proctex_noise_lut.texture_buffer = 0; | ||||
| 
 | ||||
|     image_shadow_buffer = 0; | ||||
|     image_shadow_texture_px = 0; | ||||
|     image_shadow_texture_nx = 0; | ||||
|     image_shadow_texture_py = 0; | ||||
|     image_shadow_texture_ny = 0; | ||||
|     image_shadow_texture_pz = 0; | ||||
|     image_shadow_texture_nz = 0; | ||||
| 
 | ||||
|     draw.read_framebuffer = 0; | ||||
|     draw.draw_framebuffer = 0; | ||||
|     draw.vertex_array = 0; | ||||
|  | @ -255,6 +263,42 @@ void OpenGLState::Apply() const { | |||
|         glBindTexture(GL_TEXTURE_BUFFER, proctex_diff_lut.texture_buffer); | ||||
|     } | ||||
| 
 | ||||
|     // Shadow Images
 | ||||
|     if (image_shadow_buffer != cur_state.image_shadow_buffer) { | ||||
|         glBindImageTexture(ImageUnits::ShadowBuffer, image_shadow_buffer, 0, GL_FALSE, 0, | ||||
|                            GL_READ_WRITE, GL_R32UI); | ||||
|     } | ||||
| 
 | ||||
|     if (image_shadow_texture_px != cur_state.image_shadow_texture_px) { | ||||
|         glBindImageTexture(ImageUnits::ShadowTexturePX, image_shadow_texture_px, 0, GL_FALSE, 0, | ||||
|                            GL_READ_ONLY, GL_R32UI); | ||||
|     } | ||||
| 
 | ||||
|     if (image_shadow_texture_nx != cur_state.image_shadow_texture_nx) { | ||||
|         glBindImageTexture(ImageUnits::ShadowTextureNX, image_shadow_texture_nx, 0, GL_FALSE, 0, | ||||
|                            GL_READ_ONLY, GL_R32UI); | ||||
|     } | ||||
| 
 | ||||
|     if (image_shadow_texture_py != cur_state.image_shadow_texture_py) { | ||||
|         glBindImageTexture(ImageUnits::ShadowTexturePY, image_shadow_texture_py, 0, GL_FALSE, 0, | ||||
|                            GL_READ_ONLY, GL_R32UI); | ||||
|     } | ||||
| 
 | ||||
|     if (image_shadow_texture_ny != cur_state.image_shadow_texture_ny) { | ||||
|         glBindImageTexture(ImageUnits::ShadowTextureNY, image_shadow_texture_ny, 0, GL_FALSE, 0, | ||||
|                            GL_READ_ONLY, GL_R32UI); | ||||
|     } | ||||
| 
 | ||||
|     if (image_shadow_texture_pz != cur_state.image_shadow_texture_pz) { | ||||
|         glBindImageTexture(ImageUnits::ShadowTexturePZ, image_shadow_texture_pz, 0, GL_FALSE, 0, | ||||
|                            GL_READ_ONLY, GL_R32UI); | ||||
|     } | ||||
| 
 | ||||
|     if (image_shadow_texture_nz != cur_state.image_shadow_texture_nz) { | ||||
|         glBindImageTexture(ImageUnits::ShadowTextureNZ, image_shadow_texture_nz, 0, GL_FALSE, 0, | ||||
|                            GL_READ_ONLY, GL_R32UI); | ||||
|     } | ||||
| 
 | ||||
|     // Framebuffer
 | ||||
|     if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { | ||||
|         glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); | ||||
|  | @ -344,6 +388,20 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) { | |||
|         proctex_lut.texture_buffer = 0; | ||||
|     if (proctex_diff_lut.texture_buffer == handle) | ||||
|         proctex_diff_lut.texture_buffer = 0; | ||||
|     if (image_shadow_buffer == handle) | ||||
|         image_shadow_buffer = 0; | ||||
|     if (image_shadow_texture_px == handle) | ||||
|         image_shadow_texture_px = 0; | ||||
|     if (image_shadow_texture_nx == handle) | ||||
|         image_shadow_texture_nx = 0; | ||||
|     if (image_shadow_texture_py == handle) | ||||
|         image_shadow_texture_py = 0; | ||||
|     if (image_shadow_texture_ny == handle) | ||||
|         image_shadow_texture_ny = 0; | ||||
|     if (image_shadow_texture_pz == handle) | ||||
|         image_shadow_texture_pz = 0; | ||||
|     if (image_shadow_texture_nz == handle) | ||||
|         image_shadow_texture_nz = 0; | ||||
|     return *this; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -31,6 +31,16 @@ constexpr TextureUnit TextureCube{10}; | |||
| 
 | ||||
| } // namespace TextureUnits
 | ||||
| 
 | ||||
| namespace ImageUnits { | ||||
| constexpr GLuint ShadowBuffer = 0; | ||||
| constexpr GLuint ShadowTexturePX = 1; | ||||
| constexpr GLuint ShadowTextureNX = 2; | ||||
| constexpr GLuint ShadowTexturePY = 3; | ||||
| constexpr GLuint ShadowTextureNY = 4; | ||||
| constexpr GLuint ShadowTexturePZ = 5; | ||||
| constexpr GLuint ShadowTextureNZ = 6; | ||||
| } // namespace ImageUnits
 | ||||
| 
 | ||||
| class OpenGLState { | ||||
| public: | ||||
|     struct { | ||||
|  | @ -121,6 +131,15 @@ public: | |||
|         GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
 | ||||
|     } proctex_diff_lut; | ||||
| 
 | ||||
|     // GL_IMAGE_BINDING_NAME
 | ||||
|     GLuint image_shadow_buffer; | ||||
|     GLuint image_shadow_texture_px; | ||||
|     GLuint image_shadow_texture_nx; | ||||
|     GLuint image_shadow_texture_py; | ||||
|     GLuint image_shadow_texture_ny; | ||||
|     GLuint image_shadow_texture_pz; | ||||
|     GLuint image_shadow_texture_nz; | ||||
| 
 | ||||
|     struct { | ||||
|         GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
 | ||||
|         GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue