mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	renderer/vulkan: Emulate custom border colors in shaders when unavailable. (#6878)
This commit is contained in:
		
							parent
							
								
									f3d92dd3b8
								
							
						
					
					
						commit
						6ddf4b241f
					
				
					 10 changed files with 322 additions and 195 deletions
				
			
		|  | @ -599,6 +599,17 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) { | |||
|         SyncTextureLodBias(2); | ||||
|         break; | ||||
| 
 | ||||
|     // Texture borders
 | ||||
|     case PICA_REG_INDEX(texturing.texture0.border_color): | ||||
|         SyncTextureBorderColor(0); | ||||
|         break; | ||||
|     case PICA_REG_INDEX(texturing.texture1.border_color): | ||||
|         SyncTextureBorderColor(1); | ||||
|         break; | ||||
|     case PICA_REG_INDEX(texturing.texture2.border_color): | ||||
|         SyncTextureBorderColor(2); | ||||
|         break; | ||||
| 
 | ||||
|     // Clipping plane
 | ||||
|     case PICA_REG_INDEX(rasterizer.clip_coef[0]): | ||||
|     case PICA_REG_INDEX(rasterizer.clip_coef[1]): | ||||
|  | @ -821,6 +832,16 @@ void RasterizerAccelerated::SyncTextureLodBias(int tex_index) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerAccelerated::SyncTextureBorderColor(int tex_index) { | ||||
|     const auto pica_textures = regs.texturing.GetTextures(); | ||||
|     const auto params = pica_textures[tex_index].config; | ||||
|     const Common::Vec4f border_color = ColorRGBA8(params.border_color.raw); | ||||
|     if (border_color != uniform_block_data.data.tex_border_color[tex_index]) { | ||||
|         uniform_block_data.data.tex_border_color[tex_index] = border_color; | ||||
|         uniform_block_data.dirty = true; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerAccelerated::SyncClipCoef() { | ||||
|     const auto raw_clip_coef = regs.rasterizer.GetClipCoef(); | ||||
|     const Common::Vec4f new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), | ||||
|  |  | |||
|  | @ -97,6 +97,9 @@ protected: | |||
|     /// Syncs the texture LOD bias to match the PICA register
 | ||||
|     void SyncTextureLodBias(int tex_index); | ||||
| 
 | ||||
|     /// Syncs the texture border color to match the PICA registers
 | ||||
|     void SyncTextureBorderColor(int tex_index); | ||||
| 
 | ||||
|     /// Syncs the clip coefficients to match the PICA register
 | ||||
|     void SyncClipCoef(); | ||||
| 
 | ||||
|  |  | |||
|  | @ -409,7 +409,9 @@ bool Instance::CreateDevice() { | |||
|     const bool has_extended_dynamic_state = | ||||
|         add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, is_arm || is_qualcomm, | ||||
|                       "it is broken on Qualcomm and ARM drivers"); | ||||
|     const bool has_custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); | ||||
|     const bool has_custom_border_color = | ||||
|         add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, is_qualcomm, | ||||
|                       "it is broken on most Qualcomm driver versions"); | ||||
|     const bool has_index_type_uint8 = add_extension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME); | ||||
|     const bool has_pipeline_creation_cache_control = | ||||
|         add_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME); | ||||
|  |  | |||
|  | @ -69,6 +69,17 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) { | |||
| 
 | ||||
|     state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0); | ||||
| 
 | ||||
|     const auto pica_textures = regs.texturing.GetTextures(); | ||||
|     for (u32 tex_index = 0; tex_index < 3; tex_index++) { | ||||
|         const auto config = pica_textures[tex_index].config; | ||||
|         state.texture_border_color[tex_index].enable_s.Assign( | ||||
|             !instance.IsCustomBorderColorSupported() && | ||||
|             config.wrap_s == TexturingRegs::TextureConfig::WrapMode::ClampToBorder); | ||||
|         state.texture_border_color[tex_index].enable_t.Assign( | ||||
|             !instance.IsCustomBorderColorSupported() && | ||||
|             config.wrap_t == TexturingRegs::TextureConfig::WrapMode::ClampToBorder); | ||||
|     } | ||||
| 
 | ||||
|     // Emulate logic op in the shader if not supported. This is mostly for mobile GPUs
 | ||||
|     const bool emulate_logic_op = instance.NeedsLogicOpEmulation() && | ||||
|                                   !Pica::g_state.regs.framebuffer.output_merger.alphablend_enable; | ||||
|  | @ -284,54 +295,6 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) { | |||
|             stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); | ||||
| } | ||||
| 
 | ||||
| static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) { | ||||
|     const auto& state = config.state; | ||||
|     switch (texture_unit) { | ||||
|     case 0: | ||||
|         // Only unit 0 respects the texturing type
 | ||||
|         switch (state.texture0_type) { | ||||
|         case TexturingRegs::TextureConfig::Texture2D: | ||||
|             return "textureLod(tex0, texcoord0, getLod(texcoord0 * " | ||||
|                    "vec2(textureSize(tex0, 0))) + tex_lod_bias[0])"; | ||||
|         case TexturingRegs::TextureConfig::Projection2D: | ||||
|             // TODO (wwylele): find the exact LOD formula for projection texture
 | ||||
|             return "textureProj(tex0, vec3(texcoord0, texcoord0_w))"; | ||||
|         case TexturingRegs::TextureConfig::TextureCube: | ||||
|             return "texture(tex_cube, vec3(texcoord0, texcoord0_w))"; | ||||
|         case TexturingRegs::TextureConfig::Shadow2D: | ||||
|             return "shadowTexture(texcoord0, texcoord0_w)"; | ||||
|         case TexturingRegs::TextureConfig::ShadowCube: | ||||
|             return "shadowTextureCube(texcoord0, texcoord0_w)"; | ||||
|         case TexturingRegs::TextureConfig::Disabled: | ||||
|             return "vec4(0.0)"; | ||||
|         default: | ||||
|             LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type); | ||||
|             UNIMPLEMENTED(); | ||||
|             return "texture(tex0, texcoord0)"; | ||||
|         } | ||||
|     case 1: | ||||
|         return "textureLod(tex1, texcoord1, getLod(texcoord1 * " | ||||
|                "vec2(textureSize(tex1, 0))) + tex_lod_bias[1])"; | ||||
|     case 2: | ||||
|         if (state.texture2_use_coord1) | ||||
|             return "textureLod(tex2, texcoord1, getLod(texcoord1 * " | ||||
|                    "vec2(textureSize(tex2, 0))) + tex_lod_bias[2])"; | ||||
|         else | ||||
|             return "textureLod(tex2, texcoord2, getLod(texcoord2 * " | ||||
|                    "vec2(textureSize(tex2, 0))) + tex_lod_bias[2])"; | ||||
|     case 3: | ||||
|         if (state.proctex.enable) { | ||||
|             return "ProcTex()"; | ||||
|         } else { | ||||
|             LOG_DEBUG(Render_OpenGL, "Using Texture3 without enabling it"); | ||||
|             return "vec4(0.0)"; | ||||
|         } | ||||
|     default: | ||||
|         UNREACHABLE(); | ||||
|         return ""; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /// Writes the specified TEV stage source component(s)
 | ||||
| static void AppendSource(std::string& out, const PicaFSConfig& config, | ||||
|                          TevStageConfig::Source source, std::string_view index_name) { | ||||
|  | @ -347,16 +310,16 @@ static void AppendSource(std::string& out, const PicaFSConfig& config, | |||
|         out += "secondary_fragment_color"; | ||||
|         break; | ||||
|     case Source::Texture0: | ||||
|         out += SampleTexture(config, 0); | ||||
|         out += "sampleTexUnit0()"; | ||||
|         break; | ||||
|     case Source::Texture1: | ||||
|         out += SampleTexture(config, 1); | ||||
|         out += "sampleTexUnit1()"; | ||||
|         break; | ||||
|     case Source::Texture2: | ||||
|         out += SampleTexture(config, 2); | ||||
|         out += "sampleTexUnit2()"; | ||||
|         break; | ||||
|     case Source::Texture3: | ||||
|         out += SampleTexture(config, 3); | ||||
|         out += "sampleTexUnit3()"; | ||||
|         break; | ||||
|     case Source::PreviousBuffer: | ||||
|         out += "combiner_buffer"; | ||||
|  | @ -656,7 +619,7 @@ static void WriteLighting(std::string& out, const PicaFSConfig& config) { | |||
| 
 | ||||
|     // Compute fragment normals and tangents
 | ||||
|     const auto perturbation = [&] { | ||||
|         return fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, lighting.bump_selector)); | ||||
|         return fmt::format("2.0 * (sampleTexUnit{}()).rgb - 1.0", lighting.bump_selector); | ||||
|     }; | ||||
| 
 | ||||
|     switch (lighting.bump_mode) { | ||||
|  | @ -700,7 +663,7 @@ static void WriteLighting(std::string& out, const PicaFSConfig& config) { | |||
|            "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n"; | ||||
| 
 | ||||
|     if (lighting.enable_shadow) { | ||||
|         std::string shadow_texture = SampleTexture(config, lighting.shadow_selector); | ||||
|         std::string shadow_texture = fmt::format("sampleTexUnit{}()", lighting.shadow_selector); | ||||
|         if (lighting.shadow_invert) { | ||||
|             out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture); | ||||
|         } else { | ||||
|  | @ -1310,6 +1273,7 @@ float mix2(vec4 s, vec2 a) { | |||
| 
 | ||||
| vec4 shadowTexture(vec2 uv, float w) { | ||||
| )"; | ||||
| 
 | ||||
|     if (!config.state.shadow_texture_orthographic) { | ||||
|         out += "uv /= w;"; | ||||
|     } | ||||
|  | @ -1344,9 +1308,7 @@ vec4 shadowTextureCube(vec2 uv, float w) { | |||
|         uv = -c.xy; | ||||
|         if (c.z > 0.0) uv.x = -uv.x; | ||||
|     } | ||||
| )"; | ||||
|     out += "uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));"; | ||||
|     out += R"( | ||||
|     uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); | ||||
|     vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); | ||||
|     vec2 coord_floor = floor(coord); | ||||
|     vec2 f = coord - coord_floor; | ||||
|  | @ -1409,10 +1371,92 @@ vec4 shadowTextureCube(vec2 uv, float w) { | |||
|         CompareShadow(pixels.w, z)); | ||||
|     return vec4(mix2(s, f)); | ||||
| } | ||||
| )"; | ||||
|     )"; | ||||
| 
 | ||||
|     if (config.state.proctex.enable) | ||||
|     if (config.state.proctex.enable) { | ||||
|         AppendProcTexSampler(out, config); | ||||
|     } | ||||
| 
 | ||||
|     for (u32 texture_unit = 0; texture_unit < 4; texture_unit++) { | ||||
|         out += fmt::format("vec4 sampleTexUnit{}() {{", texture_unit); | ||||
|         if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) { | ||||
|             out += "return vec4(0.0);}"; | ||||
|             continue; | ||||
|         } else if (texture_unit == 3) { | ||||
|             if (state.proctex.enable) { | ||||
|                 out += "return ProcTex();}"; | ||||
|             } else { | ||||
|                 out += "return vec4(0.0);}"; | ||||
|             } | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit; | ||||
|         if (config.state.texture_border_color[texture_unit].enable_s) { | ||||
|             out += fmt::format(R"( | ||||
|             if (texcoord{}.x < 0 || texcoord{}.x > 1) {{ | ||||
|                 return tex_border_color[{}]; | ||||
|             }} | ||||
|             )", | ||||
|                                texcoord_num, texcoord_num, texture_unit); | ||||
|         } | ||||
|         if (config.state.texture_border_color[texture_unit].enable_t) { | ||||
|             out += fmt::format(R"( | ||||
|             if (texcoord{}.y < 0 || texcoord{}.y > 1) {{ | ||||
|                 return tex_border_color[{}]; | ||||
|             }} | ||||
|             )", | ||||
|                                texcoord_num, texcoord_num, texture_unit); | ||||
|         } | ||||
|         // TODO: 3D border?
 | ||||
| 
 | ||||
|         switch (texture_unit) { | ||||
|         case 0: | ||||
|             // Only unit 0 respects the texturing type
 | ||||
|             switch (state.texture0_type) { | ||||
|             case TexturingRegs::TextureConfig::Texture2D: | ||||
|                 out += "return textureLod(tex0, texcoord0, getLod(texcoord0 * " | ||||
|                        "vec2(textureSize(tex0, 0))) + tex_lod_bias[0]);"; | ||||
|                 break; | ||||
|             case TexturingRegs::TextureConfig::Projection2D: | ||||
|                 // TODO (wwylele): find the exact LOD formula for projection texture
 | ||||
|                 out += "return textureProj(tex0, vec3(texcoord0, texcoord0_w));"; | ||||
|                 break; | ||||
|             case TexturingRegs::TextureConfig::TextureCube: | ||||
|                 out += "return texture(tex_cube, vec3(texcoord0, texcoord0_w));"; | ||||
|                 break; | ||||
|             case TexturingRegs::TextureConfig::Shadow2D: | ||||
|                 out += "return shadowTexture(texcoord0, texcoord0_w);"; | ||||
|                 break; | ||||
|             case TexturingRegs::TextureConfig::ShadowCube: | ||||
|                 out += "return shadowTextureCube(texcoord0, texcoord0_w);"; | ||||
|                 break; | ||||
|             default: | ||||
|                 LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type); | ||||
|                 UNIMPLEMENTED(); | ||||
|                 out += "return texture(tex0, texcoord0);"; | ||||
|                 break; | ||||
|             } | ||||
|         case 1: | ||||
|             out += "return textureLod(tex1, texcoord1, getLod(texcoord1 * vec2(textureSize(tex1, " | ||||
|                    "0))) + tex_lod_bias[1]);"; | ||||
|             break; | ||||
|         case 2: | ||||
|             if (state.texture2_use_coord1) { | ||||
|                 out += "return textureLod(tex2, texcoord1, getLod(texcoord1 * " | ||||
|                        "vec2(textureSize(tex2, 0))) + tex_lod_bias[1]);"; | ||||
|             } else { | ||||
|                 out += "return textureLod(tex2, texcoord2, getLod(texcoord2 * " | ||||
|                        "vec2(textureSize(tex2, 0))) + tex_lod_bias[2]);"; | ||||
|             } | ||||
|             break; | ||||
|         default: | ||||
|             UNREACHABLE(); | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         out += "}"; | ||||
|     } | ||||
| 
 | ||||
|     // We round the interpolated primary color to the nearest 1/255th
 | ||||
|     // This maintains the PICA's 8 bits of precision
 | ||||
|  |  | |||
|  | @ -57,6 +57,11 @@ struct PicaFSConfigState { | |||
|         BitField<28, 1, u32> shadow_texture_orthographic; | ||||
|     }; | ||||
| 
 | ||||
|     union { | ||||
|         BitField<0, 1, u32> enable_s; | ||||
|         BitField<1, 1, u32> enable_t; | ||||
|     } texture_border_color[3]; | ||||
| 
 | ||||
|     std::array<TevStageConfigRaw, 6> tev_stages; | ||||
| 
 | ||||
|     struct { | ||||
|  |  | |||
|  | @ -21,8 +21,8 @@ FragmentModule::FragmentModule(Core::TelemetrySession& telemetry_, const PicaFSC | |||
|     DefineArithmeticTypes(); | ||||
|     DefineUniformStructs(); | ||||
|     DefineInterface(); | ||||
|     if (config.state.proctex.enable) { | ||||
|         DefineProcTexSampler(); | ||||
|     for (u32 i = 0; i < NUM_TEX_UNITS; i++) { | ||||
|         DefineTexSampler(i); | ||||
|     } | ||||
|     DefineEntryPoint(); | ||||
| } | ||||
|  | @ -225,7 +225,8 @@ void FragmentModule::WriteLighting() { | |||
| 
 | ||||
|     // Compute fragment normals and tangents
 | ||||
|     const auto perturbation = [&]() -> Id { | ||||
|         const Id texel{SampleTexture(lighting.bump_selector)}; | ||||
|         const Id texel{ | ||||
|             OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[lighting.bump_selector])}; | ||||
|         const Id texel_rgb{OpVectorShuffle(vec_ids.Get(3), texel, texel, 0, 1, 2)}; | ||||
|         const Id rgb_mul_two{OpVectorTimesScalar(vec_ids.Get(3), texel_rgb, ConstF32(2.f))}; | ||||
|         return OpFSub(vec_ids.Get(3), rgb_mul_two, ConstF32(1.f, 1.f, 1.f)); | ||||
|  | @ -284,7 +285,7 @@ void FragmentModule::WriteLighting() { | |||
| 
 | ||||
|     Id shadow{ConstF32(1.f, 1.f, 1.f, 1.f)}; | ||||
|     if (lighting.enable_shadow) { | ||||
|         shadow = SampleTexture(lighting.shadow_selector); | ||||
|         shadow = OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[lighting.shadow_selector]); | ||||
|         if (lighting.shadow_invert) { | ||||
|             shadow = OpFSub(vec_ids.Get(4), ConstF32(1.f, 1.f, 1.f, 1.f), shadow); | ||||
|         } | ||||
|  | @ -710,89 +711,6 @@ void FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) | |||
|     } | ||||
| } | ||||
| 
 | ||||
| Id FragmentModule::SampleTexture(u32 texture_unit) { | ||||
|     const PicaFSConfigState& state = config.state; | ||||
|     const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)}; | ||||
| 
 | ||||
|     // PICA's LOD formula for 2D textures.
 | ||||
|     // This LOD formula is the same as the LOD lower limit defined in OpenGL.
 | ||||
|     // f(x, y) >= max{m_u, m_v, m_w}
 | ||||
|     // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
 | ||||
|     const auto sample_lod = [this, texture_unit](Id tex_id, Id texcoord_id) { | ||||
|         const Id sampled_image{OpLoad(TypeSampledImage(image2d_id), tex_id)}; | ||||
|         const Id tex_image{OpImage(image2d_id, sampled_image)}; | ||||
|         const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))}; | ||||
|         const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)}; | ||||
|         const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))}; | ||||
|         const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))}; | ||||
|         const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))}; | ||||
|         const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)}; | ||||
|         const Id dx_dy_max{ | ||||
|             OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))}; | ||||
|         const Id lod{OpLog2(f32_id, dx_dy_max)}; | ||||
|         const Id lod_bias{GetShaderDataMember(f32_id, ConstS32(28), ConstU32(texture_unit))}; | ||||
|         const Id biased_lod{OpFAdd(f32_id, lod, lod_bias)}; | ||||
|         return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord, | ||||
|                                         spv::ImageOperandsMask::Lod, biased_lod); | ||||
|     }; | ||||
| 
 | ||||
|     const auto sample = [this](Id tex_id, bool projection) { | ||||
|         const Id image_type = tex_id.value == tex_cube_id.value ? image_cube_id : image2d_id; | ||||
|         const Id sampled_image{OpLoad(TypeSampledImage(image_type), tex_id)}; | ||||
|         const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)}; | ||||
|         const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)}; | ||||
|         const Id coord{OpCompositeConstruct(vec_ids.Get(3), | ||||
|                                             OpCompositeExtract(f32_id, texcoord0, 0), | ||||
|                                             OpCompositeExtract(f32_id, texcoord0, 1), texcoord0_w)}; | ||||
|         if (projection) { | ||||
|             return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord); | ||||
|         } else { | ||||
|             return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord); | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     switch (texture_unit) { | ||||
|     case 0: | ||||
|         // Only unit 0 respects the texturing type
 | ||||
|         switch (state.texture0_type) { | ||||
|         case Pica::TexturingRegs::TextureConfig::Texture2D: | ||||
|             return sample_lod(tex0_id, texcoord0_id); | ||||
|         case Pica::TexturingRegs::TextureConfig::Projection2D: | ||||
|             return sample(tex0_id, true); | ||||
|         case Pica::TexturingRegs::TextureConfig::TextureCube: | ||||
|             return sample(tex_cube_id, false); | ||||
|         case Pica::TexturingRegs::TextureConfig::Shadow2D: | ||||
|             return SampleShadow(); | ||||
|         // case Pica::TexturingRegs::TextureConfig::ShadowCube:
 | ||||
|         // return "shadowTextureCube(texcoord0, texcoord0_w)";
 | ||||
|         case Pica::TexturingRegs::TextureConfig::Disabled: | ||||
|             return zero_vec; | ||||
|         default: | ||||
|             LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type); | ||||
|             UNIMPLEMENTED(); | ||||
|             return zero_vec; | ||||
|         } | ||||
|     case 1: | ||||
|         return sample_lod(tex1_id, texcoord1_id); | ||||
|     case 2: | ||||
|         if (state.texture2_use_coord1) { | ||||
|             return sample_lod(tex2_id, texcoord1_id); | ||||
|         } else { | ||||
|             return sample_lod(tex2_id, texcoord2_id); | ||||
|         } | ||||
|     case 3: | ||||
|         if (state.proctex.enable) { | ||||
|             return OpFunctionCall(vec_ids.Get(4), proctex_func); | ||||
|         } else { | ||||
|             LOG_DEBUG(Render_Vulkan, "Using Texture3 without enabling it"); | ||||
|             return zero_vec; | ||||
|         } | ||||
|     default: | ||||
|         UNREACHABLE(); | ||||
|         return void_id; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| Id FragmentModule::CompareShadow(Id pixel, Id z) { | ||||
|     const Id pixel_d24{OpShiftRightLogical(u32_id, pixel, ConstS32(8))}; | ||||
|     const Id pixel_s8{OpConvertUToF(f32_id, OpBitwiseAnd(u32_id, pixel, ConstU32(255u)))}; | ||||
|  | @ -802,7 +720,7 @@ Id FragmentModule::CompareShadow(Id pixel, Id z) { | |||
| } | ||||
| 
 | ||||
| Id FragmentModule::SampleShadow() { | ||||
|     const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)}; | ||||
|     const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord_id[0])}; | ||||
|     const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)}; | ||||
|     const Id abs_min_w{OpFMul(f32_id, OpFMin(f32_id, OpFAbs(f32_id, texcoord0_w), ConstF32(1.f)), | ||||
|                               ConstF32(16777215.f))}; | ||||
|  | @ -941,11 +859,145 @@ Id FragmentModule::AppendProcTexCombineAndMap(ProcTexCombiner combiner, Id u, Id | |||
|     return ProcTexLookupLUT(offset, combined); | ||||
| } | ||||
| 
 | ||||
| void FragmentModule::DefineProcTexSampler() { | ||||
| void FragmentModule::DefineTexSampler(u32 texture_unit) { | ||||
|     const PicaFSConfigState& state = config.state; | ||||
| 
 | ||||
|     const Id func_type{TypeFunction(vec_ids.Get(4))}; | ||||
|     proctex_func = OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type); | ||||
|     sample_tex_unit_func[texture_unit] = | ||||
|         OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type); | ||||
|     AddLabel(OpLabel()); | ||||
| 
 | ||||
|     const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)}; | ||||
| 
 | ||||
|     if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) { | ||||
|         OpReturnValue(zero_vec); | ||||
|         OpFunctionEnd(); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (texture_unit == 3) { | ||||
|         if (state.proctex.enable) { | ||||
|             OpReturnValue(ProcTexSampler()); | ||||
|         } else { | ||||
|             OpReturnValue(zero_vec); | ||||
|         } | ||||
|         OpFunctionEnd(); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     const Id border_label{OpLabel()}; | ||||
|     const Id not_border_label{OpLabel()}; | ||||
| 
 | ||||
|     u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit; | ||||
|     const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[texcoord_num])}; | ||||
| 
 | ||||
|     auto& texture_border_color = state.texture_border_color[texture_unit]; | ||||
|     if (texture_border_color.enable_s || texture_border_color.enable_t) { | ||||
|         const Id texcoord_s{OpCompositeExtract(f32_id, texcoord, 0)}; | ||||
|         const Id texcoord_t{OpCompositeExtract(f32_id, texcoord, 1)}; | ||||
| 
 | ||||
|         const Id s_lt_zero{OpFOrdLessThan(bool_id, texcoord_s, ConstF32(0.0f))}; | ||||
|         const Id s_gt_one{OpFOrdGreaterThan(bool_id, texcoord_s, ConstF32(1.0f))}; | ||||
|         const Id t_lt_zero{OpFOrdLessThan(bool_id, texcoord_t, ConstF32(0.0f))}; | ||||
|         const Id t_gt_one{OpFOrdGreaterThan(bool_id, texcoord_t, ConstF32(1.0f))}; | ||||
| 
 | ||||
|         Id cond{}; | ||||
|         if (texture_border_color.enable_s && texture_border_color.enable_t) { | ||||
|             cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(4), s_lt_zero, s_gt_one, | ||||
|                                                        t_lt_zero, t_gt_one)); | ||||
|         } else if (texture_border_color.enable_s) { | ||||
|             cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(2), s_lt_zero, s_gt_one)); | ||||
|         } else if (texture_border_color.enable_t) { | ||||
|             cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(2), t_lt_zero, t_gt_one)); | ||||
|         } | ||||
| 
 | ||||
|         OpSelectionMerge(not_border_label, spv::SelectionControlMask::MaskNone); | ||||
|         OpBranchConditional(cond, border_label, not_border_label); | ||||
| 
 | ||||
|         AddLabel(border_label); | ||||
|         const Id border_color{ | ||||
|             GetShaderDataMember(vec_ids.Get(4), ConstS32(29), ConstU32(texture_unit))}; | ||||
|         OpReturnValue(border_color); | ||||
| 
 | ||||
|         AddLabel(not_border_label); | ||||
|     } | ||||
| 
 | ||||
|     // PICA's LOD formula for 2D textures.
 | ||||
|     // This LOD formula is the same as the LOD lower limit defined in OpenGL.
 | ||||
|     // f(x, y) >= max{m_u, m_v, m_w}
 | ||||
|     // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
 | ||||
|     const auto sample_lod = [&](Id tex_id) { | ||||
|         const Id sampled_image{OpLoad(TypeSampledImage(image2d_id), tex_id)}; | ||||
|         const Id tex_image{OpImage(image2d_id, sampled_image)}; | ||||
|         const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))}; | ||||
|         const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))}; | ||||
|         const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))}; | ||||
|         const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))}; | ||||
|         const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)}; | ||||
|         const Id dx_dy_max{ | ||||
|             OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))}; | ||||
|         const Id lod{OpLog2(f32_id, dx_dy_max)}; | ||||
|         const Id lod_bias{GetShaderDataMember(f32_id, ConstS32(28), ConstU32(texture_unit))}; | ||||
|         const Id biased_lod{OpFAdd(f32_id, lod, lod_bias)}; | ||||
|         return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord, | ||||
|                                         spv::ImageOperandsMask::Lod, biased_lod); | ||||
|     }; | ||||
| 
 | ||||
|     const auto sample_3d = [&](Id tex_id, bool projection) { | ||||
|         const Id image_type = tex_id.value == tex_cube_id.value ? image_cube_id : image2d_id; | ||||
|         const Id sampled_image{OpLoad(TypeSampledImage(image_type), tex_id)}; | ||||
|         const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)}; | ||||
|         const Id coord{OpCompositeConstruct(vec_ids.Get(3), OpCompositeExtract(f32_id, texcoord, 0), | ||||
|                                             OpCompositeExtract(f32_id, texcoord, 1), texcoord0_w)}; | ||||
|         if (projection) { | ||||
|             return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord); | ||||
|         } else { | ||||
|             return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord); | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     Id ret_val{void_id}; | ||||
|     switch (texture_unit) { | ||||
|     case 0: | ||||
|         // Only unit 0 respects the texturing type
 | ||||
|         switch (state.texture0_type) { | ||||
|         case Pica::TexturingRegs::TextureConfig::Texture2D: | ||||
|             ret_val = sample_lod(tex0_id); | ||||
|             break; | ||||
|         case Pica::TexturingRegs::TextureConfig::Projection2D: | ||||
|             ret_val = sample_3d(tex0_id, true); | ||||
|             break; | ||||
|         case Pica::TexturingRegs::TextureConfig::TextureCube: | ||||
|             ret_val = sample_3d(tex_cube_id, false); | ||||
|             break; | ||||
|         case Pica::TexturingRegs::TextureConfig::Shadow2D: | ||||
|             ret_val = SampleShadow(); | ||||
|             // case Pica::TexturingRegs::TextureConfig::ShadowCube:
 | ||||
|             // return "shadowTextureCube(texcoord0, texcoord0_w)";
 | ||||
|             break; | ||||
|         default: | ||||
|             LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type); | ||||
|             UNIMPLEMENTED(); | ||||
|             ret_val = zero_vec; | ||||
|             break; | ||||
|         } | ||||
|         break; | ||||
|     case 1: | ||||
|         ret_val = sample_lod(tex1_id); | ||||
|         break; | ||||
|     case 2: | ||||
|         ret_val = sample_lod(tex2_id); | ||||
|         break; | ||||
|     default: | ||||
|         UNREACHABLE(); | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     OpReturnValue(ret_val); | ||||
|     OpFunctionEnd(); | ||||
| } | ||||
| 
 | ||||
| Id FragmentModule::ProcTexSampler() { | ||||
|     // Define noise tables at the beginning of the function
 | ||||
|     if (config.state.proctex.noise_enable) { | ||||
|         noise1d_table = | ||||
|  | @ -957,24 +1009,11 @@ void FragmentModule::DefineProcTexSampler() { | |||
| 
 | ||||
|     Id uv{}; | ||||
|     if (config.state.proctex.coord < 3) { | ||||
|         Id texcoord_id{}; | ||||
|         switch (config.state.proctex.coord.Value()) { | ||||
|         case 0: | ||||
|             texcoord_id = texcoord0_id; | ||||
|             break; | ||||
|         case 1: | ||||
|             texcoord_id = texcoord1_id; | ||||
|             break; | ||||
|         case 2: | ||||
|             texcoord_id = texcoord2_id; | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)}; | ||||
|         const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[config.state.proctex.coord.Value()])}; | ||||
|         uv = OpFAbs(vec_ids.Get(2), texcoord); | ||||
|     } else { | ||||
|         LOG_CRITICAL(Render_Vulkan, "Unexpected proctex.coord >= 3"); | ||||
|         uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord0_id)); | ||||
|         uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord_id[0])); | ||||
|     } | ||||
| 
 | ||||
|     // This LOD formula is the same as the LOD upper limit defined in OpenGL.
 | ||||
|  | @ -1058,8 +1097,7 @@ void FragmentModule::DefineProcTexSampler() { | |||
|         final_color = OpCompositeInsert(vec_ids.Get(4), final_alpha, final_color, 3); | ||||
|     } | ||||
| 
 | ||||
|     OpReturnValue(final_color); | ||||
|     OpFunctionEnd(); | ||||
|     return final_color; | ||||
| } | ||||
| 
 | ||||
| Id FragmentModule::Byteround(Id variable_id, u32 size) { | ||||
|  | @ -1226,13 +1264,13 @@ Id FragmentModule::AppendSource(TevStageConfig::Source source, s32 index) { | |||
|     case Source::SecondaryFragmentColor: | ||||
|         return secondary_fragment_color; | ||||
|     case Source::Texture0: | ||||
|         return SampleTexture(0); | ||||
|         return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[0]); | ||||
|     case Source::Texture1: | ||||
|         return SampleTexture(1); | ||||
|         return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[1]); | ||||
|     case Source::Texture2: | ||||
|         return SampleTexture(2); | ||||
|         return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[2]); | ||||
|     case Source::Texture3: | ||||
|         return SampleTexture(3); | ||||
|         return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[3]); | ||||
|     case Source::PreviousBuffer: | ||||
|         return combiner_buffer; | ||||
|     case Source::Constant: | ||||
|  | @ -1428,9 +1466,9 @@ void FragmentModule::DefineEntryPoint() { | |||
| 
 | ||||
|     const Id main_type{TypeFunction(TypeVoid())}; | ||||
|     const Id main_func{OpFunction(TypeVoid(), spv::FunctionControlMask::MaskNone, main_type)}; | ||||
|     AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id, texcoord0_id, | ||||
|                   texcoord1_id, texcoord2_id, texcoord0_w_id, normquat_id, view_id, color_id, | ||||
|                   gl_frag_coord_id, gl_frag_depth_id); | ||||
|     AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id, | ||||
|                   texcoord_id[0], texcoord_id[1], texcoord_id[2], texcoord0_w_id, normquat_id, | ||||
|                   view_id, color_id, gl_frag_coord_id, gl_frag_depth_id); | ||||
|     AddExecutionMode(main_func, spv::ExecutionMode::OriginUpperLeft); | ||||
|     AddExecutionMode(main_func, spv::ExecutionMode::DepthReplacing); | ||||
| } | ||||
|  | @ -1443,21 +1481,25 @@ void FragmentModule::DefineUniformStructs() { | |||
|     const Id light_src_array_id{TypeArray(light_src_struct_id, ConstU32(NUM_LIGHTS))}; | ||||
|     const Id lighting_lut_array_id{TypeArray(ivec_ids.Get(4), ConstU32(NUM_LIGHTING_SAMPLERS / 4))}; | ||||
|     const Id const_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_TEV_STAGES))}; | ||||
|     const Id border_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_NON_PROC_TEX_UNITS))}; | ||||
| 
 | ||||
|     const Id shader_data_struct_id{TypeStruct( | ||||
|         i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id, i32_id, i32_id, i32_id, i32_id, | ||||
|         i32_id, i32_id, i32_id, i32_id, i32_id, f32_id, i32_id, u32_id, lighting_lut_array_id, | ||||
|         vec_ids.Get(3), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(3), | ||||
|         light_src_array_id, const_color_array_id, vec_ids.Get(4), vec_ids.Get(3), vec_ids.Get(4))}; | ||||
|     const Id shader_data_struct_id{ | ||||
|         TypeStruct(i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id, i32_id, i32_id, i32_id, | ||||
|                    i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, f32_id, i32_id, u32_id, | ||||
|                    lighting_lut_array_id, vec_ids.Get(3), vec_ids.Get(2), vec_ids.Get(2), | ||||
|                    vec_ids.Get(2), vec_ids.Get(3), light_src_array_id, const_color_array_id, | ||||
|                    vec_ids.Get(4), vec_ids.Get(3), border_color_array_id, vec_ids.Get(4))}; | ||||
| 
 | ||||
|     constexpr std::array light_src_offsets{0u, 16u, 32u, 48u, 64u, 80u, 92u, 96u}; | ||||
|     constexpr std::array shader_data_offsets{ | ||||
|         0u,  4u,  8u,  12u, 16u, 20u,  24u,  28u,  32u,  36u,  40u,  44u,   48u,   52u,   56u, | ||||
|         60u, 64u, 68u, 72u, 80u, 176u, 192u, 200u, 208u, 224u, 240u, 1136u, 1232u, 1248u, 1264u}; | ||||
|     constexpr std::array shader_data_offsets{0u,   4u,   8u,    12u,   16u,   20u,   24u,  28u, | ||||
|                                              32u,  36u,  40u,   44u,   48u,   52u,   56u,  60u, | ||||
|                                              64u,  68u,  72u,   80u,   176u,  192u,  200u, 208u, | ||||
|                                              224u, 240u, 1136u, 1232u, 1248u, 1264u, 1312u}; | ||||
| 
 | ||||
|     Decorate(lighting_lut_array_id, spv::Decoration::ArrayStride, 16u); | ||||
|     Decorate(light_src_array_id, spv::Decoration::ArrayStride, 112u); | ||||
|     Decorate(const_color_array_id, spv::Decoration::ArrayStride, 16u); | ||||
|     Decorate(border_color_array_id, spv::Decoration::ArrayStride, 16u); | ||||
|     for (u32 i = 0; i < static_cast<u32>(light_src_offsets.size()); i++) { | ||||
|         MemberDecorate(light_src_struct_id, i, spv::Decoration::Offset, light_src_offsets[i]); | ||||
|     } | ||||
|  | @ -1475,9 +1517,9 @@ void FragmentModule::DefineUniformStructs() { | |||
| void FragmentModule::DefineInterface() { | ||||
|     // Define interface block
 | ||||
|     primary_color_id = DefineInput(vec_ids.Get(4), 1); | ||||
|     texcoord0_id = DefineInput(vec_ids.Get(2), 2); | ||||
|     texcoord1_id = DefineInput(vec_ids.Get(2), 3); | ||||
|     texcoord2_id = DefineInput(vec_ids.Get(2), 4); | ||||
|     texcoord_id[0] = DefineInput(vec_ids.Get(2), 2); | ||||
|     texcoord_id[1] = DefineInput(vec_ids.Get(2), 3); | ||||
|     texcoord_id[2] = DefineInput(vec_ids.Get(2), 4); | ||||
|     texcoord0_w_id = DefineInput(f32_id, 5); | ||||
|     normquat_id = DefineInput(vec_ids.Get(4), 6); | ||||
|     view_id = DefineInput(vec_ids.Get(3), 7); | ||||
|  |  | |||
|  | @ -30,6 +30,8 @@ class FragmentModule : public Sirit::Module { | |||
|     static constexpr u32 NUM_TEV_STAGES = 6; | ||||
|     static constexpr u32 NUM_LIGHTS = 8; | ||||
|     static constexpr u32 NUM_LIGHTING_SAMPLERS = 24; | ||||
|     static constexpr u32 NUM_TEX_UNITS = 4; | ||||
|     static constexpr u32 NUM_NON_PROC_TEX_UNITS = 3; | ||||
| 
 | ||||
| public: | ||||
|     explicit FragmentModule(Core::TelemetrySession& telemetry, const PicaFSConfig& config); | ||||
|  | @ -57,15 +59,15 @@ private: | |||
|     /// Writes the code to emulate the specified TEV stage
 | ||||
|     void WriteTevStage(s32 index); | ||||
| 
 | ||||
|     /// Defines the tex3 proctex sampling function
 | ||||
|     void DefineProcTexSampler(); | ||||
|     /// Defines the basic texture sampling functions for a unit
 | ||||
|     void DefineTexSampler(u32 texture_unit); | ||||
| 
 | ||||
|     /// Function for sampling the procedurally generated texture unit.
 | ||||
|     Id ProcTexSampler(); | ||||
| 
 | ||||
|     /// Writes the if-statement condition used to evaluate alpha testing.
 | ||||
|     void WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func); | ||||
| 
 | ||||
|     /// Samples the current fragment texel from the provided texture unit
 | ||||
|     [[nodiscard]] Id SampleTexture(u32 texture_unit); | ||||
| 
 | ||||
|     /// Samples the current fragment texel from shadow plane
 | ||||
|     [[nodiscard]] Id SampleShadow(); | ||||
| 
 | ||||
|  | @ -237,9 +239,7 @@ private: | |||
|     Id shader_data_id{}; | ||||
| 
 | ||||
|     Id primary_color_id{}; | ||||
|     Id texcoord0_id{}; | ||||
|     Id texcoord1_id{}; | ||||
|     Id texcoord2_id{}; | ||||
|     Id texcoord_id[NUM_NON_PROC_TEX_UNITS]{}; | ||||
|     Id texcoord0_w_id{}; | ||||
|     Id normquat_id{}; | ||||
|     Id view_id{}; | ||||
|  | @ -276,7 +276,7 @@ private: | |||
|     Id alpha_results_2{}; | ||||
|     Id alpha_results_3{}; | ||||
| 
 | ||||
|     Id proctex_func{}; | ||||
|     Id sample_tex_unit_func[NUM_TEX_UNITS]{}; | ||||
|     Id noise1d_table{}; | ||||
|     Id noise2d_table{}; | ||||
|     Id lut_offsets{}; | ||||
|  |  | |||
|  | @ -67,6 +67,7 @@ layout ({}std140) uniform shader_data {{ | |||
|     vec4 const_color[NUM_TEV_STAGES]; | ||||
|     vec4 tev_combiner_buffer_color; | ||||
|     vec3 tex_lod_bias; | ||||
|     vec4 tex_border_color[3]; | ||||
|     vec4 clip_coef; | ||||
| }}; | ||||
| )"; | ||||
|  |  | |||
|  | @ -64,10 +64,11 @@ struct UniformData { | |||
|     alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages
 | ||||
|     alignas(16) Common::Vec4f tev_combiner_buffer_color; | ||||
|     alignas(16) Common::Vec3f tex_lod_bias; | ||||
|     alignas(16) Common::Vec4f tex_border_color[3]; | ||||
|     alignas(16) Common::Vec4f clip_coef; | ||||
| }; | ||||
| 
 | ||||
| static_assert(sizeof(UniformData) == 0x500, | ||||
| static_assert(sizeof(UniformData) == 0x530, | ||||
|               "The size of the UniformData does not match the structure in the shader"); | ||||
| static_assert(sizeof(UniformData) < 16384, | ||||
|               "UniformData structure must be less than 16kb as per the OpenGL spec"); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue