From a19166ddec72cd58e4cbaaf6bfbd10c01cfcc171 Mon Sep 17 00:00:00 2001 From: Vitor Kiguchi Date: Sat, 20 Jan 2024 19:51:18 -0300 Subject: [PATCH 1/4] pica_types: minor cleanup --- src/video_core/pica_types.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h index 4fd27f9fe..ad454555e 100644 --- a/src/video_core/pica_types.h +++ b/src/video_core/pica_types.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include #include @@ -50,7 +51,7 @@ public: hex = sign; } - std::memcpy(&res.value, &hex, sizeof(float)); + res.value = std::bit_cast(hex); return res; } @@ -95,17 +96,17 @@ public: } constexpr Float& operator/=(const Float& flt) { - value /= flt.ToFloat32(); + value = operator/(flt).value; return *this; } constexpr Float& operator+=(const Float& flt) { - value += flt.ToFloat32(); + value = operator+(flt).value; return *this; } constexpr Float& operator-=(const Float& flt) { - value -= flt.ToFloat32(); + value = operator-(flt).value; return *this; } From d2af98673a636d9b86ea288e676a3e9b72c8eee5 Mon Sep 17 00:00:00 2001 From: Vitor Kiguchi Date: Sat, 20 Jan 2024 21:20:28 -0300 Subject: [PATCH 2/4] pica_types: float: truncate, flush to 0, and treat infinities for all values doing it at FromFloat32 results in this being applied for all float that aren't constructed from raw. Note: due to lack of compiler support for C++23 at the moment, the use of std::isnormal and std::abs results in Trunc not being constexpr, which required the changes to Zero, One, and operator-, to prevent FromFloat32 being used in constexpr contexts, and those specific changes may be reverted in the future. --- src/video_core/pica_types.h | 66 ++++++++++++++++++- .../renderer_software/sw_rasterizer.cpp | 5 +- 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h index ad454555e..043691e87 100644 --- a/src/video_core/pica_types.h +++ b/src/video_core/pica_types.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "common/common_types.h" @@ -29,6 +30,41 @@ public: static constexpr Float FromFloat32(float val) { Float ret; ret.value = val; + return Trunc(ret); + } + + static constexpr Float MinNormal() { + Float ret; + // Mininum normal value = 1.0 / (1 << ((1 << (E - 1)) - 2)); + if constexpr (E == 5) { + ret.value = 0x1.p-14; + } else { + // E == 7 + ret.value = (0x1.p-62); + } + return ret; + } + + // these values are approximate, rounded up + static constexpr Float Max() { + Float ret; + if constexpr (E == 5) { + ret.value = 0x1.p16; + } else { + // E == 7 + ret.value = 0x1.p64; + } + return ret; + } + + // before C++23 std::isnormal and std::abs aren't considered constexpr so this function can't be + // used as constexpr until the compilers support that. + static constexpr Float Trunc(const Float& val) { + Float ret = val.Flushed().InfChecked(); + if (std::isnormal(val.ToFloat32())) { + u32 hex = std::bit_cast(ret.ToFloat32()) & (0xffffffff ^ ((1 << (23 - M)) - 1)); + ret.value = std::bit_cast(hex); + } return ret; } @@ -57,11 +93,15 @@ public: } static constexpr Float Zero() { - return FromFloat32(0.f); + Float ret; + ret.value = 0.f; + return ret; } static constexpr Float One() { - return FromFloat32(1.f); + Float ret; + ret.value = 1.f; + return ret; } // Not recommended for anything but logging @@ -69,6 +109,24 @@ public: return value; } + constexpr Float Flushed() const { + Float ret; + ret.value = value; + if (std::abs(value) < MinNormal().ToFloat32()) { + ret.value = 0; + } + return ret; + } + + constexpr Float InfChecked() const { + Float ret; + ret.value = value; + if (std::abs(value) > Max().ToFloat32()) { + ret.value = value * std::numeric_limits::infinity(); + } + return ret; + } + constexpr Float operator*(const Float& flt) const { float result = value * flt.ToFloat32(); // PICA gives 0 instead of NaN when multiplying by inf @@ -111,7 +169,9 @@ public: } constexpr Float operator-() const { - return Float::FromFloat32(-ToFloat32()); + Float ret; + ret.value = -value; + return ret; } constexpr bool operator<(const Float& flt) const { diff --git a/src/video_core/renderer_software/sw_rasterizer.cpp b/src/video_core/renderer_software/sw_rasterizer.cpp index f0f290ebe..6011400e7 100644 --- a/src/video_core/renderer_software/sw_rasterizer.cpp +++ b/src/video_core/renderer_software/sw_rasterizer.cpp @@ -125,9 +125,8 @@ void RasterizerSoftware::AddTriangle(const Pica::OutputVertex& v0, const Pica::O auto* input_list = &buffer_b; // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value. - // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest - // epsilon possible within f24 accuracy. - static constexpr f24 EPSILON = f24::FromFloat32(0.00001f); + // TODO: Not sure if this is a valid approach. + static constexpr f24 EPSILON = f24::MinNormal(); static constexpr f24 f0 = f24::Zero(); static constexpr f24 f1 = f24::One(); static constexpr std::array clipping_edges = {{ From 5a9bb045d72998c3d1a76e2afbae60119fb033da Mon Sep 17 00:00:00 2001 From: Vitor Kiguchi Date: Tue, 23 Jan 2024 00:56:57 -0300 Subject: [PATCH 3/4] tests: fix shader_jit_compiler and add pica_float The Uniform Read and Address Register Offset ran into precision differences between f32 and f24, which can be easily fixed and are not the point of the tests. As for the LG2 and EX2 tests that were failing, they were wrong. While it is true that 2^79.7 ~= 1e24, the value is bigger than the biggest representable value in f24, therefore both EX2 and LG2(?) should result in Inf. --- src/tests/CMakeLists.txt | 1 + src/tests/video_core/pica_float.cpp | 30 +++++++++++++++++++ .../video_core/shader/shader_jit_compiler.cpp | 10 ++++--- 3 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 src/tests/video_core/pica_float.cpp diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 590f07e78..741ba7ec0 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -13,6 +13,7 @@ add_executable(tests audio_core/audio_fixures.h audio_core/decoder_tests.cpp video_core/shader/shader_jit_compiler.cpp + video_core/pica_float.cpp audio_core/merryhime_3ds_audio/merry_audio/merry_audio.cpp audio_core/merryhime_3ds_audio/merry_audio/merry_audio.h audio_core/merryhime_3ds_audio/merry_audio/service_fixture.cpp diff --git a/src/tests/video_core/pica_float.cpp b/src/tests/video_core/pica_float.cpp new file mode 100644 index 000000000..394420c99 --- /dev/null +++ b/src/tests/video_core/pica_float.cpp @@ -0,0 +1,30 @@ +// Copyright 2024 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include "video_core/pica_types.h" + +using Pica::f24; + +TEST_CASE("Infinities", "[video_core][pica_float]") { + REQUIRE(std::isinf(f24::FromFloat32(INFINITY).ToFloat32())); + REQUIRE(std::isinf(f24::FromFloat32(1.e20f).ToFloat32())); + REQUIRE(std::isinf(f24::FromFloat32(-1.e20f).ToFloat32())); +} + +TEST_CASE("Subnormals", "[video_core][pica_float]") { + REQUIRE(f24::FromFloat32(1e-20f).ToFloat32() == 0.f); +} + +TEST_CASE("NaN", "[video_core][pica_float]") { + const auto inf = f24::FromFloat32(INFINITY); + const auto nan = f24::FromFloat32(NAN); + + REQUIRE(std::isnan(nan.ToFloat32())); + REQUIRE(std::isnan((nan * f24::Zero()).ToFloat32())); + REQUIRE(std::isnan((inf - inf).ToFloat32())); + REQUIRE((inf * f24::Zero()).ToFloat32() == 0.f); +} diff --git a/src/tests/video_core/shader/shader_jit_compiler.cpp b/src/tests/video_core/shader/shader_jit_compiler.cpp index 01698a2e1..579bf7e99 100644 --- a/src/tests/video_core/shader/shader_jit_compiler.cpp +++ b/src/tests/video_core/shader/shader_jit_compiler.cpp @@ -260,7 +260,7 @@ TEST_CASE("LG2", "[video_core][shader][shader_jit]") { REQUIRE(std::isinf(shader.Run(0.f).x)); REQUIRE(shader.Run(4.f).x == Catch::Approx(2.f)); REQUIRE(shader.Run(64.f).x == Catch::Approx(6.f)); - REQUIRE(shader.Run(1.e24f).x == Catch::Approx(79.7262742773f)); + // REQUIRE(std::isinf(shader.Run(INFINITY).x)); } TEST_CASE("EX2", "[video_core][shader][shader_jit]") { @@ -277,8 +277,9 @@ TEST_CASE("EX2", "[video_core][shader][shader_jit]") { REQUIRE(shader.Run(0.f).x == Catch::Approx(1.f)); REQUIRE(shader.Run(2.f).x == Catch::Approx(4.f)); REQUIRE(shader.Run(6.f).x == Catch::Approx(64.f)); - REQUIRE(shader.Run(79.7262742773f).x == Catch::Approx(1.e24f)); REQUIRE(std::isinf(shader.Run(800.f).x)); + // If we respect f24 precision, 2^79 = inf, as 79 > 63 + // REQUIRE(std::isinf(shader.Run(79.7262742773f).x)); } TEST_CASE("MUL", "[video_core][shader][shader_jit]") { @@ -469,7 +470,7 @@ TEST_CASE("Uniform Read", "[video_core][shader][shader_jit]") { const float color = (i * 2.0f) / 255.0f; const auto color_f24 = Pica::f24::FromFloat32(color); shader.shader_setup->uniforms.f[i] = {color_f24, color_f24, color_f24, Pica::f24::One()}; - f_uniforms[i] = {color, color, color, 1.0f}; + f_uniforms[i] = {color_f24.ToFloat32(), color_f24.ToFloat32(), color_f24.ToFloat32(), 1.0f}; } for (u32 i = 0; i < 96; ++i) { @@ -506,7 +507,8 @@ TEST_CASE("Address Register Offset", "[video_core][shader][shader_jit]") { const auto color_f24 = Pica::f24::FromFloat32(color); shader.shader_setup->uniforms.f[i] = {color_f24, color_f24, color_f24, Pica::f24::One()}; - f_uniforms[i] = {color, color, color, 1.f}; + f_uniforms[i] = {color_f24.ToFloat32(), color_f24.ToFloat32(), color_f24.ToFloat32(), + 1.f}; } else if (i >= 0x60 && i < 0x64) { const u8 color = static_cast((i - 0x60) * 0x10); shader.shader_setup->uniforms.i[i - 0x60] = {color, color, color, 255}; From 265ed7ec14eff0b08696db2a9d259f27712a29da Mon Sep 17 00:00:00 2001 From: Vitor Kiguchi Date: Mon, 5 Feb 2024 17:13:56 -0300 Subject: [PATCH 4/4] sw_rasterizer: don't include the bias in the barycentric coords --- .../renderer_software/sw_rasterizer.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_software/sw_rasterizer.cpp b/src/video_core/renderer_software/sw_rasterizer.cpp index 6011400e7..134acf5b5 100644 --- a/src/video_core/renderer_software/sw_rasterizer.cpp +++ b/src/video_core/renderer_software/sw_rasterizer.cpp @@ -30,7 +30,7 @@ using Pica::Texture::TextureInfo; // negative/positive z values when computing with f32 precision, // causing some vertices to get erroneously clipped. To workaround this problem, // we can use a very small epsilon value for clip plane comparison. -constexpr f32 EPSILON_Z = 0.00000001f; +constexpr f32 EPSILON_Z = 0.f; struct Vertex : Pica::OutputVertex { Vertex(const OutputVertex& v) : OutputVertex(v) {} @@ -286,11 +286,11 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask()); const int bias0 = - IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; + IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? 1 : 0; const int bias1 = - IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; + IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? 1 : 0; const int bias2 = - IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; + IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? 1 : 0; const auto w_inverse = Common::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); @@ -313,13 +313,13 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con } // Calculate the barycentric coordinates w0, w1 and w2 - const s32 w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); - const s32 w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); - const s32 w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); + const s32 w0 = SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); + const s32 w1 = SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); + const s32 w2 = SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); const s32 wsum = w0 + w1 + w2; // If current pixel is not covered by the current primitive - if (w0 < 0 || w1 < 0 || w2 < 0) { + if (w0 < bias0 || w1 < bias1 || w2 < bias2) { continue; }