mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	Merge pull request #1065 from yuriks/shader-fp
Shader FP compliance fixes
This commit is contained in:
		
						commit
						c5a4025b65
					
				
					 4 changed files with 102 additions and 59 deletions
				
			
		|  | @ -1021,12 +1021,20 @@ struct float24 { | ||||||
|         return ret; |         return ret; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     static float24 Zero() { | ||||||
|  |         return FromFloat32(0.f); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     // Not recommended for anything but logging
 |     // Not recommended for anything but logging
 | ||||||
|     float ToFloat32() const { |     float ToFloat32() const { | ||||||
|         return value; |         return value; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     float24 operator * (const float24& flt) const { |     float24 operator * (const float24& flt) const { | ||||||
|  |         if ((this->value == 0.f && !std::isnan(flt.value)) || | ||||||
|  |             (flt.value == 0.f && !std::isnan(this->value))) | ||||||
|  |             // PICA gives 0 instead of NaN when multiplying by inf
 | ||||||
|  |             return Zero(); | ||||||
|         return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); |         return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -1043,7 +1051,11 @@ struct float24 { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     float24& operator *= (const float24& flt) { |     float24& operator *= (const float24& flt) { | ||||||
|         value *= flt.ToFloat32(); |         if ((this->value == 0.f && !std::isnan(flt.value)) || | ||||||
|  |             (flt.value == 0.f && !std::isnan(this->value))) | ||||||
|  |             // PICA gives 0 instead of NaN when multiplying by inf
 | ||||||
|  |             *this = Zero(); | ||||||
|  |         else value *= flt.ToFloat32(); | ||||||
|         return *this; |         return *this; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -177,7 +177,10 @@ void RunInterpreter(UnitState<Debug>& state) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = std::max(src1[i], src2[i]); |                     // NOTE: Exact form required to match NaN semantics to hardware:
 | ||||||
|  |                     //   max(0, NaN) -> NaN
 | ||||||
|  |                     //   max(NaN, 0) -> 0
 | ||||||
|  |                     dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i]; | ||||||
|                 } |                 } | ||||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
|  | @ -190,7 +193,10 @@ void RunInterpreter(UnitState<Debug>& state) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = std::min(src1[i], src2[i]); |                     // NOTE: Exact form required to match NaN semantics to hardware:
 | ||||||
|  |                     //   min(0, NaN) -> NaN
 | ||||||
|  |                     //   min(NaN, 0) -> 0
 | ||||||
|  |                     dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i]; | ||||||
|                 } |                 } | ||||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
|  |  | ||||||
|  | @ -115,6 +115,8 @@ static const X64Reg SRC1 = XMM1; | ||||||
| static const X64Reg SRC2 = XMM2; | static const X64Reg SRC2 = XMM2; | ||||||
| /// Loaded with the third swizzled source register, otherwise can be used as a scratch register
 | /// Loaded with the third swizzled source register, otherwise can be used as a scratch register
 | ||||||
| static const X64Reg SRC3 = XMM3; | static const X64Reg SRC3 = XMM3; | ||||||
|  | /// Additional scratch register
 | ||||||
|  | static const X64Reg SCRATCH2 = XMM4; | ||||||
| /// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
 | /// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
 | ||||||
| static const X64Reg ONE = XMM14; | static const X64Reg ONE = XMM14; | ||||||
| /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
 | /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
 | ||||||
|  | @ -227,8 +229,8 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | ||||||
|             u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); |             u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | ||||||
|             BLENDPS(SCRATCH, R(src), mask); |             BLENDPS(SCRATCH, R(src), mask); | ||||||
|         } else { |         } else { | ||||||
|             MOVAPS(XMM4, R(src)); |             MOVAPS(SCRATCH2, R(src)); | ||||||
|             UNPCKHPS(XMM4, R(SCRATCH)); // Unpack X/Y components of source and destination
 |             UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination
 | ||||||
|             UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
 |             UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
 | ||||||
| 
 | 
 | ||||||
|             // Compute selector to selectively copy source components to destination for SHUFPS instruction
 |             // Compute selector to selectively copy source components to destination for SHUFPS instruction
 | ||||||
|  | @ -236,7 +238,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | ||||||
|                      ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | |                      ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | | ||||||
|                      ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | |                      ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | | ||||||
|                      ((swiz.DestComponentEnabled(3) ? 2 : 3) << 6); |                      ((swiz.DestComponentEnabled(3) ? 2 : 3) << 6); | ||||||
|             SHUFPS(SCRATCH, R(XMM4), sel); |             SHUFPS(SCRATCH, R(SCRATCH2), sel); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Store dest back to memory
 |         // Store dest back to memory
 | ||||||
|  | @ -244,6 +246,19 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) { | ||||||
|  |     MOVAPS(scratch, R(src1)); | ||||||
|  |     CMPPS(scratch, R(src2), CMP_ORD); | ||||||
|  | 
 | ||||||
|  |     MULPS(src1, R(src2)); | ||||||
|  | 
 | ||||||
|  |     MOVAPS(src2, R(src1)); | ||||||
|  |     CMPPS(src2, R(src2), CMP_UNORD); | ||||||
|  | 
 | ||||||
|  |     XORPS(scratch, R(src2)); | ||||||
|  |     ANDPS(src1, R(scratch)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void JitCompiler::Compile_EvaluateCondition(Instruction instr) { | void JitCompiler::Compile_EvaluateCondition(Instruction instr) { | ||||||
|     // Note: NXOR is used below to check for equality
 |     // Note: NXOR is used below to check for equality
 | ||||||
|     switch (instr.flow_control.op) { |     switch (instr.flow_control.op) { | ||||||
|  | @ -307,21 +322,17 @@ void JitCompiler::Compile_DP3(Instruction instr) { | ||||||
|     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||||||
|     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||||||
| 
 | 
 | ||||||
|     if (Common::GetCPUCaps().sse4_1) { |     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||||||
|         DPPS(SRC1, R(SRC2), 0x7f); |  | ||||||
|     } else { |  | ||||||
|         MULPS(SRC1, R(SRC2)); |  | ||||||
| 
 | 
 | ||||||
|         MOVAPS(SRC2, R(SRC1)); |     MOVAPS(SRC2, R(SRC1)); | ||||||
|         SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1)); |     SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1)); | ||||||
| 
 | 
 | ||||||
|         MOVAPS(SRC3, R(SRC1)); |     MOVAPS(SRC3, R(SRC1)); | ||||||
|         SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2)); |     SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2)); | ||||||
| 
 | 
 | ||||||
|         SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); |     SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); | ||||||
|         ADDPS(SRC1, R(SRC2)); |     ADDPS(SRC1, R(SRC2)); | ||||||
|         ADDPS(SRC1, R(SRC3)); |     ADDPS(SRC1, R(SRC3)); | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     Compile_DestEnable(instr, SRC1); |     Compile_DestEnable(instr, SRC1); | ||||||
| } | } | ||||||
|  | @ -330,19 +341,15 @@ void JitCompiler::Compile_DP4(Instruction instr) { | ||||||
|     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||||||
|     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||||||
| 
 | 
 | ||||||
|     if (Common::GetCPUCaps().sse4_1) { |     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||||||
|         DPPS(SRC1, R(SRC2), 0xff); |  | ||||||
|     } else { |  | ||||||
|         MULPS(SRC1, R(SRC2)); |  | ||||||
| 
 | 
 | ||||||
|         MOVAPS(SRC2, R(SRC1)); |     MOVAPS(SRC2, R(SRC1)); | ||||||
|         SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
 |     SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
 | ||||||
|         ADDPS(SRC1, R(SRC2)); |     ADDPS(SRC1, R(SRC2)); | ||||||
| 
 | 
 | ||||||
|         MOVAPS(SRC2, R(SRC1)); |     MOVAPS(SRC2, R(SRC1)); | ||||||
|         SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
 |     SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
 | ||||||
|         ADDPS(SRC1, R(SRC2)); |     ADDPS(SRC1, R(SRC2)); | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     Compile_DestEnable(instr, SRC1); |     Compile_DestEnable(instr, SRC1); | ||||||
| } | } | ||||||
|  | @ -359,24 +366,23 @@ void JitCompiler::Compile_DPH(Instruction instr) { | ||||||
|     if (Common::GetCPUCaps().sse4_1) { |     if (Common::GetCPUCaps().sse4_1) { | ||||||
|         // Set 4th component to 1.0
 |         // Set 4th component to 1.0
 | ||||||
|         BLENDPS(SRC1, R(ONE), 0x8); // 0b1000
 |         BLENDPS(SRC1, R(ONE), 0x8); // 0b1000
 | ||||||
|         DPPS(SRC1, R(SRC2), 0xff); |  | ||||||
|     } else { |     } else { | ||||||
|         // Reverse to set the 4th component to 1.0
 |         // Set 4th component to 1.0
 | ||||||
|         SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); |         MOVAPS(SCRATCH, R(SRC1)); | ||||||
|         MOVSS(SRC1, R(ONE)); |         UNPCKHPS(SCRATCH, R(ONE));  // XYZW, 1111 -> Z1__
 | ||||||
|         SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); |         UNPCKLPD(SRC1, R(SCRATCH)); // XYZW, Z1__ -> XYZ1
 | ||||||
| 
 |  | ||||||
|         MULPS(SRC1, R(SRC2)); |  | ||||||
| 
 |  | ||||||
|         MOVAPS(SRC2, R(SRC1)); |  | ||||||
|         SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
 |  | ||||||
|         ADDPS(SRC1, R(SRC2)); |  | ||||||
| 
 |  | ||||||
|         MOVAPS(SRC2, R(SRC1)); |  | ||||||
|         SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
 |  | ||||||
|         ADDPS(SRC1, R(SRC2)); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||||||
|  | 
 | ||||||
|  |     MOVAPS(SRC2, R(SRC1)); | ||||||
|  |     SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
 | ||||||
|  |     ADDPS(SRC1, R(SRC2)); | ||||||
|  | 
 | ||||||
|  |     MOVAPS(SRC2, R(SRC1)); | ||||||
|  |     SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
 | ||||||
|  |     ADDPS(SRC1, R(SRC2)); | ||||||
|  | 
 | ||||||
|     Compile_DestEnable(instr, SRC1); |     Compile_DestEnable(instr, SRC1); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -415,7 +421,7 @@ void JitCompiler::Compile_LG2(Instruction instr) { | ||||||
| void JitCompiler::Compile_MUL(Instruction instr) { | void JitCompiler::Compile_MUL(Instruction instr) { | ||||||
|     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||||||
|     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||||||
|     MULPS(SRC1, R(SRC2)); |     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||||||
|     Compile_DestEnable(instr, SRC1); |     Compile_DestEnable(instr, SRC1); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -465,6 +471,7 @@ void JitCompiler::Compile_FLR(Instruction instr) { | ||||||
| void JitCompiler::Compile_MAX(Instruction instr) { | void JitCompiler::Compile_MAX(Instruction instr) { | ||||||
|     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||||||
|     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||||||
|  |     // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
 | ||||||
|     MAXPS(SRC1, R(SRC2)); |     MAXPS(SRC1, R(SRC2)); | ||||||
|     Compile_DestEnable(instr, SRC1); |     Compile_DestEnable(instr, SRC1); | ||||||
| } | } | ||||||
|  | @ -472,6 +479,7 @@ void JitCompiler::Compile_MAX(Instruction instr) { | ||||||
| void JitCompiler::Compile_MIN(Instruction instr) { | void JitCompiler::Compile_MIN(Instruction instr) { | ||||||
|     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||||||
|     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||||||
|  |     // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
 | ||||||
|     MINPS(SRC1, R(SRC2)); |     MINPS(SRC1, R(SRC2)); | ||||||
|     Compile_DestEnable(instr, SRC1); |     Compile_DestEnable(instr, SRC1); | ||||||
| } | } | ||||||
|  | @ -578,27 +586,42 @@ void JitCompiler::Compile_CALLU(Instruction instr) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void JitCompiler::Compile_CMP(Instruction instr) { | void JitCompiler::Compile_CMP(Instruction instr) { | ||||||
|  |     using Op = Instruction::Common::CompareOpType::Op; | ||||||
|  |     Op op_x = instr.common.compare_op.x; | ||||||
|  |     Op op_y = instr.common.compare_op.y; | ||||||
|  | 
 | ||||||
|     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||||||
|     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||||||
| 
 | 
 | ||||||
|     static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_NLE, CMP_NLT }; |     // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
 | ||||||
|  |     // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
 | ||||||
|  |     // because they don't match when used with NaNs.
 | ||||||
|  |     static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE }; | ||||||
| 
 | 
 | ||||||
|     if (instr.common.compare_op.x == instr.common.compare_op.y) { |     bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); | ||||||
|  |     Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1; | ||||||
|  |     Gen::X64Reg rhs_x = invert_op_x ? SRC1 : SRC2; | ||||||
|  | 
 | ||||||
|  |     if (op_x == op_y) { | ||||||
|         // Compare X-component and Y-component together
 |         // Compare X-component and Y-component together
 | ||||||
|         CMPPS(SRC1, R(SRC2), cmp[instr.common.compare_op.x]); |         CMPPS(lhs_x, R(rhs_x), cmp[op_x]); | ||||||
|  |         MOVQ_xmm(R(COND0), lhs_x); | ||||||
| 
 | 
 | ||||||
|         MOVQ_xmm(R(COND0), SRC1); |  | ||||||
|         MOV(64, R(COND1), R(COND0)); |         MOV(64, R(COND1), R(COND0)); | ||||||
|     } else { |     } else { | ||||||
|  |         bool invert_op_y = (op_y == Op::GreaterThan || op_y == Op::GreaterEqual); | ||||||
|  |         Gen::X64Reg lhs_y = invert_op_y ? SRC2 : SRC1; | ||||||
|  |         Gen::X64Reg rhs_y = invert_op_y ? SRC1 : SRC2; | ||||||
|  | 
 | ||||||
|         // Compare X-component
 |         // Compare X-component
 | ||||||
|         MOVAPS(SCRATCH, R(SRC1)); |         MOVAPS(SCRATCH, R(lhs_x)); | ||||||
|         CMPSS(SCRATCH, R(SRC2), cmp[instr.common.compare_op.x]); |         CMPSS(SCRATCH, R(rhs_x), cmp[op_x]); | ||||||
| 
 | 
 | ||||||
|         // Compare Y-component
 |         // Compare Y-component
 | ||||||
|         CMPPS(SRC1, R(SRC2), cmp[instr.common.compare_op.y]); |         CMPPS(lhs_y, R(rhs_y), cmp[op_y]); | ||||||
| 
 | 
 | ||||||
|         MOVQ_xmm(R(COND0), SCRATCH); |         MOVQ_xmm(R(COND0), SCRATCH); | ||||||
|         MOVQ_xmm(R(COND1), SRC1); |         MOVQ_xmm(R(COND1), lhs_y); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     SHR(32, R(COND0), Imm8(31)); |     SHR(32, R(COND0), Imm8(31)); | ||||||
|  | @ -616,12 +639,8 @@ void JitCompiler::Compile_MAD(Instruction instr) { | ||||||
|         Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3); |         Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (Common::GetCPUCaps().fma) { |     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||||||
|         VFMADD213PS(SRC1, SRC2, R(SRC3)); |     ADDPS(SRC1, R(SRC3)); | ||||||
|     } else { |  | ||||||
|         MULPS(SRC1, R(SRC2)); |  | ||||||
|         ADDPS(SRC1, R(SRC3)); |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     Compile_DestEnable(instr, SRC1); |     Compile_DestEnable(instr, SRC1); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -68,6 +68,12 @@ private: | ||||||
|     void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); |     void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); | ||||||
|     void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); |     void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); | ||||||
| 
 | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Compiles a `MUL src1, src2` operation, properly handling the PICA semantics when multiplying | ||||||
|  |      * zero by inf. Clobbers `src2` and `scratch`. | ||||||
|  |      */ | ||||||
|  |     void Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch); | ||||||
|  | 
 | ||||||
|     void Compile_EvaluateCondition(Instruction instr); |     void Compile_EvaluateCondition(Instruction instr); | ||||||
|     void Compile_UniformCondition(Instruction instr); |     void Compile_UniformCondition(Instruction instr); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue