mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	shader/jit: Use xbyak::util::Cpu for Host capabilities (#6643)
				
					
				
			Xbyak has a complete utility-class for determining the host-processor's
ISA-features such as SSE4.1, AVX, AVX2, AVX512{F,VL,DQ,VBMI,etc}, and so
on for further potential optimizations.
			
			
This commit is contained in:
		
							parent
							
								
									3d0a3c2c45
								
							
						
					
					
						commit
						71aea7e571
					
				
					 1 changed files with 6 additions and 3 deletions
				
			
		|  | @ -10,6 +10,7 @@ | |||
| #include <cstdint> | ||||
| #include <nihstro/shader_bytecode.h> | ||||
| #include <smmintrin.h> | ||||
| #include <xbyak/xbyak_util.h> | ||||
| #include <xmmintrin.h> | ||||
| #include "common/assert.h" | ||||
| #include "common/logging/log.h" | ||||
|  | @ -32,6 +33,8 @@ using Xbyak::Xmm; | |||
| using nihstro::DestRegister; | ||||
| using nihstro::RegisterType; | ||||
| 
 | ||||
| static const Xbyak::util::Cpu host_caps; | ||||
| 
 | ||||
| namespace Pica::Shader { | ||||
| 
 | ||||
| typedef void (JitShader::*JitFunction)(Instruction instr); | ||||
|  | @ -306,7 +309,7 @@ void JitShader::Compile_DestEnable(Instruction instr, Xmm src) { | |||
|         // register...
 | ||||
|         movaps(SCRATCH, xword[STATE + dest_offset_disp]); | ||||
| 
 | ||||
|         if (Common::GetCPUCaps().sse4_1) { | ||||
|         if (host_caps.has(Cpu::tSSE41)) { | ||||
|             u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | | ||||
|                       ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | ||||
|             blendps(SCRATCH, src, mask); | ||||
|  | @ -437,7 +440,7 @@ void JitShader::Compile_DPH(Instruction instr) { | |||
|         Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||||
|     } | ||||
| 
 | ||||
|     if (Common::GetCPUCaps().sse4_1) { | ||||
|     if (host_caps.has(Cpu::tSSE41)) { | ||||
|         // Set 4th component to 1.0
 | ||||
|         blendps(SRC1, ONE, 0b1000); | ||||
|     } else { | ||||
|  | @ -507,7 +510,7 @@ void JitShader::Compile_SLT(Instruction instr) { | |||
| void JitShader::Compile_FLR(Instruction instr) { | ||||
|     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||||
| 
 | ||||
|     if (Common::GetCPUCaps().sse4_1) { | ||||
|     if (host_caps.has(Cpu::tSSE41)) { | ||||
|         roundps(SRC1, SRC1, _MM_FROUND_FLOOR); | ||||
|     } else { | ||||
|         cvttps2dq(SRC1, SRC1); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue