mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	shader_jit_x64_compiler: Use haddps for horizontal summation
This commit is contained in:
		
							parent
							
								
									af45f2b2de
								
							
						
					
					
						commit
						efec8fe513
					
				
					 1 changed files with 22 additions and 12 deletions
				
			
		|  | @ -387,6 +387,10 @@ void JitShader::Compile_DP4(Instruction instr) { | ||||||
| 
 | 
 | ||||||
|     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); |     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||||||
| 
 | 
 | ||||||
|  |     if (Common::GetCPUCaps().sse3) { | ||||||
|  |         haddps(SRC1, SRC1); | ||||||
|  |         haddps(SRC1, SRC1); | ||||||
|  |     } else { | ||||||
|         movaps(SRC2, SRC1); |         movaps(SRC2, SRC1); | ||||||
|         shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
 |         shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
 | ||||||
|         addps(SRC1, SRC2); |         addps(SRC1, SRC2); | ||||||
|  | @ -394,6 +398,7 @@ void JitShader::Compile_DP4(Instruction instr) { | ||||||
|         movaps(SRC2, SRC1); |         movaps(SRC2, SRC1); | ||||||
|         shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
 |         shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
 | ||||||
|         addps(SRC1, SRC2); |         addps(SRC1, SRC2); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     Compile_DestEnable(instr, SRC1); |     Compile_DestEnable(instr, SRC1); | ||||||
| } | } | ||||||
|  | @ -419,6 +424,10 @@ void JitShader::Compile_DPH(Instruction instr) { | ||||||
| 
 | 
 | ||||||
|     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); |     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||||||
| 
 | 
 | ||||||
|  |     if (Common::GetCPUCaps().sse3) { | ||||||
|  |         haddps(SRC1, SRC1); | ||||||
|  |         haddps(SRC1, SRC1); | ||||||
|  |     } else { | ||||||
|         movaps(SRC2, SRC1); |         movaps(SRC2, SRC1); | ||||||
|         shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
 |         shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
 | ||||||
|         addps(SRC1, SRC2); |         addps(SRC1, SRC2); | ||||||
|  | @ -426,6 +435,7 @@ void JitShader::Compile_DPH(Instruction instr) { | ||||||
|         movaps(SRC2, SRC1); |         movaps(SRC2, SRC1); | ||||||
|         shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
 |         shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
 | ||||||
|         addps(SRC1, SRC2); |         addps(SRC1, SRC2); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     Compile_DestEnable(instr, SRC1); |     Compile_DestEnable(instr, SRC1); | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue