mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-30 21:30:04 +00:00 
			
		
		
		
	shader_jit_x64_compiler: Use haddps for horizontal summation
This commit is contained in:
		
							parent
							
								
									af45f2b2de
								
							
						
					
					
						commit
						efec8fe513
					
				
					 1 changed files with 22 additions and 12 deletions
				
			
		|  | @ -387,13 +387,18 @@ void JitShader::Compile_DP4(Instruction instr) { | ||||||
| 
 | 
 | ||||||
|     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); |     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||||||
| 
 | 
 | ||||||
|     movaps(SRC2, SRC1); |     if (Common::GetCPUCaps().sse3) { | ||||||
|     shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
 |         haddps(SRC1, SRC1); | ||||||
|     addps(SRC1, SRC2); |         haddps(SRC1, SRC1); | ||||||
|  |     } else { | ||||||
|  |         movaps(SRC2, SRC1); | ||||||
|  |         shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
 | ||||||
|  |         addps(SRC1, SRC2); | ||||||
| 
 | 
 | ||||||
|     movaps(SRC2, SRC1); |         movaps(SRC2, SRC1); | ||||||
|     shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
 |         shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
 | ||||||
|     addps(SRC1, SRC2); |         addps(SRC1, SRC2); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     Compile_DestEnable(instr, SRC1); |     Compile_DestEnable(instr, SRC1); | ||||||
| } | } | ||||||
|  | @ -419,13 +424,18 @@ void JitShader::Compile_DPH(Instruction instr) { | ||||||
| 
 | 
 | ||||||
|     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); |     Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | ||||||
| 
 | 
 | ||||||
|     movaps(SRC2, SRC1); |     if (Common::GetCPUCaps().sse3) { | ||||||
|     shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
 |         haddps(SRC1, SRC1); | ||||||
|     addps(SRC1, SRC2); |         haddps(SRC1, SRC1); | ||||||
|  |     } else { | ||||||
|  |         movaps(SRC2, SRC1); | ||||||
|  |         shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
 | ||||||
|  |         addps(SRC1, SRC2); | ||||||
| 
 | 
 | ||||||
|     movaps(SRC2, SRC1); |         movaps(SRC2, SRC1); | ||||||
|     shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
 |         shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
 | ||||||
|     addps(SRC1, SRC2); |         addps(SRC1, SRC2); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     Compile_DestEnable(instr, SRC1); |     Compile_DestEnable(instr, SRC1); | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue