mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-30 21:30:04 +00:00 
			
		
		
		
	shader_jit_x64: Use Reg32 for LOOP* registers, eliminating casts
This commit is contained in:
		
							parent
							
								
									f4e98ecf3f
								
							
						
					
					
						commit
						5ff3206207
					
				
					 1 changed files with 16 additions and 16 deletions
				
			
		|  | @ -109,11 +109,11 @@ static const Reg64 SETUP = r9; | ||||||
| static const Reg64 ADDROFFS_REG_0 = r10; | static const Reg64 ADDROFFS_REG_0 = r10; | ||||||
| static const Reg64 ADDROFFS_REG_1 = r11; | static const Reg64 ADDROFFS_REG_1 = r11; | ||||||
| /// VS loop count register (Multiplied by 16)
 | /// VS loop count register (Multiplied by 16)
 | ||||||
| static const Reg64 LOOPCOUNT_REG = r12; | static const Reg32 LOOPCOUNT_REG = r12d; | ||||||
| /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
 | /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
 | ||||||
| static const Reg64 LOOPCOUNT = rsi; | static const Reg32 LOOPCOUNT = esi; | ||||||
| /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
 | /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
 | ||||||
| static const Reg64 LOOPINC = rdi; | static const Reg32 LOOPINC = edi; | ||||||
| /// Result of the previous CMP instruction for the X-component comparison
 | /// Result of the previous CMP instruction for the X-component comparison
 | ||||||
| static const Reg64 COND0 = r13; | static const Reg64 COND0 = r13; | ||||||
| /// Result of the previous CMP instruction for the Y-component comparison
 | /// Result of the previous CMP instruction for the Y-component comparison
 | ||||||
|  | @ -734,24 +734,24 @@ void JitShader::Compile_LOOP(Instruction instr) { | ||||||
|     // 4 bits) to be used as an offset into the 16-byte vector registers later
 |     // 4 bits) to be used as an offset into the 16-byte vector registers later
 | ||||||
|     size_t offset = |     size_t offset = | ||||||
|         ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); |         ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); | ||||||
|     mov(LOOPCOUNT.cvt32(), dword[SETUP + offset]); |     mov(LOOPCOUNT, dword[SETUP + offset]); | ||||||
|     mov(LOOPCOUNT_REG.cvt32(), LOOPCOUNT.cvt32()); |     mov(LOOPCOUNT_REG, LOOPCOUNT); | ||||||
|     shr(LOOPCOUNT_REG.cvt32(), 4); |     shr(LOOPCOUNT_REG, 4); | ||||||
|     and(LOOPCOUNT_REG.cvt32(), 0xFF0); // Y-component is the start
 |     and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
 | ||||||
|     mov(LOOPINC.cvt32(), LOOPCOUNT.cvt32()); |     mov(LOOPINC, LOOPCOUNT); | ||||||
|     shr(LOOPINC.cvt32(), 12); |     shr(LOOPINC, 12); | ||||||
|     and(LOOPINC.cvt32(), 0xFF0);                // Z-component is the incrementer
 |     and(LOOPINC, 0xFF0);                // Z-component is the incrementer
 | ||||||
|     movzx(LOOPCOUNT.cvt32(), LOOPCOUNT.cvt8()); // X-component is iteration count
 |     movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count
 | ||||||
|     add(LOOPCOUNT.cvt32(), 1);                  // Iteration count is X-component + 1
 |     add(LOOPCOUNT, 1);                  // Iteration count is X-component + 1
 | ||||||
| 
 | 
 | ||||||
|     Label l_loop_start; |     Label l_loop_start; | ||||||
|     L(l_loop_start); |     L(l_loop_start); | ||||||
| 
 | 
 | ||||||
|     Compile_Block(instr.flow_control.dest_offset + 1); |     Compile_Block(instr.flow_control.dest_offset + 1); | ||||||
| 
 | 
 | ||||||
|     add(LOOPCOUNT_REG.cvt32(), LOOPINC.cvt32()); // Increment LOOPCOUNT_REG by Z-component
 |     add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
 | ||||||
|     sub(LOOPCOUNT.cvt32(), 1);                   // Increment loop count by 1
 |     sub(LOOPCOUNT, 1);           // Increment loop count by 1
 | ||||||
|     jnz(l_loop_start);                           // Loop if not equal
 |     jnz(l_loop_start);           // Loop if not equal
 | ||||||
| 
 | 
 | ||||||
|     looping = false; |     looping = false; | ||||||
| } | } | ||||||
|  | @ -856,7 +856,7 @@ void JitShader::Compile() { | ||||||
|     // Zero address/loop  registers
 |     // Zero address/loop  registers
 | ||||||
|     xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32()); |     xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32()); | ||||||
|     xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32()); |     xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32()); | ||||||
|     xor(LOOPCOUNT_REG.cvt32(), LOOPCOUNT_REG.cvt32()); |     xor(LOOPCOUNT_REG, LOOPCOUNT_REG); | ||||||
| 
 | 
 | ||||||
|     // Used to set a register to one
 |     // Used to set a register to one
 | ||||||
|     static const __m128 one = {1.f, 1.f, 1.f, 1.f}; |     static const __m128 one = {1.f, 1.f, 1.f, 1.f}; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue