mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-30 21:30:04 +00:00 
			
		
		
		
	shader_jit_x64: Use Reg32 for LOOP* registers, eliminating casts
This commit is contained in:
		
							parent
							
								
									f4e98ecf3f
								
							
						
					
					
						commit
						5ff3206207
					
				
					 1 changed files with 16 additions and 16 deletions
				
			
		|  | @ -109,11 +109,11 @@ static const Reg64 SETUP = r9; | |||
| static const Reg64 ADDROFFS_REG_0 = r10; | ||||
| static const Reg64 ADDROFFS_REG_1 = r11; | ||||
| /// VS loop count register (Multiplied by 16)
 | ||||
| static const Reg64 LOOPCOUNT_REG = r12; | ||||
| static const Reg32 LOOPCOUNT_REG = r12d; | ||||
| /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
 | ||||
| static const Reg64 LOOPCOUNT = rsi; | ||||
| static const Reg32 LOOPCOUNT = esi; | ||||
| /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
 | ||||
| static const Reg64 LOOPINC = rdi; | ||||
| static const Reg32 LOOPINC = edi; | ||||
| /// Result of the previous CMP instruction for the X-component comparison
 | ||||
| static const Reg64 COND0 = r13; | ||||
| /// Result of the previous CMP instruction for the Y-component comparison
 | ||||
|  | @ -734,24 +734,24 @@ void JitShader::Compile_LOOP(Instruction instr) { | |||
|     // 4 bits) to be used as an offset into the 16-byte vector registers later
 | ||||
|     size_t offset = | ||||
|         ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); | ||||
|     mov(LOOPCOUNT.cvt32(), dword[SETUP + offset]); | ||||
|     mov(LOOPCOUNT_REG.cvt32(), LOOPCOUNT.cvt32()); | ||||
|     shr(LOOPCOUNT_REG.cvt32(), 4); | ||||
|     and(LOOPCOUNT_REG.cvt32(), 0xFF0); // Y-component is the start
 | ||||
|     mov(LOOPINC.cvt32(), LOOPCOUNT.cvt32()); | ||||
|     shr(LOOPINC.cvt32(), 12); | ||||
|     and(LOOPINC.cvt32(), 0xFF0);                // Z-component is the incrementer
 | ||||
|     movzx(LOOPCOUNT.cvt32(), LOOPCOUNT.cvt8()); // X-component is iteration count
 | ||||
|     add(LOOPCOUNT.cvt32(), 1);                  // Iteration count is X-component + 1
 | ||||
|     mov(LOOPCOUNT, dword[SETUP + offset]); | ||||
|     mov(LOOPCOUNT_REG, LOOPCOUNT); | ||||
|     shr(LOOPCOUNT_REG, 4); | ||||
|     and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
 | ||||
|     mov(LOOPINC, LOOPCOUNT); | ||||
|     shr(LOOPINC, 12); | ||||
|     and(LOOPINC, 0xFF0);                // Z-component is the incrementer
 | ||||
|     movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count
 | ||||
|     add(LOOPCOUNT, 1);                  // Iteration count is X-component + 1
 | ||||
| 
 | ||||
|     Label l_loop_start; | ||||
|     L(l_loop_start); | ||||
| 
 | ||||
|     Compile_Block(instr.flow_control.dest_offset + 1); | ||||
| 
 | ||||
|     add(LOOPCOUNT_REG.cvt32(), LOOPINC.cvt32()); // Increment LOOPCOUNT_REG by Z-component
 | ||||
|     sub(LOOPCOUNT.cvt32(), 1);                   // Increment loop count by 1
 | ||||
|     jnz(l_loop_start);                           // Loop if not equal
 | ||||
|     add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
 | ||||
|     sub(LOOPCOUNT, 1);           // Increment loop count by 1
 | ||||
|     jnz(l_loop_start);           // Loop if not equal
 | ||||
| 
 | ||||
|     looping = false; | ||||
| } | ||||
|  | @ -856,7 +856,7 @@ void JitShader::Compile() { | |||
|     // Zero address/loop  registers
 | ||||
|     xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32()); | ||||
|     xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32()); | ||||
|     xor(LOOPCOUNT_REG.cvt32(), LOOPCOUNT_REG.cvt32()); | ||||
|     xor(LOOPCOUNT_REG, LOOPCOUNT_REG); | ||||
| 
 | ||||
|     // Used to set a register to one
 | ||||
|     static const __m128 one = {1.f, 1.f, 1.f, 1.f}; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue