mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	core/arm: Improve timing accuracy before service calls in JIT
We also correct the CPU JIT's implementation of Step.
This commit is contained in:
		
							parent
							
								
									dc030c78c3
								
							
						
					
					
						commit
						7cd8b437aa
					
				
					 9 changed files with 59 additions and 53 deletions
				
			
		|  | @ -24,19 +24,11 @@ public: | |||
|         u32 fpexc; | ||||
|     }; | ||||
| 
 | ||||
|     /**
 | ||||
|      * Runs the CPU for the given number of instructions | ||||
|      * @param num_instructions Number of instructions to run | ||||
|      */ | ||||
|     void Run(int num_instructions) { | ||||
|         ExecuteInstructions(num_instructions); | ||||
|         this->num_instructions += num_instructions; | ||||
|     } | ||||
|     /// Runs the CPU until an event happens
 | ||||
|     virtual void Run() = 0; | ||||
| 
 | ||||
|     /// Step CPU by one instruction
 | ||||
|     void Step() { | ||||
|         Run(1); | ||||
|     } | ||||
|     virtual void Step() = 0; | ||||
| 
 | ||||
|     /// Clear all instruction cache
 | ||||
|     virtual void ClearInstructionCache() = 0; | ||||
|  | @ -138,19 +130,4 @@ public: | |||
| 
 | ||||
|     /// Prepare core for thread reschedule (if needed to correctly handle state)
 | ||||
|     virtual void PrepareReschedule() = 0; | ||||
| 
 | ||||
|     /// Getter for num_instructions
 | ||||
|     u64 GetNumInstructions() const { | ||||
|         return num_instructions; | ||||
|     } | ||||
| 
 | ||||
| protected: | ||||
|     /**
 | ||||
|      * Executes the given number of instructions | ||||
|      * @param num_instructions Number of instructions to executes | ||||
|      */ | ||||
|     virtual void ExecuteInstructions(int num_instructions) = 0; | ||||
| 
 | ||||
| private: | ||||
|     u64 num_instructions = 0; ///< Number of instructions executed
 | ||||
| }; | ||||
|  |  | |||
|  | @ -40,11 +40,20 @@ static bool IsReadOnlyMemory(u32 vaddr) { | |||
|     return false; | ||||
| } | ||||
| 
 | ||||
| static void AddTicks(u64 ticks) { | ||||
|     CoreTiming::AddTicks(ticks); | ||||
| } | ||||
| 
 | ||||
| static u64 GetTicksRemaining() { | ||||
|     int ticks = CoreTiming::GetDowncount(); | ||||
|     return static_cast<u64>(ticks <= 0 ? 0 : ticks); | ||||
| } | ||||
| 
 | ||||
| static Dynarmic::UserCallbacks GetUserCallbacks( | ||||
|     const std::shared_ptr<ARMul_State>& interpeter_state, Memory::PageTable* current_page_table) { | ||||
|     const std::shared_ptr<ARMul_State>& interpreter_state, Memory::PageTable* current_page_table) { | ||||
|     Dynarmic::UserCallbacks user_callbacks{}; | ||||
|     user_callbacks.InterpreterFallback = &InterpreterFallback; | ||||
|     user_callbacks.user_arg = static_cast<void*>(interpeter_state.get()); | ||||
|     user_callbacks.user_arg = static_cast<void*>(interpreter_state.get()); | ||||
|     user_callbacks.CallSVC = &SVC::CallSVC; | ||||
|     user_callbacks.memory.IsReadOnlyMemory = &IsReadOnlyMemory; | ||||
|     user_callbacks.memory.ReadCode = &Memory::Read32; | ||||
|  | @ -56,8 +65,10 @@ static Dynarmic::UserCallbacks GetUserCallbacks( | |||
|     user_callbacks.memory.Write16 = &Memory::Write16; | ||||
|     user_callbacks.memory.Write32 = &Memory::Write32; | ||||
|     user_callbacks.memory.Write64 = &Memory::Write64; | ||||
|     user_callbacks.AddTicks = &AddTicks; | ||||
|     user_callbacks.GetTicksRemaining = &GetTicksRemaining; | ||||
|     user_callbacks.page_table = ¤t_page_table->pointers; | ||||
|     user_callbacks.coprocessors[15] = std::make_shared<DynarmicCP15>(interpeter_state); | ||||
|     user_callbacks.coprocessors[15] = std::make_shared<DynarmicCP15>(interpreter_state); | ||||
|     return user_callbacks; | ||||
| } | ||||
| 
 | ||||
|  | @ -66,6 +77,19 @@ ARM_Dynarmic::ARM_Dynarmic(PrivilegeMode initial_mode) { | |||
|     PageTableChanged(); | ||||
| } | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(ARM_Jit, "ARM JIT", "ARM JIT", MP_RGB(255, 64, 64)); | ||||
| 
 | ||||
| void ARM_Dynarmic::Run() { | ||||
|     ASSERT(Memory::GetCurrentPageTable() == current_page_table); | ||||
|     MICROPROFILE_SCOPE(ARM_Jit); | ||||
| 
 | ||||
|     jit->Run(GetTicksRemaining()); | ||||
| } | ||||
| 
 | ||||
| void ARM_Dynarmic::Step() { | ||||
|     InterpreterFallback(jit->Regs()[15], jit, static_cast<void*>(interpreter_state.get())); | ||||
| } | ||||
| 
 | ||||
| void ARM_Dynarmic::SetPC(u32 pc) { | ||||
|     jit->Regs()[15] = pc; | ||||
| } | ||||
|  | @ -124,17 +148,6 @@ void ARM_Dynarmic::SetCP15Register(CP15Register reg, u32 value) { | |||
|     interpreter_state->CP15[reg] = value; | ||||
| } | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(ARM_Jit, "ARM JIT", "ARM JIT", MP_RGB(255, 64, 64)); | ||||
| 
 | ||||
| void ARM_Dynarmic::ExecuteInstructions(int num_instructions) { | ||||
|     ASSERT(Memory::GetCurrentPageTable() == current_page_table); | ||||
|     MICROPROFILE_SCOPE(ARM_Jit); | ||||
| 
 | ||||
|     std::size_t ticks_executed = jit->Run(static_cast<unsigned>(num_instructions)); | ||||
| 
 | ||||
|     CoreTiming::AddTicks(ticks_executed); | ||||
| } | ||||
| 
 | ||||
| void ARM_Dynarmic::SaveContext(ARM_Interface::ThreadContext& ctx) { | ||||
|     memcpy(ctx.cpu_registers, jit->Regs().data(), sizeof(ctx.cpu_registers)); | ||||
|     memcpy(ctx.fpu_registers, jit->ExtRegs().data(), sizeof(ctx.fpu_registers)); | ||||
|  | @ -168,6 +181,7 @@ void ARM_Dynarmic::PrepareReschedule() { | |||
| } | ||||
| 
 | ||||
| void ARM_Dynarmic::ClearInstructionCache() { | ||||
|     // TODO: Clear interpreter cache when appropriate.
 | ||||
|     for (const auto& j : jits) { | ||||
|         j.second->ClearCache(); | ||||
|     } | ||||
|  |  | |||
|  | @ -19,6 +19,9 @@ class ARM_Dynarmic final : public ARM_Interface { | |||
| public: | ||||
|     ARM_Dynarmic(PrivilegeMode initial_mode); | ||||
| 
 | ||||
|     void Run() override; | ||||
|     void Step() override; | ||||
| 
 | ||||
|     void SetPC(u32 pc) override; | ||||
|     u32 GetPC() const override; | ||||
|     u32 GetReg(int index) const override; | ||||
|  | @ -36,7 +39,6 @@ public: | |||
|     void LoadContext(const ThreadContext& ctx) override; | ||||
| 
 | ||||
|     void PrepareReschedule() override; | ||||
|     void ExecuteInstructions(int num_instructions) override; | ||||
| 
 | ||||
|     void ClearInstructionCache() override; | ||||
|     void PageTableChanged() override; | ||||
|  |  | |||
|  | @ -2,6 +2,7 @@ | |||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <cstring> | ||||
| #include <memory> | ||||
| #include "core/arm/dyncom/arm_dyncom.h" | ||||
|  | @ -20,6 +21,14 @@ ARM_DynCom::ARM_DynCom(PrivilegeMode initial_mode) { | |||
| 
 | ||||
| ARM_DynCom::~ARM_DynCom() {} | ||||
| 
 | ||||
| void ARM_DynCom::Run() { | ||||
|     ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0)); | ||||
| } | ||||
| 
 | ||||
| void ARM_DynCom::Step() { | ||||
|     ExecuteInstructions(1); | ||||
| } | ||||
| 
 | ||||
| void ARM_DynCom::ClearInstructionCache() { | ||||
|     state->instruction_cache.clear(); | ||||
|     trans_cache_buf_top = 0; | ||||
|  | @ -79,10 +88,6 @@ void ARM_DynCom::SetCP15Register(CP15Register reg, u32 value) { | |||
| 
 | ||||
| void ARM_DynCom::ExecuteInstructions(int num_instructions) { | ||||
|     state->NumInstrsToExecute = num_instructions; | ||||
| 
 | ||||
|     // Dyncom only breaks on instruction dispatch. This only happens on every instruction when
 | ||||
|     // executing one instruction at a time. Otherwise, if a block is being executed, more
 | ||||
|     // instructions may actually be executed than specified.
 | ||||
|     unsigned ticks_executed = InterpreterMainLoop(state.get()); | ||||
|     CoreTiming::AddTicks(ticks_executed); | ||||
| } | ||||
|  |  | |||
|  | @ -15,6 +15,9 @@ public: | |||
|     ARM_DynCom(PrivilegeMode initial_mode); | ||||
|     ~ARM_DynCom(); | ||||
| 
 | ||||
|     void Run() override; | ||||
|     void Step() override; | ||||
| 
 | ||||
|     void ClearInstructionCache() override; | ||||
|     void PageTableChanged() override; | ||||
| 
 | ||||
|  | @ -35,8 +38,9 @@ public: | |||
|     void LoadContext(const ThreadContext& ctx) override; | ||||
| 
 | ||||
|     void PrepareReschedule() override; | ||||
|     void ExecuteInstructions(int num_instructions) override; | ||||
| 
 | ||||
| private: | ||||
|     void ExecuteInstructions(int num_instructions); | ||||
| 
 | ||||
|     std::unique_ptr<ARMul_State> state; | ||||
| }; | ||||
|  |  | |||
|  | @ -27,7 +27,7 @@ namespace Core { | |||
| 
 | ||||
| /*static*/ System System::s_instance; | ||||
| 
 | ||||
| System::ResultStatus System::RunLoop(int tight_loop) { | ||||
| System::ResultStatus System::RunLoop(bool tight_loop) { | ||||
|     status = ResultStatus::Success; | ||||
|     if (!cpu_core) { | ||||
|         return ResultStatus::ErrorNotInitialized; | ||||
|  | @ -57,7 +57,11 @@ System::ResultStatus System::RunLoop(int tight_loop) { | |||
|         PrepareReschedule(); | ||||
|     } else { | ||||
|         CoreTiming::Advance(); | ||||
|         cpu_core->Run(tight_loop); | ||||
|         if (tight_loop) { | ||||
|             cpu_core->Run(); | ||||
|         } else { | ||||
|             cpu_core->Step(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     HW::Update(); | ||||
|  | @ -67,7 +71,7 @@ System::ResultStatus System::RunLoop(int tight_loop) { | |||
| } | ||||
| 
 | ||||
| System::ResultStatus System::SingleStep() { | ||||
|     return RunLoop(1); | ||||
|     return RunLoop(false); | ||||
| } | ||||
| 
 | ||||
| System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& filepath) { | ||||
|  |  | |||
|  | @ -50,10 +50,10 @@ public: | |||
|      * is not required to do a full dispatch with each instruction. NOTE: the number of instructions | ||||
|      * requested is not guaranteed to run, as this will be interrupted preemptively if a hardware | ||||
|      * update is requested (e.g. on a thread switch). | ||||
|      * @param tight_loop Number of instructions to execute. | ||||
|      * @param tight_loop If false, the CPU single-steps. | ||||
|      * @return Result status, indicating whethor or not the operation succeeded. | ||||
|      */ | ||||
|     ResultStatus RunLoop(int tight_loop = 1000); | ||||
|     ResultStatus RunLoop(bool tight_loop = true); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Step the CPU one instruction | ||||
|  |  | |||
|  | @ -34,7 +34,7 @@ TEST_CASE("ARM_DynCom (vfp): vadd", "[arm_dyncom]") { | |||
|         dyncom.SetVFPSystemReg(VFP_FPSCR, test_case.initial_fpscr); | ||||
|         dyncom.SetVFPReg(4, test_case.a); | ||||
|         dyncom.SetVFPReg(6, test_case.b); | ||||
|         dyncom.ExecuteInstructions(1); | ||||
|         dyncom.Step(); | ||||
|         if (dyncom.GetVFPReg(2) != test_case.result || | ||||
|             dyncom.GetVFPSystemReg(VFP_FPSCR) != test_case.final_fpscr) { | ||||
|             printf("f: %x\n", test_case.initial_fpscr); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue