Message ID | 1465208582.4274.47.camel@kernel.crashing.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 06/06/2016 12:23 PM, Benjamin Herrenschmidt wrote: > This ports the existing 64-bit mechanism to 32-bit, thus series > of 64 tlbie's followed by a sync like some versions of Darwin > (ab)use will result in a single flush. > > We apply a pending flush on any sync instruction though, as Darwin > doesn't use tlbsync on non-SMP systems. Yes, this is the case at the right beginning of boot but it does use tlbsync after, in hw_rem_map() where pvr is only tested against 603. > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> > --- > > Note: I haven't done any performance impact measurements with this > one ... feel free to let me know what it does for you :-) It adds a couple of seconds improvement on a ~47s boot time on my thinkpad. So a 2-5% I would say but I haven't done much more perf. Thanks, C. > target-ppc/cpu.h | 2 +- > target-ppc/helper_regs.h | 2 +- > target-ppc/mmu_helper.c | 44 ++++++++------------------------------------ > target-ppc/translate.c | 27 +++++++++++++++++++++------ > 4 files changed, 31 insertions(+), 44 deletions(-) > > diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h > index d8f8f7e..c2962d7 100644 > --- a/target-ppc/cpu.h > +++ b/target-ppc/cpu.h > @@ -959,7 +959,6 @@ struct CPUPPCState { > ppc_slb_t slb[MAX_SLB_ENTRIES]; > int32_t slb_nr; > /* tcg TLB needs flush (deferred slb inval instruction typically) */ > - uint32_t tlb_need_flush; > #endif > /* segment registers */ > hwaddr htab_base; > @@ -985,6 +984,7 @@ struct CPUPPCState { > target_ulong pb[4]; > bool tlb_dirty; /* Set to non-zero when modifying TLB */ > bool kvm_sw_tlb; /* non-zero if KVM SW TLB API is active */ > + uint32_t tlb_need_flush; /* Delayed flush needed */ > #endif > > /* Other registers */ > diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h > index 104b690..8fc0934 100644 > --- a/target-ppc/helper_regs.h > +++ b/target-ppc/helper_regs.h > @@ -151,7 +151,7 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, > return excp; > } > > -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) > +#if !defined(CONFIG_USER_ONLY) > static inline void check_tlb_flush(CPUPPCState *env) > { > CPUState *cs = CPU(ppc_env_get_cpu(env)); > diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c > index a5e3878..485d5b8 100644 > --- a/target-ppc/mmu_helper.c > +++ b/target-ppc/mmu_helper.c > @@ -1935,8 +1935,8 @@ void ppc_tlb_invalidate_all(CPUPPCState *env) > case POWERPC_MMU_2_06a: > case POWERPC_MMU_2_07: > case POWERPC_MMU_2_07a: > - env->tlb_need_flush = 0; > #endif /* defined(TARGET_PPC64) */ > + env->tlb_need_flush = 0; > tlb_flush(CPU(cpu), 1); > break; > default: > @@ -1949,9 +1949,6 @@ void ppc_tlb_invalidate_all(CPUPPCState *env) > void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr) > { > #if !defined(FLUSH_ALL_TLBS) > - PowerPCCPU *cpu = ppc_env_get_cpu(env); > - CPUState *cs; > - > addr &= TARGET_PAGE_MASK; > switch (env->mmu_model) { > case POWERPC_MMU_SOFT_6xx: > @@ -1963,36 +1960,12 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr) > break; > case POWERPC_MMU_32B: > case POWERPC_MMU_601: > - /* tlbie invalidate TLBs for all segments */ > - addr &= ~((target_ulong)-1ULL << 28); > - cs = CPU(cpu); > - /* XXX: this case should be optimized, > - * giving a mask to tlb_flush_page > - */ > - /* This is broken, some CPUs invalidate a whole congruence > - * class on an even smaller subset of bits and some OSes take > - * advantage of this. Just blow the whole thing away. > + /* Actual CPUs invalidate entire congruence classes based on the > + * geometry of their TLBs and some OSes take that into account, > + * we just mark the TLB to be flushed later (context synchronizing > + * event or sync instruction on 32-bit). > */ > -#if 0 > - tlb_flush_page(cs, addr | (0x0 << 28)); > - tlb_flush_page(cs, addr | (0x1 << 28)); > - tlb_flush_page(cs, addr | (0x2 << 28)); > - tlb_flush_page(cs, addr | (0x3 << 28)); > - tlb_flush_page(cs, addr | (0x4 << 28)); > - tlb_flush_page(cs, addr | (0x5 << 28)); > - tlb_flush_page(cs, addr | (0x6 << 28)); > - tlb_flush_page(cs, addr | (0x7 << 28)); > - tlb_flush_page(cs, addr | (0x8 << 28)); > - tlb_flush_page(cs, addr | (0x9 << 28)); > - tlb_flush_page(cs, addr | (0xA << 28)); > - tlb_flush_page(cs, addr | (0xB << 28)); > - tlb_flush_page(cs, addr | (0xC << 28)); > - tlb_flush_page(cs, addr | (0xD << 28)); > - tlb_flush_page(cs, addr | (0xE << 28)); > - tlb_flush_page(cs, addr | (0xF << 28)); > -#else > - tlb_flush(cs, 1); > -#endif > + env->tlb_need_flush = 1; > break; > #if defined(TARGET_PPC64) > case POWERPC_MMU_64B: > @@ -2058,13 +2031,12 @@ target_ulong helper_load_sr(CPUPPCState *env, target_ulong sr_num) > > void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value) > { > - PowerPCCPU *cpu = ppc_env_get_cpu(env); > - > qemu_log_mask(CPU_LOG_MMU, > "%s: reg=%d " TARGET_FMT_lx " " TARGET_FMT_lx "\n", __func__, > (int)srnum, value, env->sr[srnum]); > #if defined(TARGET_PPC64) > if (env->mmu_model & POWERPC_MMU_64) { > + PowerPCCPU *cpu = ppc_env_get_cpu(env); > uint64_t esid, vsid; > > /* ESID = srnum */ > @@ -2093,7 +2065,7 @@ void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value) > } > } > #else > - tlb_flush(CPU(cpu), 1); > + env->tlb_need_flush = 1; > #endif > } > } > diff --git a/target-ppc/translate.c b/target-ppc/translate.c > index 7763431..ab5862f 100644 > --- a/target-ppc/translate.c > +++ b/target-ppc/translate.c > @@ -193,6 +193,7 @@ struct DisasContext { > uint32_t exception; > /* Routine used to access memory */ > bool pr, hv; > + bool lazy_tlb_flush; > int mem_idx; > int access_type; > /* Translation flags */ > @@ -3290,12 +3291,17 @@ static void gen_eieio(DisasContext *ctx) > { > } > > -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) > +#if !defined(CONFIG_USER_ONLY) > static inline void gen_check_tlb_flush(DisasContext *ctx) > { > - TCGv_i32 t = tcg_temp_new_i32(); > - TCGLabel *l = gen_new_label(); > + TCGv_i32 t; > + TCGLabel *l; > > + if (!ctx->lazy_tlb_flush) { > + return; > + } > + l = gen_new_label(); > + t = tcg_temp_new_i32(); > tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush)); > tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l); > gen_helper_check_tlb_flush(cpu_env); > @@ -3475,10 +3481,14 @@ static void gen_sync(DisasContext *ctx) > uint32_t l = (ctx->opcode >> 21) & 3; > > /* > - * For l == 2, it's a ptesync, We need to check for a pending TLB flush. > - * This can only happen in kernel mode however so check MSR_PR as well. > + * We may need to check for a pending TLB flush. > + * > + * We do this on ptesync (l == 2) on ppc64 and any sync pn ppc32. > + * > + * Additionally, this can only happen in kernel mode however so > + * check MSR_PR as well. > */ > - if (l == 2 && !ctx->pr) { > + if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) { > gen_check_tlb_flush(ctx); > } > } > @@ -11491,6 +11501,11 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) > ctx.sf_mode = msr_is_64bit(env, env->msr); > ctx.has_cfar = !!(env->flags & POWERPC_FLAG_CFAR); > #endif > + if (env->mmu_model == POWERPC_MMU_32B || > + env->mmu_model == POWERPC_MMU_601 || > + (env->mmu_model & POWERPC_MMU_64B)) > + ctx.lazy_tlb_flush = true; > + > ctx.fpu_enabled = msr_fp; > if ((env->flags & POWERPC_FLAG_SPE) && msr_spe) > ctx.spe_enabled = msr_spe; > >
On 06/06/16 11:23, Benjamin Herrenschmidt wrote: > This ports the existing 64-bit mechanism to 32-bit, thus series > of 64 tlbie's followed by a sync like some versions of Darwin > (ab)use will result in a single flush. > > We apply a pending flush on any sync instruction though, as Darwin > doesn't use tlbsync on non-SMP systems. > > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> > --- > > Note: I haven't done any performance impact measurements with this > one ... feel free to let me know what it does for you :-) > > target-ppc/cpu.h | 2 +- > target-ppc/helper_regs.h | 2 +- > target-ppc/mmu_helper.c | 44 ++++++++------------------------------------ > target-ppc/translate.c | 27 +++++++++++++++++++++------ > 4 files changed, 31 insertions(+), 44 deletions(-) > > diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h > index d8f8f7e..c2962d7 100644 > --- a/target-ppc/cpu.h > +++ b/target-ppc/cpu.h > @@ -959,7 +959,6 @@ struct CPUPPCState { > ppc_slb_t slb[MAX_SLB_ENTRIES]; > int32_t slb_nr; > /* tcg TLB needs flush (deferred slb inval instruction typically) */ > - uint32_t tlb_need_flush; > #endif > /* segment registers */ > hwaddr htab_base; > @@ -985,6 +984,7 @@ struct CPUPPCState { > target_ulong pb[4]; > bool tlb_dirty; /* Set to non-zero when modifying TLB */ > bool kvm_sw_tlb; /* non-zero if KVM SW TLB API is active */ > + uint32_t tlb_need_flush; /* Delayed flush needed */ > #endif > > /* Other registers */ > diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h > index 104b690..8fc0934 100644 > --- a/target-ppc/helper_regs.h > +++ b/target-ppc/helper_regs.h > @@ -151,7 +151,7 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, > return excp; > } > > -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) > +#if !defined(CONFIG_USER_ONLY) > static inline void check_tlb_flush(CPUPPCState *env) > { > CPUState *cs = CPU(ppc_env_get_cpu(env)); > diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c > index a5e3878..485d5b8 100644 > --- a/target-ppc/mmu_helper.c > +++ b/target-ppc/mmu_helper.c > @@ -1935,8 +1935,8 @@ void ppc_tlb_invalidate_all(CPUPPCState *env) > case POWERPC_MMU_2_06a: > case POWERPC_MMU_2_07: > case POWERPC_MMU_2_07a: > - env->tlb_need_flush = 0; > #endif /* defined(TARGET_PPC64) */ > + env->tlb_need_flush = 0; > tlb_flush(CPU(cpu), 1); > break; > default: > @@ -1949,9 +1949,6 @@ void ppc_tlb_invalidate_all(CPUPPCState *env) > void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr) > { > #if !defined(FLUSH_ALL_TLBS) > - PowerPCCPU *cpu = ppc_env_get_cpu(env); > - CPUState *cs; > - > addr &= TARGET_PAGE_MASK; > switch (env->mmu_model) { > case POWERPC_MMU_SOFT_6xx: > @@ -1963,36 +1960,12 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr) > break; > case POWERPC_MMU_32B: > case POWERPC_MMU_601: > - /* tlbie invalidate TLBs for all segments */ > - addr &= ~((target_ulong)-1ULL << 28); > - cs = CPU(cpu); > - /* XXX: this case should be optimized, > - * giving a mask to tlb_flush_page > - */ > - /* This is broken, some CPUs invalidate a whole congruence > - * class on an even smaller subset of bits and some OSes take > - * advantage of this. Just blow the whole thing away. > + /* Actual CPUs invalidate entire congruence classes based on the > + * geometry of their TLBs and some OSes take that into account, > + * we just mark the TLB to be flushed later (context synchronizing > + * event or sync instruction on 32-bit). > */ > -#if 0 > - tlb_flush_page(cs, addr | (0x0 << 28)); > - tlb_flush_page(cs, addr | (0x1 << 28)); > - tlb_flush_page(cs, addr | (0x2 << 28)); > - tlb_flush_page(cs, addr | (0x3 << 28)); > - tlb_flush_page(cs, addr | (0x4 << 28)); > - tlb_flush_page(cs, addr | (0x5 << 28)); > - tlb_flush_page(cs, addr | (0x6 << 28)); > - tlb_flush_page(cs, addr | (0x7 << 28)); > - tlb_flush_page(cs, addr | (0x8 << 28)); > - tlb_flush_page(cs, addr | (0x9 << 28)); > - tlb_flush_page(cs, addr | (0xA << 28)); > - tlb_flush_page(cs, addr | (0xB << 28)); > - tlb_flush_page(cs, addr | (0xC << 28)); > - tlb_flush_page(cs, addr | (0xD << 28)); > - tlb_flush_page(cs, addr | (0xE << 28)); > - tlb_flush_page(cs, addr | (0xF << 28)); > -#else > - tlb_flush(cs, 1); > -#endif > + env->tlb_need_flush = 1; > break; > #if defined(TARGET_PPC64) > case POWERPC_MMU_64B: > @@ -2058,13 +2031,12 @@ target_ulong helper_load_sr(CPUPPCState *env, target_ulong sr_num) > > void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value) > { > - PowerPCCPU *cpu = ppc_env_get_cpu(env); > - > qemu_log_mask(CPU_LOG_MMU, > "%s: reg=%d " TARGET_FMT_lx " " TARGET_FMT_lx "\n", __func__, > (int)srnum, value, env->sr[srnum]); > #if defined(TARGET_PPC64) > if (env->mmu_model & POWERPC_MMU_64) { > + PowerPCCPU *cpu = ppc_env_get_cpu(env); > uint64_t esid, vsid; > > /* ESID = srnum */ > @@ -2093,7 +2065,7 @@ void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value) > } > } > #else > - tlb_flush(CPU(cpu), 1); > + env->tlb_need_flush = 1; > #endif > } > } > diff --git a/target-ppc/translate.c b/target-ppc/translate.c > index 7763431..ab5862f 100644 > --- a/target-ppc/translate.c > +++ b/target-ppc/translate.c > @@ -193,6 +193,7 @@ struct DisasContext { > uint32_t exception; > /* Routine used to access memory */ > bool pr, hv; > + bool lazy_tlb_flush; > int mem_idx; > int access_type; > /* Translation flags */ > @@ -3290,12 +3291,17 @@ static void gen_eieio(DisasContext *ctx) > { > } > > -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) > +#if !defined(CONFIG_USER_ONLY) > static inline void gen_check_tlb_flush(DisasContext *ctx) > { > - TCGv_i32 t = tcg_temp_new_i32(); > - TCGLabel *l = gen_new_label(); > + TCGv_i32 t; > + TCGLabel *l; > > + if (!ctx->lazy_tlb_flush) { > + return; > + } > + l = gen_new_label(); > + t = tcg_temp_new_i32(); > tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush)); > tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l); > gen_helper_check_tlb_flush(cpu_env); > @@ -3475,10 +3481,14 @@ static void gen_sync(DisasContext *ctx) > uint32_t l = (ctx->opcode >> 21) & 3; > > /* > - * For l == 2, it's a ptesync, We need to check for a pending TLB flush. > - * This can only happen in kernel mode however so check MSR_PR as well. > + * We may need to check for a pending TLB flush. > + * > + * We do this on ptesync (l == 2) on ppc64 and any sync pn ppc32. > + * > + * Additionally, this can only happen in kernel mode however so > + * check MSR_PR as well. > */ > - if (l == 2 && !ctx->pr) { > + if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) { > gen_check_tlb_flush(ctx); > } > } > @@ -11491,6 +11501,11 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) > ctx.sf_mode = msr_is_64bit(env, env->msr); > ctx.has_cfar = !!(env->flags & POWERPC_FLAG_CFAR); > #endif > + if (env->mmu_model == POWERPC_MMU_32B || > + env->mmu_model == POWERPC_MMU_601 || > + (env->mmu_model & POWERPC_MMU_64B)) > + ctx.lazy_tlb_flush = true; > + > ctx.fpu_enabled = msr_fp; > if ((env->flags & POWERPC_FLAG_SPE) && msr_spe) > ctx.spe_enabled = msr_spe; > > After another run of the OpenBIOS tests with this patch applied on top of the previous 2 patches, I see no regressions introduced. Like Cédric I don't get the feeling that the Mac machines necessarily run faster, however the overall experience does feel smoother and more responsive. Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> ATB, Mark.
On Mon, 2016-06-06 at 23:36 +0100, Mark Cave-Ayland wrote: > > After another run of the OpenBIOS tests with this patch applied on top > of the previous 2 patches, I see no regressions introduced. Like Cédric > I don't get the feeling that the Mac machines necessarily run faster, > however the overall experience does feel smoother and more responsive. > > Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> Thanks ! Cheers, Ben.
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index d8f8f7e..c2962d7 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -959,7 +959,6 @@ struct CPUPPCState { ppc_slb_t slb[MAX_SLB_ENTRIES]; int32_t slb_nr; /* tcg TLB needs flush (deferred slb inval instruction typically) */ - uint32_t tlb_need_flush; #endif /* segment registers */ hwaddr htab_base; @@ -985,6 +984,7 @@ struct CPUPPCState { target_ulong pb[4]; bool tlb_dirty; /* Set to non-zero when modifying TLB */ bool kvm_sw_tlb; /* non-zero if KVM SW TLB API is active */ + uint32_t tlb_need_flush; /* Delayed flush needed */ #endif /* Other registers */ diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h index 104b690..8fc0934 100644 --- a/target-ppc/helper_regs.h +++ b/target-ppc/helper_regs.h @@ -151,7 +151,7 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, return excp; } -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) +#if !defined(CONFIG_USER_ONLY) static inline void check_tlb_flush(CPUPPCState *env) { CPUState *cs = CPU(ppc_env_get_cpu(env)); diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c index a5e3878..485d5b8 100644 --- a/target-ppc/mmu_helper.c +++ b/target-ppc/mmu_helper.c @@ -1935,8 +1935,8 @@ void ppc_tlb_invalidate_all(CPUPPCState *env) case POWERPC_MMU_2_06a: case POWERPC_MMU_2_07: case POWERPC_MMU_2_07a: - env->tlb_need_flush = 0; #endif /* defined(TARGET_PPC64) */ + env->tlb_need_flush = 0; tlb_flush(CPU(cpu), 1); break; default: @@ -1949,9 +1949,6 @@ void ppc_tlb_invalidate_all(CPUPPCState *env) void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr) { #if !defined(FLUSH_ALL_TLBS) - PowerPCCPU *cpu = ppc_env_get_cpu(env); - CPUState *cs; - addr &= TARGET_PAGE_MASK; switch (env->mmu_model) { case POWERPC_MMU_SOFT_6xx: @@ -1963,36 +1960,12 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr) break; case POWERPC_MMU_32B: case POWERPC_MMU_601: - /* tlbie invalidate TLBs for all segments */ - addr &= ~((target_ulong)-1ULL << 28); - cs = CPU(cpu); - /* XXX: this case should be optimized, - * giving a mask to tlb_flush_page - */ - /* This is broken, some CPUs invalidate a whole congruence - * class on an even smaller subset of bits and some OSes take - * advantage of this. Just blow the whole thing away. + /* Actual CPUs invalidate entire congruence classes based on the + * geometry of their TLBs and some OSes take that into account, + * we just mark the TLB to be flushed later (context synchronizing + * event or sync instruction on 32-bit). */ -#if 0 - tlb_flush_page(cs, addr | (0x0 << 28)); - tlb_flush_page(cs, addr | (0x1 << 28)); - tlb_flush_page(cs, addr | (0x2 << 28)); - tlb_flush_page(cs, addr | (0x3 << 28)); - tlb_flush_page(cs, addr | (0x4 << 28)); - tlb_flush_page(cs, addr | (0x5 << 28)); - tlb_flush_page(cs, addr | (0x6 << 28)); - tlb_flush_page(cs, addr | (0x7 << 28)); - tlb_flush_page(cs, addr | (0x8 << 28)); - tlb_flush_page(cs, addr | (0x9 << 28)); - tlb_flush_page(cs, addr | (0xA << 28)); - tlb_flush_page(cs, addr | (0xB << 28)); - tlb_flush_page(cs, addr | (0xC << 28)); - tlb_flush_page(cs, addr | (0xD << 28)); - tlb_flush_page(cs, addr | (0xE << 28)); - tlb_flush_page(cs, addr | (0xF << 28)); -#else - tlb_flush(cs, 1); -#endif + env->tlb_need_flush = 1; break; #if defined(TARGET_PPC64) case POWERPC_MMU_64B: @@ -2058,13 +2031,12 @@ target_ulong helper_load_sr(CPUPPCState *env, target_ulong sr_num) void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value) { - PowerPCCPU *cpu = ppc_env_get_cpu(env); - qemu_log_mask(CPU_LOG_MMU, "%s: reg=%d " TARGET_FMT_lx " " TARGET_FMT_lx "\n", __func__, (int)srnum, value, env->sr[srnum]); #if defined(TARGET_PPC64) if (env->mmu_model & POWERPC_MMU_64) { + PowerPCCPU *cpu = ppc_env_get_cpu(env); uint64_t esid, vsid; /* ESID = srnum */ @@ -2093,7 +2065,7 @@ void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value) } } #else - tlb_flush(CPU(cpu), 1); + env->tlb_need_flush = 1; #endif } } diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 7763431..ab5862f 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -193,6 +193,7 @@ struct DisasContext { uint32_t exception; /* Routine used to access memory */ bool pr, hv; + bool lazy_tlb_flush; int mem_idx; int access_type; /* Translation flags */ @@ -3290,12 +3291,17 @@ static void gen_eieio(DisasContext *ctx) { } -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) +#if !defined(CONFIG_USER_ONLY) static inline void gen_check_tlb_flush(DisasContext *ctx) { - TCGv_i32 t = tcg_temp_new_i32(); - TCGLabel *l = gen_new_label(); + TCGv_i32 t; + TCGLabel *l; + if (!ctx->lazy_tlb_flush) { + return; + } + l = gen_new_label(); + t = tcg_temp_new_i32(); tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush)); tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l); gen_helper_check_tlb_flush(cpu_env); @@ -3475,10 +3481,14 @@ static void gen_sync(DisasContext *ctx) uint32_t l = (ctx->opcode >> 21) & 3; /* - * For l == 2, it's a ptesync, We need to check for a pending TLB flush. - * This can only happen in kernel mode however so check MSR_PR as well. + * We may need to check for a pending TLB flush. + * + * We do this on ptesync (l == 2) on ppc64 and any sync pn ppc32. + * + * Additionally, this can only happen in kernel mode however so + * check MSR_PR as well. */ - if (l == 2 && !ctx->pr) { + if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) { gen_check_tlb_flush(ctx); } } @@ -11491,6 +11501,11 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) ctx.sf_mode = msr_is_64bit(env, env->msr); ctx.has_cfar = !!(env->flags & POWERPC_FLAG_CFAR); #endif + if (env->mmu_model == POWERPC_MMU_32B || + env->mmu_model == POWERPC_MMU_601 || + (env->mmu_model & POWERPC_MMU_64B)) + ctx.lazy_tlb_flush = true; + ctx.fpu_enabled = msr_fp; if ((env->flags & POWERPC_FLAG_SPE) && msr_spe) ctx.spe_enabled = msr_spe;
This ports the existing 64-bit mechanism to 32-bit, thus series of 64 tlbie's followed by a sync like some versions of Darwin (ab)use will result in a single flush. We apply a pending flush on any sync instruction though, as Darwin doesn't use tlbsync on non-SMP systems. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> --- Note: I haven't done any performance impact measurements with this one ... feel free to let me know what it does for you :-) target-ppc/cpu.h | 2 +- target-ppc/helper_regs.h | 2 +- target-ppc/mmu_helper.c | 44 ++++++++------------------------------------ target-ppc/translate.c | 27 +++++++++++++++++++++------ 4 files changed, 31 insertions(+), 44 deletions(-)