From patchwork Mon Jun 6 10:23:02 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Benjamin Herrenschmidt X-Patchwork-Id: 9157731 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id BC36160572 for ; Mon, 6 Jun 2016 10:23:59 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id A0B952656B for ; Mon, 6 Jun 2016 10:23:59 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 93D8726E82; Mon, 6 Jun 2016 10:23:59 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-6.9 required=2.0 tests=BAYES_00,RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 9244F2656B for ; Mon, 6 Jun 2016 10:23:58 +0000 (UTC) Received: from localhost ([::1]:41174 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1b9rhN-0007N9-2T for patchwork-qemu-devel@patchwork.kernel.org; Mon, 06 Jun 2016 06:23:57 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:51608) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1b9rh0-0007Mr-IE for qemu-devel@nongnu.org; Mon, 06 Jun 2016 06:23:36 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1b9rgw-00078o-Eo for qemu-devel@nongnu.org; Mon, 06 Jun 2016 06:23:34 -0400 Received: from gate.crashing.org ([63.228.1.57]:46470) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1b9rgv-00078d-RP; Mon, 06 Jun 2016 06:23:30 -0400 Received: from localhost.localdomain (localhost.localdomain [127.0.0.1]) by gate.crashing.org (8.14.1/8.13.8) with ESMTP id u56AN2EC003917; Mon, 6 Jun 2016 05:23:04 -0500 Message-ID: <1465208582.4274.47.camel@kernel.crashing.org> From: Benjamin Herrenschmidt To: qemu-ppc@nongnu.org Date: Mon, 06 Jun 2016 20:23:02 +1000 X-Mailer: Evolution 3.18.5.2 (3.18.5.2-1.fc23) Mime-Version: 1.0 X-MIME-Autoconverted: from 8bit to base64 by gate.crashing.org id u56AN2EC003917 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6.x X-Received-From: 63.228.1.57 Subject: [Qemu-devel] [RFC/PATCH] ppc: Batch TLB flushes on 32-bit 6xx/7xx/7xxx in hash mode X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: =?ISO-8859-1?Q?C=E9dric?= Le Goater , Mark Cave-Ayland , qemu-devel@nongnu.org, David Gibson Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP This ports the existing 64-bit mechanism to 32-bit, thus series of 64 tlbie's followed by a sync like some versions of Darwin (ab)use will result in a single flush. We apply a pending flush on any sync instruction though, as Darwin doesn't use tlbsync on non-SMP systems. Signed-off-by: Benjamin Herrenschmidt --- Note: I haven't done any performance impact measurements with this one ... feel free to let me know what it does for you :-)  target-ppc/cpu.h         |  2 +-  target-ppc/helper_regs.h |  2 +-  target-ppc/mmu_helper.c  | 44 ++++++++------------------------------------  target-ppc/translate.c   | 27 +++++++++++++++++++++------  4 files changed, 31 insertions(+), 44 deletions(-) Tested-by: Mark Cave-Ayland diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index d8f8f7e..c2962d7 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -959,7 +959,6 @@ struct CPUPPCState {      ppc_slb_t slb[MAX_SLB_ENTRIES];      int32_t slb_nr;      /* tcg TLB needs flush (deferred slb inval instruction typically) */ -    uint32_t tlb_need_flush;  #endif      /* segment registers */      hwaddr htab_base; @@ -985,6 +984,7 @@ struct CPUPPCState {      target_ulong pb[4];      bool tlb_dirty;   /* Set to non-zero when modifying TLB                  */      bool kvm_sw_tlb;  /* non-zero if KVM SW TLB API is active                */ +    uint32_t tlb_need_flush; /* Delayed flush needed */  #endif        /* Other registers */ diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h index 104b690..8fc0934 100644 --- a/target-ppc/helper_regs.h +++ b/target-ppc/helper_regs.h @@ -151,7 +151,7 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,      return excp;  }   -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) +#if !defined(CONFIG_USER_ONLY)  static inline void check_tlb_flush(CPUPPCState *env)  {      CPUState *cs = CPU(ppc_env_get_cpu(env)); diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c index a5e3878..485d5b8 100644 --- a/target-ppc/mmu_helper.c +++ b/target-ppc/mmu_helper.c @@ -1935,8 +1935,8 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)      case POWERPC_MMU_2_06a:      case POWERPC_MMU_2_07:      case POWERPC_MMU_2_07a: -        env->tlb_need_flush = 0;  #endif /* defined(TARGET_PPC64) */ +        env->tlb_need_flush = 0;          tlb_flush(CPU(cpu), 1);          break;      default: @@ -1949,9 +1949,6 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)  void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)  {  #if !defined(FLUSH_ALL_TLBS) -    PowerPCCPU *cpu = ppc_env_get_cpu(env); -    CPUState *cs; -      addr &= TARGET_PAGE_MASK;      switch (env->mmu_model) {      case POWERPC_MMU_SOFT_6xx: @@ -1963,36 +1960,12 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)          break;      case POWERPC_MMU_32B:      case POWERPC_MMU_601: -        /* tlbie invalidate TLBs for all segments */ -        addr &= ~((target_ulong)-1ULL << 28); -        cs = CPU(cpu); -        /* XXX: this case should be optimized, -         * giving a mask to tlb_flush_page -         */ -        /* This is broken, some CPUs invalidate a whole congruence -         * class on an even smaller subset of bits and some OSes take -         * advantage of this. Just blow the whole thing away. +        /* Actual CPUs invalidate entire congruence classes based on the +         * geometry of their TLBs and some OSes take that into account, +         * we just mark the TLB to be flushed later (context synchronizing +         * event or sync instruction on 32-bit).           */ -#if 0 -        tlb_flush_page(cs, addr | (0x0 << 28)); -        tlb_flush_page(cs, addr | (0x1 << 28)); -        tlb_flush_page(cs, addr | (0x2 << 28)); -        tlb_flush_page(cs, addr | (0x3 << 28)); -        tlb_flush_page(cs, addr | (0x4 << 28)); -        tlb_flush_page(cs, addr | (0x5 << 28)); -        tlb_flush_page(cs, addr | (0x6 << 28)); -        tlb_flush_page(cs, addr | (0x7 << 28)); -        tlb_flush_page(cs, addr | (0x8 << 28)); -        tlb_flush_page(cs, addr | (0x9 << 28)); -        tlb_flush_page(cs, addr | (0xA << 28)); -        tlb_flush_page(cs, addr | (0xB << 28)); -        tlb_flush_page(cs, addr | (0xC << 28)); -        tlb_flush_page(cs, addr | (0xD << 28)); -        tlb_flush_page(cs, addr | (0xE << 28)); -        tlb_flush_page(cs, addr | (0xF << 28)); -#else -        tlb_flush(cs, 1); -#endif +        env->tlb_need_flush = 1;          break;  #if defined(TARGET_PPC64)      case POWERPC_MMU_64B: @@ -2058,13 +2031,12 @@ target_ulong helper_load_sr(CPUPPCState *env, target_ulong sr_num)    void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value)  { -    PowerPCCPU *cpu = ppc_env_get_cpu(env); -      qemu_log_mask(CPU_LOG_MMU,              "%s: reg=%d " TARGET_FMT_lx " " TARGET_FMT_lx "\n", __func__,              (int)srnum, value, env->sr[srnum]);  #if defined(TARGET_PPC64)      if (env->mmu_model & POWERPC_MMU_64) { +        PowerPCCPU *cpu = ppc_env_get_cpu(env);          uint64_t esid, vsid;            /* ESID = srnum */ @@ -2093,7 +2065,7 @@ void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value)              }          }  #else -        tlb_flush(CPU(cpu), 1); +        env->tlb_need_flush = 1;  #endif      }  } diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 7763431..ab5862f 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -193,6 +193,7 @@ struct DisasContext {      uint32_t exception;      /* Routine used to access memory */      bool pr, hv; +    bool lazy_tlb_flush;      int mem_idx;      int access_type;      /* Translation flags */ @@ -3290,12 +3291,17 @@ static void gen_eieio(DisasContext *ctx)  {  }   -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) +#if !defined(CONFIG_USER_ONLY)  static inline void gen_check_tlb_flush(DisasContext *ctx)  { -    TCGv_i32 t = tcg_temp_new_i32(); -    TCGLabel *l = gen_new_label(); +    TCGv_i32 t; +    TCGLabel *l;   +    if (!ctx->lazy_tlb_flush) { +        return; +    } +    l = gen_new_label(); +    t = tcg_temp_new_i32();      tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));      tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);      gen_helper_check_tlb_flush(cpu_env); @@ -3475,10 +3481,14 @@ static void gen_sync(DisasContext *ctx)      uint32_t l = (ctx->opcode >> 21) & 3;        /* -     * For l == 2, it's a ptesync, We need to check for a pending TLB flush. -     * This can only happen in kernel mode however so check MSR_PR as well. +     * We may need to check for a pending TLB flush. +     * +     * We do this on ptesync (l == 2) on ppc64 and any sync pn ppc32. +     * +     * Additionally, this can only happen in kernel mode however so +     * check MSR_PR as well.       */ -    if (l == 2 && !ctx->pr) { +    if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) {          gen_check_tlb_flush(ctx);      }  } @@ -11491,6 +11501,11 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)      ctx.sf_mode = msr_is_64bit(env, env->msr);      ctx.has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);  #endif +    if (env->mmu_model == POWERPC_MMU_32B || +        env->mmu_model == POWERPC_MMU_601 || +        (env->mmu_model & POWERPC_MMU_64B)) +            ctx.lazy_tlb_flush = true; +      ctx.fpu_enabled = msr_fp;      if ((env->flags & POWERPC_FLAG_SPE) && msr_spe)          ctx.spe_enabled = msr_spe;