Message ID | 20210614191630.101304-1-bruno.larsen@eldorado.org.br (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2,1/2] target/ppc: fix address translation bug for radix mmus | expand |
On Mon, 14 Jun 2021 16:16:29 -0300 "Bruno Larsen (billionai)" <bruno.larsen@eldorado.org.br> wrote: > Based-on: <20210518201146.794854-1-richard.henderson@linaro.org> > > This commit attempts to fix the first bug mentioned by Richard Henderson in > https://lists.nongnu.org/archive/html/qemu-devel/2021-05/msg06247.html > > To sumarize the bug here, when radix-style mmus are translating an > address, they might need to call a second level of translation, with > hypvervisor priviledges. However, the way it was being done up until > this point meant that the second level translation had the same > priviledges as the first level. This would only happen when a TCG guest > was emulating KVM, which is why it hasn't been discovered yet. > Cédric is definitely the person to Cc: for stuff like this :) > This patch attempts to correct that by making radix64_*_xlate functions > receive the mmu_idx, and passing one with the correct permission for the > second level translation. > > The mmuidx macros added by this patch are only correct for non-bookE > mmus, because BookE style set the IS and DS bits inverted and there > might be other subtle differences. However, there doesn't seem to be > BookE cpus that have radix-style mmus, so we left a comment there to > document the issue, in case a machine does have that and was missed. > > As part of this cleanup, we now need to send the correct mmmu_idx > when calling get_phys_page_debug, otherwise we might not be able to see the > memory that the CPU could > > Suggested-by: Richard Henderson <richard.henderson@linaro.org> > Signed-off-by: Bruno Larsen (billionai) <bruno.larsen@eldorado.org.br> > --- > target/ppc/internal.h | 12 ++++++++++++ > target/ppc/mmu-radix64.c | 38 ++++++++++++++++++++++---------------- > target/ppc/mmu-radix64.h | 2 +- > target/ppc/mmu_helper.c | 8 +++++--- > 4 files changed, 40 insertions(+), 20 deletions(-) > > diff --git a/target/ppc/internal.h b/target/ppc/internal.h > index f1fd3c8d04..003df7e8a9 100644 > --- a/target/ppc/internal.h > +++ b/target/ppc/internal.h > @@ -245,4 +245,16 @@ static inline int prot_for_access_type(MMUAccessType access_type) > g_assert_not_reached(); > } > > +/* > + * These correspond to the mmu_idx values computed in > + * hreg_compute_hflags_value. See the tables therein > + */ > +static inline bool mmuidx_pr(int idx) { return !(idx & 1); } > +/* > + * If we want to use these macros for hash-style MMUs, we need to > + * add an if or another macro here. > + */ > +static inline bool mmuidx_real(int idx) { return idx & 2; } > +static inline bool mmuidx_hv(int idx) { return idx & 4; } > + > #endif /* PPC_INTERNAL_H */ > diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c > index cbd404bfa4..0ae8f6b572 100644 > --- a/target/ppc/mmu-radix64.c > +++ b/target/ppc/mmu-radix64.c > @@ -155,7 +155,7 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type, > > static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, > uint64_t pte, int *fault_cause, int *prot, > - bool partition_scoped) > + int mmu_idx, bool partition_scoped) > { > CPUPPCState *env = &cpu->env; > int need_prot; > @@ -173,7 +173,8 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, > /* Determine permissions allowed by Encoded Access Authority */ > if (!partition_scoped && (pte & R_PTE_EAA_PRIV) && msr_pr) { > *prot = 0; > - } else if (msr_pr || (pte & R_PTE_EAA_PRIV) || partition_scoped) { > + } else if (mmuidx_pr(mmu_idx) || (pte & R_PTE_EAA_PRIV) || > + partition_scoped) { > *prot = ppc_radix64_get_prot_eaa(pte); > } else { /* !msr_pr && !(pte & R_PTE_EAA_PRIV) && !partition_scoped */ > *prot = ppc_radix64_get_prot_eaa(pte); > @@ -299,7 +300,7 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, > ppc_v3_pate_t pate, > hwaddr *h_raddr, int *h_prot, > int *h_page_size, bool pde_addr, > - bool guest_visible) > + int mmu_idx, bool guest_visible) > { > int fault_cause = 0; > hwaddr pte_addr; > @@ -310,7 +311,9 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, > if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB, > pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size, > &pte, &fault_cause, &pte_addr) || > - ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, h_prot, true)) { > + ppc_radix64_check_prot(cpu, access_type, pte, > + &fault_cause, h_prot, mmu_idx, true) > + ) { > if (pde_addr) { /* address being translated was that of a guest pde */ > fault_cause |= DSISR_PRTABLE_FAULT; > } > @@ -332,7 +335,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > vaddr eaddr, uint64_t pid, > ppc_v3_pate_t pate, hwaddr *g_raddr, > int *g_prot, int *g_page_size, > - bool guest_visible) > + int mmu_idx, bool guest_visible) > { > CPUState *cs = CPU(cpu); > CPUPPCState *env = &cpu->env; > @@ -367,7 +370,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr, > pate, &h_raddr, &h_prot, > &h_page_size, true, > - guest_visible); > + /* mmu_idx is 5 because we're translating from hypervisor scope */ > + 5, guest_visible); > if (ret) { > return ret; > } > @@ -407,7 +411,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr, > pate, &h_raddr, &h_prot, > &h_page_size, true, > - guest_visible); > + /* mmu_idx is 5 because we're translating from hypervisor scope */ > + 5, guest_visible); > if (ret) { > return ret; > } > @@ -431,7 +436,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > *g_raddr = (rpn & ~mask) | (eaddr & mask); > } > > - if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, g_prot, false)) { > + if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, > + g_prot, mmu_idx, false)) { > /* Access denied due to protection */ > if (guest_visible) { > ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause); > @@ -464,7 +470,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > * +-------------+----------------+---------------+ > */ > bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > - hwaddr *raddr, int *psizep, int *protp, > + hwaddr *raddr, int *psizep, int *protp, int mmu_idx, > bool guest_visible) > { > CPUPPCState *env = &cpu->env; > @@ -474,17 +480,17 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > hwaddr g_raddr; > bool relocation; > > - assert(!(msr_hv && cpu->vhyp)); > + assert(!(mmuidx_hv(mmu_idx) && cpu->vhyp)); > > - relocation = (access_type == MMU_INST_FETCH ? msr_ir : msr_dr); > + relocation = !mmuidx_real(mmu_idx); > > /* HV or virtual hypervisor Real Mode Access */ > - if (!relocation && (msr_hv || cpu->vhyp)) { > + if (!relocation && (mmuidx_hv(mmu_idx) || cpu->vhyp)) { > /* In real mode top 4 effective addr bits (mostly) ignored */ > *raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL; > > /* In HV mode, add HRMOR if top EA bit is clear */ > - if (msr_hv || !env->has_hv_mode) { > + if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) { > if (!(eaddr >> 63)) { > *raddr |= env->spr[SPR_HRMOR]; > } > @@ -546,7 +552,7 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > if (relocation) { > int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid, > pate, &g_raddr, &prot, > - &psize, guest_visible); > + &psize, mmu_idx, guest_visible); > if (ret) { > return false; > } > @@ -564,13 +570,13 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > * quadrants 1 or 2. Translates a guest real address to a host > * real address. > */ > - if (lpid || !msr_hv) { > + if (lpid || !mmuidx_hv(mmu_idx)) { > int ret; > > ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, > g_raddr, pate, raddr, > &prot, &psize, false, > - guest_visible); > + mmu_idx, guest_visible); > if (ret) { > return false; > } > diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h > index 6b13b89b64..b70357cf34 100644 > --- a/target/ppc/mmu-radix64.h > +++ b/target/ppc/mmu-radix64.h > @@ -45,7 +45,7 @@ > #ifdef TARGET_PPC64 > > bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > - hwaddr *raddr, int *psizep, int *protp, > + hwaddr *raddr, int *psizep, int *protp, int mmu_idx, > bool guest_visible); > > static inline int ppc_radix64_get_prot_eaa(uint64_t pte) > diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c > index ba1952c77d..9dcdf88597 100644 > --- a/target/ppc/mmu_helper.c > +++ b/target/ppc/mmu_helper.c > @@ -2908,7 +2908,7 @@ static bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > case POWERPC_MMU_3_00: > if (ppc64_v3_radix(cpu)) { > return ppc_radix64_xlate(cpu, eaddr, access_type, > - raddrp, psizep, protp, guest_visible); > + raddrp, psizep, protp, mmu_idx, guest_visible); > } > /* fall through */ > case POWERPC_MMU_64B: > @@ -2941,8 +2941,10 @@ hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) > * try an MMU_DATA_LOAD, we may not be able to read instructions > * mapped by code TLBs, so we also try a MMU_INST_FETCH. > */ > - if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p, 0, false) || > - ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p, 0, false)) { > + if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p, > + cpu_mmu_index(&cpu->env, false), false) || > + ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p, > + cpu_mmu_index(&cpu->env, true), false)) { > return raddr & TARGET_PAGE_MASK; > } > return -1;
"Bruno Larsen (billionai)" <bruno.larsen@eldorado.org.br> writes: > +/* > + * These correspond to the mmu_idx values computed in > + * hreg_compute_hflags_value. See the tables therein > + */ > +static inline bool mmuidx_pr(int idx) { return !(idx & 1); } > +/* > + * If we want to use these macros for hash-style MMUs, we need to > + * add an if or another macro here. > + */ Don't these work just fine for hash as well? Except for Booke. > +static inline bool mmuidx_real(int idx) { return idx & 2; } > +static inline bool mmuidx_hv(int idx) { return idx & 4; } > +
On Mon, Jun 14, 2021 at 06:04:13PM -0300, Fabiano Rosas wrote: > "Bruno Larsen (billionai)" <bruno.larsen@eldorado.org.br> writes: > > > +/* > > + * These correspond to the mmu_idx values computed in > > + * hreg_compute_hflags_value. See the tables therein > > + */ > > +static inline bool mmuidx_pr(int idx) { return !(idx & 1); } > > +/* > > + * If we want to use these macros for hash-style MMUs, we need to > > + * add an if or another macro here. > > + */ > > Don't these work just fine for hash as well? Except for Booke. Right, I think we want to restrict these to BookS mmus, but I don't think we need to restrict to radix. > > > +static inline bool mmuidx_real(int idx) { return idx & 2; } > > +static inline bool mmuidx_hv(int idx) { return idx & 4; } > > + >
On Mon, Jun 14, 2021 at 04:16:29PM -0300, Bruno Larsen (billionai) wrote: > Based-on: <20210518201146.794854-1-richard.henderson@linaro.org> > > This commit attempts to fix the first bug mentioned by Richard Henderson in > https://lists.nongnu.org/archive/html/qemu-devel/2021-05/msg06247.html > > To sumarize the bug here, when radix-style mmus are translating an > address, they might need to call a second level of translation, with > hypvervisor priviledges. However, the way it was being done up until s/hypvervisor/hypervisor/ And... it's a super-common error, even amongst native speakers, but s/priviledge/privilege/g > this point meant that the second level translation had the same > priviledges as the first level. This would only happen when a TCG guest > was emulating KVM, which is why it hasn't been discovered yet. > > This patch attempts to correct that by making radix64_*_xlate functions > receive the mmu_idx, and passing one with the correct permission for the > second level translation. > > The mmuidx macros added by this patch are only correct for non-bookE > mmus, because BookE style set the IS and DS bits inverted and there > might be other subtle differences. However, there doesn't seem to be > BookE cpus that have radix-style mmus, so we left a comment there to > document the issue, in case a machine does have that and was missed. Right, the mmuidx values are basically local to a cpu family. Radix is only present on BookS 64-bit cpus. > As part of this cleanup, we now need to send the correct mmmu_idx > when calling get_phys_page_debug, otherwise we might not be able to see the > memory that the CPU could > > Suggested-by: Richard Henderson <richard.henderson@linaro.org> > Signed-off-by: Bruno Larsen (billionai) <bruno.larsen@eldorado.org.br> > --- > target/ppc/internal.h | 12 ++++++++++++ > target/ppc/mmu-radix64.c | 38 ++++++++++++++++++++++---------------- > target/ppc/mmu-radix64.h | 2 +- > target/ppc/mmu_helper.c | 8 +++++--- > 4 files changed, 40 insertions(+), 20 deletions(-) > > diff --git a/target/ppc/internal.h b/target/ppc/internal.h > index f1fd3c8d04..003df7e8a9 100644 > --- a/target/ppc/internal.h > +++ b/target/ppc/internal.h > @@ -245,4 +245,16 @@ static inline int prot_for_access_type(MMUAccessType access_type) > g_assert_not_reached(); > } > > +/* > + * These correspond to the mmu_idx values computed in > + * hreg_compute_hflags_value. See the tables therein > + */ > +static inline bool mmuidx_pr(int idx) { return !(idx & 1); } > +/* > + * If we want to use these macros for hash-style MMUs, we need to > + * add an if or another macro here I think move these to mmu-book3s-v3.h, since they're correct for both the radix and hash sides of the modern book3s mmu. . > + */ > +static inline bool mmuidx_real(int idx) { return idx & 2; } > +static inline bool mmuidx_hv(int idx) { return idx & 4; } > + > #endif /* PPC_INTERNAL_H */ > diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c > index cbd404bfa4..0ae8f6b572 100644 > --- a/target/ppc/mmu-radix64.c > +++ b/target/ppc/mmu-radix64.c > @@ -155,7 +155,7 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type, > > static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, > uint64_t pte, int *fault_cause, int *prot, > - bool partition_scoped) > + int mmu_idx, bool partition_scoped) > { > CPUPPCState *env = &cpu->env; > int need_prot; > @@ -173,7 +173,8 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, > /* Determine permissions allowed by Encoded Access Authority */ > if (!partition_scoped && (pte & R_PTE_EAA_PRIV) && msr_pr) { > *prot = 0; > - } else if (msr_pr || (pte & R_PTE_EAA_PRIV) || partition_scoped) { > + } else if (mmuidx_pr(mmu_idx) || (pte & R_PTE_EAA_PRIV) || > + partition_scoped) { > *prot = ppc_radix64_get_prot_eaa(pte); > } else { /* !msr_pr && !(pte & R_PTE_EAA_PRIV) && !partition_scoped */ > *prot = ppc_radix64_get_prot_eaa(pte); > @@ -299,7 +300,7 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, > ppc_v3_pate_t pate, > hwaddr *h_raddr, int *h_prot, > int *h_page_size, bool pde_addr, > - bool guest_visible) > + int mmu_idx, bool guest_visible) > { > int fault_cause = 0; > hwaddr pte_addr; > @@ -310,7 +311,9 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, > if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB, > pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size, > &pte, &fault_cause, &pte_addr) || > - ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, h_prot, true)) { > + ppc_radix64_check_prot(cpu, access_type, pte, > + &fault_cause, h_prot, mmu_idx, true) > + ) { > if (pde_addr) { /* address being translated was that of a guest pde */ > fault_cause |= DSISR_PRTABLE_FAULT; > } > @@ -332,7 +335,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > vaddr eaddr, uint64_t pid, > ppc_v3_pate_t pate, hwaddr *g_raddr, > int *g_prot, int *g_page_size, > - bool guest_visible) > + int mmu_idx, bool guest_visible) > { > CPUState *cs = CPU(cpu); > CPUPPCState *env = &cpu->env; > @@ -367,7 +370,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr, > pate, &h_raddr, &h_prot, > &h_page_size, true, > - guest_visible); > + /* mmu_idx is 5 because we're translating from hypervisor scope */ > + 5, guest_visible); > if (ret) { > return ret; > } > @@ -407,7 +411,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr, > pate, &h_raddr, &h_prot, > &h_page_size, true, > - guest_visible); > + /* mmu_idx is 5 because we're translating from hypervisor scope */ > + 5, guest_visible); > if (ret) { > return ret; > } > @@ -431,7 +436,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > *g_raddr = (rpn & ~mask) | (eaddr & mask); > } > > - if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, g_prot, false)) { > + if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, > + g_prot, mmu_idx, false)) { > /* Access denied due to protection */ > if (guest_visible) { > ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause); > @@ -464,7 +470,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > * +-------------+----------------+---------------+ > */ > bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > - hwaddr *raddr, int *psizep, int *protp, > + hwaddr *raddr, int *psizep, int *protp, int mmu_idx, > bool guest_visible) > { > CPUPPCState *env = &cpu->env; > @@ -474,17 +480,17 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > hwaddr g_raddr; > bool relocation; > > - assert(!(msr_hv && cpu->vhyp)); > + assert(!(mmuidx_hv(mmu_idx) && cpu->vhyp)); > > - relocation = (access_type == MMU_INST_FETCH ? msr_ir : msr_dr); > + relocation = !mmuidx_real(mmu_idx); > > /* HV or virtual hypervisor Real Mode Access */ > - if (!relocation && (msr_hv || cpu->vhyp)) { > + if (!relocation && (mmuidx_hv(mmu_idx) || cpu->vhyp)) { > /* In real mode top 4 effective addr bits (mostly) ignored */ > *raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL; > > /* In HV mode, add HRMOR if top EA bit is clear */ > - if (msr_hv || !env->has_hv_mode) { > + if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) { > if (!(eaddr >> 63)) { > *raddr |= env->spr[SPR_HRMOR]; > } > @@ -546,7 +552,7 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > if (relocation) { > int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid, > pate, &g_raddr, &prot, > - &psize, guest_visible); > + &psize, mmu_idx, guest_visible); > if (ret) { > return false; > } > @@ -564,13 +570,13 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > * quadrants 1 or 2. Translates a guest real address to a host > * real address. > */ > - if (lpid || !msr_hv) { > + if (lpid || !mmuidx_hv(mmu_idx)) { > int ret; > > ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, > g_raddr, pate, raddr, > &prot, &psize, false, > - guest_visible); > + mmu_idx, guest_visible); > if (ret) { > return false; > } > diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h > index 6b13b89b64..b70357cf34 100644 > --- a/target/ppc/mmu-radix64.h > +++ b/target/ppc/mmu-radix64.h > @@ -45,7 +45,7 @@ > #ifdef TARGET_PPC64 > > bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > - hwaddr *raddr, int *psizep, int *protp, > + hwaddr *raddr, int *psizep, int *protp, int mmu_idx, > bool guest_visible); > > static inline int ppc_radix64_get_prot_eaa(uint64_t pte) > diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c > index ba1952c77d..9dcdf88597 100644 > --- a/target/ppc/mmu_helper.c > +++ b/target/ppc/mmu_helper.c > @@ -2908,7 +2908,7 @@ static bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > case POWERPC_MMU_3_00: > if (ppc64_v3_radix(cpu)) { > return ppc_radix64_xlate(cpu, eaddr, access_type, > - raddrp, psizep, protp, guest_visible); > + raddrp, psizep, protp, mmu_idx, guest_visible); > } > /* fall through */ > case POWERPC_MMU_64B: > @@ -2941,8 +2941,10 @@ hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) > * try an MMU_DATA_LOAD, we may not be able to read instructions > * mapped by code TLBs, so we also try a MMU_INST_FETCH. > */ > - if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p, 0, false) || > - ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p, 0, false)) { > + if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p, > + cpu_mmu_index(&cpu->env, false), false) || > + ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p, > + cpu_mmu_index(&cpu->env, true), false)) { > return raddr & TARGET_PAGE_MASK; > } > return -1;
On 6/14/21 6:41 PM, David Gibson wrote: > I think move these to mmu-book3s-v3.h, since they're correct for both > the radix and hash sides of the modern book3s mmu. They're also correct for all non-booke mmus, e.g. hash32 and 6xx, which is why I recommended internal.h (or some new mmu-internal.h). While neither hash32 nor 6xx have HV, and thus there is no second tlb bug, it would still be More Correct to use mmu_idx instead of direct references to msr_pr et al. r~
On 15/06/2021 00:20, Richard Henderson wrote: > On 6/14/21 6:41 PM, David Gibson wrote: >> I think move these to mmu-book3s-v3.h, since they're correct for both >> the radix and hash sides of the modern book3s mmu. > > They're also correct for all non-booke mmus, e.g. hash32 and 6xx, > which is why I recommended internal.h (or some new mmu-internal.h). > > While neither hash32 nor 6xx have HV, and thus there is no second tlb > bug, it would still be More Correct to use mmu_idx instead of direct > references to msr_pr et al. yeah, I agree that I should change the documentation. Before I send a new version with everything corrected, I wanna make sure if I should leave them in internal and use it for hash32 and 6xx MMUs, or do I put them in mmu-book3s-v3.h, since only radix64 has that the bug? > > > r~
On 6/14/21 9:16 PM, Bruno Larsen (billionai) wrote: > Based-on: <20210518201146.794854-1-richard.henderson@linaro.org> > > This commit attempts to fix the first bug mentioned by Richard Henderson in > https://lists.nongnu.org/archive/html/qemu-devel/2021-05/msg06247.html > > To sumarize the bug here, when radix-style mmus are translating an > address, they might need to call a second level of translation, with > hypvervisor priviledges. However, the way it was being done up until > this point meant that the second level translation had the same > priviledges as the first level. This would only happen when a TCG guest > was emulating KVM, which is why it hasn't been discovered yet. What do you mean ? The QEMU PowerNV machine emulates baremetal and can run KVM pseries guests. It has some issues under load but not related to memory translation. This patch is certainly improving the model and it is worth testing but this version does not apply on ppc-6.1. Thanks, C. > This patch attempts to correct that by making radix64_*_xlate functions > receive the mmu_idx, and passing one with the correct permission for the > second level translation. > > The mmuidx macros added by this patch are only correct for non-bookE > mmus, because BookE style set the IS and DS bits inverted and there > might be other subtle differences. However, there doesn't seem to be > BookE cpus that have radix-style mmus, so we left a comment there to > document the issue, in case a machine does have that and was missed. > > As part of this cleanup, we now need to send the correct mmmu_idx > when calling get_phys_page_debug, otherwise we might not be able to see the > memory that the CPU could > > Suggested-by: Richard Henderson <richard.henderson@linaro.org> > Signed-off-by: Bruno Larsen (billionai) <bruno.larsen@eldorado.org.br> > --- > target/ppc/internal.h | 12 ++++++++++++ > target/ppc/mmu-radix64.c | 38 ++++++++++++++++++++++---------------- > target/ppc/mmu-radix64.h | 2 +- > target/ppc/mmu_helper.c | 8 +++++--- > 4 files changed, 40 insertions(+), 20 deletions(-) > > diff --git a/target/ppc/internal.h b/target/ppc/internal.h > index f1fd3c8d04..003df7e8a9 100644 > --- a/target/ppc/internal.h > +++ b/target/ppc/internal.h > @@ -245,4 +245,16 @@ static inline int prot_for_access_type(MMUAccessType access_type) > g_assert_not_reached(); > } > > +/* > + * These correspond to the mmu_idx values computed in > + * hreg_compute_hflags_value. See the tables therein > + */ > +static inline bool mmuidx_pr(int idx) { return !(idx & 1); } > +/* > + * If we want to use these macros for hash-style MMUs, we need to > + * add an if or another macro here. > + */ > +static inline bool mmuidx_real(int idx) { return idx & 2; } > +static inline bool mmuidx_hv(int idx) { return idx & 4; } > + > #endif /* PPC_INTERNAL_H */ > diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c > index cbd404bfa4..0ae8f6b572 100644 > --- a/target/ppc/mmu-radix64.c > +++ b/target/ppc/mmu-radix64.c > @@ -155,7 +155,7 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type, > > static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, > uint64_t pte, int *fault_cause, int *prot, > - bool partition_scoped) > + int mmu_idx, bool partition_scoped) > { > CPUPPCState *env = &cpu->env; > int need_prot; > @@ -173,7 +173,8 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, > /* Determine permissions allowed by Encoded Access Authority */ > if (!partition_scoped && (pte & R_PTE_EAA_PRIV) && msr_pr) { > *prot = 0; > - } else if (msr_pr || (pte & R_PTE_EAA_PRIV) || partition_scoped) { > + } else if (mmuidx_pr(mmu_idx) || (pte & R_PTE_EAA_PRIV) || > + partition_scoped) { > *prot = ppc_radix64_get_prot_eaa(pte); > } else { /* !msr_pr && !(pte & R_PTE_EAA_PRIV) && !partition_scoped */ > *prot = ppc_radix64_get_prot_eaa(pte); > @@ -299,7 +300,7 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, > ppc_v3_pate_t pate, > hwaddr *h_raddr, int *h_prot, > int *h_page_size, bool pde_addr, > - bool guest_visible) > + int mmu_idx, bool guest_visible) > { > int fault_cause = 0; > hwaddr pte_addr; > @@ -310,7 +311,9 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, > if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB, > pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size, > &pte, &fault_cause, &pte_addr) || > - ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, h_prot, true)) { > + ppc_radix64_check_prot(cpu, access_type, pte, > + &fault_cause, h_prot, mmu_idx, true) > + ) { > if (pde_addr) { /* address being translated was that of a guest pde */ > fault_cause |= DSISR_PRTABLE_FAULT; > } > @@ -332,7 +335,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > vaddr eaddr, uint64_t pid, > ppc_v3_pate_t pate, hwaddr *g_raddr, > int *g_prot, int *g_page_size, > - bool guest_visible) > + int mmu_idx, bool guest_visible) > { > CPUState *cs = CPU(cpu); > CPUPPCState *env = &cpu->env; > @@ -367,7 +370,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr, > pate, &h_raddr, &h_prot, > &h_page_size, true, > - guest_visible); > + /* mmu_idx is 5 because we're translating from hypervisor scope */ > + 5, guest_visible); > if (ret) { > return ret; > } > @@ -407,7 +411,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr, > pate, &h_raddr, &h_prot, > &h_page_size, true, > - guest_visible); > + /* mmu_idx is 5 because we're translating from hypervisor scope */ > + 5, guest_visible); > if (ret) { > return ret; > } > @@ -431,7 +436,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > *g_raddr = (rpn & ~mask) | (eaddr & mask); > } > > - if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, g_prot, false)) { > + if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, > + g_prot, mmu_idx, false)) { > /* Access denied due to protection */ > if (guest_visible) { > ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause); > @@ -464,7 +470,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, > * +-------------+----------------+---------------+ > */ > bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > - hwaddr *raddr, int *psizep, int *protp, > + hwaddr *raddr, int *psizep, int *protp, int mmu_idx, > bool guest_visible) > { > CPUPPCState *env = &cpu->env; > @@ -474,17 +480,17 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > hwaddr g_raddr; > bool relocation; > > - assert(!(msr_hv && cpu->vhyp)); > + assert(!(mmuidx_hv(mmu_idx) && cpu->vhyp)); > > - relocation = (access_type == MMU_INST_FETCH ? msr_ir : msr_dr); > + relocation = !mmuidx_real(mmu_idx); > > /* HV or virtual hypervisor Real Mode Access */ > - if (!relocation && (msr_hv || cpu->vhyp)) { > + if (!relocation && (mmuidx_hv(mmu_idx) || cpu->vhyp)) { > /* In real mode top 4 effective addr bits (mostly) ignored */ > *raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL; > > /* In HV mode, add HRMOR if top EA bit is clear */ > - if (msr_hv || !env->has_hv_mode) { > + if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) { > if (!(eaddr >> 63)) { > *raddr |= env->spr[SPR_HRMOR]; > } > @@ -546,7 +552,7 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > if (relocation) { > int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid, > pate, &g_raddr, &prot, > - &psize, guest_visible); > + &psize, mmu_idx, guest_visible); > if (ret) { > return false; > } > @@ -564,13 +570,13 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > * quadrants 1 or 2. Translates a guest real address to a host > * real address. > */ > - if (lpid || !msr_hv) { > + if (lpid || !mmuidx_hv(mmu_idx)) { > int ret; > > ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, > g_raddr, pate, raddr, > &prot, &psize, false, > - guest_visible); > + mmu_idx, guest_visible); > if (ret) { > return false; > } > diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h > index 6b13b89b64..b70357cf34 100644 > --- a/target/ppc/mmu-radix64.h > +++ b/target/ppc/mmu-radix64.h > @@ -45,7 +45,7 @@ > #ifdef TARGET_PPC64 > > bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > - hwaddr *raddr, int *psizep, int *protp, > + hwaddr *raddr, int *psizep, int *protp, int mmu_idx, > bool guest_visible); > > static inline int ppc_radix64_get_prot_eaa(uint64_t pte) > diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c > index ba1952c77d..9dcdf88597 100644 > --- a/target/ppc/mmu_helper.c > +++ b/target/ppc/mmu_helper.c > @@ -2908,7 +2908,7 @@ static bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, > case POWERPC_MMU_3_00: > if (ppc64_v3_radix(cpu)) { > return ppc_radix64_xlate(cpu, eaddr, access_type, > - raddrp, psizep, protp, guest_visible); > + raddrp, psizep, protp, mmu_idx, guest_visible); > } > /* fall through */ > case POWERPC_MMU_64B: > @@ -2941,8 +2941,10 @@ hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) > * try an MMU_DATA_LOAD, we may not be able to read instructions > * mapped by code TLBs, so we also try a MMU_INST_FETCH. > */ > - if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p, 0, false) || > - ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p, 0, false)) { > + if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p, > + cpu_mmu_index(&cpu->env, false), false) || > + ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p, > + cpu_mmu_index(&cpu->env, true), false)) { > return raddr & TARGET_PAGE_MASK; > } > return -1; >
On 6/15/21 3:57 PM, Cédric Le Goater wrote: > On 6/14/21 9:16 PM, Bruno Larsen (billionai) wrote: >> Based-on: <20210518201146.794854-1-richard.henderson@linaro.org> >> >> This commit attempts to fix the first bug mentioned by Richard Henderson in >> https://lists.nongnu.org/archive/html/qemu-devel/2021-05/msg06247.html >> >> To sumarize the bug here, when radix-style mmus are translating an >> address, they might need to call a second level of translation, with >> hypvervisor priviledges. However, the way it was being done up until >> this point meant that the second level translation had the same >> priviledges as the first level. This would only happen when a TCG guest >> was emulating KVM, which is why it hasn't been discovered yet. > > What do you mean ? The QEMU PowerNV machine emulates baremetal and > can run KVM pseries guests. > > It has some issues under load but not related to memory translation. > This patch is certainly improving the model and it is worth testing > but this version does not apply on ppc-6.1. Unfortunately this series misses a cover letter. The base series is mentioned in the first patch: Based-on: <20210518201146.794854-1-richard.henderson@linaro.org>
On 15/06/2021 10:57, Cédric Le Goater wrote: > On 6/14/21 9:16 PM, Bruno Larsen (billionai) wrote: >> Based-on: <20210518201146.794854-1-richard.henderson@linaro.org> >> >> This commit attempts to fix the first bug mentioned by Richard Henderson in >> https://lists.nongnu.org/archive/html/qemu-devel/2021-05/msg06247.html >> >> To sumarize the bug here, when radix-style mmus are translating an >> address, they might need to call a second level of translation, with >> hypvervisor priviledges. However, the way it was being done up until >> this point meant that the second level translation had the same >> priviledges as the first level. This would only happen when a TCG guest >> was emulating KVM, which is why it hasn't been discovered yet. > What do you mean ? The QEMU PowerNV machine emulates baremetal and > can run KVM pseries guests. Is the PowerNV using radix? The bug only happens when guest1 (the emulated PowerNV, not it's guest), and if so, we might need to reevaluate if the bug actually happens, or if it's just a theoretical hiccup. And also change the commit message. Either way, if my explanation is a bit confusing, there are more details on the link. I sort of just implemented the solutions suggested in that e-mail chain. > > It has some issues under load but not related to memory translation. > This patch is certainly improving the model and it is worth testing > but this version does not apply on ppc-6.1. Ah, yes, it is based on a patch series made by Richard Henderson a few weeks ago. Since we need that patch to be accepted to finally support disable-tcg and I don't want to delay that by adding a change that might change a lot of his remaining patches. > > Thanks, > > C. > > >> This patch attempts to correct that by making radix64_*_xlate functions >> receive the mmu_idx, and passing one with the correct permission for the >> second level translation. >> >> The mmuidx macros added by this patch are only correct for non-bookE >> mmus, because BookE style set the IS and DS bits inverted and there >> might be other subtle differences. However, there doesn't seem to be >> BookE cpus that have radix-style mmus, so we left a comment there to >> document the issue, in case a machine does have that and was missed. >> >> As part of this cleanup, we now need to send the correct mmmu_idx >> when calling get_phys_page_debug, otherwise we might not be able to see the >> memory that the CPU could >> >> Suggested-by: Richard Henderson <richard.henderson@linaro.org> >> Signed-off-by: Bruno Larsen (billionai) <bruno.larsen@eldorado.org.br> >> --- >> target/ppc/internal.h | 12 ++++++++++++ >> target/ppc/mmu-radix64.c | 38 ++++++++++++++++++++++---------------- >> target/ppc/mmu-radix64.h | 2 +- >> target/ppc/mmu_helper.c | 8 +++++--- >> 4 files changed, 40 insertions(+), 20 deletions(-) >> >> diff --git a/target/ppc/internal.h b/target/ppc/internal.h >> index f1fd3c8d04..003df7e8a9 100644 >> --- a/target/ppc/internal.h >> +++ b/target/ppc/internal.h >> @@ -245,4 +245,16 @@ static inline int prot_for_access_type(MMUAccessType access_type) >> g_assert_not_reached(); >> } >> >> +/* >> + * These correspond to the mmu_idx values computed in >> + * hreg_compute_hflags_value. See the tables therein >> + */ >> +static inline bool mmuidx_pr(int idx) { return !(idx & 1); } >> +/* >> + * If we want to use these macros for hash-style MMUs, we need to >> + * add an if or another macro here. >> + */ >> +static inline bool mmuidx_real(int idx) { return idx & 2; } >> +static inline bool mmuidx_hv(int idx) { return idx & 4; } >> + >> #endif /* PPC_INTERNAL_H */ >> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c >> index cbd404bfa4..0ae8f6b572 100644 >> --- a/target/ppc/mmu-radix64.c >> +++ b/target/ppc/mmu-radix64.c >> @@ -155,7 +155,7 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type, >> >> static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, >> uint64_t pte, int *fault_cause, int *prot, >> - bool partition_scoped) >> + int mmu_idx, bool partition_scoped) >> { >> CPUPPCState *env = &cpu->env; >> int need_prot; >> @@ -173,7 +173,8 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, >> /* Determine permissions allowed by Encoded Access Authority */ >> if (!partition_scoped && (pte & R_PTE_EAA_PRIV) && msr_pr) { >> *prot = 0; >> - } else if (msr_pr || (pte & R_PTE_EAA_PRIV) || partition_scoped) { >> + } else if (mmuidx_pr(mmu_idx) || (pte & R_PTE_EAA_PRIV) || >> + partition_scoped) { >> *prot = ppc_radix64_get_prot_eaa(pte); >> } else { /* !msr_pr && !(pte & R_PTE_EAA_PRIV) && !partition_scoped */ >> *prot = ppc_radix64_get_prot_eaa(pte); >> @@ -299,7 +300,7 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, >> ppc_v3_pate_t pate, >> hwaddr *h_raddr, int *h_prot, >> int *h_page_size, bool pde_addr, >> - bool guest_visible) >> + int mmu_idx, bool guest_visible) >> { >> int fault_cause = 0; >> hwaddr pte_addr; >> @@ -310,7 +311,9 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, >> if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB, >> pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size, >> &pte, &fault_cause, &pte_addr) || >> - ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, h_prot, true)) { >> + ppc_radix64_check_prot(cpu, access_type, pte, >> + &fault_cause, h_prot, mmu_idx, true) >> + ) { >> if (pde_addr) { /* address being translated was that of a guest pde */ >> fault_cause |= DSISR_PRTABLE_FAULT; >> } >> @@ -332,7 +335,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, >> vaddr eaddr, uint64_t pid, >> ppc_v3_pate_t pate, hwaddr *g_raddr, >> int *g_prot, int *g_page_size, >> - bool guest_visible) >> + int mmu_idx, bool guest_visible) >> { >> CPUState *cs = CPU(cpu); >> CPUPPCState *env = &cpu->env; >> @@ -367,7 +370,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, >> ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr, >> pate, &h_raddr, &h_prot, >> &h_page_size, true, >> - guest_visible); >> + /* mmu_idx is 5 because we're translating from hypervisor scope */ >> + 5, guest_visible); >> if (ret) { >> return ret; >> } >> @@ -407,7 +411,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, >> ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr, >> pate, &h_raddr, &h_prot, >> &h_page_size, true, >> - guest_visible); >> + /* mmu_idx is 5 because we're translating from hypervisor scope */ >> + 5, guest_visible); >> if (ret) { >> return ret; >> } >> @@ -431,7 +436,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, >> *g_raddr = (rpn & ~mask) | (eaddr & mask); >> } >> >> - if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, g_prot, false)) { >> + if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, >> + g_prot, mmu_idx, false)) { >> /* Access denied due to protection */ >> if (guest_visible) { >> ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause); >> @@ -464,7 +470,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, >> * +-------------+----------------+---------------+ >> */ >> bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, >> - hwaddr *raddr, int *psizep, int *protp, >> + hwaddr *raddr, int *psizep, int *protp, int mmu_idx, >> bool guest_visible) >> { >> CPUPPCState *env = &cpu->env; >> @@ -474,17 +480,17 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, >> hwaddr g_raddr; >> bool relocation; >> >> - assert(!(msr_hv && cpu->vhyp)); >> + assert(!(mmuidx_hv(mmu_idx) && cpu->vhyp)); >> >> - relocation = (access_type == MMU_INST_FETCH ? msr_ir : msr_dr); >> + relocation = !mmuidx_real(mmu_idx); >> >> /* HV or virtual hypervisor Real Mode Access */ >> - if (!relocation && (msr_hv || cpu->vhyp)) { >> + if (!relocation && (mmuidx_hv(mmu_idx) || cpu->vhyp)) { >> /* In real mode top 4 effective addr bits (mostly) ignored */ >> *raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL; >> >> /* In HV mode, add HRMOR if top EA bit is clear */ >> - if (msr_hv || !env->has_hv_mode) { >> + if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) { >> if (!(eaddr >> 63)) { >> *raddr |= env->spr[SPR_HRMOR]; >> } >> @@ -546,7 +552,7 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, >> if (relocation) { >> int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid, >> pate, &g_raddr, &prot, >> - &psize, guest_visible); >> + &psize, mmu_idx, guest_visible); >> if (ret) { >> return false; >> } >> @@ -564,13 +570,13 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, >> * quadrants 1 or 2. Translates a guest real address to a host >> * real address. >> */ >> - if (lpid || !msr_hv) { >> + if (lpid || !mmuidx_hv(mmu_idx)) { >> int ret; >> >> ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, >> g_raddr, pate, raddr, >> &prot, &psize, false, >> - guest_visible); >> + mmu_idx, guest_visible); >> if (ret) { >> return false; >> } >> diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h >> index 6b13b89b64..b70357cf34 100644 >> --- a/target/ppc/mmu-radix64.h >> +++ b/target/ppc/mmu-radix64.h >> @@ -45,7 +45,7 @@ >> #ifdef TARGET_PPC64 >> >> bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, >> - hwaddr *raddr, int *psizep, int *protp, >> + hwaddr *raddr, int *psizep, int *protp, int mmu_idx, >> bool guest_visible); >> >> static inline int ppc_radix64_get_prot_eaa(uint64_t pte) >> diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c >> index ba1952c77d..9dcdf88597 100644 >> --- a/target/ppc/mmu_helper.c >> +++ b/target/ppc/mmu_helper.c >> @@ -2908,7 +2908,7 @@ static bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, >> case POWERPC_MMU_3_00: >> if (ppc64_v3_radix(cpu)) { >> return ppc_radix64_xlate(cpu, eaddr, access_type, >> - raddrp, psizep, protp, guest_visible); >> + raddrp, psizep, protp, mmu_idx, guest_visible); >> } >> /* fall through */ >> case POWERPC_MMU_64B: >> @@ -2941,8 +2941,10 @@ hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) >> * try an MMU_DATA_LOAD, we may not be able to read instructions >> * mapped by code TLBs, so we also try a MMU_INST_FETCH. >> */ >> - if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p, 0, false) || >> - ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p, 0, false)) { >> + if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p, >> + cpu_mmu_index(&cpu->env, false), false) || >> + ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p, >> + cpu_mmu_index(&cpu->env, true), false)) { >> return raddr & TARGET_PAGE_MASK; >> } >> return -1; >>
On 6/15/21 4:57 PM, Bruno Piazera Larsen wrote: > > On 15/06/2021 10:57, Cédric Le Goater wrote: >> On 6/14/21 9:16 PM, Bruno Larsen (billionai) wrote: >>> Based-on: <20210518201146.794854-1-richard.henderson@linaro.org> >>> >>> This commit attempts to fix the first bug mentioned by Richard Henderson in >>> https://lists.nongnu.org/archive/html/qemu-devel/2021-05/msg06247.html >>> >>> To sumarize the bug here, when radix-style mmus are translating an >>> address, they might need to call a second level of translation, with >>> hypvervisor priviledges. However, the way it was being done up until >>> this point meant that the second level translation had the same >>> priviledges as the first level. This would only happen when a TCG guest >>> was emulating KVM, which is why it hasn't been discovered yet. >> What do you mean ? The QEMU PowerNV machine emulates baremetal and >> can run KVM pseries guests. > > Is the PowerNV using radix? Yes. Radix MMU is the default for Linux under POWER9 but it can also use Hash. > The bug only happens when guest1 (the emulated PowerNV, not it's guest), For PowerNV, "machine" would be more appropriate. We sometime refer to it as L0 and the sublevel guests as L1 and L2. > and if so, we might need to reevaluate if the bug actually happens, > or if it's just a theoretical hiccup. And also change the commit message. I think this would be a fix for the model. > Either way, if my explanation is a bit confusing, there are more details > on the link. I sort of just implemented the solutions suggested in that > e-mail chain. It seems to makes sense. I just want to give it a try using an emulated QEMU PowerNV machine running a KVM pseries guest to see if it doesn't break anything. Which would mean that the fix is incomplete. >> It has some issues under load but not related to memory translation. >> This patch is certainly improving the model and it is worth testing >> but this version does not apply on ppc-6.1. > > Ah, yes, it is based on a patch series made by Richard Henderson a > few weeks ago. Since we need that patch to be accepted to finally > support disable-tcg and I don't want to delay that by adding a > change that might change a lot of his remaining patches. OK. Thanks, C.
On Tue, Jun 15, 2021 at 09:25:06AM -0300, Bruno Piazera Larsen wrote: > > On 15/06/2021 00:20, Richard Henderson wrote: > > On 6/14/21 6:41 PM, David Gibson wrote: > > > I think move these to mmu-book3s-v3.h, since they're correct for both > > > the radix and hash sides of the modern book3s mmu. > > > > They're also correct for all non-booke mmus, e.g. hash32 and 6xx, which > > is why I recommended internal.h (or some new mmu-internal.h). > > > > While neither hash32 nor 6xx have HV, and thus there is no second tlb > > bug, it would still be More Correct to use mmu_idx instead of direct > > references to msr_pr et al. > yeah, I agree that I should change the documentation. Before I send a new > version with everything corrected, I wanna make sure if I should leave them > in internal and use it for hash32 and 6xx MMUs, or do I put them in > mmu-book3s-v3.h, since only radix64 has that the bug? Put them in mmu-book3s-v3.h for the time being, we can move it later if it makes sense. I want to move to a mode model-first approach to the softmmu code, so I think of these values as logically per-MMU-style, even if they are the same in practice for many of them.
diff --git a/target/ppc/internal.h b/target/ppc/internal.h index f1fd3c8d04..003df7e8a9 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -245,4 +245,16 @@ static inline int prot_for_access_type(MMUAccessType access_type) g_assert_not_reached(); } +/* + * These correspond to the mmu_idx values computed in + * hreg_compute_hflags_value. See the tables therein + */ +static inline bool mmuidx_pr(int idx) { return !(idx & 1); } +/* + * If we want to use these macros for hash-style MMUs, we need to + * add an if or another macro here. + */ +static inline bool mmuidx_real(int idx) { return idx & 2; } +static inline bool mmuidx_hv(int idx) { return idx & 4; } + #endif /* PPC_INTERNAL_H */ diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index cbd404bfa4..0ae8f6b572 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -155,7 +155,7 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type, static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, uint64_t pte, int *fault_cause, int *prot, - bool partition_scoped) + int mmu_idx, bool partition_scoped) { CPUPPCState *env = &cpu->env; int need_prot; @@ -173,7 +173,8 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, /* Determine permissions allowed by Encoded Access Authority */ if (!partition_scoped && (pte & R_PTE_EAA_PRIV) && msr_pr) { *prot = 0; - } else if (msr_pr || (pte & R_PTE_EAA_PRIV) || partition_scoped) { + } else if (mmuidx_pr(mmu_idx) || (pte & R_PTE_EAA_PRIV) || + partition_scoped) { *prot = ppc_radix64_get_prot_eaa(pte); } else { /* !msr_pr && !(pte & R_PTE_EAA_PRIV) && !partition_scoped */ *prot = ppc_radix64_get_prot_eaa(pte); @@ -299,7 +300,7 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, ppc_v3_pate_t pate, hwaddr *h_raddr, int *h_prot, int *h_page_size, bool pde_addr, - bool guest_visible) + int mmu_idx, bool guest_visible) { int fault_cause = 0; hwaddr pte_addr; @@ -310,7 +311,9 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB, pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size, &pte, &fault_cause, &pte_addr) || - ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, h_prot, true)) { + ppc_radix64_check_prot(cpu, access_type, pte, + &fault_cause, h_prot, mmu_idx, true) + ) { if (pde_addr) { /* address being translated was that of a guest pde */ fault_cause |= DSISR_PRTABLE_FAULT; } @@ -332,7 +335,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, vaddr eaddr, uint64_t pid, ppc_v3_pate_t pate, hwaddr *g_raddr, int *g_prot, int *g_page_size, - bool guest_visible) + int mmu_idx, bool guest_visible) { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; @@ -367,7 +370,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr, pate, &h_raddr, &h_prot, &h_page_size, true, - guest_visible); + /* mmu_idx is 5 because we're translating from hypervisor scope */ + 5, guest_visible); if (ret) { return ret; } @@ -407,7 +411,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr, pate, &h_raddr, &h_prot, &h_page_size, true, - guest_visible); + /* mmu_idx is 5 because we're translating from hypervisor scope */ + 5, guest_visible); if (ret) { return ret; } @@ -431,7 +436,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, *g_raddr = (rpn & ~mask) | (eaddr & mask); } - if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, g_prot, false)) { + if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, + g_prot, mmu_idx, false)) { /* Access denied due to protection */ if (guest_visible) { ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause); @@ -464,7 +470,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, * +-------------+----------------+---------------+ */ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, - hwaddr *raddr, int *psizep, int *protp, + hwaddr *raddr, int *psizep, int *protp, int mmu_idx, bool guest_visible) { CPUPPCState *env = &cpu->env; @@ -474,17 +480,17 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, hwaddr g_raddr; bool relocation; - assert(!(msr_hv && cpu->vhyp)); + assert(!(mmuidx_hv(mmu_idx) && cpu->vhyp)); - relocation = (access_type == MMU_INST_FETCH ? msr_ir : msr_dr); + relocation = !mmuidx_real(mmu_idx); /* HV or virtual hypervisor Real Mode Access */ - if (!relocation && (msr_hv || cpu->vhyp)) { + if (!relocation && (mmuidx_hv(mmu_idx) || cpu->vhyp)) { /* In real mode top 4 effective addr bits (mostly) ignored */ *raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL; /* In HV mode, add HRMOR if top EA bit is clear */ - if (msr_hv || !env->has_hv_mode) { + if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) { if (!(eaddr >> 63)) { *raddr |= env->spr[SPR_HRMOR]; } @@ -546,7 +552,7 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, if (relocation) { int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid, pate, &g_raddr, &prot, - &psize, guest_visible); + &psize, mmu_idx, guest_visible); if (ret) { return false; } @@ -564,13 +570,13 @@ bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, * quadrants 1 or 2. Translates a guest real address to a host * real address. */ - if (lpid || !msr_hv) { + if (lpid || !mmuidx_hv(mmu_idx)) { int ret; ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, g_raddr, pate, raddr, &prot, &psize, false, - guest_visible); + mmu_idx, guest_visible); if (ret) { return false; } diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h index 6b13b89b64..b70357cf34 100644 --- a/target/ppc/mmu-radix64.h +++ b/target/ppc/mmu-radix64.h @@ -45,7 +45,7 @@ #ifdef TARGET_PPC64 bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, - hwaddr *raddr, int *psizep, int *protp, + hwaddr *raddr, int *psizep, int *protp, int mmu_idx, bool guest_visible); static inline int ppc_radix64_get_prot_eaa(uint64_t pte) diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c index ba1952c77d..9dcdf88597 100644 --- a/target/ppc/mmu_helper.c +++ b/target/ppc/mmu_helper.c @@ -2908,7 +2908,7 @@ static bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, case POWERPC_MMU_3_00: if (ppc64_v3_radix(cpu)) { return ppc_radix64_xlate(cpu, eaddr, access_type, - raddrp, psizep, protp, guest_visible); + raddrp, psizep, protp, mmu_idx, guest_visible); } /* fall through */ case POWERPC_MMU_64B: @@ -2941,8 +2941,10 @@ hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) * try an MMU_DATA_LOAD, we may not be able to read instructions * mapped by code TLBs, so we also try a MMU_INST_FETCH. */ - if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p, 0, false) || - ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p, 0, false)) { + if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p, + cpu_mmu_index(&cpu->env, false), false) || + ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p, + cpu_mmu_index(&cpu->env, true), false)) { return raddr & TARGET_PAGE_MASK; } return -1;
Based-on: <20210518201146.794854-1-richard.henderson@linaro.org> This commit attempts to fix the first bug mentioned by Richard Henderson in https://lists.nongnu.org/archive/html/qemu-devel/2021-05/msg06247.html To sumarize the bug here, when radix-style mmus are translating an address, they might need to call a second level of translation, with hypvervisor priviledges. However, the way it was being done up until this point meant that the second level translation had the same priviledges as the first level. This would only happen when a TCG guest was emulating KVM, which is why it hasn't been discovered yet. This patch attempts to correct that by making radix64_*_xlate functions receive the mmu_idx, and passing one with the correct permission for the second level translation. The mmuidx macros added by this patch are only correct for non-bookE mmus, because BookE style set the IS and DS bits inverted and there might be other subtle differences. However, there doesn't seem to be BookE cpus that have radix-style mmus, so we left a comment there to document the issue, in case a machine does have that and was missed. As part of this cleanup, we now need to send the correct mmmu_idx when calling get_phys_page_debug, otherwise we might not be able to see the memory that the CPU could Suggested-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Bruno Larsen (billionai) <bruno.larsen@eldorado.org.br> --- target/ppc/internal.h | 12 ++++++++++++ target/ppc/mmu-radix64.c | 38 ++++++++++++++++++++++---------------- target/ppc/mmu-radix64.h | 2 +- target/ppc/mmu_helper.c | 8 +++++--- 4 files changed, 40 insertions(+), 20 deletions(-)