Message ID | 1494501810-11822-2-git-send-email-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 05/11/2017 07:23 PM, Paolo Bonzini wrote: > This fixes the new ept_access_test_read_only and ept_access_test_read_write > testcases from vmx.flat. > > The problem is that gpte_access moves bits around to switch from EPT > bit order (XWR) to ACC_*_MASK bit order (RWX). This results in an > incorrect exit qualification. To fix this, make pt_access and > pte_access operate on raw PTE values (only with NX flipped to mean > "can execute") and call gpte_access at the end of the walk. This > lets us use pte_access to compute the exit qualification with XWR > bit order. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > arch/x86/kvm/paging_tmpl.h | 35 +++++++++++++++++++++-------------- > 1 file changed, 21 insertions(+), 14 deletions(-) > > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index 56241746abbd..b0454c7e4cff 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -283,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > pt_element_t pte; > pt_element_t __user *uninitialized_var(ptep_user); > gfn_t table_gfn; > - unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey; > + u64 pt_access, pte_access; > + unsigned index, accessed_dirty, pte_pkey; > unsigned nested_access; > gpa_t pte_gpa; > bool have_ad; > int offset; > + u64 walk_nx_mask = 0; > const int write_fault = access & PFERR_WRITE_MASK; > const int user_fault = access & PFERR_USER_MASK; > const int fetch_fault = access & PFERR_FETCH_MASK; > @@ -302,6 +304,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); > > #if PTTYPE == 64 > + walk_nx_mask = 1ULL << PT64_NX_SHIFT; We can always make walk_nx_mask = 1ULL << PT64_NX_SHIFT, as: - for EPT, this bit is useless - for 32bit, bit 63 is always ZERO, so that the final result should be ZERO too, > if (walker->level == PT32E_ROOT_LEVEL) { > pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); > trace_kvm_mmu_paging_element(pte, walker->level); > @@ -313,8 +316,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > walker->max_level = walker->level; > ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); > > - accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0; > - > /* > * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging > * by the MOV to CR instruction are treated as reads and do not cause the > @@ -322,14 +323,14 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > */ > nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; > > - pt_access = pte_access = ACC_ALL; > + pte_access = ~0; > ++walker->level; > > do { > gfn_t real_gfn; > unsigned long host_addr; > > - pt_access &= pte_access; > + pt_access = pte_access; > --walker->level; > > index = PT_INDEX(addr, walker->level); > @@ -371,6 +372,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > > trace_kvm_mmu_paging_element(pte, walker->level); > > + /* > + * Inverting the NX it lets us AND it like other > + * permission bits. > + */ > + pte_access = pt_access & (pte ^ walk_nx_mask); > + > if (unlikely(!FNAME(is_present_gpte)(pte))) > goto error; > > @@ -379,14 +386,16 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > goto error; > } > > - accessed_dirty &= pte; > - pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); > - > walker->ptes[walker->level - 1] = pte; > } while (!is_last_gpte(mmu, walker->level, pte)); > > pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); > - errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access); > + accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; > + > + /* Convert to ACC_*_MASK flags for struct guest_walker. */ > + walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask); > + walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask); > + errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); > if (unlikely(errcode)) > goto error; > > @@ -403,7 +412,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > walker->gfn = real_gpa >> PAGE_SHIFT; > > if (!write_fault) > - FNAME(protect_clean_gpte)(mmu, &pte_access, pte); > + FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte); > else > /* > * On a write fault, fold the dirty bit into accessed_dirty. > @@ -421,10 +430,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > goto retry_walk; > } > > - walker->pt_access = pt_access; > - walker->pte_access = pte_access; > pgprintk("%s: pte %llx pte_access %x pt_access %x\n", > - __func__, (u64)pte, pte_access, pt_access); > + __func__, (u64)pte, walker->pte_access, walker->pt_access); > return 1; > > error: > @@ -452,7 +459,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, > */ > if (!(errcode & PFERR_RSVD_MASK)) { > vcpu->arch.exit_qualification &= 0x187; > - vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3; ^ here, the original code is buggy as pt_access and pte have different bit order, fortunately, this patch fixes it too. :) Otherwise it looks good to me, thanks for your fix. Reviewed-by: Xiao Guangrong <xiaoguangrong@tencent.com>
On 05/12/2017 11:59 AM, Xiao Guangrong wrote: error: >> @@ -452,7 +459,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, >> */ >> if (!(errcode & PFERR_RSVD_MASK)) { >> vcpu->arch.exit_qualification &= 0x187; >> - vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3; > ^ here, the original code > is buggy as pt_access and pte have different bit order, fortunately, this patch fixes it > too. :) > It's exactly what this patch aims at, do not know what i was thinking when wrote these down. :(
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 56241746abbd..b0454c7e4cff 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -283,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, pt_element_t pte; pt_element_t __user *uninitialized_var(ptep_user); gfn_t table_gfn; - unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey; + u64 pt_access, pte_access; + unsigned index, accessed_dirty, pte_pkey; unsigned nested_access; gpa_t pte_gpa; bool have_ad; int offset; + u64 walk_nx_mask = 0; const int write_fault = access & PFERR_WRITE_MASK; const int user_fault = access & PFERR_USER_MASK; const int fetch_fault = access & PFERR_FETCH_MASK; @@ -302,6 +304,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); #if PTTYPE == 64 + walk_nx_mask = 1ULL << PT64_NX_SHIFT; if (walker->level == PT32E_ROOT_LEVEL) { pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); trace_kvm_mmu_paging_element(pte, walker->level); @@ -313,8 +316,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, walker->max_level = walker->level; ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); - accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0; - /* * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging * by the MOV to CR instruction are treated as reads and do not cause the @@ -322,14 +323,14 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, */ nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; - pt_access = pte_access = ACC_ALL; + pte_access = ~0; ++walker->level; do { gfn_t real_gfn; unsigned long host_addr; - pt_access &= pte_access; + pt_access = pte_access; --walker->level; index = PT_INDEX(addr, walker->level); @@ -371,6 +372,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, trace_kvm_mmu_paging_element(pte, walker->level); + /* + * Inverting the NX it lets us AND it like other + * permission bits. + */ + pte_access = pt_access & (pte ^ walk_nx_mask); + if (unlikely(!FNAME(is_present_gpte)(pte))) goto error; @@ -379,14 +386,16 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, goto error; } - accessed_dirty &= pte; - pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); - walker->ptes[walker->level - 1] = pte; } while (!is_last_gpte(mmu, walker->level, pte)); pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); - errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access); + accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; + + /* Convert to ACC_*_MASK flags for struct guest_walker. */ + walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask); + walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask); + errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); if (unlikely(errcode)) goto error; @@ -403,7 +412,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, walker->gfn = real_gpa >> PAGE_SHIFT; if (!write_fault) - FNAME(protect_clean_gpte)(mmu, &pte_access, pte); + FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte); else /* * On a write fault, fold the dirty bit into accessed_dirty. @@ -421,10 +430,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, goto retry_walk; } - walker->pt_access = pt_access; - walker->pte_access = pte_access; pgprintk("%s: pte %llx pte_access %x pt_access %x\n", - __func__, (u64)pte, pte_access, pt_access); + __func__, (u64)pte, walker->pte_access, walker->pt_access); return 1; error: @@ -452,7 +459,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, */ if (!(errcode & PFERR_RSVD_MASK)) { vcpu->arch.exit_qualification &= 0x187; - vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3; + vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3; } #endif walker->fault.address = addr;
This fixes the new ept_access_test_read_only and ept_access_test_read_write testcases from vmx.flat. The problem is that gpte_access moves bits around to switch from EPT bit order (XWR) to ACC_*_MASK bit order (RWX). This results in an incorrect exit qualification. To fix this, make pt_access and pte_access operate on raw PTE values (only with NX flipped to mean "can execute") and call gpte_access at the end of the walk. This lets us use pte_access to compute the exit qualification with XWR bit order. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/paging_tmpl.h | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-)