Message ID | 20190926231824.149014-3-bgardon@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | kvm: mmu: Rework the x86 TDP direct mapped case | expand |
On Thu, Sep 26, 2019 at 04:17:58PM -0700, Ben Gardon wrote: > Separate the functions for generating leaf page table entries from the > function that inserts them into the paging structure. This refactoring > will allow changes to the MMU sychronization model to use atomic > compare / exchanges (which are not guaranteed to succeed) instead of a > monolithic MMU lock. > > Signed-off-by: Ben Gardon <bgardon@google.com> > --- > arch/x86/kvm/mmu.c | 93 ++++++++++++++++++++++++++++------------------ > 1 file changed, 57 insertions(+), 36 deletions(-) > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 781c2ca7455e3..7e5ab9c6e2b09 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -2964,21 +2964,15 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) > #define SET_SPTE_WRITE_PROTECTED_PT BIT(0) > #define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1) > > -static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > - unsigned pte_access, int level, > - gfn_t gfn, kvm_pfn_t pfn, bool speculative, > - bool can_unsync, bool host_writable) > +static int generate_pte(struct kvm_vcpu *vcpu, unsigned pte_access, int level, Similar comment on "generate". Note, I don't necessarily like the names get_mmio_spte_value() or get_spte_value() either as they could be misinterpreted as reading the value from memory. Maybe calc_{mmio_}spte_value()? > + gfn_t gfn, kvm_pfn_t pfn, u64 old_pte, bool speculative, > + bool can_unsync, bool host_writable, bool ad_disabled, > + u64 *ptep) > { > - u64 spte = 0; > + u64 pte; Renames and unrelated refactoring (leaving the variable uninitialized and setting it directdly to shadow_present_mask) belong in separate patches. The renames especially make this patch much more difficult to review. And, I disagree with the rename, I think it's important to keep the "spte" nomenclature, even though it's a bit of a misnomer for TDP entries, so that it is easy to differentiate data that is coming from the host PTEs versus data that is for KVM's MMU. > int ret = 0; > - struct kvm_mmu_page *sp; > - > - if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) > - return 0; > > - sp = page_header(__pa(sptep)); > - if (sp_ad_disabled(sp)) > - spte |= shadow_acc_track_value; > + *ptep = 0; > > /* > * For the EPT case, shadow_present_mask is 0 if hardware > @@ -2986,36 +2980,39 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > * ACC_USER_MASK and shadow_user_mask are used to represent > * read access. See FNAME(gpte_access) in paging_tmpl.h. > */ > - spte |= shadow_present_mask; > + pte = shadow_present_mask; > + > + if (ad_disabled) > + pte |= shadow_acc_track_value; > + > if (!speculative) > - spte |= spte_shadow_accessed_mask(spte); > + pte |= spte_shadow_accessed_mask(pte); > > if (pte_access & ACC_EXEC_MASK) > - spte |= shadow_x_mask; > + pte |= shadow_x_mask; > else > - spte |= shadow_nx_mask; > + pte |= shadow_nx_mask; > > if (pte_access & ACC_USER_MASK) > - spte |= shadow_user_mask; > + pte |= shadow_user_mask; > > if (level > PT_PAGE_TABLE_LEVEL) > - spte |= PT_PAGE_SIZE_MASK; > + pte |= PT_PAGE_SIZE_MASK; > if (tdp_enabled) > - spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, > + pte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, > kvm_is_mmio_pfn(pfn)); > > if (host_writable) > - spte |= SPTE_HOST_WRITEABLE; > + pte |= SPTE_HOST_WRITEABLE; > else > pte_access &= ~ACC_WRITE_MASK; > > if (!kvm_is_mmio_pfn(pfn)) > - spte |= shadow_me_mask; > + pte |= shadow_me_mask; > > - spte |= (u64)pfn << PAGE_SHIFT; > + pte |= (u64)pfn << PAGE_SHIFT; > > if (pte_access & ACC_WRITE_MASK) { > - > /* > * Other vcpu creates new sp in the window between > * mapping_level() and acquiring mmu-lock. We can > @@ -3024,9 +3021,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > */ > if (level > PT_PAGE_TABLE_LEVEL && > mmu_gfn_lpage_is_disallowed(vcpu, gfn, level)) > - goto done; > + return 0; > > - spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; > + pte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; > > /* > * Optimization: for pte sync, if spte was writable the hash > @@ -3034,30 +3031,54 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, > * is responsibility of mmu_get_page / kvm_sync_page. > * Same reasoning can be applied to dirty page accounting. > */ > - if (!can_unsync && is_writable_pte(*sptep)) > - goto set_pte; > + if (!can_unsync && is_writable_pte(old_pte)) { > + *ptep = pte; > + return 0; > + } > > if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { > pgprintk("%s: found shadow page for %llx, marking ro\n", > __func__, gfn); > - ret |= SET_SPTE_WRITE_PROTECTED_PT; > + ret = SET_SPTE_WRITE_PROTECTED_PT; More unnecessary refactoring. > pte_access &= ~ACC_WRITE_MASK; > - spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); > + pte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); > } > } > > - if (pte_access & ACC_WRITE_MASK) { > - kvm_vcpu_mark_page_dirty(vcpu, gfn); > - spte |= spte_shadow_dirty_mask(spte); > - } > + if (pte_access & ACC_WRITE_MASK) > + pte |= spte_shadow_dirty_mask(pte); > > if (speculative) > - spte = mark_spte_for_access_track(spte); > + pte = mark_spte_for_access_track(pte); > + > + *ptep = pte; > + return ret; > +} > + > +static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, > + int level, gfn_t gfn, kvm_pfn_t pfn, bool speculative, > + bool can_unsync, bool host_writable) > +{ > + u64 spte; > + int ret; > + struct kvm_mmu_page *sp; > + > + if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) > + return 0; > + > + sp = page_header(__pa(sptep)); > + > + ret = generate_pte(vcpu, pte_access, level, gfn, pfn, *sptep, > + speculative, can_unsync, host_writable, > + sp_ad_disabled(sp), &spte); Yowsers, that's a big prototype. This is something that came up in an unrelated internal discussion. I wonder if it would make sense to define a struct to hold all of the data needed to insert an spte and pass that on the stack isntead of having a bajillion parameters. Just spitballing, no idea if it's feasible and/or reasonable. > + if (!spte) > + return 0; > + > + if (spte & PT_WRITABLE_MASK) > + kvm_vcpu_mark_page_dirty(vcpu, gfn); > > -set_pte: > if (mmu_spte_update(sptep, spte)) > ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH; > -done: > return ret; > } > > -- > 2.23.0.444.g18eeb5a265-goog >
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 781c2ca7455e3..7e5ab9c6e2b09 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2964,21 +2964,15 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) #define SET_SPTE_WRITE_PROTECTED_PT BIT(0) #define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1) -static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, - unsigned pte_access, int level, - gfn_t gfn, kvm_pfn_t pfn, bool speculative, - bool can_unsync, bool host_writable) +static int generate_pte(struct kvm_vcpu *vcpu, unsigned pte_access, int level, + gfn_t gfn, kvm_pfn_t pfn, u64 old_pte, bool speculative, + bool can_unsync, bool host_writable, bool ad_disabled, + u64 *ptep) { - u64 spte = 0; + u64 pte; int ret = 0; - struct kvm_mmu_page *sp; - - if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) - return 0; - sp = page_header(__pa(sptep)); - if (sp_ad_disabled(sp)) - spte |= shadow_acc_track_value; + *ptep = 0; /* * For the EPT case, shadow_present_mask is 0 if hardware @@ -2986,36 +2980,39 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, * ACC_USER_MASK and shadow_user_mask are used to represent * read access. See FNAME(gpte_access) in paging_tmpl.h. */ - spte |= shadow_present_mask; + pte = shadow_present_mask; + + if (ad_disabled) + pte |= shadow_acc_track_value; + if (!speculative) - spte |= spte_shadow_accessed_mask(spte); + pte |= spte_shadow_accessed_mask(pte); if (pte_access & ACC_EXEC_MASK) - spte |= shadow_x_mask; + pte |= shadow_x_mask; else - spte |= shadow_nx_mask; + pte |= shadow_nx_mask; if (pte_access & ACC_USER_MASK) - spte |= shadow_user_mask; + pte |= shadow_user_mask; if (level > PT_PAGE_TABLE_LEVEL) - spte |= PT_PAGE_SIZE_MASK; + pte |= PT_PAGE_SIZE_MASK; if (tdp_enabled) - spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, + pte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, kvm_is_mmio_pfn(pfn)); if (host_writable) - spte |= SPTE_HOST_WRITEABLE; + pte |= SPTE_HOST_WRITEABLE; else pte_access &= ~ACC_WRITE_MASK; if (!kvm_is_mmio_pfn(pfn)) - spte |= shadow_me_mask; + pte |= shadow_me_mask; - spte |= (u64)pfn << PAGE_SHIFT; + pte |= (u64)pfn << PAGE_SHIFT; if (pte_access & ACC_WRITE_MASK) { - /* * Other vcpu creates new sp in the window between * mapping_level() and acquiring mmu-lock. We can @@ -3024,9 +3021,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, */ if (level > PT_PAGE_TABLE_LEVEL && mmu_gfn_lpage_is_disallowed(vcpu, gfn, level)) - goto done; + return 0; - spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; + pte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; /* * Optimization: for pte sync, if spte was writable the hash @@ -3034,30 +3031,54 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, * is responsibility of mmu_get_page / kvm_sync_page. * Same reasoning can be applied to dirty page accounting. */ - if (!can_unsync && is_writable_pte(*sptep)) - goto set_pte; + if (!can_unsync && is_writable_pte(old_pte)) { + *ptep = pte; + return 0; + } if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { pgprintk("%s: found shadow page for %llx, marking ro\n", __func__, gfn); - ret |= SET_SPTE_WRITE_PROTECTED_PT; + ret = SET_SPTE_WRITE_PROTECTED_PT; pte_access &= ~ACC_WRITE_MASK; - spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); + pte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); } } - if (pte_access & ACC_WRITE_MASK) { - kvm_vcpu_mark_page_dirty(vcpu, gfn); - spte |= spte_shadow_dirty_mask(spte); - } + if (pte_access & ACC_WRITE_MASK) + pte |= spte_shadow_dirty_mask(pte); if (speculative) - spte = mark_spte_for_access_track(spte); + pte = mark_spte_for_access_track(pte); + + *ptep = pte; + return ret; +} + +static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, + int level, gfn_t gfn, kvm_pfn_t pfn, bool speculative, + bool can_unsync, bool host_writable) +{ + u64 spte; + int ret; + struct kvm_mmu_page *sp; + + if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) + return 0; + + sp = page_header(__pa(sptep)); + + ret = generate_pte(vcpu, pte_access, level, gfn, pfn, *sptep, + speculative, can_unsync, host_writable, + sp_ad_disabled(sp), &spte); + if (!spte) + return 0; + + if (spte & PT_WRITABLE_MASK) + kvm_vcpu_mark_page_dirty(vcpu, gfn); -set_pte: if (mmu_spte_update(sptep, spte)) ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH; -done: return ret; }
Separate the functions for generating leaf page table entries from the function that inserts them into the paging structure. This refactoring will allow changes to the MMU sychronization model to use atomic compare / exchanges (which are not guaranteed to succeed) instead of a monolithic MMU lock. Signed-off-by: Ben Gardon <bgardon@google.com> --- arch/x86/kvm/mmu.c | 93 ++++++++++++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 36 deletions(-)