Message ID | 20200102061319.10077-4-weijiang.yang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Enable Sub-Page Write Protection Support | expand |
On Thu, Jan 02, 2020 at 02:13:12PM +0800, Yang Weijiang wrote: > SPPT is a 4-level paging structure similar to EPT, when SPP is How does SPP interact with 5-level EPT? > armed for target physical page, bit 61 of the corresponding > EPT entry is flaged, then SPPT is traversed with the gfn, > the leaf entry of SPPT contains the access bitmap of subpages > inside the target 4KB physical page, one bit per 128-byte subpage. > > Co-developed-by: He Chen <he.chen@linux.intel.com> > Signed-off-by: He Chen <he.chen@linux.intel.com> > Co-developed-by: Zhang Yi <yi.z.zhang@linux.intel.com> > Signed-off-by: Zhang Yi <yi.z.zhang@linux.intel.com> > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> > --- ... > +static u64 format_spp_spte(u32 spp_wp_bitmap) > +{ > + u64 new_spte = 0; > + int i = 0; > + > + /* > + * One 4K page contains 32 sub-pages, in SPP table L4E, old bits Is this One 4k page constains 32 sub-pages in SPP table L4E. Old bits are... or One 4k page contains 32 sub-pages. In SPP table L4E, old bits are... or ??? > + * are reserved, so we need to transfer u32 subpage write Wrap comments at 80 columns to save lines. > + * protect bitmap to u64 SPP L4E format. What is a "page" in "one 4k page"? What old bits? Why not just track a 64-bit value? I understand *what* the code below does, but I have no clue why or whether it's correct. > + */ > + while (i < 32) { > + if (spp_wp_bitmap & (1ULL << i)) > + new_spte |= 1ULL << (i * 2); > + i++; > + } for (i = 0; i < 32; i++) new_spte |= (spp_wp_bitmap & BIT_ULL(i)) << i; At the very least, use a for loop. > + > + return new_spte; > +} > + > +static void spp_spte_set(u64 *sptep, u64 new_spte) > +{ > + __set_spte(sptep, new_spte); > +} > + > +bool is_spp_spte(struct kvm_mmu_page *sp) > +{ > + return sp->role.spp; > +} > + > +#define SPPT_ENTRY_PHA_MASK (0xFFFFFFFFFF << 12) > + > +int kvm_spp_setup_structure(struct kvm_vcpu *vcpu, > + u32 access_map, gfn_t gfn) > +{ > + struct kvm_shadow_walk_iterator iter; > + struct kvm_mmu_page *sp; > + gfn_t pseudo_gfn; > + u64 old_spte, spp_spte; > + int ret = -EFAULT; > + > + /* direct_map spp start */ > + if (!VALID_PAGE(vcpu->kvm->arch.sppt_root)) > + return -EFAULT; > + > + for_each_shadow_spp_entry(vcpu, (u64)gfn << PAGE_SHIFT, iter) { > + if (iter.level == PT_PAGE_TABLE_LEVEL) { > + spp_spte = format_spp_spte(access_map); > + old_spte = mmu_spte_get_lockless(iter.sptep); > + if (old_spte != spp_spte) > + spp_spte_set(iter.sptep, spp_spte); > + ret = 0; > + break; > + } > + > + if (!is_shadow_present_pte(*iter.sptep)) { > + u64 base_addr = iter.addr; > + > + base_addr &= PT64_LVL_ADDR_MASK(iter.level); > + pseudo_gfn = base_addr >> PAGE_SHIFT; > + spp_spte = *iter.sptep; > + sp = kvm_spp_get_page(vcpu, pseudo_gfn, > + iter.level - 1); > + link_spp_shadow_page(vcpu, iter.sptep, sp); > + } else if (iter.level == PT_DIRECTORY_LEVEL && > + !(spp_spte & PT_PRESENT_MASK) && > + (spp_spte & SPPT_ENTRY_PHA_MASK)) { > + spp_spte = *iter.sptep; > + spp_spte |= PT_PRESENT_MASK; > + spp_spte_set(iter.sptep, spp_spte); > + } > + } > + > + kvm_flush_remote_tlbs(vcpu->kvm); > + return ret; > +} > +EXPORT_SYMBOL_GPL(kvm_spp_setup_structure); > + > +inline u64 construct_spptp(unsigned long root_hpa) > +{ > + return root_hpa & PAGE_MASK; > +} > +EXPORT_SYMBOL_GPL(construct_spptp); > + > diff --git a/arch/x86/kvm/mmu/spp.h b/arch/x86/kvm/mmu/spp.h > new file mode 100644 > index 000000000000..8ef94b7a2057 > --- /dev/null > +++ b/arch/x86/kvm/mmu/spp.h > @@ -0,0 +1,10 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __KVM_X86_VMX_SPP_H > +#define __KVM_X86_VMX_SPP_H > + > +bool is_spp_spte(struct kvm_mmu_page *sp); > +u64 construct_spptp(unsigned long root_hpa); > +int kvm_spp_setup_structure(struct kvm_vcpu *vcpu, > + u32 access_map, gfn_t gfn); > + > +#endif /* __KVM_X86_VMX_SPP_H */ > -- > 2.17.2 >
On Thu, Jan 02, 2020 at 02:13:12PM +0800, Yang Weijiang wrote: > SPPT is a 4-level paging structure similar to EPT, when SPP is > armed for target physical page, bit 61 of the corresponding > EPT entry is flaged, then SPPT is traversed with the gfn, > the leaf entry of SPPT contains the access bitmap of subpages > inside the target 4KB physical page, one bit per 128-byte subpage. > > Co-developed-by: He Chen <he.chen@linux.intel.com> > Signed-off-by: He Chen <he.chen@linux.intel.com> > Co-developed-by: Zhang Yi <yi.z.zhang@linux.intel.com> > Signed-off-by: Zhang Yi <yi.z.zhang@linux.intel.com> > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> > --- > arch/x86/include/asm/kvm_host.h | 5 +- > arch/x86/kvm/mmu/spp.c | 228 ++++++++++++++++++++++++++++++++ > arch/x86/kvm/mmu/spp.h | 10 ++ This patch is completely untesteable. It adds spp.c but doesn't compile it. Actually, everything up until patch 06 is untestable.
On Fri, Jan 10, 2020 at 09:26:11AM -0800, Sean Christopherson wrote: > On Thu, Jan 02, 2020 at 02:13:12PM +0800, Yang Weijiang wrote: > > SPPT is a 4-level paging structure similar to EPT, when SPP is > > How does SPP interact with 5-level EPT? > It should work, will add the test later. > > armed for target physical page, bit 61 of the corresponding > > EPT entry is flaged, then SPPT is traversed with the gfn, > > the leaf entry of SPPT contains the access bitmap of subpages > > inside the target 4KB physical page, one bit per 128-byte subpage. > > > > Co-developed-by: He Chen <he.chen@linux.intel.com> > > Signed-off-by: He Chen <he.chen@linux.intel.com> > > Co-developed-by: Zhang Yi <yi.z.zhang@linux.intel.com> > > Signed-off-by: Zhang Yi <yi.z.zhang@linux.intel.com> > > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> > > --- > > ... > > > +static u64 format_spp_spte(u32 spp_wp_bitmap) > > +{ > > + u64 new_spte = 0; > > + int i = 0; > > + > > + /* > > + * One 4K page contains 32 sub-pages, in SPP table L4E, old bits > > Is this > > One 4k page constains 32 sub-pages in SPP table L4E. Old bits are... > > or > One 4k page contains 32 sub-pages. In SPP table L4E, old bits are... > > or > ??? > The second case, there's a typo, old should be odd. will modify it, thank you! > > + * are reserved, so we need to transfer u32 subpage write > > Wrap comments at 80 columns to save lines. > > > + * protect bitmap to u64 SPP L4E format. > > What is a "page" in "one 4k page"? What old bits? Why not just track a > 64-bit value? I understand *what* the code below does, but I have no clue > why or whether it's correct. old should be "odd", according to SDM, the write-permission is tracked via even bits(2i), then 32*128 = 4KB. odd bits are reserved now. > > > + */ > > + while (i < 32) { > > + if (spp_wp_bitmap & (1ULL << i)) > > + new_spte |= 1ULL << (i * 2); > > + i++; > > + } > > for (i = 0; i < 32; i++) > new_spte |= (spp_wp_bitmap & BIT_ULL(i)) << i; > > At the very least, use a for loop. Sure, will change it, thank you! > > > + > > + return new_spte; > > +} > > + > > +static void spp_spte_set(u64 *sptep, u64 new_spte) > > +{ > > + __set_spte(sptep, new_spte); > > +} > > + > > +bool is_spp_spte(struct kvm_mmu_page *sp) > > +{ > > + return sp->role.spp; > > +} > > + > > +#define SPPT_ENTRY_PHA_MASK (0xFFFFFFFFFF << 12) > > + > > +int kvm_spp_setup_structure(struct kvm_vcpu *vcpu, > > + u32 access_map, gfn_t gfn) > > +{ > > + struct kvm_shadow_walk_iterator iter; > > + struct kvm_mmu_page *sp; > > + gfn_t pseudo_gfn; > > + u64 old_spte, spp_spte; > > + int ret = -EFAULT; > > + > > + /* direct_map spp start */ > > + if (!VALID_PAGE(vcpu->kvm->arch.sppt_root)) > > + return -EFAULT; > > + > > + for_each_shadow_spp_entry(vcpu, (u64)gfn << PAGE_SHIFT, iter) { > > + if (iter.level == PT_PAGE_TABLE_LEVEL) { > > + spp_spte = format_spp_spte(access_map); > > + old_spte = mmu_spte_get_lockless(iter.sptep); > > + if (old_spte != spp_spte) > > + spp_spte_set(iter.sptep, spp_spte); > > + ret = 0; > > + break; > > + } > > + > > + if (!is_shadow_present_pte(*iter.sptep)) { > > + u64 base_addr = iter.addr; > > + > > + base_addr &= PT64_LVL_ADDR_MASK(iter.level); > > + pseudo_gfn = base_addr >> PAGE_SHIFT; > > + spp_spte = *iter.sptep; > > + sp = kvm_spp_get_page(vcpu, pseudo_gfn, > > + iter.level - 1); > > + link_spp_shadow_page(vcpu, iter.sptep, sp); > > + } else if (iter.level == PT_DIRECTORY_LEVEL && > > + !(spp_spte & PT_PRESENT_MASK) && > > + (spp_spte & SPPT_ENTRY_PHA_MASK)) { > > + spp_spte = *iter.sptep; > > + spp_spte |= PT_PRESENT_MASK; > > + spp_spte_set(iter.sptep, spp_spte); > > + } > > + } > > + > > + kvm_flush_remote_tlbs(vcpu->kvm); > > + return ret; > > +} > > +EXPORT_SYMBOL_GPL(kvm_spp_setup_structure); > > + > > +inline u64 construct_spptp(unsigned long root_hpa) > > +{ > > + return root_hpa & PAGE_MASK; > > +} > > +EXPORT_SYMBOL_GPL(construct_spptp); > > + > > diff --git a/arch/x86/kvm/mmu/spp.h b/arch/x86/kvm/mmu/spp.h > > new file mode 100644 > > index 000000000000..8ef94b7a2057 > > --- /dev/null > > +++ b/arch/x86/kvm/mmu/spp.h > > @@ -0,0 +1,10 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +#ifndef __KVM_X86_VMX_SPP_H > > +#define __KVM_X86_VMX_SPP_H > > + > > +bool is_spp_spte(struct kvm_mmu_page *sp); > > +u64 construct_spptp(unsigned long root_hpa); > > +int kvm_spp_setup_structure(struct kvm_vcpu *vcpu, > > + u32 access_map, gfn_t gfn); > > + > > +#endif /* __KVM_X86_VMX_SPP_H */ > > -- > > 2.17.2 > >
On Fri, Jan 10, 2020 at 09:40:27AM -0800, Sean Christopherson wrote: > On Thu, Jan 02, 2020 at 02:13:12PM +0800, Yang Weijiang wrote: > > SPPT is a 4-level paging structure similar to EPT, when SPP is > > armed for target physical page, bit 61 of the corresponding > > EPT entry is flaged, then SPPT is traversed with the gfn, > > the leaf entry of SPPT contains the access bitmap of subpages > > inside the target 4KB physical page, one bit per 128-byte subpage. > > > > Co-developed-by: He Chen <he.chen@linux.intel.com> > > Signed-off-by: He Chen <he.chen@linux.intel.com> > > Co-developed-by: Zhang Yi <yi.z.zhang@linux.intel.com> > > Signed-off-by: Zhang Yi <yi.z.zhang@linux.intel.com> > > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> > > --- > > arch/x86/include/asm/kvm_host.h | 5 +- > > arch/x86/kvm/mmu/spp.c | 228 ++++++++++++++++++++++++++++++++ > > arch/x86/kvm/mmu/spp.h | 10 ++ > > This patch is completely untesteable. It adds spp.c but doesn't compile > it. Actually, everything up until patch 06 is untestable. OK, will re-order the patches in a reasonable sequence. thank you!
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b79cd6aa4075..9506c9d40895 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -261,7 +261,8 @@ union kvm_mmu_page_role { unsigned smap_andnot_wp:1; unsigned ad_disabled:1; unsigned guest_mode:1; - unsigned :6; + unsigned spp:1; + unsigned reserved:5; /* * This is left at the top of the word so that @@ -956,6 +957,8 @@ struct kvm_arch { struct kvm_pmu_event_filter *pmu_event_filter; struct task_struct *nx_lpage_recovery_thread; + + hpa_t sppt_root; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/mmu/spp.c b/arch/x86/kvm/mmu/spp.c new file mode 100644 index 000000000000..5fca08af705c --- /dev/null +++ b/arch/x86/kvm/mmu/spp.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "spp.h" + +#define for_each_shadow_spp_entry(_vcpu, _addr, _walker) \ + for (shadow_spp_walk_init(&(_walker), _vcpu, _addr); \ + shadow_walk_okay(&(_walker)); \ + shadow_walk_next(&(_walker))) + +static void shadow_spp_walk_init(struct kvm_shadow_walk_iterator *iterator, + struct kvm_vcpu *vcpu, u64 addr) +{ + iterator->addr = addr; + iterator->shadow_addr = vcpu->kvm->arch.sppt_root; + + /* SPP Table is a 4-level paging structure */ + iterator->level = PT64_ROOT_4LEVEL; +} + +static bool __rmap_open_subpage_bit(struct kvm *kvm, + struct kvm_rmap_head *rmap_head) +{ + struct rmap_iterator iter; + bool flush = false; + u64 *sptep; + u64 spte; + + for_each_rmap_spte(rmap_head, &iter, sptep) { + /* + * SPP works only when the page is write-protected + * and SPP bit is set in EPT leaf entry. + */ + flush |= spte_write_protect(sptep, false); + spte = *sptep | PT_SPP_MASK; + flush |= mmu_spte_update(sptep, spte); + } + + return flush; +} + +static int kvm_spp_open_write_protect(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn) +{ + struct kvm_rmap_head *rmap_head; + bool flush = false; + + /* + * SPP is only supported with 4KB level1 memory page, check + * if the page is mapped in EPT leaf entry. + */ + rmap_head = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot); + + if (!rmap_head->val) + return -EFAULT; + + flush |= __rmap_open_subpage_bit(kvm, rmap_head); + + if (flush) + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +static bool __rmap_clear_subpage_bit(struct kvm *kvm, + struct kvm_rmap_head *rmap_head) +{ + struct rmap_iterator iter; + bool flush = false; + u64 *sptep; + u64 spte; + + for_each_rmap_spte(rmap_head, &iter, sptep) { + spte = (*sptep & ~PT_SPP_MASK); + flush |= mmu_spte_update(sptep, spte); + } + + return flush; +} + +static int kvm_spp_clear_write_protect(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn) +{ + struct kvm_rmap_head *rmap_head; + bool flush = false; + + rmap_head = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot); + + if (!rmap_head->val) + return -EFAULT; + + flush |= __rmap_clear_subpage_bit(kvm, rmap_head); + + if (flush) + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +struct kvm_mmu_page *kvm_spp_get_page(struct kvm_vcpu *vcpu, + gfn_t gfn, + unsigned int level) +{ + struct kvm_mmu_page *sp; + union kvm_mmu_page_role role; + + role = vcpu->arch.mmu->mmu_role.base; + role.level = level; + role.direct = true; + role.spp = true; + + for_each_valid_sp(vcpu->kvm, sp, gfn) { + if (sp->gfn != gfn) + continue; + if (sp->role.word != role.word) + continue; + if (sp->role.spp && sp->role.level == level) + goto out; + } + + sp = kvm_mmu_alloc_page(vcpu, true); + sp->gfn = gfn; + sp->role = role; + hlist_add_head(&sp->hash_link, + &vcpu->kvm->arch.mmu_page_hash + [kvm_page_table_hashfn(gfn)]); + clear_page(sp->spt); +out: + return sp; +} +EXPORT_SYMBOL_GPL(kvm_spp_get_page); + +static void link_spp_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, + struct kvm_mmu_page *sp) +{ + u64 spte; + + spte = __pa(sp->spt) | PT_PRESENT_MASK; + + mmu_spte_set(sptep, spte); + + mmu_page_add_parent_pte(vcpu, sp, sptep); +} + +static u64 format_spp_spte(u32 spp_wp_bitmap) +{ + u64 new_spte = 0; + int i = 0; + + /* + * One 4K page contains 32 sub-pages, in SPP table L4E, old bits + * are reserved, so we need to transfer u32 subpage write + * protect bitmap to u64 SPP L4E format. + */ + while (i < 32) { + if (spp_wp_bitmap & (1ULL << i)) + new_spte |= 1ULL << (i * 2); + i++; + } + + return new_spte; +} + +static void spp_spte_set(u64 *sptep, u64 new_spte) +{ + __set_spte(sptep, new_spte); +} + +bool is_spp_spte(struct kvm_mmu_page *sp) +{ + return sp->role.spp; +} + +#define SPPT_ENTRY_PHA_MASK (0xFFFFFFFFFF << 12) + +int kvm_spp_setup_structure(struct kvm_vcpu *vcpu, + u32 access_map, gfn_t gfn) +{ + struct kvm_shadow_walk_iterator iter; + struct kvm_mmu_page *sp; + gfn_t pseudo_gfn; + u64 old_spte, spp_spte; + int ret = -EFAULT; + + /* direct_map spp start */ + if (!VALID_PAGE(vcpu->kvm->arch.sppt_root)) + return -EFAULT; + + for_each_shadow_spp_entry(vcpu, (u64)gfn << PAGE_SHIFT, iter) { + if (iter.level == PT_PAGE_TABLE_LEVEL) { + spp_spte = format_spp_spte(access_map); + old_spte = mmu_spte_get_lockless(iter.sptep); + if (old_spte != spp_spte) + spp_spte_set(iter.sptep, spp_spte); + ret = 0; + break; + } + + if (!is_shadow_present_pte(*iter.sptep)) { + u64 base_addr = iter.addr; + + base_addr &= PT64_LVL_ADDR_MASK(iter.level); + pseudo_gfn = base_addr >> PAGE_SHIFT; + spp_spte = *iter.sptep; + sp = kvm_spp_get_page(vcpu, pseudo_gfn, + iter.level - 1); + link_spp_shadow_page(vcpu, iter.sptep, sp); + } else if (iter.level == PT_DIRECTORY_LEVEL && + !(spp_spte & PT_PRESENT_MASK) && + (spp_spte & SPPT_ENTRY_PHA_MASK)) { + spp_spte = *iter.sptep; + spp_spte |= PT_PRESENT_MASK; + spp_spte_set(iter.sptep, spp_spte); + } + } + + kvm_flush_remote_tlbs(vcpu->kvm); + return ret; +} +EXPORT_SYMBOL_GPL(kvm_spp_setup_structure); + +inline u64 construct_spptp(unsigned long root_hpa) +{ + return root_hpa & PAGE_MASK; +} +EXPORT_SYMBOL_GPL(construct_spptp); + diff --git a/arch/x86/kvm/mmu/spp.h b/arch/x86/kvm/mmu/spp.h new file mode 100644 index 000000000000..8ef94b7a2057 --- /dev/null +++ b/arch/x86/kvm/mmu/spp.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_VMX_SPP_H +#define __KVM_X86_VMX_SPP_H + +bool is_spp_spte(struct kvm_mmu_page *sp); +u64 construct_spptp(unsigned long root_hpa); +int kvm_spp_setup_structure(struct kvm_vcpu *vcpu, + u32 access_map, gfn_t gfn); + +#endif /* __KVM_X86_VMX_SPP_H */