Message ID | 20200907152344.12978-4-will@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: Rewrite page-table code and fault handling | expand |
On 9/8/20 1:23 AM, Will Deacon wrote: > The generic page-table walker is pretty useless as it stands, because it > doesn't understand enough to allocate anything. Teach it about stage-1 > page-tables, and hook up an API for allocating these for the hypervisor > at EL2. > > Cc: Marc Zyngier <maz@kernel.org> > Cc: Quentin Perret <qperret@google.com> > Signed-off-by: Will Deacon <will@kernel.org> > --- Reviewed-by: Gavin Shan <gshan@redhat.com> > arch/arm64/include/asm/kvm_pgtable.h | 36 ++++++++ > arch/arm64/kvm/hyp/pgtable.c | 133 +++++++++++++++++++++++++++ > 2 files changed, 169 insertions(+) > > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h > index 1c5d981e15c3..91e364804547 100644 > --- a/arch/arm64/include/asm/kvm_pgtable.h > +++ b/arch/arm64/include/asm/kvm_pgtable.h > @@ -77,6 +77,42 @@ struct kvm_pgtable_walker { > const enum kvm_pgtable_walk_flags flags; > }; > > +/** > + * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. > + * @pgt: Uninitialised page-table structure to initialise. > + * @va_bits: Maximum virtual address bits. > + * > + * Return: 0 on success, negative error code on failure. > + */ > +int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits); > + > +/** > + * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table. > + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). > + * > + * The page-table is assumed to be unreachable by any hardware walkers prior > + * to freeing and therefore no TLB invalidation is performed. > + */ > +void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); > + > +/** > + * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table. > + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). > + * @addr: Virtual address at which to place the mapping. > + * @size: Size of the mapping. > + * @phys: Physical address of the memory to map. > + * @prot: Permissions and attributes for the mapping. > + * > + * If device attributes are not explicitly requested in @prot, then the > + * mapping will be normal, cacheable. Attempts to install a mapping for > + * a virtual address that is already mapped will be rejected with an error > + * and a WARN(). > + * > + * Return: 0 on success, negative error code on failure. > + */ > +int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, > + enum kvm_pgtable_prot prot); > + > /** > * kvm_pgtable_walk() - Walk a page-table. > * @pgt: Page-table structure initialised by kvm_pgtable_*_init(). > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c > index 3fb9d1949a3f..23a1006aa4ef 100644 > --- a/arch/arm64/kvm/hyp/pgtable.c > +++ b/arch/arm64/kvm/hyp/pgtable.c > @@ -24,8 +24,18 @@ > > #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) > > +#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) > +#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) > +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3 > +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1 > +#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) > +#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 > +#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) > + > #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51) > > +#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) > + > struct kvm_pgtable_walk_data { > struct kvm_pgtable *pgt; > struct kvm_pgtable_walker *walker; > @@ -283,3 +293,126 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, > > return _kvm_pgtable_walk(&walk_data); > } > + > +struct hyp_map_data { > + u64 phys; > + kvm_pte_t attr; > +}; > + > +static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot, > + struct hyp_map_data *data) > +{ > + bool device = prot & KVM_PGTABLE_PROT_DEVICE; > + u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL; > + kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype); > + u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS; > + u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW : > + KVM_PTE_LEAF_ATTR_LO_S1_AP_RO; > + > + if (!(prot & KVM_PGTABLE_PROT_R)) > + return -EINVAL; > + > + if (prot & KVM_PGTABLE_PROT_X) { > + if (prot & KVM_PGTABLE_PROT_W) > + return -EINVAL; > + > + if (device) > + return -EINVAL; > + } else { > + attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; > + } > + > + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); > + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); > + attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; > + data->attr = attr; > + return 0; > +} > + > +static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, > + kvm_pte_t *ptep, struct hyp_map_data *data) > +{ > + u64 granule = kvm_granule_size(level), phys = data->phys; > + > + if (!kvm_block_mapping_supported(addr, end, phys, level)) > + return false; > + > + WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)); > + data->phys += granule; > + return true; > +} > + > +static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, > + enum kvm_pgtable_walk_flags flag, void * const arg) > +{ > + kvm_pte_t *childp; > + > + if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg)) > + return 0; > + > + if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) > + return -EINVAL; > + > + childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); > + if (!childp) > + return -ENOMEM; > + > + kvm_set_table_pte(ptep, childp); > + return 0; > +} > + > +int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, > + enum kvm_pgtable_prot prot) > +{ > + int ret; > + struct hyp_map_data map_data = { > + .phys = ALIGN_DOWN(phys, PAGE_SIZE), > + }; > + struct kvm_pgtable_walker walker = { > + .cb = hyp_map_walker, > + .flags = KVM_PGTABLE_WALK_LEAF, > + .arg = &map_data, > + }; > + > + ret = hyp_map_set_prot_attr(prot, &map_data); > + if (ret) > + return ret; > + > + ret = kvm_pgtable_walk(pgt, addr, size, &walker); > + dsb(ishst); > + isb(); > + return ret; > +} > + > +int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits) > +{ > + u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); > + > + pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); > + if (!pgt->pgd) > + return -ENOMEM; > + > + pgt->ia_bits = va_bits; > + pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; > + pgt->mmu = NULL; > + return 0; > +} > + > +static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, > + enum kvm_pgtable_walk_flags flag, void * const arg) > +{ > + free_page((unsigned long)kvm_pte_follow(*ptep)); > + return 0; > +} > + > +void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) > +{ > + struct kvm_pgtable_walker walker = { > + .cb = hyp_free_walker, > + .flags = KVM_PGTABLE_WALK_TABLE_POST, > + }; > + > + WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); > + free_page((unsigned long)pgt->pgd); > + pgt->pgd = NULL; > +} >
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 1c5d981e15c3..91e364804547 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -77,6 +77,42 @@ struct kvm_pgtable_walker { const enum kvm_pgtable_walk_flags flags; }; +/** + * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. + * @pgt: Uninitialised page-table structure to initialise. + * @va_bits: Maximum virtual address bits. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits); + +/** + * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). + * + * The page-table is assumed to be unreachable by any hardware walkers prior + * to freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); + +/** + * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). + * @addr: Virtual address at which to place the mapping. + * @size: Size of the mapping. + * @phys: Physical address of the memory to map. + * @prot: Permissions and attributes for the mapping. + * + * If device attributes are not explicitly requested in @prot, then the + * mapping will be normal, cacheable. Attempts to install a mapping for + * a virtual address that is already mapped will be rejected with an error + * and a WARN(). + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, + enum kvm_pgtable_prot prot); + /** * kvm_pgtable_walk() - Walk a page-table. * @pgt: Page-table structure initialised by kvm_pgtable_*_init(). diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 3fb9d1949a3f..23a1006aa4ef 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -24,8 +24,18 @@ #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) +#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3 +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1 +#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) +#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 +#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) + #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51) +#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) + struct kvm_pgtable_walk_data { struct kvm_pgtable *pgt; struct kvm_pgtable_walker *walker; @@ -283,3 +293,126 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, return _kvm_pgtable_walk(&walk_data); } + +struct hyp_map_data { + u64 phys; + kvm_pte_t attr; +}; + +static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot, + struct hyp_map_data *data) +{ + bool device = prot & KVM_PGTABLE_PROT_DEVICE; + u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL; + kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype); + u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS; + u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW : + KVM_PTE_LEAF_ATTR_LO_S1_AP_RO; + + if (!(prot & KVM_PGTABLE_PROT_R)) + return -EINVAL; + + if (prot & KVM_PGTABLE_PROT_X) { + if (prot & KVM_PGTABLE_PROT_W) + return -EINVAL; + + if (device) + return -EINVAL; + } else { + attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; + } + + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); + attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; + data->attr = attr; + return 0; +} + +static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, struct hyp_map_data *data) +{ + u64 granule = kvm_granule_size(level), phys = data->phys; + + if (!kvm_block_mapping_supported(addr, end, phys, level)) + return false; + + WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)); + data->phys += granule; + return true; +} + +static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + kvm_pte_t *childp; + + if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg)) + return 0; + + if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) + return -EINVAL; + + childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); + if (!childp) + return -ENOMEM; + + kvm_set_table_pte(ptep, childp); + return 0; +} + +int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, + enum kvm_pgtable_prot prot) +{ + int ret; + struct hyp_map_data map_data = { + .phys = ALIGN_DOWN(phys, PAGE_SIZE), + }; + struct kvm_pgtable_walker walker = { + .cb = hyp_map_walker, + .flags = KVM_PGTABLE_WALK_LEAF, + .arg = &map_data, + }; + + ret = hyp_map_set_prot_attr(prot, &map_data); + if (ret) + return ret; + + ret = kvm_pgtable_walk(pgt, addr, size, &walker); + dsb(ishst); + isb(); + return ret; +} + +int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits) +{ + u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); + + pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); + if (!pgt->pgd) + return -ENOMEM; + + pgt->ia_bits = va_bits; + pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; + pgt->mmu = NULL; + return 0; +} + +static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + free_page((unsigned long)kvm_pte_follow(*ptep)); + return 0; +} + +void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) +{ + struct kvm_pgtable_walker walker = { + .cb = hyp_free_walker, + .flags = KVM_PGTABLE_WALK_TABLE_POST, + }; + + WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); + free_page((unsigned long)pgt->pgd); + pgt->pgd = NULL; +}
The generic page-table walker is pretty useless as it stands, because it doesn't understand enough to allocate anything. Teach it about stage-1 page-tables, and hook up an API for allocating these for the hypervisor at EL2. Cc: Marc Zyngier <maz@kernel.org> Cc: Quentin Perret <qperret@google.com> Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/kvm_pgtable.h | 36 ++++++++ arch/arm64/kvm/hyp/pgtable.c | 133 +++++++++++++++++++++++++++ 2 files changed, 169 insertions(+)