diff mbox series

[1/2] KVM: arm64: Introduce KVM_PGTABLE_S2_NOFWB Stage-2 flag

Message ID 20210317141714.383046-2-qperret@google.com (mailing list archive)
State New, archived
Headers show
Series Fixes for FWB | expand

Commit Message

Quentin Perret March 17, 2021, 2:17 p.m. UTC
In order to further configure stage-2 page-tables, pass flags to the
init function using a new enum.

The first of these flags allows to disable FWB even if the hardware
supports it as we will need to do so for the host stage-2.

Signed-off-by: Quentin Perret <qperret@google.com>

---

One question is, do we want to use stage2_has_fwb() everywhere, including
guest-specific paths (e.g. kvm_arch_prepare_memory_region(), ...) ?

That'd make this patch more intrusive, but would make the whole codebase
work with FWB enabled on a guest by guest basis. I don't see us use that
anytime soon (other than maybe debug of some sort?) but it'd be good to
have an agreement.
---
 arch/arm64/include/asm/kvm_pgtable.h  | 19 +++++++++--
 arch/arm64/include/asm/pgtable-prot.h |  4 +--
 arch/arm64/kvm/hyp/pgtable.c          | 49 +++++++++++++++++----------
 3 files changed, 50 insertions(+), 22 deletions(-)

Comments

Marc Zyngier March 17, 2021, 2:41 p.m. UTC | #1
Hi Quentin,

On Wed, 17 Mar 2021 14:17:13 +0000,
Quentin Perret <qperret@google.com> wrote:
> 
> In order to further configure stage-2 page-tables, pass flags to the
> init function using a new enum.
> 
> The first of these flags allows to disable FWB even if the hardware
> supports it as we will need to do so for the host stage-2.
> 
> Signed-off-by: Quentin Perret <qperret@google.com>
> 
> ---
> 
> One question is, do we want to use stage2_has_fwb() everywhere, including
> guest-specific paths (e.g. kvm_arch_prepare_memory_region(), ...) ?
> 
> That'd make this patch more intrusive, but would make the whole codebase
> work with FWB enabled on a guest by guest basis. I don't see us use that
> anytime soon (other than maybe debug of some sort?) but it'd be good to
> have an agreement.

I'm not sure how useful that would be. We fought long and hard to get
FWB, and I can't see a good reason to disable it for guests unless the
HW was buggy (but in which case that'd be for everyone). I'd rather
keep the changes small for now (this whole series is invasive
enough!).

As for this patch, I only have a few cosmetic comments:

> ---
>  arch/arm64/include/asm/kvm_pgtable.h  | 19 +++++++++--
>  arch/arm64/include/asm/pgtable-prot.h |  4 +--
>  arch/arm64/kvm/hyp/pgtable.c          | 49 +++++++++++++++++----------
>  3 files changed, 50 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index b93a2a3526ab..7382bdfb6284 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -56,6 +56,15 @@ struct kvm_pgtable_mm_ops {
>  	phys_addr_t	(*virt_to_phys)(void *addr);
>  };
>  
> +/**
> + * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
> + * @KVM_PGTABLE_S2_NOFWB:	Don't enforce Normal-WB even if the CPUs have
> + *				ARM64_HAS_STAGE2_FWB.
> + */
> +enum kvm_pgtable_stage2_flags {
> +	KVM_PGTABLE_S2_NOFWB			= BIT(0),
> +};
> +
>  /**
>   * struct kvm_pgtable - KVM page-table.
>   * @ia_bits:		Maximum input address size, in bits.
> @@ -72,6 +81,7 @@ struct kvm_pgtable {
>  
>  	/* Stage-2 only */
>  	struct kvm_s2_mmu			*mmu;
> +	enum kvm_pgtable_stage2_flags		flags;
>  };
>  
>  /**
> @@ -201,11 +211,16 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
>   * @arch:	Arch-specific KVM structure representing the guest virtual
>   *		machine.
>   * @mm_ops:	Memory management callbacks.
> + * @flags:	Stage-2 configuration flags.
>   *
>   * Return: 0 on success, negative error code on failure.
>   */
> -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> -			    struct kvm_pgtable_mm_ops *mm_ops);
> +int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> +				  struct kvm_pgtable_mm_ops *mm_ops,
> +				  enum kvm_pgtable_stage2_flags flags);
> +
> +#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
> +	kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)
>  
>  /**
>   * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> index 046be789fbb4..beeb722a82d3 100644
> --- a/arch/arm64/include/asm/pgtable-prot.h
> +++ b/arch/arm64/include/asm/pgtable-prot.h
> @@ -72,10 +72,10 @@ extern bool arm64_use_ng_mappings;
>  #define PAGE_KERNEL_EXEC	__pgprot(PROT_NORMAL & ~PTE_PXN)
>  #define PAGE_KERNEL_EXEC_CONT	__pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
>  
> -#define PAGE_S2_MEMATTR(attr)						\
> +#define PAGE_S2_MEMATTR(attr, has_fwb)					\
>  	({								\
>  		u64 __val;						\
> -		if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))		\
> +		if (has_fwb)						\
>  			__val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr);	\
>  		else							\
>  			__val = PTE_S2_MEMATTR(MT_S2_ ## attr);		\
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 3a971df278bd..dee8aaeaf13e 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -507,12 +507,25 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
>  	return vtcr;
>  }
>  
> -static int stage2_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
> +static bool stage2_has_fwb(struct kvm_pgtable *pgt)
> +{
> +	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
> +		return false;
> +
> +	return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
> +}
> +
> +static int stage2_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep,
> +				struct kvm_pgtable *pgt)

nit: make pgt the first parameter, as it defines the context in which
the rest applies.

>  {
>  	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
> -	kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
> -			    PAGE_S2_MEMATTR(NORMAL);
>  	u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
> +	kvm_pte_t attr;
> +
> +	if (device)
> +		attr = PAGE_S2_MEMATTR(DEVICE_nGnRE, stage2_has_fwb(pgt));
> +	else
> +		attr = PAGE_S2_MEMATTR(NORMAL, stage2_has_fwb(pgt));

Maybe define a new helper:

#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))

to avoid the constant stage2_has_fwb() repetition.

>  
>  	if (!(prot & KVM_PGTABLE_PROT_X))
>  		attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
> @@ -748,7 +761,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
>  		.arg		= &map_data,
>  	};
>  
> -	ret = stage2_set_prot_attr(prot, &map_data.attr);
> +	ret = stage2_set_prot_attr(prot, &map_data.attr, pgt);
>  	if (ret)
>  		return ret;
>  
> @@ -786,16 +799,13 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
>  
>  static void stage2_flush_dcache(void *addr, u64 size)
>  {
> -	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
> -		return;
> -
>  	__flush_dcache_area(addr, size);
>  }

Consider dropping the function altogether and use __flush_dcache_area
directly (assuming the prototypes are identical).

>  
> -static bool stage2_pte_cacheable(kvm_pte_t pte)
> +static bool stage2_pte_cacheable(kvm_pte_t pte, struct kvm_pgtable *pgt)

Same comment about pgt being the first argument.

>  {
>  	u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
> -	return memattr == PAGE_S2_MEMATTR(NORMAL);
> +	return memattr == PAGE_S2_MEMATTR(NORMAL, stage2_has_fwb(pgt));
>  }
>  
>  static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
> @@ -821,8 +831,8 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
>  
>  		if (mm_ops->page_count(childp) != 1)
>  			return 0;
> -	} else if (stage2_pte_cacheable(pte)) {
> -		need_flush = true;
> +	} else if (stage2_pte_cacheable(pte, pgt)) {
> +		need_flush = !stage2_has_fwb(pgt);
>  	}
>  
>  	/*
> @@ -979,10 +989,11 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
>  			       enum kvm_pgtable_walk_flags flag,
>  			       void * const arg)
>  {
> -	struct kvm_pgtable_mm_ops *mm_ops = arg;
> +	struct kvm_pgtable *pgt = arg;
> +	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
>  	kvm_pte_t pte = *ptep;
>  
> -	if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte))
> +	if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte, pgt))
>  		return 0;
>  
>  	stage2_flush_dcache(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level));
> @@ -994,17 +1005,18 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
>  	struct kvm_pgtable_walker walker = {
>  		.cb	= stage2_flush_walker,
>  		.flags	= KVM_PGTABLE_WALK_LEAF,
> -		.arg	= pgt->mm_ops,
> +		.arg	= pgt,
>  	};
>  
> -	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
> +	if (stage2_has_fwb(pgt))
>  		return 0;
>  
>  	return kvm_pgtable_walk(pgt, addr, size, &walker);
>  }
>  
> -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> -			    struct kvm_pgtable_mm_ops *mm_ops)
> +int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> +				  struct kvm_pgtable_mm_ops *mm_ops,
> +				  enum kvm_pgtable_stage2_flags flags)
>  {
>  	size_t pgd_sz;
>  	u64 vtcr = arch->vtcr;
> @@ -1017,6 +1029,7 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
>  	if (!pgt->pgd)
>  		return -ENOMEM;
>  
> +	pgt->flags		= flags;

Try and keep the initialisation order similar to the definition of the
structure if possible.

>  	pgt->ia_bits		= ia_bits;
>  	pgt->start_level	= start_level;
>  	pgt->mm_ops		= mm_ops;
> @@ -1101,7 +1114,7 @@ int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
>  	u32 level;
>  	int ret;
>  
> -	ret = stage2_set_prot_attr(prot, &attr);
> +	ret = stage2_set_prot_attr(prot, &attr, pgt);
>  	if (ret)
>  		return ret;
>  	attr &= KVM_PTE_LEAF_S2_COMPAT_MASK;
> -- 
> 2.31.0.rc2.261.g7f71774620-goog
> 
> 

Thanks,

	M.
Will Deacon March 17, 2021, 2:42 p.m. UTC | #2
On Wed, Mar 17, 2021 at 02:17:13PM +0000, Quentin Perret wrote:
> In order to further configure stage-2 page-tables, pass flags to the
> init function using a new enum.
> 
> The first of these flags allows to disable FWB even if the hardware
> supports it as we will need to do so for the host stage-2.
> 
> Signed-off-by: Quentin Perret <qperret@google.com>
> 
> ---
> 
> One question is, do we want to use stage2_has_fwb() everywhere, including
> guest-specific paths (e.g. kvm_arch_prepare_memory_region(), ...) ?
> 
> That'd make this patch more intrusive, but would make the whole codebase
> work with FWB enabled on a guest by guest basis. I don't see us use that
> anytime soon (other than maybe debug of some sort?) but it'd be good to
> have an agreement.

I don't see the value in spreading this everywhere for now.

>  arch/arm64/include/asm/kvm_pgtable.h  | 19 +++++++++--
>  arch/arm64/include/asm/pgtable-prot.h |  4 +--
>  arch/arm64/kvm/hyp/pgtable.c          | 49 +++++++++++++++++----------
>  3 files changed, 50 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index b93a2a3526ab..7382bdfb6284 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -56,6 +56,15 @@ struct kvm_pgtable_mm_ops {
>  	phys_addr_t	(*virt_to_phys)(void *addr);
>  };
>  
> +/**
> + * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
> + * @KVM_PGTABLE_S2_NOFWB:	Don't enforce Normal-WB even if the CPUs have
> + *				ARM64_HAS_STAGE2_FWB.
> + */
> +enum kvm_pgtable_stage2_flags {
> +	KVM_PGTABLE_S2_NOFWB			= BIT(0),
> +};
> +
>  /**
>   * struct kvm_pgtable - KVM page-table.
>   * @ia_bits:		Maximum input address size, in bits.
> @@ -72,6 +81,7 @@ struct kvm_pgtable {
>  
>  	/* Stage-2 only */
>  	struct kvm_s2_mmu			*mmu;
> +	enum kvm_pgtable_stage2_flags		flags;
>  };
>  
>  /**
> @@ -201,11 +211,16 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
>   * @arch:	Arch-specific KVM structure representing the guest virtual
>   *		machine.
>   * @mm_ops:	Memory management callbacks.
> + * @flags:	Stage-2 configuration flags.
>   *
>   * Return: 0 on success, negative error code on failure.
>   */
> -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> -			    struct kvm_pgtable_mm_ops *mm_ops);
> +int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> +				  struct kvm_pgtable_mm_ops *mm_ops,
> +				  enum kvm_pgtable_stage2_flags flags);
> +
> +#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
> +	kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)

nit: I think some of the kerneldoc refers to "kvm_pgtable_stage_init()"
so that needs a trivial update to e.g. "kvm_pgtable_stage_init*()".

> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> index 046be789fbb4..beeb722a82d3 100644
> --- a/arch/arm64/include/asm/pgtable-prot.h
> +++ b/arch/arm64/include/asm/pgtable-prot.h
> @@ -72,10 +72,10 @@ extern bool arm64_use_ng_mappings;
>  #define PAGE_KERNEL_EXEC	__pgprot(PROT_NORMAL & ~PTE_PXN)
>  #define PAGE_KERNEL_EXEC_CONT	__pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
>  
> -#define PAGE_S2_MEMATTR(attr)						\
> +#define PAGE_S2_MEMATTR(attr, has_fwb)					\
>  	({								\
>  		u64 __val;						\
> -		if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))		\
> +		if (has_fwb)						\
>  			__val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr);	\
>  		else							\
>  			__val = PTE_S2_MEMATTR(MT_S2_ ## attr);		\

Can you take the pgt structure instead of a bool here, or does it end up
being really ugly?

Will
Quentin Perret March 17, 2021, 2:47 p.m. UTC | #3
On Wednesday 17 Mar 2021 at 14:41:31 (+0000), Marc Zyngier wrote:
> Hi Quentin,
> 
> On Wed, 17 Mar 2021 14:17:13 +0000,
> Quentin Perret <qperret@google.com> wrote:
> > 
> > In order to further configure stage-2 page-tables, pass flags to the
> > init function using a new enum.
> > 
> > The first of these flags allows to disable FWB even if the hardware
> > supports it as we will need to do so for the host stage-2.
> > 
> > Signed-off-by: Quentin Perret <qperret@google.com>
> > 
> > ---
> > 
> > One question is, do we want to use stage2_has_fwb() everywhere, including
> > guest-specific paths (e.g. kvm_arch_prepare_memory_region(), ...) ?
> > 
> > That'd make this patch more intrusive, but would make the whole codebase
> > work with FWB enabled on a guest by guest basis. I don't see us use that
> > anytime soon (other than maybe debug of some sort?) but it'd be good to
> > have an agreement.
> 
> I'm not sure how useful that would be. We fought long and hard to get
> FWB, and I can't see a good reason to disable it for guests unless the
> HW was buggy (but in which case that'd be for everyone). I'd rather
> keep the changes small for now (this whole series is invasive
> enough!).

OK, that works for me.

> As for this patch, I only have a few cosmetic comments:

Happy with the suggestions, I'll fold that in v6.

Cheers,
Quentin
Quentin Perret March 17, 2021, 2:51 p.m. UTC | #4
On Wednesday 17 Mar 2021 at 14:42:46 (+0000), Will Deacon wrote:
> On Wed, Mar 17, 2021 at 02:17:13PM +0000, Quentin Perret wrote:
> > In order to further configure stage-2 page-tables, pass flags to the
> > init function using a new enum.
> > 
> > The first of these flags allows to disable FWB even if the hardware
> > supports it as we will need to do so for the host stage-2.
> > 
> > Signed-off-by: Quentin Perret <qperret@google.com>
> > 
> > ---
> > 
> > One question is, do we want to use stage2_has_fwb() everywhere, including
> > guest-specific paths (e.g. kvm_arch_prepare_memory_region(), ...) ?
> > 
> > That'd make this patch more intrusive, but would make the whole codebase
> > work with FWB enabled on a guest by guest basis. I don't see us use that
> > anytime soon (other than maybe debug of some sort?) but it'd be good to
> > have an agreement.
> 
> I don't see the value in spreading this everywhere for now.

Good. Sounds like we're all in agreement.

> >  arch/arm64/include/asm/kvm_pgtable.h  | 19 +++++++++--
> >  arch/arm64/include/asm/pgtable-prot.h |  4 +--
> >  arch/arm64/kvm/hyp/pgtable.c          | 49 +++++++++++++++++----------
> >  3 files changed, 50 insertions(+), 22 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> > index b93a2a3526ab..7382bdfb6284 100644
> > --- a/arch/arm64/include/asm/kvm_pgtable.h
> > +++ b/arch/arm64/include/asm/kvm_pgtable.h
> > @@ -56,6 +56,15 @@ struct kvm_pgtable_mm_ops {
> >  	phys_addr_t	(*virt_to_phys)(void *addr);
> >  };
> >  
> > +/**
> > + * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
> > + * @KVM_PGTABLE_S2_NOFWB:	Don't enforce Normal-WB even if the CPUs have
> > + *				ARM64_HAS_STAGE2_FWB.
> > + */
> > +enum kvm_pgtable_stage2_flags {
> > +	KVM_PGTABLE_S2_NOFWB			= BIT(0),
> > +};
> > +
> >  /**
> >   * struct kvm_pgtable - KVM page-table.
> >   * @ia_bits:		Maximum input address size, in bits.
> > @@ -72,6 +81,7 @@ struct kvm_pgtable {
> >  
> >  	/* Stage-2 only */
> >  	struct kvm_s2_mmu			*mmu;
> > +	enum kvm_pgtable_stage2_flags		flags;
> >  };
> >  
> >  /**
> > @@ -201,11 +211,16 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
> >   * @arch:	Arch-specific KVM structure representing the guest virtual
> >   *		machine.
> >   * @mm_ops:	Memory management callbacks.
> > + * @flags:	Stage-2 configuration flags.
> >   *
> >   * Return: 0 on success, negative error code on failure.
> >   */
> > -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> > -			    struct kvm_pgtable_mm_ops *mm_ops);
> > +int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
> > +				  struct kvm_pgtable_mm_ops *mm_ops,
> > +				  enum kvm_pgtable_stage2_flags flags);
> > +
> > +#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
> > +	kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)
> 
> nit: I think some of the kerneldoc refers to "kvm_pgtable_stage_init()"
> so that needs a trivial update to e.g. "kvm_pgtable_stage_init*()".

Will do.

> > diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> > index 046be789fbb4..beeb722a82d3 100644
> > --- a/arch/arm64/include/asm/pgtable-prot.h
> > +++ b/arch/arm64/include/asm/pgtable-prot.h
> > @@ -72,10 +72,10 @@ extern bool arm64_use_ng_mappings;
> >  #define PAGE_KERNEL_EXEC	__pgprot(PROT_NORMAL & ~PTE_PXN)
> >  #define PAGE_KERNEL_EXEC_CONT	__pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
> >  
> > -#define PAGE_S2_MEMATTR(attr)						\
> > +#define PAGE_S2_MEMATTR(attr, has_fwb)					\
> >  	({								\
> >  		u64 __val;						\
> > -		if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))		\
> > +		if (has_fwb)						\
> >  			__val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr);	\
> >  		else							\
> >  			__val = PTE_S2_MEMATTR(MT_S2_ ## attr);		\
> 
> Can you take the pgt structure instead of a bool here, or does it end up
> being really ugly?

It means I need to expose the stage2_has_fwb() helper in pgtable.h so I
can use it here. But Marc suggested that I introduce another macro along
the lines of

#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))

which can be defined in pgtable.c and keep everything neatly contained
in there. So I think I'll go ahead with that unless you feel strongly
about it.

Cheers,
Quentin
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index b93a2a3526ab..7382bdfb6284 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -56,6 +56,15 @@  struct kvm_pgtable_mm_ops {
 	phys_addr_t	(*virt_to_phys)(void *addr);
 };
 
+/**
+ * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
+ * @KVM_PGTABLE_S2_NOFWB:	Don't enforce Normal-WB even if the CPUs have
+ *				ARM64_HAS_STAGE2_FWB.
+ */
+enum kvm_pgtable_stage2_flags {
+	KVM_PGTABLE_S2_NOFWB			= BIT(0),
+};
+
 /**
  * struct kvm_pgtable - KVM page-table.
  * @ia_bits:		Maximum input address size, in bits.
@@ -72,6 +81,7 @@  struct kvm_pgtable {
 
 	/* Stage-2 only */
 	struct kvm_s2_mmu			*mmu;
+	enum kvm_pgtable_stage2_flags		flags;
 };
 
 /**
@@ -201,11 +211,16 @@  u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
  * @arch:	Arch-specific KVM structure representing the guest virtual
  *		machine.
  * @mm_ops:	Memory management callbacks.
+ * @flags:	Stage-2 configuration flags.
  *
  * Return: 0 on success, negative error code on failure.
  */
-int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
-			    struct kvm_pgtable_mm_ops *mm_ops);
+int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+				  struct kvm_pgtable_mm_ops *mm_ops,
+				  enum kvm_pgtable_stage2_flags flags);
+
+#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
+	kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)
 
 /**
  * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 046be789fbb4..beeb722a82d3 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -72,10 +72,10 @@  extern bool arm64_use_ng_mappings;
 #define PAGE_KERNEL_EXEC	__pgprot(PROT_NORMAL & ~PTE_PXN)
 #define PAGE_KERNEL_EXEC_CONT	__pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
 
-#define PAGE_S2_MEMATTR(attr)						\
+#define PAGE_S2_MEMATTR(attr, has_fwb)					\
 	({								\
 		u64 __val;						\
-		if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))		\
+		if (has_fwb)						\
 			__val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr);	\
 		else							\
 			__val = PTE_S2_MEMATTR(MT_S2_ ## attr);		\
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 3a971df278bd..dee8aaeaf13e 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -507,12 +507,25 @@  u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
 	return vtcr;
 }
 
-static int stage2_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
+static bool stage2_has_fwb(struct kvm_pgtable *pgt)
+{
+	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+		return false;
+
+	return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
+}
+
+static int stage2_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep,
+				struct kvm_pgtable *pgt)
 {
 	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
-	kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
-			    PAGE_S2_MEMATTR(NORMAL);
 	u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
+	kvm_pte_t attr;
+
+	if (device)
+		attr = PAGE_S2_MEMATTR(DEVICE_nGnRE, stage2_has_fwb(pgt));
+	else
+		attr = PAGE_S2_MEMATTR(NORMAL, stage2_has_fwb(pgt));
 
 	if (!(prot & KVM_PGTABLE_PROT_X))
 		attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
@@ -748,7 +761,7 @@  int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		.arg		= &map_data,
 	};
 
-	ret = stage2_set_prot_attr(prot, &map_data.attr);
+	ret = stage2_set_prot_attr(prot, &map_data.attr, pgt);
 	if (ret)
 		return ret;
 
@@ -786,16 +799,13 @@  int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
 
 static void stage2_flush_dcache(void *addr, u64 size)
 {
-	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
-		return;
-
 	__flush_dcache_area(addr, size);
 }
 
-static bool stage2_pte_cacheable(kvm_pte_t pte)
+static bool stage2_pte_cacheable(kvm_pte_t pte, struct kvm_pgtable *pgt)
 {
 	u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
-	return memattr == PAGE_S2_MEMATTR(NORMAL);
+	return memattr == PAGE_S2_MEMATTR(NORMAL, stage2_has_fwb(pgt));
 }
 
 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
@@ -821,8 +831,8 @@  static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 
 		if (mm_ops->page_count(childp) != 1)
 			return 0;
-	} else if (stage2_pte_cacheable(pte)) {
-		need_flush = true;
+	} else if (stage2_pte_cacheable(pte, pgt)) {
+		need_flush = !stage2_has_fwb(pgt);
 	}
 
 	/*
@@ -979,10 +989,11 @@  static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 			       enum kvm_pgtable_walk_flags flag,
 			       void * const arg)
 {
-	struct kvm_pgtable_mm_ops *mm_ops = arg;
+	struct kvm_pgtable *pgt = arg;
+	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
 	kvm_pte_t pte = *ptep;
 
-	if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte))
+	if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte, pgt))
 		return 0;
 
 	stage2_flush_dcache(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level));
@@ -994,17 +1005,18 @@  int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
 	struct kvm_pgtable_walker walker = {
 		.cb	= stage2_flush_walker,
 		.flags	= KVM_PGTABLE_WALK_LEAF,
-		.arg	= pgt->mm_ops,
+		.arg	= pgt,
 	};
 
-	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+	if (stage2_has_fwb(pgt))
 		return 0;
 
 	return kvm_pgtable_walk(pgt, addr, size, &walker);
 }
 
-int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
-			    struct kvm_pgtable_mm_ops *mm_ops)
+int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+				  struct kvm_pgtable_mm_ops *mm_ops,
+				  enum kvm_pgtable_stage2_flags flags)
 {
 	size_t pgd_sz;
 	u64 vtcr = arch->vtcr;
@@ -1017,6 +1029,7 @@  int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
 	if (!pgt->pgd)
 		return -ENOMEM;
 
+	pgt->flags		= flags;
 	pgt->ia_bits		= ia_bits;
 	pgt->start_level	= start_level;
 	pgt->mm_ops		= mm_ops;
@@ -1101,7 +1114,7 @@  int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
 	u32 level;
 	int ret;
 
-	ret = stage2_set_prot_attr(prot, &attr);
+	ret = stage2_set_prot_attr(prot, &attr, pgt);
 	if (ret)
 		return ret;
 	attr &= KVM_PTE_LEAF_S2_COMPAT_MASK;