diff mbox series

[08/14] KVM: arm64: Add support for tagging shared pages in page-table

Message ID 20210719104735.3681732-9-qperret@google.com (mailing list archive)
State New, archived
Headers show
Series Track shared pages at EL2 in protected mode | expand

Commit Message

Quentin Perret July 19, 2021, 10:47 a.m. UTC
The hypervisor will soon be in charge of tracking ownership of all
memory pages in the system. The current page-tracking infrastructure at
EL2 only allows binary states: a page is either owned or not by an
entity. But a number of use-cases will require more complex states for
pages that are shared between two entities (host, hypervisor, or guests).

In preparation for supporting these use-cases, introduce in the KVM
page-table library some infrastructure allowing to tag shared pages
using ignored bits (a.k.a. software bits) in PTEs.

Signed-off-by: Quentin Perret <qperret@google.com>
---
 arch/arm64/include/asm/kvm_pgtable.h |  5 +++++
 arch/arm64/kvm/hyp/pgtable.c         | 25 +++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

Comments

Marc Zyngier July 19, 2021, 2:43 p.m. UTC | #1
On Mon, 19 Jul 2021 11:47:29 +0100,
Quentin Perret <qperret@google.com> wrote:
> 
> The hypervisor will soon be in charge of tracking ownership of all
> memory pages in the system. The current page-tracking infrastructure at
> EL2 only allows binary states: a page is either owned or not by an
> entity. But a number of use-cases will require more complex states for
> pages that are shared between two entities (host, hypervisor, or guests).
> 
> In preparation for supporting these use-cases, introduce in the KVM
> page-table library some infrastructure allowing to tag shared pages
> using ignored bits (a.k.a. software bits) in PTEs.
> 
> Signed-off-by: Quentin Perret <qperret@google.com>
> ---
>  arch/arm64/include/asm/kvm_pgtable.h |  5 +++++
>  arch/arm64/kvm/hyp/pgtable.c         | 25 +++++++++++++++++++++++++
>  2 files changed, 30 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index dd72653314c7..f6d3d5c8910d 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -81,6 +81,8 @@ enum kvm_pgtable_stage2_flags {
>   * @KVM_PGTABLE_PROT_W:		Write permission.
>   * @KVM_PGTABLE_PROT_R:		Read permission.
>   * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
> + * @KVM_PGTABLE_STATE_SHARED:	Page shared with another entity.
> + * @KVM_PGTABLE_STATE_BORROWED:	Page borrowed from another entity.
>   */
>  enum kvm_pgtable_prot {
>  	KVM_PGTABLE_PROT_X			= BIT(0),
> @@ -88,6 +90,9 @@ enum kvm_pgtable_prot {
>  	KVM_PGTABLE_PROT_R			= BIT(2),
>  
>  	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
> +
> +	KVM_PGTABLE_STATE_SHARED		= BIT(4),
> +	KVM_PGTABLE_STATE_BORROWED		= BIT(5),

I'd rather have some indirection here, as we have other potential
users for the SW bits outside of pKVM (see the NV series, which uses
some of these SW bits as the backend for TTL-based TLB invalidation).

Can we instead only describe the SW bit states in this enum, and let
the users map the semantic they require onto that state? See [1] for
what I carry in the NV branch.

>  };
>  
>  #define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 5bdbe7a31551..51598b79dafc 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -211,6 +211,29 @@ static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
>  	return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
>  }
>  
> +static kvm_pte_t pte_ignored_bit_prot(enum kvm_pgtable_prot prot)

Can we call these sw rather than ignored?

> +{
> +	kvm_pte_t ignored_bits = 0;
> +
> +	/*
> +	 * Ignored bits 0 and 1 are reserved to track the memory ownership
> +	 * state of each page:
> +	 *   00: The page is owned solely by the page-table owner.
> +	 *   01: The page is owned by the page-table owner, but is shared
> +	 *       with another entity.
> +	 *   10: The page is shared with, but not owned by the page-table owner.
> +	 *   11: Reserved for future use (lending).
> +	 */
> +	if (prot & KVM_PGTABLE_STATE_SHARED) {
> +		if (prot & KVM_PGTABLE_STATE_BORROWED)
> +			ignored_bits |= BIT(1);
> +		else
> +			ignored_bits |= BIT(0);
> +	}
> +
> +	return FIELD_PREP(KVM_PTE_LEAF_ATTR_IGNORED, ignored_bits);
> +}
> +
>  static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
>  				  u32 level, kvm_pte_t *ptep,
>  				  enum kvm_pgtable_walk_flags flag)
> @@ -357,6 +380,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
>  	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
>  	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
>  	attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
> +	attr |= pte_ignored_bit_prot(prot);
>  	*ptep = attr;
>  
>  	return 0;
> @@ -558,6 +582,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
>  
>  	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
>  	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
> +	attr |= pte_ignored_bit_prot(prot);
>  	*ptep = attr;
>  
>  	return 0;

How about kvm_pgtable_stage2_relax_perms()?

Thanks,

	M.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/commit/?h=kvm-arm64/nv-5.13&id=5dea6d82de76cfcda59818ec2532fc34c615db39
Quentin Perret July 19, 2021, 3:49 p.m. UTC | #2
On Monday 19 Jul 2021 at 15:43:34 (+0100), Marc Zyngier wrote:
> On Mon, 19 Jul 2021 11:47:29 +0100,
> Quentin Perret <qperret@google.com> wrote:
> > 
> > The hypervisor will soon be in charge of tracking ownership of all
> > memory pages in the system. The current page-tracking infrastructure at
> > EL2 only allows binary states: a page is either owned or not by an
> > entity. But a number of use-cases will require more complex states for
> > pages that are shared between two entities (host, hypervisor, or guests).
> > 
> > In preparation for supporting these use-cases, introduce in the KVM
> > page-table library some infrastructure allowing to tag shared pages
> > using ignored bits (a.k.a. software bits) in PTEs.
> > 
> > Signed-off-by: Quentin Perret <qperret@google.com>
> > ---
> >  arch/arm64/include/asm/kvm_pgtable.h |  5 +++++
> >  arch/arm64/kvm/hyp/pgtable.c         | 25 +++++++++++++++++++++++++
> >  2 files changed, 30 insertions(+)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> > index dd72653314c7..f6d3d5c8910d 100644
> > --- a/arch/arm64/include/asm/kvm_pgtable.h
> > +++ b/arch/arm64/include/asm/kvm_pgtable.h
> > @@ -81,6 +81,8 @@ enum kvm_pgtable_stage2_flags {
> >   * @KVM_PGTABLE_PROT_W:		Write permission.
> >   * @KVM_PGTABLE_PROT_R:		Read permission.
> >   * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
> > + * @KVM_PGTABLE_STATE_SHARED:	Page shared with another entity.
> > + * @KVM_PGTABLE_STATE_BORROWED:	Page borrowed from another entity.
> >   */
> >  enum kvm_pgtable_prot {
> >  	KVM_PGTABLE_PROT_X			= BIT(0),
> > @@ -88,6 +90,9 @@ enum kvm_pgtable_prot {
> >  	KVM_PGTABLE_PROT_R			= BIT(2),
> >  
> >  	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
> > +
> > +	KVM_PGTABLE_STATE_SHARED		= BIT(4),
> > +	KVM_PGTABLE_STATE_BORROWED		= BIT(5),
> 
> I'd rather have some indirection here, as we have other potential
> users for the SW bits outside of pKVM (see the NV series, which uses
> some of these SW bits as the backend for TTL-based TLB invalidation).
> 
> Can we instead only describe the SW bit states in this enum, and let
> the users map the semantic they require onto that state? See [1] for
> what I carry in the NV branch.

Works for me -- I just wanted to make sure we don't have users in
different places that use the same bits without knowing, but no strong
opinions, so happy to change.

> >  };
> >  
> >  #define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
> > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> > index 5bdbe7a31551..51598b79dafc 100644
> > --- a/arch/arm64/kvm/hyp/pgtable.c
> > +++ b/arch/arm64/kvm/hyp/pgtable.c
> > @@ -211,6 +211,29 @@ static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
> >  	return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
> >  }
> >  
> > +static kvm_pte_t pte_ignored_bit_prot(enum kvm_pgtable_prot prot)
> 
> Can we call these sw rather than ignored?

Sure.

> > +{
> > +	kvm_pte_t ignored_bits = 0;
> > +
> > +	/*
> > +	 * Ignored bits 0 and 1 are reserved to track the memory ownership
> > +	 * state of each page:
> > +	 *   00: The page is owned solely by the page-table owner.
> > +	 *   01: The page is owned by the page-table owner, but is shared
> > +	 *       with another entity.
> > +	 *   10: The page is shared with, but not owned by the page-table owner.
> > +	 *   11: Reserved for future use (lending).
> > +	 */
> > +	if (prot & KVM_PGTABLE_STATE_SHARED) {
> > +		if (prot & KVM_PGTABLE_STATE_BORROWED)
> > +			ignored_bits |= BIT(1);
> > +		else
> > +			ignored_bits |= BIT(0);
> > +	}
> > +
> > +	return FIELD_PREP(KVM_PTE_LEAF_ATTR_IGNORED, ignored_bits);
> > +}
> > +
> >  static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
> >  				  u32 level, kvm_pte_t *ptep,
> >  				  enum kvm_pgtable_walk_flags flag)
> > @@ -357,6 +380,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
> >  	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
> >  	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
> >  	attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
> > +	attr |= pte_ignored_bit_prot(prot);
> >  	*ptep = attr;
> >  
> >  	return 0;
> > @@ -558,6 +582,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
> >  
> >  	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
> >  	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
> > +	attr |= pte_ignored_bit_prot(prot);
> >  	*ptep = attr;
> >  
> >  	return 0;
> 
> How about kvm_pgtable_stage2_relax_perms()?

It should leave SW bits untouched, and it really felt like a path were
we want to change permissions and nothing else. What did you have in
mind?

Cheers,
Quentin
Marc Zyngier July 20, 2021, 10:13 a.m. UTC | #3
On Mon, 19 Jul 2021 16:49:13 +0100,
Quentin Perret <qperret@google.com> wrote:
> 
> On Monday 19 Jul 2021 at 15:43:34 (+0100), Marc Zyngier wrote:
> > On Mon, 19 Jul 2021 11:47:29 +0100,
> > Quentin Perret <qperret@google.com> wrote:
> > > 
> > > The hypervisor will soon be in charge of tracking ownership of all
> > > memory pages in the system. The current page-tracking infrastructure at
> > > EL2 only allows binary states: a page is either owned or not by an
> > > entity. But a number of use-cases will require more complex states for
> > > pages that are shared between two entities (host, hypervisor, or guests).
> > > 
> > > In preparation for supporting these use-cases, introduce in the KVM
> > > page-table library some infrastructure allowing to tag shared pages
> > > using ignored bits (a.k.a. software bits) in PTEs.
> > > 
> > > Signed-off-by: Quentin Perret <qperret@google.com>
> > > ---
> > >  arch/arm64/include/asm/kvm_pgtable.h |  5 +++++
> > >  arch/arm64/kvm/hyp/pgtable.c         | 25 +++++++++++++++++++++++++
> > >  2 files changed, 30 insertions(+)
> > > 
> > > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> > > index dd72653314c7..f6d3d5c8910d 100644
> > > --- a/arch/arm64/include/asm/kvm_pgtable.h
> > > +++ b/arch/arm64/include/asm/kvm_pgtable.h
> > > @@ -81,6 +81,8 @@ enum kvm_pgtable_stage2_flags {
> > >   * @KVM_PGTABLE_PROT_W:		Write permission.
> > >   * @KVM_PGTABLE_PROT_R:		Read permission.
> > >   * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
> > > + * @KVM_PGTABLE_STATE_SHARED:	Page shared with another entity.
> > > + * @KVM_PGTABLE_STATE_BORROWED:	Page borrowed from another entity.
> > >   */
> > >  enum kvm_pgtable_prot {
> > >  	KVM_PGTABLE_PROT_X			= BIT(0),
> > > @@ -88,6 +90,9 @@ enum kvm_pgtable_prot {
> > >  	KVM_PGTABLE_PROT_R			= BIT(2),
> > >  
> > >  	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
> > > +
> > > +	KVM_PGTABLE_STATE_SHARED		= BIT(4),
> > > +	KVM_PGTABLE_STATE_BORROWED		= BIT(5),
> > 
> > I'd rather have some indirection here, as we have other potential
> > users for the SW bits outside of pKVM (see the NV series, which uses
> > some of these SW bits as the backend for TTL-based TLB invalidation).
> > 
> > Can we instead only describe the SW bit states in this enum, and let
> > the users map the semantic they require onto that state? See [1] for
> > what I carry in the NV branch.
> 
> Works for me -- I just wanted to make sure we don't have users in
> different places that use the same bits without knowing, but no strong
> opinions, so happy to change.
> 
> > >  };
> > >  
> > >  #define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
> > > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> > > index 5bdbe7a31551..51598b79dafc 100644
> > > --- a/arch/arm64/kvm/hyp/pgtable.c
> > > +++ b/arch/arm64/kvm/hyp/pgtable.c
> > > @@ -211,6 +211,29 @@ static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
> > >  	return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
> > >  }
> > >  
> > > +static kvm_pte_t pte_ignored_bit_prot(enum kvm_pgtable_prot prot)
> > 
> > Can we call these sw rather than ignored?
> 
> Sure.
> 
> > > +{
> > > +	kvm_pte_t ignored_bits = 0;
> > > +
> > > +	/*
> > > +	 * Ignored bits 0 and 1 are reserved to track the memory ownership
> > > +	 * state of each page:
> > > +	 *   00: The page is owned solely by the page-table owner.
> > > +	 *   01: The page is owned by the page-table owner, but is shared
> > > +	 *       with another entity.
> > > +	 *   10: The page is shared with, but not owned by the page-table owner.
> > > +	 *   11: Reserved for future use (lending).
> > > +	 */
> > > +	if (prot & KVM_PGTABLE_STATE_SHARED) {
> > > +		if (prot & KVM_PGTABLE_STATE_BORROWED)
> > > +			ignored_bits |= BIT(1);
> > > +		else
> > > +			ignored_bits |= BIT(0);
> > > +	}
> > > +
> > > +	return FIELD_PREP(KVM_PTE_LEAF_ATTR_IGNORED, ignored_bits);
> > > +}
> > > +
> > >  static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
> > >  				  u32 level, kvm_pte_t *ptep,
> > >  				  enum kvm_pgtable_walk_flags flag)
> > > @@ -357,6 +380,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
> > >  	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
> > >  	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
> > >  	attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
> > > +	attr |= pte_ignored_bit_prot(prot);
> > >  	*ptep = attr;
> > >  
> > >  	return 0;
> > > @@ -558,6 +582,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
> > >  
> > >  	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
> > >  	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
> > > +	attr |= pte_ignored_bit_prot(prot);
> > >  	*ptep = attr;
> > >  
> > >  	return 0;
> > 
> > How about kvm_pgtable_stage2_relax_perms()?
> 
> It should leave SW bits untouched, and it really felt like a path were
> we want to change permissions and nothing else. What did you have in
> mind?

It isn't clear to me that it would not (cannot?) be used to change
other bits, given that it takes an arbitrary 'prot' set. If there is
such an intended restriction, we definitely should document it.

	M.
Quentin Perret July 20, 2021, 11:48 a.m. UTC | #4
On Tuesday 20 Jul 2021 at 11:13:31 (+0100), Marc Zyngier wrote:
> On Mon, 19 Jul 2021 16:49:13 +0100,
> Quentin Perret <qperret@google.com> wrote:
> > 
> > On Monday 19 Jul 2021 at 15:43:34 (+0100), Marc Zyngier wrote:
> > > On Mon, 19 Jul 2021 11:47:29 +0100,
> > > Quentin Perret <qperret@google.com> wrote:
> > > > 
> > > > The hypervisor will soon be in charge of tracking ownership of all
> > > > memory pages in the system. The current page-tracking infrastructure at
> > > > EL2 only allows binary states: a page is either owned or not by an
> > > > entity. But a number of use-cases will require more complex states for
> > > > pages that are shared between two entities (host, hypervisor, or guests).
> > > > 
> > > > In preparation for supporting these use-cases, introduce in the KVM
> > > > page-table library some infrastructure allowing to tag shared pages
> > > > using ignored bits (a.k.a. software bits) in PTEs.
> > > > 
> > > > Signed-off-by: Quentin Perret <qperret@google.com>
> > > > ---
> > > >  arch/arm64/include/asm/kvm_pgtable.h |  5 +++++
> > > >  arch/arm64/kvm/hyp/pgtable.c         | 25 +++++++++++++++++++++++++
> > > >  2 files changed, 30 insertions(+)
> > > > 
> > > > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> > > > index dd72653314c7..f6d3d5c8910d 100644
> > > > --- a/arch/arm64/include/asm/kvm_pgtable.h
> > > > +++ b/arch/arm64/include/asm/kvm_pgtable.h
> > > > @@ -81,6 +81,8 @@ enum kvm_pgtable_stage2_flags {
> > > >   * @KVM_PGTABLE_PROT_W:		Write permission.
> > > >   * @KVM_PGTABLE_PROT_R:		Read permission.
> > > >   * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
> > > > + * @KVM_PGTABLE_STATE_SHARED:	Page shared with another entity.
> > > > + * @KVM_PGTABLE_STATE_BORROWED:	Page borrowed from another entity.
> > > >   */
> > > >  enum kvm_pgtable_prot {
> > > >  	KVM_PGTABLE_PROT_X			= BIT(0),
> > > > @@ -88,6 +90,9 @@ enum kvm_pgtable_prot {
> > > >  	KVM_PGTABLE_PROT_R			= BIT(2),
> > > >  
> > > >  	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
> > > > +
> > > > +	KVM_PGTABLE_STATE_SHARED		= BIT(4),
> > > > +	KVM_PGTABLE_STATE_BORROWED		= BIT(5),
> > > 
> > > I'd rather have some indirection here, as we have other potential
> > > users for the SW bits outside of pKVM (see the NV series, which uses
> > > some of these SW bits as the backend for TTL-based TLB invalidation).
> > > 
> > > Can we instead only describe the SW bit states in this enum, and let
> > > the users map the semantic they require onto that state? See [1] for
> > > what I carry in the NV branch.
> > 
> > Works for me -- I just wanted to make sure we don't have users in
> > different places that use the same bits without knowing, but no strong
> > opinions, so happy to change.
> > 
> > > >  };
> > > >  
> > > >  #define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
> > > > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> > > > index 5bdbe7a31551..51598b79dafc 100644
> > > > --- a/arch/arm64/kvm/hyp/pgtable.c
> > > > +++ b/arch/arm64/kvm/hyp/pgtable.c
> > > > @@ -211,6 +211,29 @@ static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
> > > >  	return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
> > > >  }
> > > >  
> > > > +static kvm_pte_t pte_ignored_bit_prot(enum kvm_pgtable_prot prot)
> > > 
> > > Can we call these sw rather than ignored?
> > 
> > Sure.
> > 
> > > > +{
> > > > +	kvm_pte_t ignored_bits = 0;
> > > > +
> > > > +	/*
> > > > +	 * Ignored bits 0 and 1 are reserved to track the memory ownership
> > > > +	 * state of each page:
> > > > +	 *   00: The page is owned solely by the page-table owner.
> > > > +	 *   01: The page is owned by the page-table owner, but is shared
> > > > +	 *       with another entity.
> > > > +	 *   10: The page is shared with, but not owned by the page-table owner.
> > > > +	 *   11: Reserved for future use (lending).
> > > > +	 */
> > > > +	if (prot & KVM_PGTABLE_STATE_SHARED) {
> > > > +		if (prot & KVM_PGTABLE_STATE_BORROWED)
> > > > +			ignored_bits |= BIT(1);
> > > > +		else
> > > > +			ignored_bits |= BIT(0);
> > > > +	}
> > > > +
> > > > +	return FIELD_PREP(KVM_PTE_LEAF_ATTR_IGNORED, ignored_bits);
> > > > +}
> > > > +
> > > >  static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
> > > >  				  u32 level, kvm_pte_t *ptep,
> > > >  				  enum kvm_pgtable_walk_flags flag)
> > > > @@ -357,6 +380,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
> > > >  	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
> > > >  	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
> > > >  	attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
> > > > +	attr |= pte_ignored_bit_prot(prot);
> > > >  	*ptep = attr;
> > > >  
> > > >  	return 0;
> > > > @@ -558,6 +582,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
> > > >  
> > > >  	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
> > > >  	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
> > > > +	attr |= pte_ignored_bit_prot(prot);
> > > >  	*ptep = attr;
> > > >  
> > > >  	return 0;
> > > 
> > > How about kvm_pgtable_stage2_relax_perms()?
> > 
> > It should leave SW bits untouched, and it really felt like a path were
> > we want to change permissions and nothing else. What did you have in
> > mind?
> 
> It isn't clear to me that it would not (cannot?) be used to change
> other bits, given that it takes an arbitrary 'prot' set.

Sure, though it already ignores KVM_PGTABLE_PROT_DEVICE.

I guess the thing I find hard to reason about is that
kvm_pgtable_stage2_relax_perms() is 'additive'. E.g. it can make a
mapping RW if it was RO, but not the other way around. With the current
patch-set it wasn't really clear how that should translate to
KVM_PGTABLE_STATE_SHARED and such.

> If there is
> such an intended restriction, we definitely should document it.

Ack, that's definitely missing. And in fact I should probably make
kvm_pgtable_stage2_relax_perms() return -EINVAL if we're passing prot
values it can't handle.

Cheers,
Quentin
Fuad Tabba July 20, 2021, 1:48 p.m. UTC | #5
Hi Quentin,

On Mon, Jul 19, 2021 at 11:47 AM Quentin Perret <qperret@google.com> wrote:
>
> The hypervisor will soon be in charge of tracking ownership of all
> memory pages in the system. The current page-tracking infrastructure at
> EL2 only allows binary states: a page is either owned or not by an
> entity. But a number of use-cases will require more complex states for
> pages that are shared between two entities (host, hypervisor, or guests).
>
> In preparation for supporting these use-cases, introduce in the KVM
> page-table library some infrastructure allowing to tag shared pages
> using ignored bits (a.k.a. software bits) in PTEs.
>
> Signed-off-by: Quentin Perret <qperret@google.com>
> ---
>  arch/arm64/include/asm/kvm_pgtable.h |  5 +++++
>  arch/arm64/kvm/hyp/pgtable.c         | 25 +++++++++++++++++++++++++
>  2 files changed, 30 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index dd72653314c7..f6d3d5c8910d 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -81,6 +81,8 @@ enum kvm_pgtable_stage2_flags {
>   * @KVM_PGTABLE_PROT_W:                Write permission.
>   * @KVM_PGTABLE_PROT_R:                Read permission.
>   * @KVM_PGTABLE_PROT_DEVICE:   Device attributes.
> + * @KVM_PGTABLE_STATE_SHARED:  Page shared with another entity.
> + * @KVM_PGTABLE_STATE_BORROWED:        Page borrowed from another entity.
>   */
>  enum kvm_pgtable_prot {
>         KVM_PGTABLE_PROT_X                      = BIT(0),
> @@ -88,6 +90,9 @@ enum kvm_pgtable_prot {
>         KVM_PGTABLE_PROT_R                      = BIT(2),
>
>         KVM_PGTABLE_PROT_DEVICE                 = BIT(3),
> +
> +       KVM_PGTABLE_STATE_SHARED                = BIT(4),
> +       KVM_PGTABLE_STATE_BORROWED              = BIT(5),
>  };
>
>  #define KVM_PGTABLE_PROT_RW    (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 5bdbe7a31551..51598b79dafc 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -211,6 +211,29 @@ static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
>         return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
>  }
>
> +static kvm_pte_t pte_ignored_bit_prot(enum kvm_pgtable_prot prot)
> +{
> +       kvm_pte_t ignored_bits = 0;
> +
> +       /*
> +        * Ignored bits 0 and 1 are reserved to track the memory ownership
> +        * state of each page:
> +        *   00: The page is owned solely by the page-table owner.
> +        *   01: The page is owned by the page-table owner, but is shared
> +        *       with another entity.
> +        *   10: The page is shared with, but not owned by the page-table owner.
> +        *   11: Reserved for future use (lending).
> +        */
> +       if (prot & KVM_PGTABLE_STATE_SHARED) {
> +               if (prot & KVM_PGTABLE_STATE_BORROWED)
> +                       ignored_bits |= BIT(1);
> +               else
> +                       ignored_bits |= BIT(0);
> +       }

This might tie in to Marc's comments for using enums, but
consolidating the translation between prot and ignored/software bits
in one place would be good: thinking about patch 10 as well, where you
get the prot from the ignored bits. Even though you have documented
it, I'm finding the part where a field can be borrowed and shared as
opposed to being only shared not very intuitive, and I need to reread
the comment here to remember the difference while going through the
code.

You also mention lending as potentially reserved for the future, but I
think that lending is the other side of borrowing (depends on who's
doing the giving/taking). I wonder if in this case it would be clearer
to describe it in terms of whether it's exclusively owned vs owned but
shared (for the owner), and just shared for the sharer...

Thanks,
/fuad


> +       return FIELD_PREP(KVM_PTE_LEAF_ATTR_IGNORED, ignored_bits);
> +}
> +
>  static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
>                                   u32 level, kvm_pte_t *ptep,
>                                   enum kvm_pgtable_walk_flags flag)
> @@ -357,6 +380,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
>         attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
>         attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
>         attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
> +       attr |= pte_ignored_bit_prot(prot);
>         *ptep = attr;
>
>         return 0;
> @@ -558,6 +582,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
>
>         attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
>         attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
> +       attr |= pte_ignored_bit_prot(prot);
>         *ptep = attr;
>
>         return 0;
> --
> 2.32.0.402.g57bb445576-goog
>
Quentin Perret July 20, 2021, 2:06 p.m. UTC | #6
On Tuesday 20 Jul 2021 at 14:48:09 (+0100), Fuad Tabba wrote:
> This might tie in to Marc's comments for using enums, but
> consolidating the translation between prot and ignored/software bits
> in one place would be good: thinking about patch 10 as well, where you
> get the prot from the ignored bits. Even though you have documented
> it, I'm finding the part where a field can be borrowed and shared as
> opposed to being only shared not very intuitive, and I need to reread
> the comment here to remember the difference while going through the
> code.
> 
> You also mention lending as potentially reserved for the future, but I
> think that lending is the other side of borrowing (depends on who's
> doing the giving/taking). I wonder if in this case it would be clearer
> to describe it in terms of whether it's exclusively owned vs owned but
> shared (for the owner), and just shared for the sharer...

Argh so I actually found the encoding pretty neat :/
The idea is the following:

  - an entity that has a page mapped as SHARED in its PT means it
    doesn't have exclusive access to the page;

  - an entity that has a page mapped as BORROWED in its PT means it has
    access to a page it doesn't own;

From that we can build the states we need:

  - when an entity shares a page with another, the original owner gets a
    SHARED mapping, and the recipient a SHARED+BORROWED mapping.

  - and in the future when/if we implement lending (which means an
    entity gives exclusive access to a page to another entity, but
    retains ownership) we can map the page in the recipient as
    'BORROWED' only, but not 'SHARED'. And the original owner will have
    an invalid mapping with a new state 'LENT', which is encoded with
    both SW bits set.

How does that sound? Did you have something else in mind?

Thanks,
Quentin
Fuad Tabba July 21, 2021, 7:34 a.m. UTC | #7
Hi Quentin,


On Tue, Jul 20, 2021 at 3:06 PM Quentin Perret <qperret@google.com> wrote:
>
> On Tuesday 20 Jul 2021 at 14:48:09 (+0100), Fuad Tabba wrote:
> > This might tie in to Marc's comments for using enums, but
> > consolidating the translation between prot and ignored/software bits
> > in one place would be good: thinking about patch 10 as well, where you
> > get the prot from the ignored bits. Even though you have documented
> > it, I'm finding the part where a field can be borrowed and shared as
> > opposed to being only shared not very intuitive, and I need to reread
> > the comment here to remember the difference while going through the
> > code.
> >
> > You also mention lending as potentially reserved for the future, but I
> > think that lending is the other side of borrowing (depends on who's
> > doing the giving/taking). I wonder if in this case it would be clearer
> > to describe it in terms of whether it's exclusively owned vs owned but
> > shared (for the owner), and just shared for the sharer...
>
> Argh so I actually found the encoding pretty neat :/
> The idea is the following:
>
>   - an entity that has a page mapped as SHARED in its PT means it
>     doesn't have exclusive access to the page;
>
>   - an entity that has a page mapped as BORROWED in its PT means it has
>     access to a page it doesn't own;
>
> From that we can build the states we need:
>
>   - when an entity shares a page with another, the original owner gets a
>     SHARED mapping, and the recipient a SHARED+BORROWED mapping.
>
>   - and in the future when/if we implement lending (which means an
>     entity gives exclusive access to a page to another entity, but
>     retains ownership) we can map the page in the recipient as
>     'BORROWED' only, but not 'SHARED'. And the original owner will have
>     an invalid mapping with a new state 'LENT', which is encoded with
>     both SW bits set.
>
> How does that sound? Did you have something else in mind?

The encoding is very neat by the way :D

I see where you're going with the lent state now, and I understand the
states as well as the possible transitions now that you've explained
it.

It's the terminology that confused me a bit (especially when you
mention lending, which seemed to imply is something distinct from
borrowing as opposed to just the other side of it). What for me would
help is to document this, and the possible combinations/legal states.
kvm_pgtable.h describes the prots a bit, but maybe you could expand
that similar to what you've done in this email:

@KVM_PGTABLE_STATE_BORROWED: Page borrowed from another entity: has
access to the page but no ownership

Not sure if defining aliases for all legal combinations would also
help or add to the confusion, thinking out loud, something along the
lines of cache state taxonomy (e.g., Sweazy and Smith fig 3 [1]). You
have that in the borrowed (as opposed to owned), and shared (as
opposed to exclusive). So aliases to build on these:

#define KVM_PGTABLE_STATE_BORROWED_SHARED (KVM_PGTABLE_STATE_SHARED |
KVM_PGTABLE_STATE_BORROWED)
#define KVM_PGTABLE_STATE_BORROWED_EXCLUSIVE (KVM_PGTABLE_STATE_BORROWED)
#define KVM_PGTABLE_STATE_OWNED_SHARED (KVM_PGTABLE_STATE_SHARED)
#define KVM_PGTABLE_STATE_OWNED_EXCLUSIVE (0ULL)

You have thought about this way more than I have. But I think that
having clear documentation, ideally in the code itself via
helpers/enums/aliases could help people like me who come to the code
later not shoot themselves in the foot.

Thanks!
/fuad

[1] https://www.cs.auckland.ac.nz/compsci703s1c/archive/2008/resources/Sweazey.pdf

> Thanks,
> Quentin
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index dd72653314c7..f6d3d5c8910d 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -81,6 +81,8 @@  enum kvm_pgtable_stage2_flags {
  * @KVM_PGTABLE_PROT_W:		Write permission.
  * @KVM_PGTABLE_PROT_R:		Read permission.
  * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
+ * @KVM_PGTABLE_STATE_SHARED:	Page shared with another entity.
+ * @KVM_PGTABLE_STATE_BORROWED:	Page borrowed from another entity.
  */
 enum kvm_pgtable_prot {
 	KVM_PGTABLE_PROT_X			= BIT(0),
@@ -88,6 +90,9 @@  enum kvm_pgtable_prot {
 	KVM_PGTABLE_PROT_R			= BIT(2),
 
 	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
+
+	KVM_PGTABLE_STATE_SHARED		= BIT(4),
+	KVM_PGTABLE_STATE_BORROWED		= BIT(5),
 };
 
 #define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 5bdbe7a31551..51598b79dafc 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -211,6 +211,29 @@  static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
 	return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
 }
 
+static kvm_pte_t pte_ignored_bit_prot(enum kvm_pgtable_prot prot)
+{
+	kvm_pte_t ignored_bits = 0;
+
+	/*
+	 * Ignored bits 0 and 1 are reserved to track the memory ownership
+	 * state of each page:
+	 *   00: The page is owned solely by the page-table owner.
+	 *   01: The page is owned by the page-table owner, but is shared
+	 *       with another entity.
+	 *   10: The page is shared with, but not owned by the page-table owner.
+	 *   11: Reserved for future use (lending).
+	 */
+	if (prot & KVM_PGTABLE_STATE_SHARED) {
+		if (prot & KVM_PGTABLE_STATE_BORROWED)
+			ignored_bits |= BIT(1);
+		else
+			ignored_bits |= BIT(0);
+	}
+
+	return FIELD_PREP(KVM_PTE_LEAF_ATTR_IGNORED, ignored_bits);
+}
+
 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
 				  u32 level, kvm_pte_t *ptep,
 				  enum kvm_pgtable_walk_flags flag)
@@ -357,6 +380,7 @@  static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
 	attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
+	attr |= pte_ignored_bit_prot(prot);
 	*ptep = attr;
 
 	return 0;
@@ -558,6 +582,7 @@  static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
 
 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
 	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
+	attr |= pte_ignored_bit_prot(prot);
 	*ptep = attr;
 
 	return 0;