diff mbox series

[v2,02/15] iommu: Report domain nesting info

Message ID 1591877734-66527-3-git-send-email-yi.l.liu@intel.com (mailing list archive)
State New, archived
Headers show
Series vfio: expose virtual Shared Virtual Addressing to VMs | expand

Commit Message

Yi Liu June 11, 2020, 12:15 p.m. UTC
IOMMUs that support nesting translation needs report the capability info
to userspace, e.g. the format of first level/stage paging structures.

Cc: Kevin Tian <kevin.tian@intel.com>
CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Eric Auger <eric.auger@redhat.com>
Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
---
@Jean, Eric: as nesting was introduced for ARM, but looks like no actual
user of it. right? So I'm wondering if we can reuse DOMAIN_ATTR_NESTING
to retrieve nesting info? how about your opinions?

 include/linux/iommu.h      |  1 +
 include/uapi/linux/iommu.h | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

Comments

Alex Williamson June 11, 2020, 7:30 p.m. UTC | #1
On Thu, 11 Jun 2020 05:15:21 -0700
Liu Yi L <yi.l.liu@intel.com> wrote:

> IOMMUs that support nesting translation needs report the capability info
> to userspace, e.g. the format of first level/stage paging structures.
> 
> Cc: Kevin Tian <kevin.tian@intel.com>
> CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Cc: Alex Williamson <alex.williamson@redhat.com>
> Cc: Eric Auger <eric.auger@redhat.com>
> Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: Lu Baolu <baolu.lu@linux.intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> ---
> @Jean, Eric: as nesting was introduced for ARM, but looks like no actual
> user of it. right? So I'm wondering if we can reuse DOMAIN_ATTR_NESTING
> to retrieve nesting info? how about your opinions?
> 
>  include/linux/iommu.h      |  1 +
>  include/uapi/linux/iommu.h | 34 ++++++++++++++++++++++++++++++++++
>  2 files changed, 35 insertions(+)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 78a26ae..f6e4b49 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -126,6 +126,7 @@ enum iommu_attr {
>  	DOMAIN_ATTR_FSL_PAMUV1,
>  	DOMAIN_ATTR_NESTING,	/* two stages of translation */
>  	DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
> +	DOMAIN_ATTR_NESTING_INFO,
>  	DOMAIN_ATTR_MAX,
>  };
>  
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> index 303f148..02eac73 100644
> --- a/include/uapi/linux/iommu.h
> +++ b/include/uapi/linux/iommu.h
> @@ -332,4 +332,38 @@ struct iommu_gpasid_bind_data {
>  	};
>  };
>  
> +struct iommu_nesting_info {
> +	__u32	size;
> +	__u32	format;
> +	__u32	features;
> +#define IOMMU_NESTING_FEAT_SYSWIDE_PASID	(1 << 0)
> +#define IOMMU_NESTING_FEAT_BIND_PGTBL		(1 << 1)
> +#define IOMMU_NESTING_FEAT_CACHE_INVLD		(1 << 2)
> +	__u32	flags;
> +	__u8	data[];
> +};
> +
> +/*
> + * @flags:	VT-d specific flags. Currently reserved for future
> + *		extension.
> + * @addr_width:	The output addr width of first level/stage translation
> + * @pasid_bits:	Maximum supported PASID bits, 0 represents no PASID
> + *		support.
> + * @cap_reg:	Describe basic capabilities as defined in VT-d capability
> + *		register.
> + * @cap_mask:	Mark valid capability bits in @cap_reg.
> + * @ecap_reg:	Describe the extended capabilities as defined in VT-d
> + *		extended capability register.
> + * @ecap_mask:	Mark the valid capability bits in @ecap_reg.

Please explain this a little further, why do we need to tell userspace
about cap/ecap register bits that aren't valid through this interface?
Thanks,

Alex


> + */
> +struct iommu_nesting_info_vtd {
> +	__u32	flags;
> +	__u16	addr_width;
> +	__u16	pasid_bits;
> +	__u64	cap_reg;
> +	__u64	cap_mask;
> +	__u64	ecap_reg;
> +	__u64	ecap_mask;
> +};
> +
>  #endif /* _UAPI_IOMMU_H */
Yi Liu June 12, 2020, 9:05 a.m. UTC | #2
Hi Alex,

> From: Alex Williamson <alex.williamson@redhat.com>
> Sent: Friday, June 12, 2020 3:30 AM
> 
> On Thu, 11 Jun 2020 05:15:21 -0700
> Liu Yi L <yi.l.liu@intel.com> wrote:
> 
> > IOMMUs that support nesting translation needs report the capability
> > info to userspace, e.g. the format of first level/stage paging structures.
> >
> > Cc: Kevin Tian <kevin.tian@intel.com>
> > CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > Cc: Alex Williamson <alex.williamson@redhat.com>
> > Cc: Eric Auger <eric.auger@redhat.com>
> > Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > Cc: Joerg Roedel <joro@8bytes.org>
> > Cc: Lu Baolu <baolu.lu@linux.intel.com>
> > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > ---
> > @Jean, Eric: as nesting was introduced for ARM, but looks like no
> > actual user of it. right? So I'm wondering if we can reuse
> > DOMAIN_ATTR_NESTING to retrieve nesting info? how about your opinions?
> >
> >  include/linux/iommu.h      |  1 +
> >  include/uapi/linux/iommu.h | 34 ++++++++++++++++++++++++++++++++++
> >  2 files changed, 35 insertions(+)
> >
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
> > 78a26ae..f6e4b49 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -126,6 +126,7 @@ enum iommu_attr {
> >  	DOMAIN_ATTR_FSL_PAMUV1,
> >  	DOMAIN_ATTR_NESTING,	/* two stages of translation */
> >  	DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
> > +	DOMAIN_ATTR_NESTING_INFO,
> >  	DOMAIN_ATTR_MAX,
> >  };
> >
> > diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> > index 303f148..02eac73 100644
> > --- a/include/uapi/linux/iommu.h
> > +++ b/include/uapi/linux/iommu.h
> > @@ -332,4 +332,38 @@ struct iommu_gpasid_bind_data {
> >  	};
> >  };
> >
> > +struct iommu_nesting_info {
> > +	__u32	size;
> > +	__u32	format;
> > +	__u32	features;
> > +#define IOMMU_NESTING_FEAT_SYSWIDE_PASID	(1 << 0)
> > +#define IOMMU_NESTING_FEAT_BIND_PGTBL		(1 << 1)
> > +#define IOMMU_NESTING_FEAT_CACHE_INVLD		(1 << 2)
> > +	__u32	flags;
> > +	__u8	data[];
> > +};
> > +
> > +/*
> > + * @flags:	VT-d specific flags. Currently reserved for future
> > + *		extension.
> > + * @addr_width:	The output addr width of first level/stage translation
> > + * @pasid_bits:	Maximum supported PASID bits, 0 represents no PASID
> > + *		support.
> > + * @cap_reg:	Describe basic capabilities as defined in VT-d capability
> > + *		register.
> > + * @cap_mask:	Mark valid capability bits in @cap_reg.
> > + * @ecap_reg:	Describe the extended capabilities as defined in VT-d
> > + *		extended capability register.
> > + * @ecap_mask:	Mark the valid capability bits in @ecap_reg.
> 
> Please explain this a little further, why do we need to tell userspace about
> cap/ecap register bits that aren't valid through this interface?
> Thanks,

we only want to tell userspace about the bits marked in the cap/ecap_mask.
cap/ecap_mask is kind of white-list of the cap/ecap register. userspace should
only care about the bits in the white-list, for other bits, it should ignore.

Regards,
Yi Liu

> Alex
> 
> 
> > + */
> > +struct iommu_nesting_info_vtd {
> > +	__u32	flags;
> > +	__u16	addr_width;
> > +	__u16	pasid_bits;
> > +	__u64	cap_reg;
> > +	__u64	cap_mask;
> > +	__u64	ecap_reg;
> > +	__u64	ecap_mask;
> > +};
> > +
> >  #endif /* _UAPI_IOMMU_H */
Tian, Kevin June 15, 2020, 1:22 a.m. UTC | #3
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Friday, June 12, 2020 5:05 PM
> 
> Hi Alex,
> 
> > From: Alex Williamson <alex.williamson@redhat.com>
> > Sent: Friday, June 12, 2020 3:30 AM
> >
> > On Thu, 11 Jun 2020 05:15:21 -0700
> > Liu Yi L <yi.l.liu@intel.com> wrote:
> >
> > > IOMMUs that support nesting translation needs report the capability
> > > info to userspace, e.g. the format of first level/stage paging structures.
> > >
> > > Cc: Kevin Tian <kevin.tian@intel.com>
> > > CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > Cc: Alex Williamson <alex.williamson@redhat.com>
> > > Cc: Eric Auger <eric.auger@redhat.com>
> > > Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > > Cc: Joerg Roedel <joro@8bytes.org>
> > > Cc: Lu Baolu <baolu.lu@linux.intel.com>
> > > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > ---
> > > @Jean, Eric: as nesting was introduced for ARM, but looks like no
> > > actual user of it. right? So I'm wondering if we can reuse
> > > DOMAIN_ATTR_NESTING to retrieve nesting info? how about your
> opinions?
> > >
> > >  include/linux/iommu.h      |  1 +
> > >  include/uapi/linux/iommu.h | 34
> ++++++++++++++++++++++++++++++++++
> > >  2 files changed, 35 insertions(+)
> > >
> > > diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
> > > 78a26ae..f6e4b49 100644
> > > --- a/include/linux/iommu.h
> > > +++ b/include/linux/iommu.h
> > > @@ -126,6 +126,7 @@ enum iommu_attr {
> > >  	DOMAIN_ATTR_FSL_PAMUV1,
> > >  	DOMAIN_ATTR_NESTING,	/* two stages of translation */
> > >  	DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
> > > +	DOMAIN_ATTR_NESTING_INFO,
> > >  	DOMAIN_ATTR_MAX,
> > >  };
> > >
> > > diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> > > index 303f148..02eac73 100644
> > > --- a/include/uapi/linux/iommu.h
> > > +++ b/include/uapi/linux/iommu.h
> > > @@ -332,4 +332,38 @@ struct iommu_gpasid_bind_data {
> > >  	};
> > >  };
> > >
> > > +struct iommu_nesting_info {
> > > +	__u32	size;
> > > +	__u32	format;
> > > +	__u32	features;
> > > +#define IOMMU_NESTING_FEAT_SYSWIDE_PASID	(1 << 0)
> > > +#define IOMMU_NESTING_FEAT_BIND_PGTBL		(1 << 1)
> > > +#define IOMMU_NESTING_FEAT_CACHE_INVLD		(1 << 2)
> > > +	__u32	flags;
> > > +	__u8	data[];
> > > +};
> > > +
> > > +/*
> > > + * @flags:	VT-d specific flags. Currently reserved for future
> > > + *		extension.
> > > + * @addr_width:	The output addr width of first level/stage translation
> > > + * @pasid_bits:	Maximum supported PASID bits, 0 represents no
> PASID
> > > + *		support.
> > > + * @cap_reg:	Describe basic capabilities as defined in VT-d
> capability
> > > + *		register.
> > > + * @cap_mask:	Mark valid capability bits in @cap_reg.
> > > + * @ecap_reg:	Describe the extended capabilities as defined in VT-d
> > > + *		extended capability register.
> > > + * @ecap_mask:	Mark the valid capability bits in @ecap_reg.
> >
> > Please explain this a little further, why do we need to tell userspace about
> > cap/ecap register bits that aren't valid through this interface?
> > Thanks,
> 
> we only want to tell userspace about the bits marked in the cap/ecap_mask.
> cap/ecap_mask is kind of white-list of the cap/ecap register. userspace
> should
> only care about the bits in the white-list, for other bits, it should ignore.
> 
> Regards,
> Yi Liu

For invalid bits if kernel just clears them then do we still need additional
mask bits to explicitly mark them out? I guess this might be the point that 
Alex asked...

> 
> > Alex
> >
> >
> > > + */
> > > +struct iommu_nesting_info_vtd {
> > > +	__u32	flags;
> > > +	__u16	addr_width;
> > > +	__u16	pasid_bits;
> > > +	__u64	cap_reg;
> > > +	__u64	cap_mask;
> > > +	__u64	ecap_reg;
> > > +	__u64	ecap_mask;
> > > +};
> > > +
> > >  #endif /* _UAPI_IOMMU_H */
Yi Liu June 15, 2020, 6:04 a.m. UTC | #4
Hi Kevin,

> From: Tian, Kevin <kevin.tian@intel.com>
> Sent: Monday, June 15, 2020 9:23 AM
> 
> > From: Liu, Yi L <yi.l.liu@intel.com>
> > Sent: Friday, June 12, 2020 5:05 PM
> >
> > Hi Alex,
> >
> > > From: Alex Williamson <alex.williamson@redhat.com>
> > > Sent: Friday, June 12, 2020 3:30 AM
> > >
> > > On Thu, 11 Jun 2020 05:15:21 -0700
> > > Liu Yi L <yi.l.liu@intel.com> wrote:
> > >
> > > > IOMMUs that support nesting translation needs report the
> > > > capability info to userspace, e.g. the format of first level/stage paging
> structures.
> > > >
> > > > Cc: Kevin Tian <kevin.tian@intel.com>
> > > > CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > > Cc: Alex Williamson <alex.williamson@redhat.com>
> > > > Cc: Eric Auger <eric.auger@redhat.com>
> > > > Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > > > Cc: Joerg Roedel <joro@8bytes.org>
> > > > Cc: Lu Baolu <baolu.lu@linux.intel.com>
> > > > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > > > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > > ---
> > > > @Jean, Eric: as nesting was introduced for ARM, but looks like no
> > > > actual user of it. right? So I'm wondering if we can reuse
> > > > DOMAIN_ATTR_NESTING to retrieve nesting info? how about your
> > opinions?
> > > >
> > > >  include/linux/iommu.h      |  1 +
> > > >  include/uapi/linux/iommu.h | 34
> > ++++++++++++++++++++++++++++++++++
> > > >  2 files changed, 35 insertions(+)
> > > >
> > > > diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
> > > > 78a26ae..f6e4b49 100644
> > > > --- a/include/linux/iommu.h
> > > > +++ b/include/linux/iommu.h
> > > > @@ -126,6 +126,7 @@ enum iommu_attr {
> > > >  	DOMAIN_ATTR_FSL_PAMUV1,
> > > >  	DOMAIN_ATTR_NESTING,	/* two stages of translation */
> > > >  	DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
> > > > +	DOMAIN_ATTR_NESTING_INFO,
> > > >  	DOMAIN_ATTR_MAX,
> > > >  };
> > > >
> > > > diff --git a/include/uapi/linux/iommu.h
> > > > b/include/uapi/linux/iommu.h index 303f148..02eac73 100644
> > > > --- a/include/uapi/linux/iommu.h
> > > > +++ b/include/uapi/linux/iommu.h
> > > > @@ -332,4 +332,38 @@ struct iommu_gpasid_bind_data {
> > > >  	};
> > > >  };
> > > >
> > > > +struct iommu_nesting_info {
> > > > +	__u32	size;
> > > > +	__u32	format;
> > > > +	__u32	features;
> > > > +#define IOMMU_NESTING_FEAT_SYSWIDE_PASID	(1 << 0)
> > > > +#define IOMMU_NESTING_FEAT_BIND_PGTBL		(1 << 1)
> > > > +#define IOMMU_NESTING_FEAT_CACHE_INVLD		(1 << 2)
> > > > +	__u32	flags;
> > > > +	__u8	data[];
> > > > +};
> > > > +
> > > > +/*
> > > > + * @flags:	VT-d specific flags. Currently reserved for future
> > > > + *		extension.
> > > > + * @addr_width:	The output addr width of first level/stage
> translation
> > > > + * @pasid_bits:	Maximum supported PASID bits, 0 represents no
> > PASID
> > > > + *		support.
> > > > + * @cap_reg:	Describe basic capabilities as defined in VT-d
> > capability
> > > > + *		register.
> > > > + * @cap_mask:	Mark valid capability bits in @cap_reg.
> > > > + * @ecap_reg:	Describe the extended capabilities as defined in VT-d
> > > > + *		extended capability register.
> > > > + * @ecap_mask:	Mark the valid capability bits in @ecap_reg.
> > >
> > > Please explain this a little further, why do we need to tell
> > > userspace about cap/ecap register bits that aren't valid through this interface?
> > > Thanks,
> >
> > we only want to tell userspace about the bits marked in the cap/ecap_mask.
> > cap/ecap_mask is kind of white-list of the cap/ecap register.
> > userspace should only care about the bits in the white-list, for other
> > bits, it should ignore.
> >
> > Regards,
> > Yi Liu
> 
> For invalid bits if kernel just clears them then do we still need additional mask bits
> to explicitly mark them out? I guess this might be the point that Alex asked...

For invalid bits, kernel will clear them. But I think the mask bits is
still necessary. The mask bits tells user space the bits related to
nesting. Without it, user space may have no idea about it.

Maybe talk about QEMU usage of the cap/ecap bits would help. QEMU vIOMMU
decides cap/ecap bits according to QEMU cmdline. But not all of them are
compatible with hardware support. Especially, vIOMMU built on nesting.
So needs to sync the cap/ecap bits with host side. Based on the mask
bits, QEMU can compare the cap/ecap bits configured by QEMU cmdline with
the cap/ecap bits reported by this interface. This comparation is limited
to the nesting related bits in cap/ecap, the other bits are not included
and can use the configuration by QEMU cmdline.

The link below show the current Intel vIOMMU usage on the cap/ecap bits.
For each assigned device, vIOMMU will compare the nesting related bits in
cap/ecap and mask out the bits which hardware doesn't support. After the
machine is intilized, the vIOMMU cap/ecap bits are determined. If user
hot-plug devices to VM, vIOMMU will fail it if the hardware cap/ecap bits
behind hot-plug device are not compatible with determined vIOMMU cap/ecap
bits.

https://www.spinics.net/lists/kvm/msg218294.html

Regards,
Yi Liu

> >
> > > Alex
> > >
> > >
> > > > + */
> > > > +struct iommu_nesting_info_vtd {
> > > > +	__u32	flags;
> > > > +	__u16	addr_width;
> > > > +	__u16	pasid_bits;
> > > > +	__u64	cap_reg;
> > > > +	__u64	cap_mask;
> > > > +	__u64	ecap_reg;
> > > > +	__u64	ecap_mask;
> > > > +};
> > > > +
> > > >  #endif /* _UAPI_IOMMU_H */
Tian, Kevin June 16, 2020, 1:56 a.m. UTC | #5
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Monday, June 15, 2020 2:05 PM
> 
> Hi Kevin,
> 
> > From: Tian, Kevin <kevin.tian@intel.com>
> > Sent: Monday, June 15, 2020 9:23 AM
> >
> > > From: Liu, Yi L <yi.l.liu@intel.com>
> > > Sent: Friday, June 12, 2020 5:05 PM
> > >
> > > Hi Alex,
> > >
> > > > From: Alex Williamson <alex.williamson@redhat.com>
> > > > Sent: Friday, June 12, 2020 3:30 AM
> > > >
> > > > On Thu, 11 Jun 2020 05:15:21 -0700
> > > > Liu Yi L <yi.l.liu@intel.com> wrote:
> > > >
> > > > > IOMMUs that support nesting translation needs report the
> > > > > capability info to userspace, e.g. the format of first level/stage paging
> > structures.
> > > > >
> > > > > Cc: Kevin Tian <kevin.tian@intel.com>
> > > > > CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > > > Cc: Alex Williamson <alex.williamson@redhat.com>
> > > > > Cc: Eric Auger <eric.auger@redhat.com>
> > > > > Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > > > > Cc: Joerg Roedel <joro@8bytes.org>
> > > > > Cc: Lu Baolu <baolu.lu@linux.intel.com>
> > > > > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > > > > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > > > ---
> > > > > @Jean, Eric: as nesting was introduced for ARM, but looks like no
> > > > > actual user of it. right? So I'm wondering if we can reuse
> > > > > DOMAIN_ATTR_NESTING to retrieve nesting info? how about your
> > > opinions?
> > > > >
> > > > >  include/linux/iommu.h      |  1 +
> > > > >  include/uapi/linux/iommu.h | 34
> > > ++++++++++++++++++++++++++++++++++
> > > > >  2 files changed, 35 insertions(+)
> > > > >
> > > > > diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
> > > > > 78a26ae..f6e4b49 100644
> > > > > --- a/include/linux/iommu.h
> > > > > +++ b/include/linux/iommu.h
> > > > > @@ -126,6 +126,7 @@ enum iommu_attr {
> > > > >  	DOMAIN_ATTR_FSL_PAMUV1,
> > > > >  	DOMAIN_ATTR_NESTING,	/* two stages of translation */
> > > > >  	DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
> > > > > +	DOMAIN_ATTR_NESTING_INFO,
> > > > >  	DOMAIN_ATTR_MAX,
> > > > >  };
> > > > >
> > > > > diff --git a/include/uapi/linux/iommu.h
> > > > > b/include/uapi/linux/iommu.h index 303f148..02eac73 100644
> > > > > --- a/include/uapi/linux/iommu.h
> > > > > +++ b/include/uapi/linux/iommu.h
> > > > > @@ -332,4 +332,38 @@ struct iommu_gpasid_bind_data {
> > > > >  	};
> > > > >  };
> > > > >
> > > > > +struct iommu_nesting_info {
> > > > > +	__u32	size;
> > > > > +	__u32	format;
> > > > > +	__u32	features;
> > > > > +#define IOMMU_NESTING_FEAT_SYSWIDE_PASID	(1 << 0)
> > > > > +#define IOMMU_NESTING_FEAT_BIND_PGTBL		(1 << 1)
> > > > > +#define IOMMU_NESTING_FEAT_CACHE_INVLD		(1 <<
> 2)
> > > > > +	__u32	flags;
> > > > > +	__u8	data[];
> > > > > +};
> > > > > +
> > > > > +/*
> > > > > + * @flags:	VT-d specific flags. Currently reserved for future
> > > > > + *		extension.
> > > > > + * @addr_width:	The output addr width of first level/stage
> > translation
> > > > > + * @pasid_bits:	Maximum supported PASID bits, 0 represents
> no
> > > PASID
> > > > > + *		support.
> > > > > + * @cap_reg:	Describe basic capabilities as defined in VT-d
> > > capability
> > > > > + *		register.
> > > > > + * @cap_mask:	Mark valid capability bits in @cap_reg.
> > > > > + * @ecap_reg:	Describe the extended capabilities as defined in VT-d
> > > > > + *		extended capability register.
> > > > > + * @ecap_mask:	Mark the valid capability bits in @ecap_reg.
> > > >
> > > > Please explain this a little further, why do we need to tell
> > > > userspace about cap/ecap register bits that aren't valid through this
> interface?
> > > > Thanks,
> > >
> > > we only want to tell userspace about the bits marked in the
> cap/ecap_mask.
> > > cap/ecap_mask is kind of white-list of the cap/ecap register.
> > > userspace should only care about the bits in the white-list, for other
> > > bits, it should ignore.
> > >
> > > Regards,
> > > Yi Liu
> >
> > For invalid bits if kernel just clears them then do we still need additional
> mask bits
> > to explicitly mark them out? I guess this might be the point that Alex asked...
> 
> For invalid bits, kernel will clear them. But I think the mask bits is
> still necessary. The mask bits tells user space the bits related to
> nesting. Without it, user space may have no idea about it.

userspace should know which bit is related to nesting and then should
check that bit explicitly...

> 
> Maybe talk about QEMU usage of the cap/ecap bits would help. QEMU
> vIOMMU
> decides cap/ecap bits according to QEMU cmdline. But not all of them are
> compatible with hardware support. Especially, vIOMMU built on nesting.
> So needs to sync the cap/ecap bits with host side. Based on the mask
> bits, QEMU can compare the cap/ecap bits configured by QEMU cmdline with
> the cap/ecap bits reported by this interface. This comparation is limited
> to the nesting related bits in cap/ecap, the other bits are not included
> and can use the configuration by QEMU cmdline.

I didn't get this explanation. Based on patch [15/15], nesting capabilities
are defined as:
+/* Nesting Support Capability Alignment */
+#define VTD_CAP_FL1GP		(1ULL << 56)
+#define VTD_CAP_FL5LP		(1ULL << 60)
+#define VTD_ECAP_PRS		(1ULL << 29)
+#define VTD_ECAP_ERS		(1ULL << 30)
+#define VTD_ECAP_SRS		(1ULL << 31)
+#define VTD_ECAP_EAFS		(1ULL << 34)
+#define VTD_ECAP_PASID		(1ULL << 40)

When Qemu gets an cmdline option it knows which bit out of above
list should be checked against hardware capability. Then just do the
check bit-by-bit. Why do we need mask bit in uapi to tell which bits
are valid? Unless 0/1 doesn't represent validity of some bit. Do we
have such example?

> 
> The link below show the current Intel vIOMMU usage on the cap/ecap bits.
> For each assigned device, vIOMMU will compare the nesting related bits in
> cap/ecap and mask out the bits which hardware doesn't support. After the
> machine is intilized, the vIOMMU cap/ecap bits are determined. If user
> hot-plug devices to VM, vIOMMU will fail it if the hardware cap/ecap bits
> behind hot-plug device are not compatible with determined vIOMMU
> cap/ecap
> bits.
> 
> https://www.spinics.net/lists/kvm/msg218294.html
> 
> Regards,
> Yi Liu
> 
> > >
> > > > Alex
> > > >
> > > >
> > > > > + */
> > > > > +struct iommu_nesting_info_vtd {
> > > > > +	__u32	flags;
> > > > > +	__u16	addr_width;
> > > > > +	__u16	pasid_bits;
> > > > > +	__u64	cap_reg;
> > > > > +	__u64	cap_mask;
> > > > > +	__u64	ecap_reg;
> > > > > +	__u64	ecap_mask;
> > > > > +};
> > > > > +
> > > > >  #endif /* _UAPI_IOMMU_H */
Yi Liu June 16, 2020, 2:24 a.m. UTC | #6
> From: Tian, Kevin <kevin.tian@intel.com>
> Sent: Tuesday, June 16, 2020 9:56 AM
> 
> > From: Liu, Yi L <yi.l.liu@intel.com>
> > Sent: Monday, June 15, 2020 2:05 PM
> >
> > Hi Kevin,
> >
> > > From: Tian, Kevin <kevin.tian@intel.com>
> > > Sent: Monday, June 15, 2020 9:23 AM
> > >
> > > > From: Liu, Yi L <yi.l.liu@intel.com>
> > > > Sent: Friday, June 12, 2020 5:05 PM
> > > >
> > > > Hi Alex,
> > > >
> > > > > From: Alex Williamson <alex.williamson@redhat.com>
> > > > > Sent: Friday, June 12, 2020 3:30 AM
> > > > >
> > > > > On Thu, 11 Jun 2020 05:15:21 -0700
> > > > > Liu Yi L <yi.l.liu@intel.com> wrote:
> > > > >
> > > > > > IOMMUs that support nesting translation needs report the
> > > > > > capability info to userspace, e.g. the format of first level/stage paging
> > > structures.
> > > > > >
> > > > > > Cc: Kevin Tian <kevin.tian@intel.com>
> > > > > > CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > > > > Cc: Alex Williamson <alex.williamson@redhat.com>
> > > > > > Cc: Eric Auger <eric.auger@redhat.com>
> > > > > > Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > > > > > Cc: Joerg Roedel <joro@8bytes.org>
> > > > > > Cc: Lu Baolu <baolu.lu@linux.intel.com>
> > > > > > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > > > > > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > > > > ---
> > > > > > @Jean, Eric: as nesting was introduced for ARM, but looks like no
> > > > > > actual user of it. right? So I'm wondering if we can reuse
> > > > > > DOMAIN_ATTR_NESTING to retrieve nesting info? how about your
> > > > opinions?
> > > > > >
> > > > > >  include/linux/iommu.h      |  1 +
> > > > > >  include/uapi/linux/iommu.h | 34
> > > > ++++++++++++++++++++++++++++++++++
> > > > > >  2 files changed, 35 insertions(+)
> > > > > >
> > > > > > diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
> > > > > > 78a26ae..f6e4b49 100644
> > > > > > --- a/include/linux/iommu.h
> > > > > > +++ b/include/linux/iommu.h
> > > > > > @@ -126,6 +126,7 @@ enum iommu_attr {
> > > > > >  	DOMAIN_ATTR_FSL_PAMUV1,
> > > > > >  	DOMAIN_ATTR_NESTING,	/* two stages of translation */
> > > > > >  	DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
> > > > > > +	DOMAIN_ATTR_NESTING_INFO,
> > > > > >  	DOMAIN_ATTR_MAX,
> > > > > >  };
> > > > > >
> > > > > > diff --git a/include/uapi/linux/iommu.h
> > > > > > b/include/uapi/linux/iommu.h index 303f148..02eac73 100644
> > > > > > --- a/include/uapi/linux/iommu.h
> > > > > > +++ b/include/uapi/linux/iommu.h
> > > > > > @@ -332,4 +332,38 @@ struct iommu_gpasid_bind_data {
> > > > > >  	};
> > > > > >  };
> > > > > >
> > > > > > +struct iommu_nesting_info {
> > > > > > +	__u32	size;
> > > > > > +	__u32	format;
> > > > > > +	__u32	features;
> > > > > > +#define IOMMU_NESTING_FEAT_SYSWIDE_PASID	(1 << 0)
> > > > > > +#define IOMMU_NESTING_FEAT_BIND_PGTBL		(1 << 1)
> > > > > > +#define IOMMU_NESTING_FEAT_CACHE_INVLD		(1 <<
> > 2)
> > > > > > +	__u32	flags;
> > > > > > +	__u8	data[];
> > > > > > +};
> > > > > > +
> > > > > > +/*
> > > > > > + * @flags:	VT-d specific flags. Currently reserved for future
> > > > > > + *		extension.
> > > > > > + * @addr_width:	The output addr width of first level/stage
> > > translation
> > > > > > + * @pasid_bits:	Maximum supported PASID bits, 0 represents
> > no
> > > > PASID
> > > > > > + *		support.
> > > > > > + * @cap_reg:	Describe basic capabilities as defined in VT-d
> > > > capability
> > > > > > + *		register.
> > > > > > + * @cap_mask:	Mark valid capability bits in @cap_reg.
> > > > > > + * @ecap_reg:	Describe the extended capabilities as defined in
> VT-d
> > > > > > + *		extended capability register.
> > > > > > + * @ecap_mask:	Mark the valid capability bits in @ecap_reg.
> > > > >
> > > > > Please explain this a little further, why do we need to tell
> > > > > userspace about cap/ecap register bits that aren't valid through this
> > interface?
> > > > > Thanks,
> > > >
> > > > we only want to tell userspace about the bits marked in the
> > cap/ecap_mask.
> > > > cap/ecap_mask is kind of white-list of the cap/ecap register.
> > > > userspace should only care about the bits in the white-list, for other
> > > > bits, it should ignore.
> > > >
> > > > Regards,
> > > > Yi Liu
> > >
> > > For invalid bits if kernel just clears them then do we still need additional
> > mask bits
> > > to explicitly mark them out? I guess this might be the point that Alex asked...
> >
> > For invalid bits, kernel will clear them. But I think the mask bits is
> > still necessary. The mask bits tells user space the bits related to
> > nesting. Without it, user space may have no idea about it.
> 
> userspace should know which bit is related to nesting and then should
> check that bit explicitly...

ok, so userspace could get such info by the understanding of spec, right?
if user space could get it, then I think it's uncessary to have cap/ecap mask
bits.

> >
> > Maybe talk about QEMU usage of the cap/ecap bits would help. QEMU
> > vIOMMU
> > decides cap/ecap bits according to QEMU cmdline. But not all of them are
> > compatible with hardware support. Especially, vIOMMU built on nesting.
> > So needs to sync the cap/ecap bits with host side. Based on the mask
> > bits, QEMU can compare the cap/ecap bits configured by QEMU cmdline with
> > the cap/ecap bits reported by this interface. This comparation is limited
> > to the nesting related bits in cap/ecap, the other bits are not included
> > and can use the configuration by QEMU cmdline.
> 
> I didn't get this explanation. Based on patch [15/15], nesting capabilities
> are defined as:
> +/* Nesting Support Capability Alignment */
> +#define VTD_CAP_FL1GP		(1ULL << 56)
> +#define VTD_CAP_FL5LP		(1ULL << 60)
> +#define VTD_ECAP_PRS		(1ULL << 29)
> +#define VTD_ECAP_ERS		(1ULL << 30)
> +#define VTD_ECAP_SRS		(1ULL << 31)
> +#define VTD_ECAP_EAFS		(1ULL << 34)
> +#define VTD_ECAP_PASID		(1ULL << 40)
> 
> When Qemu gets an cmdline option it knows which bit out of above
> list should be checked against hardware capability. Then just do the
> check bit-by-bit. Why do we need mask bit in uapi to tell which bits
> are valid?

as above reply, if userspace has the check list for the cap/ecap bits,
then it's not necessary to use mask bit.

> Unless 0/1 doesn't represent validity of some bit. Do we
> have such example?

yes, like the pasid bits. it's 20 bits. but we already got pasid_bits
in the iommu_nesting_info_vtd structure. so it's not covered in the
ecap_bits.

Regards,
Yi Liu

> >
> > The link below show the current Intel vIOMMU usage on the cap/ecap bits.
> > For each assigned device, vIOMMU will compare the nesting related bits in
> > cap/ecap and mask out the bits which hardware doesn't support. After the
> > machine is intilized, the vIOMMU cap/ecap bits are determined. If user
> > hot-plug devices to VM, vIOMMU will fail it if the hardware cap/ecap bits
> > behind hot-plug device are not compatible with determined vIOMMU
> > cap/ecap
> > bits.
> >
> > https://www.spinics.net/lists/kvm/msg218294.html
> >
> > Regards,
> > Yi Liu
> >
> > > >
> > > > > Alex
> > > > >
> > > > >
> > > > > > + */
> > > > > > +struct iommu_nesting_info_vtd {
> > > > > > +	__u32	flags;
> > > > > > +	__u16	addr_width;
> > > > > > +	__u16	pasid_bits;
> > > > > > +	__u64	cap_reg;
> > > > > > +	__u64	cap_mask;
> > > > > > +	__u64	ecap_reg;
> > > > > > +	__u64	ecap_mask;
> > > > > > +};
> > > > > > +
> > > > > >  #endif /* _UAPI_IOMMU_H */
Jean-Philippe Brucker June 17, 2020, 2:39 p.m. UTC | #7
[+ Will and Robin]

Hi Yi,

On Thu, Jun 11, 2020 at 05:15:21AM -0700, Liu Yi L wrote:
> IOMMUs that support nesting translation needs report the capability info
> to userspace, e.g. the format of first level/stage paging structures.
> 
> Cc: Kevin Tian <kevin.tian@intel.com>
> CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Cc: Alex Williamson <alex.williamson@redhat.com>
> Cc: Eric Auger <eric.auger@redhat.com>
> Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: Lu Baolu <baolu.lu@linux.intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> ---
> @Jean, Eric: as nesting was introduced for ARM, but looks like no actual
> user of it. right? So I'm wondering if we can reuse DOMAIN_ATTR_NESTING
> to retrieve nesting info? how about your opinions?

Sure, I think we could rework the getters for DOMAIN_ATTR_NESTING since
they aren't used, but we do need to keep the setters as is.

Before attaching a domain, VFIO sets DOMAIN_ATTR_NESTING if userspace
requested a VFIO_TYPE1_NESTING_IOMMU container. This is necessary for the
SMMU driver to know how to attach later, but at that point we don't know
whether the SMMU does support nesting (since the domain isn't attached to
any endpoint). During attach, the SMMU driver adapts to the SMMU's
capabilities, and may well fallback to one stage if the SMMU doesn't
support nesting.

VFIO should check after attaching that the nesting attribute held, by
calling iommu_domain_get_attr(NESTING). At the moment it does not, and
since your 03/15 patch does that with additional info, I agree with
reusing DOMAIN_ATTR_NESTING instead of adding DOMAIN_ATTR_NESTING_INFO.

However it requires changing the get_attr(NESTING) implementations in both
SMMU drivers as a precursor of this series, to avoid breaking
VFIO_TYPE1_NESTING_IOMMU on Arm. Since we haven't yet defined the
nesting_info structs for SMMUv2 and v3, I suppose we could return an empty
struct iommu_nesting_info for now?

> 
>  include/linux/iommu.h      |  1 +
>  include/uapi/linux/iommu.h | 34 ++++++++++++++++++++++++++++++++++
>  2 files changed, 35 insertions(+)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 78a26ae..f6e4b49 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -126,6 +126,7 @@ enum iommu_attr {
>  	DOMAIN_ATTR_FSL_PAMUV1,
>  	DOMAIN_ATTR_NESTING,	/* two stages of translation */
>  	DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
> +	DOMAIN_ATTR_NESTING_INFO,
>  	DOMAIN_ATTR_MAX,
>  };
>  
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> index 303f148..02eac73 100644
> --- a/include/uapi/linux/iommu.h
> +++ b/include/uapi/linux/iommu.h
> @@ -332,4 +332,38 @@ struct iommu_gpasid_bind_data {
>  	};
>  };
>  
> +struct iommu_nesting_info {
> +	__u32	size;
> +	__u32	format;

What goes into format? And flags? This structure needs some documentation.

Thanks,
Jean

> +	__u32	features;
> +#define IOMMU_NESTING_FEAT_SYSWIDE_PASID	(1 << 0)
> +#define IOMMU_NESTING_FEAT_BIND_PGTBL		(1 << 1)
> +#define IOMMU_NESTING_FEAT_CACHE_INVLD		(1 << 2)
> +	__u32	flags;
> +	__u8	data[];
> +};
> +
> +/*
> + * @flags:	VT-d specific flags. Currently reserved for future
> + *		extension.
> + * @addr_width:	The output addr width of first level/stage translation
> + * @pasid_bits:	Maximum supported PASID bits, 0 represents no PASID
> + *		support.
> + * @cap_reg:	Describe basic capabilities as defined in VT-d capability
> + *		register.
> + * @cap_mask:	Mark valid capability bits in @cap_reg.
> + * @ecap_reg:	Describe the extended capabilities as defined in VT-d
> + *		extended capability register.
> + * @ecap_mask:	Mark the valid capability bits in @ecap_reg.
> + */
> +struct iommu_nesting_info_vtd {
> +	__u32	flags;
> +	__u16	addr_width;
> +	__u16	pasid_bits;
> +	__u64	cap_reg;
> +	__u64	cap_mask;
> +	__u64	ecap_reg;
> +	__u64	ecap_mask;
> +};
> +
>  #endif /* _UAPI_IOMMU_H */
> -- 
> 2.7.4
>
Yi Liu June 18, 2020, 11:46 a.m. UTC | #8
Hi Jean,

> From: Jean-Philippe Brucker < jean-philippe@linaro.org>
> Sent: Wednesday, June 17, 2020 10:39 PM
> 
> [+ Will and Robin]
> 
> Hi Yi,
> 
> On Thu, Jun 11, 2020 at 05:15:21AM -0700, Liu Yi L wrote:
> > IOMMUs that support nesting translation needs report the capability
> > info to userspace, e.g. the format of first level/stage paging structures.
> >
> > Cc: Kevin Tian <kevin.tian@intel.com>
> > CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > Cc: Alex Williamson <alex.williamson@redhat.com>
> > Cc: Eric Auger <eric.auger@redhat.com>
> > Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > Cc: Joerg Roedel <joro@8bytes.org>
> > Cc: Lu Baolu <baolu.lu@linux.intel.com>
> > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > ---
> > @Jean, Eric: as nesting was introduced for ARM, but looks like no
> > actual user of it. right? So I'm wondering if we can reuse
> > DOMAIN_ATTR_NESTING to retrieve nesting info? how about your opinions?
> 
> Sure, I think we could rework the getters for DOMAIN_ATTR_NESTING since they
> aren't used, but we do need to keep the setters as is.
> 
> Before attaching a domain, VFIO sets DOMAIN_ATTR_NESTING if userspace
> requested a VFIO_TYPE1_NESTING_IOMMU container. This is necessary for the
> SMMU driver to know how to attach later, but at that point we don't know whether
> the SMMU does support nesting (since the domain isn't attached to any endpoint).
> During attach, the SMMU driver adapts to the SMMU's capabilities, and may well
> fallback to one stage if the SMMU doesn't support nesting.

got you. so even VFIO sets DOMAIN_ATTR_NESTING successfully, it doesn't mean
the nesting will be used. yeah, it's a little bit different with VT-d side. intel iommu
driver will fail ATT_NESTING setting if it found not all iommu units in the system
are nesting capable.

> VFIO should check after attaching that the nesting attribute held, by calling
> iommu_domain_get_attr(NESTING). At the moment it does not, and since your
> 03/15 patch does that with additional info, I agree with reusing
> DOMAIN_ATTR_NESTING instead of adding DOMAIN_ATTR_NESTING_INFO.
>
> However it requires changing the get_attr(NESTING) implementations in both SMMU
> drivers as a precursor of this series, to avoid breaking
> VFIO_TYPE1_NESTING_IOMMU on Arm. Since we haven't yet defined the
> nesting_info structs for SMMUv2 and v3, I suppose we could return an empty struct
> iommu_nesting_info for now?

got you. I think it works. So far, I didn't see any getter for ATTR_NESTING, once
SMMU drivers return empty struct iommu_nesting_info, VFIO won't fail. will
do it when switching to reuse ATTR_NESTING for getting nesting info.

> >
> >  include/linux/iommu.h      |  1 +
> >  include/uapi/linux/iommu.h | 34 ++++++++++++++++++++++++++++++++++
> >  2 files changed, 35 insertions(+)
> >
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
> > 78a26ae..f6e4b49 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -126,6 +126,7 @@ enum iommu_attr {
> >  	DOMAIN_ATTR_FSL_PAMUV1,
> >  	DOMAIN_ATTR_NESTING,	/* two stages of translation */
> >  	DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
> > +	DOMAIN_ATTR_NESTING_INFO,
> >  	DOMAIN_ATTR_MAX,
> >  };
> >
> > diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> > index 303f148..02eac73 100644
> > --- a/include/uapi/linux/iommu.h
> > +++ b/include/uapi/linux/iommu.h
> > @@ -332,4 +332,38 @@ struct iommu_gpasid_bind_data {
> >  	};
> >  };
> >
> > +struct iommu_nesting_info {
> > +	__u32	size;
> > +	__u32	format;
> 
> What goes into format? And flags? This structure needs some documentation.

format will be the same with the definition of @format in iommu_gpasid_bind_data.
flags is reserved for future extension. will add description in next version. :-)

struct iommu_gpasid_bind_data {
        __u32 argsz;
#define IOMMU_GPASID_BIND_VERSION_1     1
        __u32 version;
#define IOMMU_PASID_FORMAT_INTEL_VTD    1
        __u32 format;
#define IOMMU_SVA_GPASID_VAL    (1 << 0) /* guest PASID valid */
        __u64 flags;
        __u64 gpgd;
        __u64 hpasid;
        __u64 gpasid;
        __u32 addr_width;
        __u8  padding[12];
        /* Vendor specific data */
        union {
                struct iommu_gpasid_bind_data_vtd vtd;
        } vendor;
};

Regards,
Yi Liu

> Thanks,
> Jean
> 
> > +	__u32	features;
> > +#define IOMMU_NESTING_FEAT_SYSWIDE_PASID	(1 << 0)
> > +#define IOMMU_NESTING_FEAT_BIND_PGTBL		(1 << 1)
> > +#define IOMMU_NESTING_FEAT_CACHE_INVLD		(1 << 2)
> > +	__u32	flags;
> > +	__u8	data[];
> > +};
> > +
> > +/*
> > + * @flags:	VT-d specific flags. Currently reserved for future
> > + *		extension.
> > + * @addr_width:	The output addr width of first level/stage translation
> > + * @pasid_bits:	Maximum supported PASID bits, 0 represents no PASID
> > + *		support.
> > + * @cap_reg:	Describe basic capabilities as defined in VT-d capability
> > + *		register.
> > + * @cap_mask:	Mark valid capability bits in @cap_reg.
> > + * @ecap_reg:	Describe the extended capabilities as defined in VT-d
> > + *		extended capability register.
> > + * @ecap_mask:	Mark the valid capability bits in @ecap_reg.
> > + */
> > +struct iommu_nesting_info_vtd {
> > +	__u32	flags;
> > +	__u16	addr_width;
> > +	__u16	pasid_bits;
> > +	__u64	cap_reg;
> > +	__u64	cap_mask;
> > +	__u64	ecap_reg;
> > +	__u64	ecap_mask;
> > +};
> > +
> >  #endif /* _UAPI_IOMMU_H */
> > --
> > 2.7.4
> >
diff mbox series

Patch

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 78a26ae..f6e4b49 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -126,6 +126,7 @@  enum iommu_attr {
 	DOMAIN_ATTR_FSL_PAMUV1,
 	DOMAIN_ATTR_NESTING,	/* two stages of translation */
 	DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+	DOMAIN_ATTR_NESTING_INFO,
 	DOMAIN_ATTR_MAX,
 };
 
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index 303f148..02eac73 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -332,4 +332,38 @@  struct iommu_gpasid_bind_data {
 	};
 };
 
+struct iommu_nesting_info {
+	__u32	size;
+	__u32	format;
+	__u32	features;
+#define IOMMU_NESTING_FEAT_SYSWIDE_PASID	(1 << 0)
+#define IOMMU_NESTING_FEAT_BIND_PGTBL		(1 << 1)
+#define IOMMU_NESTING_FEAT_CACHE_INVLD		(1 << 2)
+	__u32	flags;
+	__u8	data[];
+};
+
+/*
+ * @flags:	VT-d specific flags. Currently reserved for future
+ *		extension.
+ * @addr_width:	The output addr width of first level/stage translation
+ * @pasid_bits:	Maximum supported PASID bits, 0 represents no PASID
+ *		support.
+ * @cap_reg:	Describe basic capabilities as defined in VT-d capability
+ *		register.
+ * @cap_mask:	Mark valid capability bits in @cap_reg.
+ * @ecap_reg:	Describe the extended capabilities as defined in VT-d
+ *		extended capability register.
+ * @ecap_mask:	Mark the valid capability bits in @ecap_reg.
+ */
+struct iommu_nesting_info_vtd {
+	__u32	flags;
+	__u16	addr_width;
+	__u16	pasid_bits;
+	__u64	cap_reg;
+	__u64	cap_mask;
+	__u64	ecap_reg;
+	__u64	ecap_mask;
+};
+
 #endif /* _UAPI_IOMMU_H */