diff mbox series

[RFC,v2,1/3] vfio: VFIO_IOMMU_CACHE_INVALIDATE

Message ID 1571919983-3231-2-git-send-email-yi.l.liu@intel.com (mailing list archive)
State New, archived
Headers show
Series vfio: support Shared Virtual Addressing | expand

Commit Message

Yi Liu Oct. 24, 2019, 12:26 p.m. UTC
From: Liu Yi L <yi.l.liu@linux.intel.com>

When the guest "owns" the stage 1 translation structures,  the host
IOMMU driver has no knowledge of caching structure updates unless
the guest invalidation requests are trapped and passed down to the
host.

This patch adds the VFIO_IOMMU_CACHE_INVALIDATE ioctl with aims
at propagating guest stage1 IOMMU cache invalidations to the host.

Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@linux.intel.com>
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
---
 drivers/vfio/vfio_iommu_type1.c | 55 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/vfio.h       | 13 ++++++++++
 2 files changed, 68 insertions(+)

Comments

Tian, Kevin Oct. 25, 2019, 9:14 a.m. UTC | #1
> From: Liu, Yi L
> Sent: Thursday, October 24, 2019 8:26 PM
> 
> From: Liu Yi L <yi.l.liu@linux.intel.com>
> 
> When the guest "owns" the stage 1 translation structures,  the host
> IOMMU driver has no knowledge of caching structure updates unless
> the guest invalidation requests are trapped and passed down to the
> host.
> 
> This patch adds the VFIO_IOMMU_CACHE_INVALIDATE ioctl with aims
> at propagating guest stage1 IOMMU cache invalidations to the host.
> 
> Cc: Kevin Tian <kevin.tian@intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@linux.intel.com>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 55
> +++++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/vfio.h       | 13 ++++++++++
>  2 files changed, 68 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c
> b/drivers/vfio/vfio_iommu_type1.c
> index 96fddc1d..cd8d3a5 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -124,6 +124,34 @@ struct vfio_regions {
>  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)	\
>  					(!list_empty(&iommu->domain_list))
> 
> +struct domain_capsule {
> +	struct iommu_domain *domain;
> +	void *data;
> +};
> +
> +/* iommu->lock must be held */
> +static int
> +vfio_iommu_lookup_dev(struct vfio_iommu *iommu,
> +		      int (*fn)(struct device *dev, void *data),
> +		      void *data)

'lookup' usually means find a device and then return. But
the real purpose here is to loop all the devices within this
container and then do something. Does it make more 
sense to be vfio_iommu_for_each_dev?

> +{
> +	struct domain_capsule dc = {.data = data};
> +	struct vfio_domain *d;
> +	struct vfio_group *g;
> +	int ret = 0;
> +
> +	list_for_each_entry(d, &iommu->domain_list, next) {
> +		dc.domain = d->domain;
> +		list_for_each_entry(g, &d->group_list, next) {
> +			ret = iommu_group_for_each_dev(g-
> >iommu_group,
> +						       &dc, fn);
> +			if (ret)
> +				break;
> +		}
> +	}
> +	return ret;
> +}
> +
>  static int put_pfn(unsigned long pfn, int prot);
> 
>  /*
> @@ -2211,6 +2239,15 @@ static int vfio_iommu_iova_build_caps(struct
> vfio_iommu *iommu,
>  	return ret;
>  }
> 
> +static int vfio_cache_inv_fn(struct device *dev, void *data)
> +{
> +	struct domain_capsule *dc = (struct domain_capsule *)data;
> +	struct vfio_iommu_type1_cache_invalidate *ustruct =
> +		(struct vfio_iommu_type1_cache_invalidate *)dc->data;
> +
> +	return iommu_cache_invalidate(dc->domain, dev, &ustruct->info);
> +}
> +
>  static long vfio_iommu_type1_ioctl(void *iommu_data,
>  				   unsigned int cmd, unsigned long arg)
>  {
> @@ -2315,6 +2352,24 @@ static long vfio_iommu_type1_ioctl(void
> *iommu_data,
> 
>  		return copy_to_user((void __user *)arg, &unmap, minsz) ?
>  			-EFAULT : 0;
> +	} else if (cmd == VFIO_IOMMU_CACHE_INVALIDATE) {
> +		struct vfio_iommu_type1_cache_invalidate ustruct;

it's weird to call a variable as struct.

> +		int ret;
> +
> +		minsz = offsetofend(struct
> vfio_iommu_type1_cache_invalidate,
> +				    info);
> +
> +		if (copy_from_user(&ustruct, (void __user *)arg, minsz))
> +			return -EFAULT;
> +
> +		if (ustruct.argsz < minsz || ustruct.flags)
> +			return -EINVAL;
> +
> +		mutex_lock(&iommu->lock);
> +		ret = vfio_iommu_lookup_dev(iommu, vfio_cache_inv_fn,
> +					    &ustruct);
> +		mutex_unlock(&iommu->lock);
> +		return ret;
>  	}
> 
>  	return -ENOTTY;
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 9e843a1..ccf60a2 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -794,6 +794,19 @@ struct vfio_iommu_type1_dma_unmap {
>  #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
>  #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
> 
> +/**
> + * VFIO_IOMMU_CACHE_INVALIDATE - _IOWR(VFIO_TYPE, VFIO_BASE +
> 24,
> + *			struct vfio_iommu_type1_cache_invalidate)
> + *
> + * Propagate guest IOMMU cache invalidation to the host.

guest or first-level/stage-1? Ideally userspace application may also
bind its own address space as stage-1 one day...

> + */
> +struct vfio_iommu_type1_cache_invalidate {
> +	__u32   argsz;
> +	__u32   flags;
> +	struct iommu_cache_invalidate_info info;
> +};
> +#define VFIO_IOMMU_CACHE_INVALIDATE      _IO(VFIO_TYPE, VFIO_BASE
> + 24)
> +
>  /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU --------
> */
> 
>  /*
> --
> 2.7.4
Yi Liu Oct. 25, 2019, 11:20 a.m. UTC | #2
Hi Kevin,

> From: Tian, Kevin
> Sent: Friday, October 25, 2019 5:14 PM
> To: Liu, Yi L <yi.l.liu@intel.com>; alex.williamson@redhat.com;
> Subject: RE: [RFC v2 1/3] vfio: VFIO_IOMMU_CACHE_INVALIDATE
> 
> > From: Liu, Yi L
> > Sent: Thursday, October 24, 2019 8:26 PM
> >
> > From: Liu Yi L <yi.l.liu@linux.intel.com>
> >
> > When the guest "owns" the stage 1 translation structures,  the host
> > IOMMU driver has no knowledge of caching structure updates unless the
> > guest invalidation requests are trapped and passed down to the host.
> >
> > This patch adds the VFIO_IOMMU_CACHE_INVALIDATE ioctl with aims at
> > propagating guest stage1 IOMMU cache invalidations to the host.
> >
> > Cc: Kevin Tian <kevin.tian@intel.com>
> > Signed-off-by: Liu Yi L <yi.l.liu@linux.intel.com>
> > Signed-off-by: Eric Auger <eric.auger@redhat.com>
> > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > ---
> >  drivers/vfio/vfio_iommu_type1.c | 55
> > +++++++++++++++++++++++++++++++++++++++++
> >  include/uapi/linux/vfio.h       | 13 ++++++++++
> >  2 files changed, 68 insertions(+)
> >
> > diff --git a/drivers/vfio/vfio_iommu_type1.c
> > b/drivers/vfio/vfio_iommu_type1.c index 96fddc1d..cd8d3a5 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -124,6 +124,34 @@ struct vfio_regions {
> >  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)	\
> >  					(!list_empty(&iommu->domain_list))
> >
> > +struct domain_capsule {
> > +	struct iommu_domain *domain;
> > +	void *data;
> > +};
> > +
> > +/* iommu->lock must be held */
> > +static int
> > +vfio_iommu_lookup_dev(struct vfio_iommu *iommu,
> > +		      int (*fn)(struct device *dev, void *data),
> > +		      void *data)
> 
> 'lookup' usually means find a device and then return. But the real purpose here is to
> loop all the devices within this container and then do something. Does it make more
> sense to be vfio_iommu_for_each_dev?

yep, I can replace it.

> 
> > +{
> > +	struct domain_capsule dc = {.data = data};
> > +	struct vfio_domain *d;
[...]
> 2315,6 +2352,24 @@
> > static long vfio_iommu_type1_ioctl(void *iommu_data,
> >
> >  		return copy_to_user((void __user *)arg, &unmap, minsz) ?
> >  			-EFAULT : 0;
> > +	} else if (cmd == VFIO_IOMMU_CACHE_INVALIDATE) {
> > +		struct vfio_iommu_type1_cache_invalidate ustruct;
> 
> it's weird to call a variable as struct.

Will fix it.

> > +		int ret;
> > +
> > +		minsz = offsetofend(struct
> > vfio_iommu_type1_cache_invalidate,
> > +				    info);
> > +
> > +		if (copy_from_user(&ustruct, (void __user *)arg, minsz))
> > +			return -EFAULT;
> > +
> > +		if (ustruct.argsz < minsz || ustruct.flags)
> > +			return -EINVAL;
> > +
> > +		mutex_lock(&iommu->lock);
> > +		ret = vfio_iommu_lookup_dev(iommu, vfio_cache_inv_fn,
> > +					    &ustruct);
> > +		mutex_unlock(&iommu->lock);
> > +		return ret;
> >  	}
> >
> >  	return -ENOTTY;
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index 9e843a1..ccf60a2 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -794,6 +794,19 @@ struct vfio_iommu_type1_dma_unmap {
> >  #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
> >  #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
> >
> > +/**
> > + * VFIO_IOMMU_CACHE_INVALIDATE - _IOWR(VFIO_TYPE, VFIO_BASE +
> > 24,
> > + *			struct vfio_iommu_type1_cache_invalidate)
> > + *
> > + * Propagate guest IOMMU cache invalidation to the host.
> 
> guest or first-level/stage-1? Ideally userspace application may also bind its own
> address space as stage-1 one day...

Should be first-level/stage-1. Will correct it.

Thanks,
Yi Liu
Alex Williamson Nov. 5, 2019, 10:42 p.m. UTC | #3
On Fri, 25 Oct 2019 11:20:40 +0000
"Liu, Yi L" <yi.l.liu@intel.com> wrote:

> Hi Kevin,
> 
> > From: Tian, Kevin
> > Sent: Friday, October 25, 2019 5:14 PM
> > To: Liu, Yi L <yi.l.liu@intel.com>; alex.williamson@redhat.com;
> > Subject: RE: [RFC v2 1/3] vfio: VFIO_IOMMU_CACHE_INVALIDATE
> >   
> > > From: Liu, Yi L
> > > Sent: Thursday, October 24, 2019 8:26 PM
> > >
> > > From: Liu Yi L <yi.l.liu@linux.intel.com>
> > >
> > > When the guest "owns" the stage 1 translation structures,  the host
> > > IOMMU driver has no knowledge of caching structure updates unless the
> > > guest invalidation requests are trapped and passed down to the host.
> > >
> > > This patch adds the VFIO_IOMMU_CACHE_INVALIDATE ioctl with aims at
> > > propagating guest stage1 IOMMU cache invalidations to the host.
> > >
> > > Cc: Kevin Tian <kevin.tian@intel.com>
> > > Signed-off-by: Liu Yi L <yi.l.liu@linux.intel.com>
> > > Signed-off-by: Eric Auger <eric.auger@redhat.com>
> > > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > ---
> > >  drivers/vfio/vfio_iommu_type1.c | 55
> > > +++++++++++++++++++++++++++++++++++++++++
> > >  include/uapi/linux/vfio.h       | 13 ++++++++++
> > >  2 files changed, 68 insertions(+)
> > >
> > > diff --git a/drivers/vfio/vfio_iommu_type1.c
> > > b/drivers/vfio/vfio_iommu_type1.c index 96fddc1d..cd8d3a5 100644
> > > --- a/drivers/vfio/vfio_iommu_type1.c
> > > +++ b/drivers/vfio/vfio_iommu_type1.c
> > > @@ -124,6 +124,34 @@ struct vfio_regions {
> > >  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)	\
> > >  					(!list_empty(&iommu->domain_list))
> > >
> > > +struct domain_capsule {
> > > +	struct iommu_domain *domain;
> > > +	void *data;
> > > +};
> > > +
> > > +/* iommu->lock must be held */
> > > +static int
> > > +vfio_iommu_lookup_dev(struct vfio_iommu *iommu,
> > > +		      int (*fn)(struct device *dev, void *data),
> > > +		      void *data)  
> > 
> > 'lookup' usually means find a device and then return. But the real purpose here is to
> > loop all the devices within this container and then do something. Does it make more
> > sense to be vfio_iommu_for_each_dev?  

+1
 
> yep, I can replace it.
> 
> >   
> > > +{
> > > +	struct domain_capsule dc = {.data = data};
> > > +	struct vfio_domain *d;  
> [...]
> > 2315,6 +2352,24 @@  
> > > static long vfio_iommu_type1_ioctl(void *iommu_data,
> > >
> > >  		return copy_to_user((void __user *)arg, &unmap, minsz) ?
> > >  			-EFAULT : 0;
> > > +	} else if (cmd == VFIO_IOMMU_CACHE_INVALIDATE) {
> > > +		struct vfio_iommu_type1_cache_invalidate ustruct;  
> > 
> > it's weird to call a variable as struct.  
> 
> Will fix it.
> 
> > > +		int ret;
> > > +
> > > +		minsz = offsetofend(struct
> > > vfio_iommu_type1_cache_invalidate,
> > > +				    info);
> > > +
> > > +		if (copy_from_user(&ustruct, (void __user *)arg, minsz))
> > > +			return -EFAULT;
> > > +
> > > +		if (ustruct.argsz < minsz || ustruct.flags)
> > > +			return -EINVAL;
> > > +
> > > +		mutex_lock(&iommu->lock);
> > > +		ret = vfio_iommu_lookup_dev(iommu, vfio_cache_inv_fn,
> > > +					    &ustruct);
> > > +		mutex_unlock(&iommu->lock);
> > > +		return ret;
> > >  	}
> > >
> > >  	return -ENOTTY;
> > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > > index 9e843a1..ccf60a2 100644
> > > --- a/include/uapi/linux/vfio.h
> > > +++ b/include/uapi/linux/vfio.h
> > > @@ -794,6 +794,19 @@ struct vfio_iommu_type1_dma_unmap {
> > >  #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
> > >  #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
> > >
> > > +/**
> > > + * VFIO_IOMMU_CACHE_INVALIDATE - _IOWR(VFIO_TYPE, VFIO_BASE +
> > > 24,

What's going on with these ioctl numbers?  AFAICT[1] we've used up
through VFIO_BASE + 21, this jumps to 24, the next patch skips to 27,
then the last patch fills in 28 & 29.  Thanks,

Alex

[1] git grep -h VFIO_BASE | grep "VFIO_BASE +" | grep -e ^#define | \
    awk '{print $NF}' | tr -d ')' | sort -u -n
Yi Liu Nov. 6, 2019, 1:31 a.m. UTC | #4
> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Wednesday, November 6, 2019 6:42 AM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Subject: Re: [RFC v2 1/3] vfio: VFIO_IOMMU_CACHE_INVALIDATE
> 
> On Fri, 25 Oct 2019 11:20:40 +0000
> "Liu, Yi L" <yi.l.liu@intel.com> wrote:
> 
> > Hi Kevin,
> >
> > > From: Tian, Kevin
> > > Sent: Friday, October 25, 2019 5:14 PM
> > > To: Liu, Yi L <yi.l.liu@intel.com>; alex.williamson@redhat.com;
> > > Subject: RE: [RFC v2 1/3] vfio: VFIO_IOMMU_CACHE_INVALIDATE
> > >
> > > > From: Liu, Yi L
> > > > Sent: Thursday, October 24, 2019 8:26 PM
> > > >
> > > > From: Liu Yi L <yi.l.liu@linux.intel.com>
> > > >
> > > > When the guest "owns" the stage 1 translation structures,  the
> > > > host IOMMU driver has no knowledge of caching structure updates
> > > > unless the guest invalidation requests are trapped and passed down to the host.
> > > >
> > > > This patch adds the VFIO_IOMMU_CACHE_INVALIDATE ioctl with aims at
> > > > propagating guest stage1 IOMMU cache invalidations to the host.
> > > >
> > > > Cc: Kevin Tian <kevin.tian@intel.com>
> > > > Signed-off-by: Liu Yi L <yi.l.liu@linux.intel.com>
> > > > Signed-off-by: Eric Auger <eric.auger@redhat.com>
> > > > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > > ---
> > > >  drivers/vfio/vfio_iommu_type1.c | 55
> > > > +++++++++++++++++++++++++++++++++++++++++
> > > >  include/uapi/linux/vfio.h       | 13 ++++++++++
> > > >  2 files changed, 68 insertions(+)
> > > >
> > > > diff --git a/drivers/vfio/vfio_iommu_type1.c
> > > > b/drivers/vfio/vfio_iommu_type1.c index 96fddc1d..cd8d3a5 100644
> > > > --- a/drivers/vfio/vfio_iommu_type1.c
> > > > +++ b/drivers/vfio/vfio_iommu_type1.c
> > > > @@ -124,6 +124,34 @@ struct vfio_regions {
> > > >  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)	\
> > > >  					(!list_empty(&iommu->domain_list))
> > > >
> > > > +struct domain_capsule {
> > > > +	struct iommu_domain *domain;
> > > > +	void *data;
> > > > +};
> > > > +
> > > > +/* iommu->lock must be held */
> > > > +static int
> > > > +vfio_iommu_lookup_dev(struct vfio_iommu *iommu,
> > > > +		      int (*fn)(struct device *dev, void *data),
> > > > +		      void *data)
> > >
> > > 'lookup' usually means find a device and then return. But the real
> > > purpose here is to loop all the devices within this container and
> > > then do something. Does it make more sense to be vfio_iommu_for_each_dev?
> 
> +1
> 
> > yep, I can replace it.
> >
> > >
> > > > +{
> > > > +	struct domain_capsule dc = {.data = data};
> > > > +	struct vfio_domain *d;
> > [...]
> > > 2315,6 +2352,24 @@
> > > > static long vfio_iommu_type1_ioctl(void *iommu_data,
> > > >
> > > >  		return copy_to_user((void __user *)arg, &unmap, minsz) ?
> > > >  			-EFAULT : 0;
> > > > +	} else if (cmd == VFIO_IOMMU_CACHE_INVALIDATE) {
> > > > +		struct vfio_iommu_type1_cache_invalidate ustruct;
> > >
> > > it's weird to call a variable as struct.
> >
> > Will fix it.
> >
> > > > +		int ret;
> > > > +
> > > > +		minsz = offsetofend(struct
> > > > vfio_iommu_type1_cache_invalidate,
> > > > +				    info);
> > > > +
> > > > +		if (copy_from_user(&ustruct, (void __user *)arg, minsz))
> > > > +			return -EFAULT;
> > > > +
> > > > +		if (ustruct.argsz < minsz || ustruct.flags)
> > > > +			return -EINVAL;
> > > > +
> > > > +		mutex_lock(&iommu->lock);
> > > > +		ret = vfio_iommu_lookup_dev(iommu, vfio_cache_inv_fn,
> > > > +					    &ustruct);
> > > > +		mutex_unlock(&iommu->lock);
> > > > +		return ret;
> > > >  	}
> > > >
> > > >  	return -ENOTTY;
> > > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > > > index 9e843a1..ccf60a2 100644
> > > > --- a/include/uapi/linux/vfio.h
> > > > +++ b/include/uapi/linux/vfio.h
> > > > @@ -794,6 +794,19 @@ struct vfio_iommu_type1_dma_unmap {
> > > >  #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
> > > >  #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
> > > >
> > > > +/**
> > > > + * VFIO_IOMMU_CACHE_INVALIDATE - _IOWR(VFIO_TYPE, VFIO_BASE +
> > > > 24,
> 
> What's going on with these ioctl numbers?  AFAICT[1] we've used up through
> VFIO_BASE + 21, this jumps to 24, the next patch skips to 27, then the last patch fills
> in 28 & 29.  Thanks,

Hi Alex,

I rebase my patch to Eric's nested stage translation patches. His base also introduced
IOCTLs. I should have made it better. I'll try to sync with Eric to serialize the IOCTLs.

[PATCH v6 00/22] SMMUv3 Nested Stage Setup by Eric Auger
https://lkml.org/lkml/2019/3/17/124

Thanks,
Yi Liu

> Alex
> 
> [1] git grep -h VFIO_BASE | grep "VFIO_BASE +" | grep -e ^#define | \
>     awk '{print $NF}' | tr -d ')' | sort -u -n
Eric Auger Nov. 13, 2019, 7:50 a.m. UTC | #5
Hi Yi,

On 11/6/19 2:31 AM, Liu, Yi L wrote:
>> From: Alex Williamson [mailto:alex.williamson@redhat.com]
>> Sent: Wednesday, November 6, 2019 6:42 AM
>> To: Liu, Yi L <yi.l.liu@intel.com>
>> Subject: Re: [RFC v2 1/3] vfio: VFIO_IOMMU_CACHE_INVALIDATE
>>
>> On Fri, 25 Oct 2019 11:20:40 +0000
>> "Liu, Yi L" <yi.l.liu@intel.com> wrote:
>>
>>> Hi Kevin,
>>>
>>>> From: Tian, Kevin
>>>> Sent: Friday, October 25, 2019 5:14 PM
>>>> To: Liu, Yi L <yi.l.liu@intel.com>; alex.williamson@redhat.com;
>>>> Subject: RE: [RFC v2 1/3] vfio: VFIO_IOMMU_CACHE_INVALIDATE
>>>>
>>>>> From: Liu, Yi L
>>>>> Sent: Thursday, October 24, 2019 8:26 PM
>>>>>
>>>>> From: Liu Yi L <yi.l.liu@linux.intel.com>
>>>>>
>>>>> When the guest "owns" the stage 1 translation structures,  the
>>>>> host IOMMU driver has no knowledge of caching structure updates
>>>>> unless the guest invalidation requests are trapped and passed down to the host.
>>>>>
>>>>> This patch adds the VFIO_IOMMU_CACHE_INVALIDATE ioctl with aims at
>>>>> propagating guest stage1 IOMMU cache invalidations to the host.
>>>>>
>>>>> Cc: Kevin Tian <kevin.tian@intel.com>
>>>>> Signed-off-by: Liu Yi L <yi.l.liu@linux.intel.com>
>>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>>>> ---
>>>>>  drivers/vfio/vfio_iommu_type1.c | 55
>>>>> +++++++++++++++++++++++++++++++++++++++++
>>>>>  include/uapi/linux/vfio.h       | 13 ++++++++++
>>>>>  2 files changed, 68 insertions(+)
>>>>>
>>>>> diff --git a/drivers/vfio/vfio_iommu_type1.c
>>>>> b/drivers/vfio/vfio_iommu_type1.c index 96fddc1d..cd8d3a5 100644
>>>>> --- a/drivers/vfio/vfio_iommu_type1.c
>>>>> +++ b/drivers/vfio/vfio_iommu_type1.c
>>>>> @@ -124,6 +124,34 @@ struct vfio_regions {
>>>>>  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)	\
>>>>>  					(!list_empty(&iommu->domain_list))
>>>>>
>>>>> +struct domain_capsule {
>>>>> +	struct iommu_domain *domain;
>>>>> +	void *data;
>>>>> +};
>>>>> +
>>>>> +/* iommu->lock must be held */
>>>>> +static int
>>>>> +vfio_iommu_lookup_dev(struct vfio_iommu *iommu,
>>>>> +		      int (*fn)(struct device *dev, void *data),
>>>>> +		      void *data)
>>>>
>>>> 'lookup' usually means find a device and then return. But the real
>>>> purpose here is to loop all the devices within this container and
>>>> then do something. Does it make more sense to be vfio_iommu_for_each_dev?
>>
>> +1
>>
>>> yep, I can replace it.
>>>
>>>>
>>>>> +{
>>>>> +	struct domain_capsule dc = {.data = data};
>>>>> +	struct vfio_domain *d;
>>> [...]
>>>> 2315,6 +2352,24 @@
>>>>> static long vfio_iommu_type1_ioctl(void *iommu_data,
>>>>>
>>>>>  		return copy_to_user((void __user *)arg, &unmap, minsz) ?
>>>>>  			-EFAULT : 0;
>>>>> +	} else if (cmd == VFIO_IOMMU_CACHE_INVALIDATE) {
>>>>> +		struct vfio_iommu_type1_cache_invalidate ustruct;
>>>>
>>>> it's weird to call a variable as struct.
>>>
>>> Will fix it.
>>>
>>>>> +		int ret;
>>>>> +
>>>>> +		minsz = offsetofend(struct
>>>>> vfio_iommu_type1_cache_invalidate,
>>>>> +				    info);
>>>>> +
>>>>> +		if (copy_from_user(&ustruct, (void __user *)arg, minsz))
>>>>> +			return -EFAULT;
>>>>> +
>>>>> +		if (ustruct.argsz < minsz || ustruct.flags)
>>>>> +			return -EINVAL;
>>>>> +
>>>>> +		mutex_lock(&iommu->lock);
>>>>> +		ret = vfio_iommu_lookup_dev(iommu, vfio_cache_inv_fn,
>>>>> +					    &ustruct);
>>>>> +		mutex_unlock(&iommu->lock);
>>>>> +		return ret;
>>>>>  	}
>>>>>
>>>>>  	return -ENOTTY;
>>>>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>>>>> index 9e843a1..ccf60a2 100644
>>>>> --- a/include/uapi/linux/vfio.h
>>>>> +++ b/include/uapi/linux/vfio.h
>>>>> @@ -794,6 +794,19 @@ struct vfio_iommu_type1_dma_unmap {
>>>>>  #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
>>>>>  #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
>>>>>
>>>>> +/**
>>>>> + * VFIO_IOMMU_CACHE_INVALIDATE - _IOWR(VFIO_TYPE, VFIO_BASE +
>>>>> 24,
>>
>> What's going on with these ioctl numbers?  AFAICT[1] we've used up through
>> VFIO_BASE + 21, this jumps to 24, the next patch skips to 27, then the last patch fills
>> in 28 & 29.  Thanks,
> 
> Hi Alex,
> 
> I rebase my patch to Eric's nested stage translation patches. His base also introduced
> IOCTLs. I should have made it better. I'll try to sync with Eric to serialize the IOCTLs.
> 
> [PATCH v6 00/22] SMMUv3 Nested Stage Setup by Eric Auger
> https://lkml.org/lkml/2019/3/17/124

Feel free to choose your IOCTL numbers without taking care of my series.
I will adapt to yours if my work gets unblocked at some point.

Thanks

Eric
> 
> Thanks,
> Yi Liu
> 
>> Alex
>>
>> [1] git grep -h VFIO_BASE | grep "VFIO_BASE +" | grep -e ^#define | \
>>     awk '{print $NF}' | tr -d ')' | sort -u -n
>
diff mbox series

Patch

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 96fddc1d..cd8d3a5 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -124,6 +124,34 @@  struct vfio_regions {
 #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)	\
 					(!list_empty(&iommu->domain_list))
 
+struct domain_capsule {
+	struct iommu_domain *domain;
+	void *data;
+};
+
+/* iommu->lock must be held */
+static int
+vfio_iommu_lookup_dev(struct vfio_iommu *iommu,
+		      int (*fn)(struct device *dev, void *data),
+		      void *data)
+{
+	struct domain_capsule dc = {.data = data};
+	struct vfio_domain *d;
+	struct vfio_group *g;
+	int ret = 0;
+
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		dc.domain = d->domain;
+		list_for_each_entry(g, &d->group_list, next) {
+			ret = iommu_group_for_each_dev(g->iommu_group,
+						       &dc, fn);
+			if (ret)
+				break;
+		}
+	}
+	return ret;
+}
+
 static int put_pfn(unsigned long pfn, int prot);
 
 /*
@@ -2211,6 +2239,15 @@  static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
 	return ret;
 }
 
+static int vfio_cache_inv_fn(struct device *dev, void *data)
+{
+	struct domain_capsule *dc = (struct domain_capsule *)data;
+	struct vfio_iommu_type1_cache_invalidate *ustruct =
+		(struct vfio_iommu_type1_cache_invalidate *)dc->data;
+
+	return iommu_cache_invalidate(dc->domain, dev, &ustruct->info);
+}
+
 static long vfio_iommu_type1_ioctl(void *iommu_data,
 				   unsigned int cmd, unsigned long arg)
 {
@@ -2315,6 +2352,24 @@  static long vfio_iommu_type1_ioctl(void *iommu_data,
 
 		return copy_to_user((void __user *)arg, &unmap, minsz) ?
 			-EFAULT : 0;
+	} else if (cmd == VFIO_IOMMU_CACHE_INVALIDATE) {
+		struct vfio_iommu_type1_cache_invalidate ustruct;
+		int ret;
+
+		minsz = offsetofend(struct vfio_iommu_type1_cache_invalidate,
+				    info);
+
+		if (copy_from_user(&ustruct, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (ustruct.argsz < minsz || ustruct.flags)
+			return -EINVAL;
+
+		mutex_lock(&iommu->lock);
+		ret = vfio_iommu_lookup_dev(iommu, vfio_cache_inv_fn,
+					    &ustruct);
+		mutex_unlock(&iommu->lock);
+		return ret;
 	}
 
 	return -ENOTTY;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 9e843a1..ccf60a2 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -794,6 +794,19 @@  struct vfio_iommu_type1_dma_unmap {
 #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
 #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
 
+/**
+ * VFIO_IOMMU_CACHE_INVALIDATE - _IOWR(VFIO_TYPE, VFIO_BASE + 24,
+ *			struct vfio_iommu_type1_cache_invalidate)
+ *
+ * Propagate guest IOMMU cache invalidation to the host.
+ */
+struct vfio_iommu_type1_cache_invalidate {
+	__u32   argsz;
+	__u32   flags;
+	struct iommu_cache_invalidate_info info;
+};
+#define VFIO_IOMMU_CACHE_INVALIDATE      _IO(VFIO_TYPE, VFIO_BASE + 24)
+
 /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
 
 /*