diff mbox series

[RFC,v3,2/8] vfio/type1: Make per-application (VM) PASID quota tunable

Message ID 1580299912-86084-3-git-send-email-yi.l.liu@intel.com (mailing list archive)
State New, archived
Headers show
Series vfio: expose virtual Shared Virtual Addressing to VMs | expand

Commit Message

Yi Liu Jan. 29, 2020, 12:11 p.m. UTC
From: Liu Yi L <yi.l.liu@intel.com>

The PASID quota is per-application (VM) according to vfio's PASID
management rule. For better flexibility, quota shall be user tunable
. This patch provides a VFIO based user interface for which quota can
be adjusted. However, quota cannot be adjusted downward below the
number of outstanding PASIDs.

This patch only makes the per-VM PASID quota tunable. While for the
way to tune the default PASID quota, it may require a new vfio module
option or other way. This may be another patchset in future.

Previous discussions:
https://patchwork.kernel.org/patch/11209429/

Cc: Kevin Tian <kevin.tian@intel.com>
CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Eric Auger <eric.auger@redhat.com>
Cc: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
---
 drivers/vfio/vfio_iommu_type1.c | 33 +++++++++++++++++++++++++++++++++
 include/uapi/linux/vfio.h       | 22 ++++++++++++++++++++++
 2 files changed, 55 insertions(+)

Comments

Alex Williamson Jan. 29, 2020, 11:56 p.m. UTC | #1
On Wed, 29 Jan 2020 04:11:46 -0800
"Liu, Yi L" <yi.l.liu@intel.com> wrote:

> From: Liu Yi L <yi.l.liu@intel.com>
> 
> The PASID quota is per-application (VM) according to vfio's PASID
> management rule. For better flexibility, quota shall be user tunable
> . This patch provides a VFIO based user interface for which quota can
> be adjusted. However, quota cannot be adjusted downward below the
> number of outstanding PASIDs.
> 
> This patch only makes the per-VM PASID quota tunable. While for the
> way to tune the default PASID quota, it may require a new vfio module
> option or other way. This may be another patchset in future.

If we give an unprivileged user the ability to increase their quota,
why do we even have a quota at all?  I figured we were going to have a
module option tunable so its under the control of the system admin.
Thanks,

Alex

> 
> Previous discussions:
> https://patchwork.kernel.org/patch/11209429/
> 
> Cc: Kevin Tian <kevin.tian@intel.com>
> CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Cc: Alex Williamson <alex.williamson@redhat.com>
> Cc: Eric Auger <eric.auger@redhat.com>
> Cc: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 33 +++++++++++++++++++++++++++++++++
>  include/uapi/linux/vfio.h       | 22 ++++++++++++++++++++++
>  2 files changed, 55 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index e836d04..1cf75f5 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -2243,6 +2243,27 @@ static int vfio_iommu_type1_pasid_free(struct vfio_iommu *iommu,
>  	return ret;
>  }
>  
> +static int vfio_iommu_type1_set_pasid_quota(struct vfio_iommu *iommu,
> +					    u32 quota)
> +{
> +	struct vfio_mm *vmm = iommu->vmm;
> +	int ret = 0;
> +
> +	mutex_lock(&iommu->lock);
> +	mutex_lock(&vmm->pasid_lock);
> +	if (vmm->pasid_count > quota) {
> +		ret = -EINVAL;
> +		goto out_unlock;
> +	}
> +	vmm->pasid_quota = quota;
> +	ret = quota;
> +
> +out_unlock:
> +	mutex_unlock(&vmm->pasid_lock);
> +	mutex_unlock(&iommu->lock);
> +	return ret;
> +}
> +
>  static long vfio_iommu_type1_ioctl(void *iommu_data,
>  				   unsigned int cmd, unsigned long arg)
>  {
> @@ -2389,6 +2410,18 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
>  		default:
>  			return -EINVAL;
>  		}
> +	} else if (cmd == VFIO_IOMMU_SET_PASID_QUOTA) {
> +		struct vfio_iommu_type1_pasid_quota quota;
> +
> +		minsz = offsetofend(struct vfio_iommu_type1_pasid_quota,
> +				    quota);
> +
> +		if (copy_from_user(&quota, (void __user *)arg, minsz))
> +			return -EFAULT;
> +
> +		if (quota.argsz < minsz)
> +			return -EINVAL;
> +		return vfio_iommu_type1_set_pasid_quota(iommu, quota.quota);
>  	}
>  
>  	return -ENOTTY;
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 298ac80..d4bf415 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -835,6 +835,28 @@ struct vfio_iommu_type1_pasid_request {
>   */
>  #define VFIO_IOMMU_PASID_REQUEST	_IO(VFIO_TYPE, VFIO_BASE + 22)
>  
> +/**
> + * @quota: the new pasid quota which a userspace application (e.g. VM)
> + * is configured.
> + */
> +struct vfio_iommu_type1_pasid_quota {
> +	__u32	argsz;
> +	__u32	flags;
> +	__u32	quota;
> +};
> +
> +/**
> + * VFIO_IOMMU_SET_PASID_QUOTA - _IOW(VFIO_TYPE, VFIO_BASE + 23,
> + *				struct vfio_iommu_type1_pasid_quota)
> + *
> + * Availability of this feature depends on PASID support in the device,
> + * its bus, the underlying IOMMU and the CPU architecture. In VFIO, it
> + * is available after VFIO_SET_IOMMU.
> + *
> + * returns: latest quota on success, -errno on failure.
> + */
> +#define VFIO_IOMMU_SET_PASID_QUOTA	_IO(VFIO_TYPE, VFIO_BASE + 23)
> +
>  /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
>  
>  /*
Yi Liu Feb. 5, 2020, 6:23 a.m. UTC | #2
> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Thursday, January 30, 2020 7:57 AM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Subject: Re: [RFC v3 2/8] vfio/type1: Make per-application (VM) PASID quota tunable
> 
> On Wed, 29 Jan 2020 04:11:46 -0800
> "Liu, Yi L" <yi.l.liu@intel.com> wrote:
> 
> > From: Liu Yi L <yi.l.liu@intel.com>
> >
> > The PASID quota is per-application (VM) according to vfio's PASID
> > management rule. For better flexibility, quota shall be user tunable
> > . This patch provides a VFIO based user interface for which quota can
> > be adjusted. However, quota cannot be adjusted downward below the
> > number of outstanding PASIDs.
> >
> > This patch only makes the per-VM PASID quota tunable. While for the
> > way to tune the default PASID quota, it may require a new vfio module
> > option or other way. This may be another patchset in future.
> 
> If we give an unprivileged user the ability to increase their quota,
> why do we even have a quota at all?  I figured we were going to have a
> module option tunable so its under the control of the system admin.
> Thanks,

Right. I'll need to add an option. Will add it in next version. :-)

Regards,
Yi Liu
Jacob Pan Feb. 7, 2020, 7:43 p.m. UTC | #3
On Wed, 29 Jan 2020 04:11:46 -0800
"Liu, Yi L" <yi.l.liu@intel.com> wrote:

> From: Liu Yi L <yi.l.liu@intel.com>
> 
> The PASID quota is per-application (VM) according to vfio's PASID
> management rule. For better flexibility, quota shall be user tunable
> . This patch provides a VFIO based user interface for which quota can
> be adjusted. However, quota cannot be adjusted downward below the
> number of outstanding PASIDs.
> 
> This patch only makes the per-VM PASID quota tunable. While for the
> way to tune the default PASID quota, it may require a new vfio module
> option or other way. This may be another patchset in future.
> 
One issue we need to solve is how to share PASIDs at the system
level, e.g. Both VMs and baremetal drivers could use PASIDs.

This patch is granting quota to a guest w/o knowing the remaining
system capacity. So guest PASID allocation could fail even within its
quota.

The solution I am thinking is to enforce quota at IOASID common
code, since IOASID APIs already used to manage system-wide allocation.
How about the following changes to IOASID?
1. introduce quota in ioasid_set (could have a soft limit for better
sharing)

2. introduce an API to create a set with quota before allocation, e.g.
ioasid_set_id = ioasid_alloc_set(size, token)
set_id will be used for ioasid_alloc() instead of token.

3. introduce API to adjust set quota ioasid_adjust_set_size(set_id,
size)

4. API to check remaining PASIDs ioasid_get_capacity(set_id); //return
system capacity if set_id == 0;

5. API to set system capacity, ioasid_set_capacity(nr_pasids), e.g. if
system has 20 bit PASIDs, IOMMU driver needs to call
ioasid_set_capacity(1<<20) during boot.

6. Optional set level APIs. e.g. ioasid_free_set(set_id), frees all
IOASIDs in the set.

With these APIs, this patch could query PASID capacity at both system
and set level and adjust quota within range. i.e.
1. IOMMU vendor driver(or other driver to use PASID w/o IOMMU) sets
system wide capacity during boot.
2. VFIO Call ioasid_alloc_set() when allocating vfio_mm(), set default
quota
3. Adjust quota per set with ioasid_adjust_set_size() as the tunable in
this patch.

Thoughts?

> Previous discussions:
> https://patchwork.kernel.org/patch/11209429/
> 
> Cc: Kevin Tian <kevin.tian@intel.com>
> CC: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Cc: Alex Williamson <alex.williamson@redhat.com>
> Cc: Eric Auger <eric.auger@redhat.com>
> Cc: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 33
> +++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h       |
> 22 ++++++++++++++++++++++ 2 files changed, 55 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c
> b/drivers/vfio/vfio_iommu_type1.c index e836d04..1cf75f5 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -2243,6 +2243,27 @@ static int vfio_iommu_type1_pasid_free(struct
> vfio_iommu *iommu, return ret;
>  }
>  
> +static int vfio_iommu_type1_set_pasid_quota(struct vfio_iommu *iommu,
> +					    u32 quota)
> +{
> +	struct vfio_mm *vmm = iommu->vmm;
> +	int ret = 0;
> +
> +	mutex_lock(&iommu->lock);
> +	mutex_lock(&vmm->pasid_lock);
> +	if (vmm->pasid_count > quota) {
> +		ret = -EINVAL;
> +		goto out_unlock;
> +	}
> +	vmm->pasid_quota = quota;
> +	ret = quota;
> +
> +out_unlock:
> +	mutex_unlock(&vmm->pasid_lock);
> +	mutex_unlock(&iommu->lock);
> +	return ret;
> +}
> +
>  static long vfio_iommu_type1_ioctl(void *iommu_data,
>  				   unsigned int cmd, unsigned long
> arg) {
> @@ -2389,6 +2410,18 @@ static long vfio_iommu_type1_ioctl(void
> *iommu_data, default:
>  			return -EINVAL;
>  		}
> +	} else if (cmd == VFIO_IOMMU_SET_PASID_QUOTA) {
> +		struct vfio_iommu_type1_pasid_quota quota;
> +
> +		minsz = offsetofend(struct
> vfio_iommu_type1_pasid_quota,
> +				    quota);
> +
> +		if (copy_from_user(&quota, (void __user *)arg,
> minsz))
> +			return -EFAULT;
> +
> +		if (quota.argsz < minsz)
> +			return -EINVAL;
> +		return vfio_iommu_type1_set_pasid_quota(iommu,
> quota.quota); }
>  
>  	return -ENOTTY;
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 298ac80..d4bf415 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -835,6 +835,28 @@ struct vfio_iommu_type1_pasid_request {
>   */
>  #define VFIO_IOMMU_PASID_REQUEST	_IO(VFIO_TYPE, VFIO_BASE +
> 22) 
> +/**
> + * @quota: the new pasid quota which a userspace application (e.g.
> VM)
> + * is configured.
> + */
> +struct vfio_iommu_type1_pasid_quota {
> +	__u32	argsz;
> +	__u32	flags;
> +	__u32	quota;
> +};
> +
> +/**
> + * VFIO_IOMMU_SET_PASID_QUOTA - _IOW(VFIO_TYPE, VFIO_BASE + 23,
> + *				struct
> vfio_iommu_type1_pasid_quota)
> + *
> + * Availability of this feature depends on PASID support in the
> device,
> + * its bus, the underlying IOMMU and the CPU architecture. In VFIO,
> it
> + * is available after VFIO_SET_IOMMU.
> + *
> + * returns: latest quota on success, -errno on failure.
> + */
> +#define VFIO_IOMMU_SET_PASID_QUOTA	_IO(VFIO_TYPE, VFIO_BASE +
> 23) +
>  /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU
> -------- */ 
>  /*

[Jacob Pan]
Yi Liu Feb. 8, 2020, 8:46 a.m. UTC | #4
Hi Jacob,

> From: Jacob Pan [mailto:jacob.jun.pan@linux.intel.com]
> Sent: Saturday, February 8, 2020 3:44 AM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Subject: Re: [RFC v3 2/8] vfio/type1: Make per-application (VM) PASID quota tunable
> 
> On Wed, 29 Jan 2020 04:11:46 -0800
> "Liu, Yi L" <yi.l.liu@intel.com> wrote:
> 
> > From: Liu Yi L <yi.l.liu@intel.com>
> >
> > The PASID quota is per-application (VM) according to vfio's PASID
> > management rule. For better flexibility, quota shall be user tunable
> > . This patch provides a VFIO based user interface for which quota can
> > be adjusted. However, quota cannot be adjusted downward below the
> > number of outstanding PASIDs.
> >
> > This patch only makes the per-VM PASID quota tunable. While for the
> > way to tune the default PASID quota, it may require a new vfio module
> > option or other way. This may be another patchset in future.
> >
> One issue we need to solve is how to share PASIDs at the system
> level, e.g. Both VMs and baremetal drivers could use PASIDs.
> 
> This patch is granting quota to a guest w/o knowing the remaining
> system capacity. So guest PASID allocation could fail even within its
> quota.

that's true.

> The solution I am thinking is to enforce quota at IOASID common
> code, since IOASID APIs already used to manage system-wide allocation.
> How about the following changes to IOASID?
> 1. introduce quota in ioasid_set (could have a soft limit for better
> sharing)
>
> 2. introduce an API to create a set with quota before allocation, e.g.
> ioasid_set_id = ioasid_alloc_set(size, token)
> set_id will be used for ioasid_alloc() instead of token.

Is the token the mm pointer? I guess you may want to add one more
API like ioasid_get_set_id(token), thus that other ioasid user could get
set_id with their token. If token is the same give them the same set_id.

> 
> 3. introduce API to adjust set quota ioasid_adjust_set_size(set_id,
> size)
> 
> 4. API to check remaining PASIDs ioasid_get_capacity(set_id); //return
> system capacity if set_id == 0;
> 
> 5. API to set system capacity, ioasid_set_capacity(nr_pasids), e.g. if
> system has 20 bit PASIDs, IOMMU driver needs to call
> ioasid_set_capacity(1<<20) during boot.

yes, this is definitely necessary.

> 6. Optional set level APIs. e.g. ioasid_free_set(set_id), frees all
> IOASIDs in the set.

If this is provided. I think VFIO may be not necessary to track allocated
PASIDs. When VM is down or crashed, VFIO just use this API to reclaim
allocated PASIDs.

> With these APIs, this patch could query PASID capacity at both system
> and set level and adjust quota within range. i.e.
> 1. IOMMU vendor driver(or other driver to use PASID w/o IOMMU) sets
> system wide capacity during boot.
> 2. VFIO Call ioasid_alloc_set() when allocating vfio_mm(), set default
> quota
> 3. Adjust quota per set with ioasid_adjust_set_size() as the tunable in
> this patch.

I think this is abstraction of the allocated PASID track logic in a common
layer. It would simplify user logic.

Regards,
Yi Liu
diff mbox series

Patch

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index e836d04..1cf75f5 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2243,6 +2243,27 @@  static int vfio_iommu_type1_pasid_free(struct vfio_iommu *iommu,
 	return ret;
 }
 
+static int vfio_iommu_type1_set_pasid_quota(struct vfio_iommu *iommu,
+					    u32 quota)
+{
+	struct vfio_mm *vmm = iommu->vmm;
+	int ret = 0;
+
+	mutex_lock(&iommu->lock);
+	mutex_lock(&vmm->pasid_lock);
+	if (vmm->pasid_count > quota) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+	vmm->pasid_quota = quota;
+	ret = quota;
+
+out_unlock:
+	mutex_unlock(&vmm->pasid_lock);
+	mutex_unlock(&iommu->lock);
+	return ret;
+}
+
 static long vfio_iommu_type1_ioctl(void *iommu_data,
 				   unsigned int cmd, unsigned long arg)
 {
@@ -2389,6 +2410,18 @@  static long vfio_iommu_type1_ioctl(void *iommu_data,
 		default:
 			return -EINVAL;
 		}
+	} else if (cmd == VFIO_IOMMU_SET_PASID_QUOTA) {
+		struct vfio_iommu_type1_pasid_quota quota;
+
+		minsz = offsetofend(struct vfio_iommu_type1_pasid_quota,
+				    quota);
+
+		if (copy_from_user(&quota, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (quota.argsz < minsz)
+			return -EINVAL;
+		return vfio_iommu_type1_set_pasid_quota(iommu, quota.quota);
 	}
 
 	return -ENOTTY;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 298ac80..d4bf415 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -835,6 +835,28 @@  struct vfio_iommu_type1_pasid_request {
  */
 #define VFIO_IOMMU_PASID_REQUEST	_IO(VFIO_TYPE, VFIO_BASE + 22)
 
+/**
+ * @quota: the new pasid quota which a userspace application (e.g. VM)
+ * is configured.
+ */
+struct vfio_iommu_type1_pasid_quota {
+	__u32	argsz;
+	__u32	flags;
+	__u32	quota;
+};
+
+/**
+ * VFIO_IOMMU_SET_PASID_QUOTA - _IOW(VFIO_TYPE, VFIO_BASE + 23,
+ *				struct vfio_iommu_type1_pasid_quota)
+ *
+ * Availability of this feature depends on PASID support in the device,
+ * its bus, the underlying IOMMU and the CPU architecture. In VFIO, it
+ * is available after VFIO_SET_IOMMU.
+ *
+ * returns: latest quota on success, -errno on failure.
+ */
+#define VFIO_IOMMU_SET_PASID_QUOTA	_IO(VFIO_TYPE, VFIO_BASE + 23)
+
 /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
 
 /*