diff mbox series

[vfio,08/13] vfio: Introduce the DMA logging feature support

Message ID 20220630102545.18005-9-yishaih@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Add device DMA logging support for mlx5 driver | expand

Commit Message

Yishai Hadas June 30, 2022, 10:25 a.m. UTC
Introduce the DMA logging feature support in the vfio core layer.

It includes the processing of the device start/stop/report DMA logging
UAPIs and calling the relevant driver 'op' to do the work.

Specifically,
Upon start, the core translates the given input ranges into an interval
tree, checks for unexpected overlapping, non aligned ranges and then
pass the translated input to the driver for start tracking the given
ranges.

Upon report, the core translates the given input user space bitmap and
page size into an IOVA kernel bitmap iterator. Then it iterates it and
call the driver to set the corresponding bits for the dirtied pages in a
specific IOVA range.

Upon stop, the driver is called to stop the previous started tracking.

The next patches from the series will introduce the mlx5 driver
implementation for the logging ops.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
 drivers/vfio/pci/vfio_pci_core.c |   5 +
 drivers/vfio/vfio_main.c         | 162 +++++++++++++++++++++++++++++++
 include/linux/vfio.h             |  21 +++-
 3 files changed, 186 insertions(+), 2 deletions(-)

Comments

kernel test robot June 30, 2022, 1:40 p.m. UTC | #1
Hi Yishai,

I love your patch! Perhaps something to improve:

[auto build test WARNING on awilliam-vfio/next]
[also build test WARNING on linus/master v5.19-rc4 next-20220630]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Yishai-Hadas/Add-device-DMA-logging-support-for-mlx5-driver/20220630-182957
base:   https://github.com/awilliam/linux-vfio.git next
config: i386-randconfig-a003 (https://download.01.org/0day-ci/archive/20220630/202206302140.XlWYhlXa-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-3) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/fea20efca2795fd8480cb0755c54062bad2ea322
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Yishai-Hadas/Add-device-DMA-logging-support-for-mlx5-driver/20220630-182957
        git checkout fea20efca2795fd8480cb0755c54062bad2ea322
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/vfio/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/vfio/vfio_main.c: In function 'vfio_ioctl_device_feature_logging_start':
>> drivers/vfio/vfio_main.c:1640:18: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    1640 |         ranges = (struct vfio_device_feature_dma_logging_range __user *)
         |                  ^
   drivers/vfio/vfio_main.c: In function 'vfio_ioctl_device_feature_logging_report':
   drivers/vfio/vfio_main.c:1730:37: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    1730 |                                     (unsigned long __user *)report.bitmap);
         |                                     ^


vim +1640 drivers/vfio/vfio_main.c

  1607	
  1608	static int
  1609	vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
  1610						u32 flags, void __user *arg,
  1611						size_t argsz)
  1612	{
  1613		size_t minsz =
  1614			offsetofend(struct vfio_device_feature_dma_logging_control,
  1615				    ranges);
  1616		struct vfio_device_feature_dma_logging_range __user *ranges;
  1617		struct vfio_device_feature_dma_logging_control control;
  1618		struct vfio_device_feature_dma_logging_range range;
  1619		struct rb_root_cached root = RB_ROOT_CACHED;
  1620		struct interval_tree_node *nodes;
  1621		u32 nnodes;
  1622		int i, ret;
  1623	
  1624		if (!device->log_ops)
  1625			return -ENOTTY;
  1626	
  1627		ret = vfio_check_feature(flags, argsz,
  1628					 VFIO_DEVICE_FEATURE_SET,
  1629					 sizeof(control));
  1630		if (ret != 1)
  1631			return ret;
  1632	
  1633		if (copy_from_user(&control, arg, minsz))
  1634			return -EFAULT;
  1635	
  1636		nnodes = control.num_ranges;
  1637		if (!nnodes || nnodes > LOG_MAX_RANGES)
  1638			return -EINVAL;
  1639	
> 1640		ranges = (struct vfio_device_feature_dma_logging_range __user *)
  1641									control.ranges;
  1642		nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
  1643				      GFP_KERNEL);
  1644		if (!nodes)
  1645			return -ENOMEM;
  1646	
  1647		for (i = 0; i < nnodes; i++) {
  1648			if (copy_from_user(&range, &ranges[i], sizeof(range))) {
  1649				ret = -EFAULT;
  1650				goto end;
  1651			}
  1652			if (!IS_ALIGNED(range.iova, control.page_size) ||
  1653			    !IS_ALIGNED(range.length, control.page_size)) {
  1654				ret = -EINVAL;
  1655				goto end;
  1656			}
  1657			nodes[i].start = range.iova;
  1658			nodes[i].last = range.iova + range.length - 1;
  1659			if (interval_tree_iter_first(&root, nodes[i].start,
  1660						     nodes[i].last)) {
  1661				/* Range overlapping */
  1662				ret = -EINVAL;
  1663				goto end;
  1664			}
  1665			interval_tree_insert(nodes + i, &root);
  1666		}
  1667	
  1668		ret = device->log_ops->log_start(device, &root, nnodes,
  1669						 &control.page_size);
  1670		if (ret)
  1671			goto end;
  1672	
  1673		if (copy_to_user(arg, &control, sizeof(control))) {
  1674			ret = -EFAULT;
  1675			device->log_ops->log_stop(device);
  1676		}
  1677	
  1678	end:
  1679		kfree(nodes);
  1680		return ret;
  1681	}
  1682
Jason Gunthorpe June 30, 2022, 1:48 p.m. UTC | #2
On Thu, Jun 30, 2022 at 09:40:01PM +0800, kernel test robot wrote:

>   1636		nnodes = control.num_ranges;
>   1637		if (!nnodes || nnodes > LOG_MAX_RANGES)
>   1638			return -EINVAL;
>   1639	
> > 1640		ranges = (struct vfio_device_feature_dma_logging_range __user *)
>   1641									control.ranges;

Things like this should always use u64_to_user_ptr()

Jason
kernel test robot July 1, 2022, 4:55 a.m. UTC | #3
Hi Yishai,

I love your patch! Yet something to improve:

[auto build test ERROR on awilliam-vfio/next]
[also build test ERROR on linus/master v5.19-rc4 next-20220630]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Yishai-Hadas/Add-device-DMA-logging-support-for-mlx5-driver/20220630-182957
base:   https://github.com/awilliam/linux-vfio.git next
config: arm-randconfig-r005-20220629 (https://download.01.org/0day-ci/archive/20220701/202207011231.1oPQhSzo-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 11.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/fea20efca2795fd8480cb0755c54062bad2ea322
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Yishai-Hadas/Add-device-DMA-logging-support-for-mlx5-driver/20220630-182957
        git checkout fea20efca2795fd8480cb0755c54062bad2ea322
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   arm-linux-gnueabi-ld: drivers/vfio/vfio_main.o: in function `vfio_ioctl_device_feature_logging_start':
>> vfio_main.c:(.text+0x61a): undefined reference to `interval_tree_iter_first'
>> arm-linux-gnueabi-ld: vfio_main.c:(.text+0x62e): undefined reference to `interval_tree_insert'
diff mbox series

Patch

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 2e003913c561..8dcd212971fe 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1862,6 +1862,11 @@  int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
 			return -EINVAL;
 	}
 
+	if (vdev->vdev.log_ops && !(vdev->vdev.log_ops->log_start &&
+	    vdev->vdev.log_ops->log_stop &&
+	    vdev->vdev.log_ops->log_read_and_clear))
+		return -EINVAL;
+
 	/*
 	 * Prevent binding to PFs with VFs enabled, the VFs might be in use
 	 * by the host or other users.  We cannot capture the VFs if they
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index aac9213a783d..8eb8ba837059 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -32,6 +32,8 @@ 
 #include <linux/vfio.h>
 #include <linux/wait.h>
 #include <linux/sched/signal.h>
+#include <linux/interval_tree.h>
+#include <linux/iova_bitmap.h>
 #include "vfio.h"
 
 #define DRIVER_VERSION	"0.3"
@@ -1601,6 +1603,154 @@  static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
 	return 0;
 }
 
+#define LOG_MAX_RANGES 1024
+
+static int
+vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
+					u32 flags, void __user *arg,
+					size_t argsz)
+{
+	size_t minsz =
+		offsetofend(struct vfio_device_feature_dma_logging_control,
+			    ranges);
+	struct vfio_device_feature_dma_logging_range __user *ranges;
+	struct vfio_device_feature_dma_logging_control control;
+	struct vfio_device_feature_dma_logging_range range;
+	struct rb_root_cached root = RB_ROOT_CACHED;
+	struct interval_tree_node *nodes;
+	u32 nnodes;
+	int i, ret;
+
+	if (!device->log_ops)
+		return -ENOTTY;
+
+	ret = vfio_check_feature(flags, argsz,
+				 VFIO_DEVICE_FEATURE_SET,
+				 sizeof(control));
+	if (ret != 1)
+		return ret;
+
+	if (copy_from_user(&control, arg, minsz))
+		return -EFAULT;
+
+	nnodes = control.num_ranges;
+	if (!nnodes || nnodes > LOG_MAX_RANGES)
+		return -EINVAL;
+
+	ranges = (struct vfio_device_feature_dma_logging_range __user *)
+								control.ranges;
+	nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
+			      GFP_KERNEL);
+	if (!nodes)
+		return -ENOMEM;
+
+	for (i = 0; i < nnodes; i++) {
+		if (copy_from_user(&range, &ranges[i], sizeof(range))) {
+			ret = -EFAULT;
+			goto end;
+		}
+		if (!IS_ALIGNED(range.iova, control.page_size) ||
+		    !IS_ALIGNED(range.length, control.page_size)) {
+			ret = -EINVAL;
+			goto end;
+		}
+		nodes[i].start = range.iova;
+		nodes[i].last = range.iova + range.length - 1;
+		if (interval_tree_iter_first(&root, nodes[i].start,
+					     nodes[i].last)) {
+			/* Range overlapping */
+			ret = -EINVAL;
+			goto end;
+		}
+		interval_tree_insert(nodes + i, &root);
+	}
+
+	ret = device->log_ops->log_start(device, &root, nnodes,
+					 &control.page_size);
+	if (ret)
+		goto end;
+
+	if (copy_to_user(arg, &control, sizeof(control))) {
+		ret = -EFAULT;
+		device->log_ops->log_stop(device);
+	}
+
+end:
+	kfree(nodes);
+	return ret;
+}
+
+static int
+vfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
+				       u32 flags, void __user *arg,
+				       size_t argsz)
+{
+	int ret;
+
+	if (!device->log_ops)
+		return -ENOTTY;
+
+	ret = vfio_check_feature(flags, argsz,
+				 VFIO_DEVICE_FEATURE_SET, 0);
+	if (ret != 1)
+		return ret;
+
+	return device->log_ops->log_stop(device);
+}
+
+static int
+vfio_ioctl_device_feature_logging_report(struct vfio_device *device,
+					 u32 flags, void __user *arg,
+					 size_t argsz)
+{
+	size_t minsz =
+		offsetofend(struct vfio_device_feature_dma_logging_report,
+			    bitmap);
+	struct vfio_device_feature_dma_logging_report report;
+	struct iova_bitmap_iter iter;
+	int ret;
+
+	if (!device->log_ops)
+		return -ENOTTY;
+
+	ret = vfio_check_feature(flags, argsz,
+				 VFIO_DEVICE_FEATURE_GET,
+				 sizeof(report));
+	if (ret != 1)
+		return ret;
+
+	if (copy_from_user(&report, arg, minsz))
+		return -EFAULT;
+
+	if (report.page_size < PAGE_SIZE)
+		return -EINVAL;
+
+	iova_bitmap_init(&iter.dirty, report.iova, ilog2(report.page_size));
+	ret = iova_bitmap_iter_init(&iter, report.iova, report.length,
+				    (unsigned long __user *)report.bitmap);
+	if (ret)
+		return ret;
+
+	for (; iova_bitmap_iter_done(&iter);
+	     iova_bitmap_iter_advance(&iter)) {
+		ret = iova_bitmap_iter_get(&iter);
+		if (ret)
+			break;
+
+		ret = device->log_ops->log_read_and_clear(device,
+			iova_bitmap_iova(&iter),
+			iova_bitmap_length(&iter), &iter.dirty);
+
+		iova_bitmap_iter_put(&iter);
+
+		if (ret)
+			break;
+	}
+
+	iova_bitmap_iter_free(&iter);
+	return ret;
+}
+
 static int vfio_ioctl_device_feature(struct vfio_device *device,
 				     struct vfio_device_feature __user *arg)
 {
@@ -1634,6 +1784,18 @@  static int vfio_ioctl_device_feature(struct vfio_device *device,
 		return vfio_ioctl_device_feature_mig_device_state(
 			device, feature.flags, arg->data,
 			feature.argsz - minsz);
+	case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
+		return vfio_ioctl_device_feature_logging_start(
+			device, feature.flags, arg->data,
+			feature.argsz - minsz);
+	case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
+		return vfio_ioctl_device_feature_logging_stop(
+			device, feature.flags, arg->data,
+			feature.argsz - minsz);
+	case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
+		return vfio_ioctl_device_feature_logging_report(
+			device, feature.flags, arg->data,
+			feature.argsz - minsz);
 	default:
 		if (unlikely(!device->ops->device_feature))
 			return -EINVAL;
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 4d26e149db81..feed84d686ec 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -14,6 +14,7 @@ 
 #include <linux/workqueue.h>
 #include <linux/poll.h>
 #include <uapi/linux/vfio.h>
+#include <linux/iova_bitmap.h>
 
 struct kvm;
 
@@ -33,10 +34,11 @@  struct vfio_device {
 	struct device *dev;
 	const struct vfio_device_ops *ops;
 	/*
-	 * mig_ops is a static property of the vfio_device which must be set
-	 * prior to registering the vfio_device.
+	 * mig_ops/log_ops is a static property of the vfio_device which must
+	 * be set prior to registering the vfio_device.
 	 */
 	const struct vfio_migration_ops *mig_ops;
+	const struct vfio_log_ops *log_ops;
 	struct vfio_group *group;
 	struct vfio_device_set *dev_set;
 	struct list_head dev_set_list;
@@ -104,6 +106,21 @@  struct vfio_migration_ops {
 				   enum vfio_device_mig_state *curr_state);
 };
 
+/**
+ * @log_start: Optional callback to ask the device start DMA logging.
+ * @log_stop: Optional callback to ask the device stop DMA logging.
+ * @log_read_and_clear: Optional callback to ask the device read
+ *         and clear the dirty DMAs in some given range.
+ */
+struct vfio_log_ops {
+	int (*log_start)(struct vfio_device *device,
+		struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
+	int (*log_stop)(struct vfio_device *device);
+	int (*log_read_and_clear)(struct vfio_device *device,
+		unsigned long iova, unsigned long length,
+		struct iova_bitmap *dirty);
+};
+
 /**
  * vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl
  * @flags: Arg from the device_feature op