diff mbox series

[02/12] vfio/mdev: Allow the mdev_parent_ops to specify the device driver to bind

Message ID 2-v1-d88406ed308e+418-vfio3_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Remove vfio_mdev.c, mdev_parent_ops and more | expand

Commit Message

Jason Gunthorpe April 23, 2021, 11:02 p.m. UTC
This allows a mdev driver to opt out of using vfio_mdev.c, instead the
driver will provide a 'struct mdev_driver' and register directly with the
driver core.

Much of mdev_parent_ops becomes unused in this mode:
- create()/remove() are done via the mdev_driver probe()/remove()
- mdev_attr_groups becomes mdev_driver driver.dev_groups
- Wrapper function callbacks are replaced with the same ones from
  struct vfio_device_ops

Following patches convert all the drivers.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/mdev/mdev_core.c   | 64 ++++++++++++++++++++++++++++-----
 drivers/vfio/mdev/mdev_driver.c | 17 ++++++++-
 include/linux/mdev.h            |  3 ++
 3 files changed, 75 insertions(+), 9 deletions(-)

Comments

Christoph Hellwig April 26, 2021, 2:02 p.m. UTC | #1
On Fri, Apr 23, 2021 at 08:02:59PM -0300, Jason Gunthorpe wrote:
> +/*
> + * mdev drivers can refuse to bind during probe(), in this case we want to fail
> + * the creation of the mdev all the way back to sysfs. This is a weird model
> + * that doesn't fit in the driver core well, nor does it seem to appear any
> + * place else in the kernel, so use a simple hack.
> + */
> +static int mdev_bind_driver(struct mdev_device *mdev)
> +{
> +	struct mdev_driver *drv = mdev->type->parent->ops->device_driver;
> +	int ret;
> +
> +	if (!drv)
> +		drv = &vfio_mdev_driver;
> +
> +	while (1) {
> +		device_lock(&mdev->dev);
> +		if (mdev->dev.driver == &drv->driver) {
> +			ret = 0;
> +			goto out_unlock;
> +		}
> +		if (mdev->probe_err) {
> +			ret = mdev->probe_err;
> +			goto out_unlock;
> +		}
> +		device_unlock(&mdev->dev);
> +		ret = device_attach(&mdev->dev);
> +		if (ret)
> +			return ret;
> +		mdev->probe_err = -EINVAL;
> +	}
> +	return 0;
> +
> +out_unlock:
> +	device_unlock(&mdev->dev);
> +	return ret;
> +}

This looks strange to me, and I think by open coding
device_attach we could do much better here, something like:

static int mdev_bind_driver(struct mdev_device *mdev)
{
	struct mdev_driver *drv = mdev->type->parent->ops->device_driver;
	int ret = -EINVAL;

	if (!drv)
		drv = &vfio_mdev_driver;

	device_lock(&mdev->dev);
	if (WARN_ON_ONCE(device_is_bound(dev)))
		goto out_unlock;
	if (mdev->dev.p->dead)
	 	goto out_unlock;

	mdev->dev.driver = &drv->driver;
	ret = device_bind_driver(&mdev->dev);
	if (ret)
		mdev->dev.driver = NULL;
out_unlock:
	device_unlock(&mdev->dev);
	return ret;
}
Jason Gunthorpe April 26, 2021, 2:11 p.m. UTC | #2
On Mon, Apr 26, 2021 at 04:02:57PM +0200, Christoph Hellwig wrote:
> On Fri, Apr 23, 2021 at 08:02:59PM -0300, Jason Gunthorpe wrote:
> > +/*
> > + * mdev drivers can refuse to bind during probe(), in this case we want to fail
> > + * the creation of the mdev all the way back to sysfs. This is a weird model
> > + * that doesn't fit in the driver core well, nor does it seem to appear any
> > + * place else in the kernel, so use a simple hack.
> > + */
> > +static int mdev_bind_driver(struct mdev_device *mdev)
> > +{
> > +	struct mdev_driver *drv = mdev->type->parent->ops->device_driver;
> > +	int ret;
> > +
> > +	if (!drv)
> > +		drv = &vfio_mdev_driver;
> > +
> > +	while (1) {
> > +		device_lock(&mdev->dev);
> > +		if (mdev->dev.driver == &drv->driver) {
> > +			ret = 0;
> > +			goto out_unlock;
> > +		}
> > +		if (mdev->probe_err) {
> > +			ret = mdev->probe_err;
> > +			goto out_unlock;
> > +		}
> > +		device_unlock(&mdev->dev);
> > +		ret = device_attach(&mdev->dev);
> > +		if (ret)
> > +			return ret;
> > +		mdev->probe_err = -EINVAL;
> > +	}
> > +	return 0;
> > +
> > +out_unlock:
> > +	device_unlock(&mdev->dev);
> > +	return ret;
> > +}
> 
> This looks strange to me, and I think by open coding
> device_attach we could do much better here, something like:

I look at this for a long time, it is strange.

> static int mdev_bind_driver(struct mdev_device *mdev)
> {
> 	struct mdev_driver *drv = mdev->type->parent->ops->device_driver;
> 	int ret = -EINVAL;
> 
> 	if (!drv)
> 		drv = &vfio_mdev_driver;
> 
> 	device_lock(&mdev->dev);
> 	if (WARN_ON_ONCE(device_is_bound(dev)))
> 		goto out_unlock;
> 	if (mdev->dev.p->dead)
> 	 	goto out_unlock;

'p' is private to the driver core so we can't touch it here

> 	mdev->dev.driver = &drv->driver;
> 	ret = device_bind_driver(&mdev->dev);

It is really counter intuitive but device_bind_driver() doesn't
actually call probe, or do a lot of other essential stuff.

As far as I can see the driver core has three different ways to bind
drivers:
 - The normal 'really_probe()' path with all the bells and whistles.
 - You can set dev.driver before calling device_add() and related
 - You can call device_bind_driver() 'somehow'.

The later two completely skip all the really_probe() stuff, so things
like devm and more become broken. They also don't call probe(), that
is up to the caller. They seem only usable in very niche special
cases, unfortunately.

Some callers open code the probe() but then they have ordering
problems with the sysfs and other little issues.

In this case 99% of the time the driver will already be bound here and
this routine does nothing - the only case I worried about about is
some kind of defered probe by default which calling device_attach()
will defeat.

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index ff8c1a84516698..51b8a9fcf866ad 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -94,9 +94,11 @@  static void mdev_device_remove_common(struct mdev_device *mdev)
 	mdev_remove_sysfs_files(mdev);
 	device_del(&mdev->dev);
 	lockdep_assert_held(&parent->unreg_sem);
-	ret = parent->ops->remove(mdev);
-	if (ret)
-		dev_err(&mdev->dev, "Remove failed: err=%d\n", ret);
+	if (parent->ops->remove) {
+		ret = parent->ops->remove(mdev);
+		if (ret)
+			dev_err(&mdev->dev, "Remove failed: err=%d\n", ret);
+	}
 
 	/* Balances with device_initialize() */
 	put_device(&mdev->dev);
@@ -127,7 +129,9 @@  int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
 	char *envp[] = { env_string, NULL };
 
 	/* check for mandatory ops */
-	if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
+	if (!ops || !ops->supported_type_groups)
+		return -EINVAL;
+	if (!ops->device_driver && (!ops->create || !ops->remove))
 		return -EINVAL;
 
 	dev = get_device(dev);
@@ -251,6 +255,43 @@  static void mdev_device_release(struct device *dev)
 	kfree(mdev);
 }
 
+/*
+ * mdev drivers can refuse to bind during probe(), in this case we want to fail
+ * the creation of the mdev all the way back to sysfs. This is a weird model
+ * that doesn't fit in the driver core well, nor does it seem to appear any
+ * place else in the kernel, so use a simple hack.
+ */
+static int mdev_bind_driver(struct mdev_device *mdev)
+{
+	struct mdev_driver *drv = mdev->type->parent->ops->device_driver;
+	int ret;
+
+	if (!drv)
+		drv = &vfio_mdev_driver;
+
+	while (1) {
+		device_lock(&mdev->dev);
+		if (mdev->dev.driver == &drv->driver) {
+			ret = 0;
+			goto out_unlock;
+		}
+		if (mdev->probe_err) {
+			ret = mdev->probe_err;
+			goto out_unlock;
+		}
+		device_unlock(&mdev->dev);
+		ret = device_attach(&mdev->dev);
+		if (ret)
+			return ret;
+		mdev->probe_err = -EINVAL;
+	}
+	return 0;
+
+out_unlock:
+	device_unlock(&mdev->dev);
+	return ret;
+}
+
 int mdev_device_create(struct mdev_type *type, const guid_t *uuid)
 {
 	int ret;
@@ -296,14 +337,20 @@  int mdev_device_create(struct mdev_type *type, const guid_t *uuid)
 		goto out_put_device;
 	}
 
-	ret = parent->ops->create(mdev);
-	if (ret)
-		goto out_unlock;
+	if (parent->ops->create) {
+		ret = parent->ops->create(mdev);
+		if (ret)
+			goto out_unlock;
+	}
 
 	ret = device_add(&mdev->dev);
 	if (ret)
 		goto out_remove;
 
+	ret = mdev_bind_driver(mdev);
+	if (ret)
+		goto out_del;
+
 	ret = mdev_create_sysfs_files(mdev);
 	if (ret)
 		goto out_del;
@@ -317,7 +364,8 @@  int mdev_device_create(struct mdev_type *type, const guid_t *uuid)
 out_del:
 	device_del(&mdev->dev);
 out_remove:
-	parent->ops->remove(mdev);
+	if (parent->ops->remove)
+		parent->ops->remove(mdev);
 out_unlock:
 	up_read(&parent->unreg_sem);
 out_put_device:
diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c
index 041699571b7e55..6e96c023d7823d 100644
--- a/drivers/vfio/mdev/mdev_driver.c
+++ b/drivers/vfio/mdev/mdev_driver.c
@@ -49,7 +49,7 @@  static int mdev_probe(struct device *dev)
 		return ret;
 
 	if (drv->probe) {
-		ret = drv->probe(mdev);
+		ret = mdev->probe_err = drv->probe(mdev);
 		if (ret)
 			mdev_detach_iommu(mdev);
 	}
@@ -71,10 +71,25 @@  static int mdev_remove(struct device *dev)
 	return 0;
 }
 
+static int mdev_match(struct device *dev, struct device_driver *drv)
+{
+	struct mdev_device *mdev = to_mdev_device(dev);
+	struct mdev_driver *target = mdev->type->parent->ops->device_driver;
+
+	/*
+	 * The ops specify the device driver to connect, fall back to the old
+	 * shim driver if the driver hasn't been converted.
+	 */
+	if (!target)
+		target = &vfio_mdev_driver;
+	return drv == &target->driver;
+}
+
 struct bus_type mdev_bus_type = {
 	.name		= "mdev",
 	.probe		= mdev_probe,
 	.remove		= mdev_remove,
+	.match		= mdev_match,
 };
 EXPORT_SYMBOL_GPL(mdev_bus_type);
 
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 1fb34ea394ad46..49cc4f65120d57 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -19,6 +19,7 @@  struct mdev_device {
 	struct list_head next;
 	struct mdev_type *type;
 	struct device *iommu_device;
+	int probe_err;
 	bool active;
 };
 
@@ -55,6 +56,7 @@  struct device *mtype_get_parent_dev(struct mdev_type *mtype);
  * register the device to mdev module.
  *
  * @owner:		The module owner.
+ * @device_driver:	Which device driver to probe() on newly created devices
  * @dev_attr_groups:	Attributes of the parent device.
  * @mdev_attr_groups:	Attributes of the mediated device.
  * @supported_type_groups: Attributes to define supported types. It is mandatory
@@ -103,6 +105,7 @@  struct device *mtype_get_parent_dev(struct mdev_type *mtype);
  **/
 struct mdev_parent_ops {
 	struct module   *owner;
+	struct mdev_driver *device_driver;
 	const struct attribute_group **dev_attr_groups;
 	const struct attribute_group **mdev_attr_groups;
 	struct attribute_group **supported_type_groups;