diff mbox series

[v3,3/4] cxl/mem: Do not rely on device_add() side effects for dev_set_name() failures

Message ID 161714740233.2168142.11116065966198937093.stgit@dwillia2-desk3.amr.corp.intel.com
State Superseded
Headers show
Series cxl/mem: Fix memdev device setup | expand

Commit Message

Dan Williams March 30, 2021, 11:36 p.m. UTC
While device_add() will happen to catch dev_set_name() failures it is a
broken pattern to follow given that the core may try to fall back to a
different name.

Add explicit checking for dev_set_name() failures to be cleaned up by
put_device(). Skip cdev_device_add() and proceed directly to
put_device() if the name set fails.

This type of bug is easier to see if 'alloc' is split from 'add'
operations that require put_device() on failure. So cxl_memdev_alloc()
is split out as a result.

Fixes: b39cb1052a5c ("cxl/mem: Register CXL memX devices")
Reported-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/mem.c |   41 ++++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

Comments

Jason Gunthorpe March 31, 2021, 1:09 p.m. UTC | #1
On Tue, Mar 30, 2021 at 04:36:42PM -0700, Dan Williams wrote:
> +static int cxl_mem_add_memdev(struct cxl_mem *cxlm)
> +{
> +	struct cxl_memdev *cxlmd;
> +	struct device *dev;
> +	struct cdev *cdev;
> +	int rc;
> +
> +	cxlmd = cxl_memdev_alloc(cxlm);
> +	if (IS_ERR(cxlmd))
> +		return PTR_ERR(cxlmd);
> +
> +	dev = &cxlmd->dev;
> +	rc = dev_set_name(dev, "mem%d", cxlmd->id);
> +	if (rc)
> +		goto err;
>  
> +	cdev = &cxlmd->cdev;
>  	cxl_memdev_activate(cxlmd, cxlm);
>  	rc = cdev_device_add(cdev, dev);
>  	if (rc)
> -		goto err_add;
> +		goto err;

It might read nicer to have the error unwind here just call cxl_memdev_unregister()

> -	return devm_add_action_or_reset(&pdev->dev, cxl_memdev_unregister,
> +	return devm_add_action_or_reset(dev->parent, cxl_memdev_unregister,
>  					cxlmd);

Since that is what the error unwind does at this point.

>  
> -err_add:
> +err:
>  	/*
>  	 * The cdev was briefly live, shutdown any ioctl operations that
>  	 * saw that state.
>  	 */
>  	cxl_memdev_shutdown(cxlmd);

Then this doesn't need to be a function

But it is OK as is

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Jason
Dan Williams March 31, 2021, 4:04 p.m. UTC | #2
On Wed, Mar 31, 2021 at 6:10 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> On Tue, Mar 30, 2021 at 04:36:42PM -0700, Dan Williams wrote:
> > +static int cxl_mem_add_memdev(struct cxl_mem *cxlm)
> > +{
> > +     struct cxl_memdev *cxlmd;
> > +     struct device *dev;
> > +     struct cdev *cdev;
> > +     int rc;
> > +
> > +     cxlmd = cxl_memdev_alloc(cxlm);
> > +     if (IS_ERR(cxlmd))
> > +             return PTR_ERR(cxlmd);
> > +
> > +     dev = &cxlmd->dev;
> > +     rc = dev_set_name(dev, "mem%d", cxlmd->id);
> > +     if (rc)
> > +             goto err;
> >
> > +     cdev = &cxlmd->cdev;
> >       cxl_memdev_activate(cxlmd, cxlm);
> >       rc = cdev_device_add(cdev, dev);
> >       if (rc)
> > -             goto err_add;
> > +             goto err;
>
> It might read nicer to have the error unwind here just call cxl_memdev_unregister()

Perhaps, but I don't think cdev_del() and device_del() are prepared to
deal with an object that was not successfully added.

>
> > -     return devm_add_action_or_reset(&pdev->dev, cxl_memdev_unregister,
> > +     return devm_add_action_or_reset(dev->parent, cxl_memdev_unregister,
> >                                       cxlmd);
>
> Since that is what the error unwind does at this point.

Right, but at this point the code knows that cdev_del() and
device_del() will receive an object in the appropriate state.

>
> >
> > -err_add:
> > +err:
> >       /*
> >        * The cdev was briefly live, shutdown any ioctl operations that
> >        * saw that state.
> >        */
> >       cxl_memdev_shutdown(cxlmd);
>
> Then this doesn't need to be a function
>
> But it is OK as is

Unless I'm missing something I think it's required to use only
put_device() to cleanup after cdev_device_add() failure, but yes I
don't like that cxl_memdev_shutdown() needs to be open coded like
this.

>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Appreciate it.
Jason Gunthorpe March 31, 2021, 4:17 p.m. UTC | #3
On Wed, Mar 31, 2021 at 09:04:32AM -0700, Dan Williams wrote:
> On Wed, Mar 31, 2021 at 6:10 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
> >
> > On Tue, Mar 30, 2021 at 04:36:42PM -0700, Dan Williams wrote:
> > > +static int cxl_mem_add_memdev(struct cxl_mem *cxlm)
> > > +{
> > > +     struct cxl_memdev *cxlmd;
> > > +     struct device *dev;
> > > +     struct cdev *cdev;
> > > +     int rc;
> > > +
> > > +     cxlmd = cxl_memdev_alloc(cxlm);
> > > +     if (IS_ERR(cxlmd))
> > > +             return PTR_ERR(cxlmd);
> > > +
> > > +     dev = &cxlmd->dev;
> > > +     rc = dev_set_name(dev, "mem%d", cxlmd->id);
> > > +     if (rc)
> > > +             goto err;
> > >
> > > +     cdev = &cxlmd->cdev;
> > >       cxl_memdev_activate(cxlmd, cxlm);
> > >       rc = cdev_device_add(cdev, dev);
> > >       if (rc)
> > > -             goto err_add;
> > > +             goto err;
> >
> > It might read nicer to have the error unwind here just call cxl_memdev_unregister()
> 
> Perhaps, but I don't think cdev_del() and device_del() are prepared to
> deal with an object that was not successfully added.

Oh, probably not, yuk yuk yuk.

Ideally cdev_device_add should not fail in a way that allows an open,
I think that is just an artifact of it being composed of smaller
functions..

For instance if we replace the kobj_map with xarray then we can
use xa_reserve and xa_store to avoid this condition.

This actually looks like a good fit because the dev_t has pretty
"lumpy" allocations and this isn't really performance sensitive.

A clever person could then make the dev_t self allocating and solve
another pain point with this interface. Hum..

Jason
Dan Williams March 31, 2021, 4:32 p.m. UTC | #4
On Wed, Mar 31, 2021 at 9:18 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> On Wed, Mar 31, 2021 at 09:04:32AM -0700, Dan Williams wrote:
> > On Wed, Mar 31, 2021 at 6:10 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
> > >
> > > On Tue, Mar 30, 2021 at 04:36:42PM -0700, Dan Williams wrote:
> > > > +static int cxl_mem_add_memdev(struct cxl_mem *cxlm)
> > > > +{
> > > > +     struct cxl_memdev *cxlmd;
> > > > +     struct device *dev;
> > > > +     struct cdev *cdev;
> > > > +     int rc;
> > > > +
> > > > +     cxlmd = cxl_memdev_alloc(cxlm);
> > > > +     if (IS_ERR(cxlmd))
> > > > +             return PTR_ERR(cxlmd);
> > > > +
> > > > +     dev = &cxlmd->dev;
> > > > +     rc = dev_set_name(dev, "mem%d", cxlmd->id);
> > > > +     if (rc)
> > > > +             goto err;
> > > >
> > > > +     cdev = &cxlmd->cdev;
> > > >       cxl_memdev_activate(cxlmd, cxlm);
> > > >       rc = cdev_device_add(cdev, dev);
> > > >       if (rc)
> > > > -             goto err_add;
> > > > +             goto err;
> > >
> > > It might read nicer to have the error unwind here just call cxl_memdev_unregister()
> >
> > Perhaps, but I don't think cdev_del() and device_del() are prepared to
> > deal with an object that was not successfully added.
>
> Oh, probably not, yuk yuk yuk.
>
> Ideally cdev_device_add should not fail in a way that allows an open,
> I think that is just an artifact of it being composed of smaller
> functions..
>
> For instance if we replace the kobj_map with xarray then we can
> use xa_reserve and xa_store to avoid this condition.
>
> This actually looks like a good fit because the dev_t has pretty
> "lumpy" allocations and this isn't really performance sensitive.
>
> A clever person could then make the dev_t self allocating and solve
> another pain point with this interface. Hum..
>

...not a bad idea.

/me bookmarks this thread for future consideration.
diff mbox series

Patch

diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 2cf620d201a6..759713b619ab 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -1187,7 +1187,7 @@  static void cxl_memdev_unregister(void *_cxlmd)
 	put_device(dev);
 }
 
-static int cxl_mem_add_memdev(struct cxl_mem *cxlm)
+static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm)
 {
 	struct pci_dev *pdev = cxlm->pdev;
 	struct cxl_memdev *cxlmd;
@@ -1197,11 +1197,11 @@  static int cxl_mem_add_memdev(struct cxl_mem *cxlm)
 
 	cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
 	if (!cxlmd)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
 	if (rc < 0)
-		goto err_id;
+		goto err;
 	cxlmd->id = rc;
 
 	dev = &cxlmd->dev;
@@ -1210,29 +1210,48 @@  static int cxl_mem_add_memdev(struct cxl_mem *cxlm)
 	dev->bus = &cxl_bus_type;
 	dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
 	dev->type = &cxl_memdev_type;
-	dev_set_name(dev, "mem%d", cxlmd->id);
 
 	cdev = &cxlmd->cdev;
 	cdev_init(cdev, &cxl_memdev_fops);
+	return cxlmd;
+
+err:
+	kfree(cxlmd);
+	return ERR_PTR(rc);
+}
+
+static int cxl_mem_add_memdev(struct cxl_mem *cxlm)
+{
+	struct cxl_memdev *cxlmd;
+	struct device *dev;
+	struct cdev *cdev;
+	int rc;
+
+	cxlmd = cxl_memdev_alloc(cxlm);
+	if (IS_ERR(cxlmd))
+		return PTR_ERR(cxlmd);
+
+	dev = &cxlmd->dev;
+	rc = dev_set_name(dev, "mem%d", cxlmd->id);
+	if (rc)
+		goto err;
 
+	cdev = &cxlmd->cdev;
 	cxl_memdev_activate(cxlmd, cxlm);
 	rc = cdev_device_add(cdev, dev);
 	if (rc)
-		goto err_add;
+		goto err;
 
-	return devm_add_action_or_reset(&pdev->dev, cxl_memdev_unregister,
+	return devm_add_action_or_reset(dev->parent, cxl_memdev_unregister,
 					cxlmd);
 
-err_add:
+err:
 	/*
 	 * The cdev was briefly live, shutdown any ioctl operations that
 	 * saw that state.
 	 */
 	cxl_memdev_shutdown(cxlmd);
-	ida_free(&cxl_memdev_ida, cxlmd->id);
-err_id:
-	kfree(cxlmd);
-
+	put_device(dev);
 	return rc;
 }