diff mbox series

[v3,46/57] perf: Simplify pmu_dev_alloc()

Message ID 20230612093540.850386350@infradead.org (mailing list archive)
State Handled Elsewhere
Delegated to: Paul Moore
Headers show
Series Scope-based Resource Management | expand

Commit Message

Peter Zijlstra June 12, 2023, 9:07 a.m. UTC
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/events/core.c |   65 ++++++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 34 deletions(-)

Comments

Peter Zijlstra June 12, 2023, 9:44 a.m. UTC | #1
On Mon, Jun 12, 2023 at 11:07:59AM +0200, Peter Zijlstra wrote:
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  kernel/events/core.c |   65 ++++++++++++++++++++++++---------------------------
>  1 file changed, 31 insertions(+), 34 deletions(-)
> 
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -11285,49 +11285,46 @@ static void pmu_dev_release(struct devic
>  
>  static int pmu_dev_alloc(struct pmu *pmu)
>  {
> +	int ret;
>  
> +	struct device *dev __free(put_device) =
> +		kzalloc(sizeof(struct device), GFP_KERNEL);
> +	if (!dev)
> +		return -ENOMEM;
>  
> +	dev->groups = pmu->attr_groups;
> +	device_initialize(dev);
>  
> +	dev_set_drvdata(dev, pmu);
> +	dev->bus = &pmu_bus;
> +	dev->release = pmu_dev_release;
>  
> +	ret = dev_set_name(dev, "%s", pmu->name);
>  	if (ret)
> +		return ret;
>  
> +	ret = device_add(dev);
>  	if (ret)
> +		return ret;
>  
> +	struct device *del __free(device_del) = dev;

Greg, I'm not much familiar with the whole device model, but it seems
unfortunate to me that one has to call device_del() explicitly if we
already have a put_device() queued.

Is there a saner way to write this?

>  
> +	/* For PMUs with address filters, throw in an extra attribute: */
> +	if (pmu->nr_addr_filters) {
> +		ret = device_create_file(dev, &dev_attr_nr_addr_filters);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	if (pmu->attr_update) {
> +		ret = sysfs_update_groups(&dev->kobj, pmu->attr_update);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	no_free_ptr(del);
> +	pmu->dev = no_free_ptr(dev);
> +	return 0;
>  }
Greg KH June 12, 2023, 9:55 a.m. UTC | #2
On Mon, Jun 12, 2023 at 11:44:00AM +0200, Peter Zijlstra wrote:
> On Mon, Jun 12, 2023 at 11:07:59AM +0200, Peter Zijlstra wrote:
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  kernel/events/core.c |   65 ++++++++++++++++++++++++---------------------------
> >  1 file changed, 31 insertions(+), 34 deletions(-)
> > 
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -11285,49 +11285,46 @@ static void pmu_dev_release(struct devic
> >  
> >  static int pmu_dev_alloc(struct pmu *pmu)
> >  {
> > +	int ret;
> >  
> > +	struct device *dev __free(put_device) =
> > +		kzalloc(sizeof(struct device), GFP_KERNEL);
> > +	if (!dev)
> > +		return -ENOMEM;
> >  
> > +	dev->groups = pmu->attr_groups;
> > +	device_initialize(dev);
> >  
> > +	dev_set_drvdata(dev, pmu);
> > +	dev->bus = &pmu_bus;
> > +	dev->release = pmu_dev_release;
> >  
> > +	ret = dev_set_name(dev, "%s", pmu->name);
> >  	if (ret)
> > +		return ret;
> >  
> > +	ret = device_add(dev);
> >  	if (ret)
> > +		return ret;
> >  
> > +	struct device *del __free(device_del) = dev;
> 
> Greg, I'm not much familiar with the whole device model, but it seems
> unfortunate to me that one has to call device_del() explicitly if we
> already have a put_device() queued.
> 
> Is there a saner way to write this?

Yes, there should be, let me look into it later tonight, need to get
some stable kernels out for review first...

thanks,

greg k-h
Greg KH June 12, 2023, 12:18 p.m. UTC | #3
On Mon, Jun 12, 2023 at 11:44:00AM +0200, Peter Zijlstra wrote:
> On Mon, Jun 12, 2023 at 11:07:59AM +0200, Peter Zijlstra wrote:
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  kernel/events/core.c |   65 ++++++++++++++++++++++++---------------------------
> >  1 file changed, 31 insertions(+), 34 deletions(-)
> > 
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -11285,49 +11285,46 @@ static void pmu_dev_release(struct devic
> >  
> >  static int pmu_dev_alloc(struct pmu *pmu)
> >  {
> > +	int ret;
> >  
> > +	struct device *dev __free(put_device) =
> > +		kzalloc(sizeof(struct device), GFP_KERNEL);
> > +	if (!dev)
> > +		return -ENOMEM;
> >  
> > +	dev->groups = pmu->attr_groups;
> > +	device_initialize(dev);
> >  
> > +	dev_set_drvdata(dev, pmu);
> > +	dev->bus = &pmu_bus;
> > +	dev->release = pmu_dev_release;
> >  
> > +	ret = dev_set_name(dev, "%s", pmu->name);
> >  	if (ret)
> > +		return ret;
> >  
> > +	ret = device_add(dev);
> >  	if (ret)
> > +		return ret;
> >  
> > +	struct device *del __free(device_del) = dev;
> 
> Greg, I'm not much familiar with the whole device model, but it seems
> unfortunate to me that one has to call device_del() explicitly if we
> already have a put_device() queued.
> 
> Is there a saner way to write this?

Ok, the "problem" here is that you have decided to do the "complex" way
to initialize a struct device.  And as such, you have to do more
housekeeping than if you were to just use the simple interface.

The rule is, after you call device_initialize() you HAVE to call
put_device() on the pointer if something goes wrong and you want to
clean up properly.  Unless you have called device_add(), and at that
point in time, then you HAVE to call device_del() if the device_add()
call succeeded.  If the device_add() call failed, then you HAVE to call
put_device().

Yeah, it's a pain, but you are trying to hand-roll code that is not a
"normal" path for a struct device, sorry.

I don't know if you really can encode all of that crazy logic in the
cleanup api, UNLESS you can "switch" the cleanup function at a point in
time (i.e. after device_add() is successful).  Is that possible?

Anyway, let me see about just cleaning up this code in general, I don't
think you need the complex interface here for a tiny struct device at
all, which would make this specific instance moot :)

Also, nit, you are racing with userspace by attempting to add new device
files _AFTER_ the device is registered with the driver core, this whole
thing can be made more simpler I hope, give me a bit...

thanks

greg k-h
Paolo Bonzini June 12, 2023, 12:29 p.m. UTC | #4
On 6/12/23 14:18, Greg KH wrote:
> Yeah, it's a pain, but you are trying to hand-roll code that is not a
> "normal" path for a struct device, sorry.
> 
> I don't know if you really can encode all of that crazy logic in the
> cleanup api, UNLESS you can "switch" the cleanup function at a point in
> time (i.e. after device_add() is successful).  Is that possible?

What _could_ make sense is that device_add() completely takes ownership 
of the given pointer, and takes care of calling put_device() on failure.

Then you can have

	struct device *dev_struct __free(put_device) =
		kzalloc(sizeof(struct device), GFP_KERNEL);

	struct device *dev __free(device_del) =
		device_add(no_free_ptr(dev_struct));

	/* dev_struct is NULL now */

	pmu->dev = no_free_ptr(dev);

Paolo
Greg KH June 12, 2023, 12:58 p.m. UTC | #5
On Mon, Jun 12, 2023 at 02:29:05PM +0200, Paolo Bonzini wrote:
> On 6/12/23 14:18, Greg KH wrote:
> > Yeah, it's a pain, but you are trying to hand-roll code that is not a
> > "normal" path for a struct device, sorry.
> > 
> > I don't know if you really can encode all of that crazy logic in the
> > cleanup api, UNLESS you can "switch" the cleanup function at a point in
> > time (i.e. after device_add() is successful).  Is that possible?
> 
> What _could_ make sense is that device_add() completely takes ownership of
> the given pointer, and takes care of calling put_device() on failure.

I think we tried that decades ago :)

Problem is that the caller wants to clean stuff up associted with that
struct device-embedded structure _before_ the final put_device() is
called which would usually free the memory of the overall structure.

So device_add() can't call put_device() on the error path within it, you
need the caller to unwind it's local stuff before that happens.

> Then you can have
> 
> 	struct device *dev_struct __free(put_device) =
> 		kzalloc(sizeof(struct device), GFP_KERNEL);
> 
> 	struct device *dev __free(device_del) =
> 		device_add(no_free_ptr(dev_struct));
> 
> 	/* dev_struct is NULL now */
> 
> 	pmu->dev = no_free_ptr(dev);

I don't see how that works properly, what am I missing?

thanks,

greg k-h
Greg KH June 12, 2023, 1:09 p.m. UTC | #6
On Mon, Jun 12, 2023 at 02:18:03PM +0200, Greg KH wrote:
> On Mon, Jun 12, 2023 at 11:44:00AM +0200, Peter Zijlstra wrote:
> > On Mon, Jun 12, 2023 at 11:07:59AM +0200, Peter Zijlstra wrote:
> > > 
> > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > > ---
> > >  kernel/events/core.c |   65 ++++++++++++++++++++++++---------------------------
> > >  1 file changed, 31 insertions(+), 34 deletions(-)
> > > 
> > > --- a/kernel/events/core.c
> > > +++ b/kernel/events/core.c
> > > @@ -11285,49 +11285,46 @@ static void pmu_dev_release(struct devic
> > >  
> > >  static int pmu_dev_alloc(struct pmu *pmu)
> > >  {
> > > +	int ret;
> > >  
> > > +	struct device *dev __free(put_device) =
> > > +		kzalloc(sizeof(struct device), GFP_KERNEL);
> > > +	if (!dev)
> > > +		return -ENOMEM;
> > >  
> > > +	dev->groups = pmu->attr_groups;
> > > +	device_initialize(dev);
> > >  
> > > +	dev_set_drvdata(dev, pmu);
> > > +	dev->bus = &pmu_bus;
> > > +	dev->release = pmu_dev_release;
> > >  
> > > +	ret = dev_set_name(dev, "%s", pmu->name);
> > >  	if (ret)
> > > +		return ret;
> > >  
> > > +	ret = device_add(dev);
> > >  	if (ret)
> > > +		return ret;
> > >  
> > > +	struct device *del __free(device_del) = dev;
> > 
> > Greg, I'm not much familiar with the whole device model, but it seems
> > unfortunate to me that one has to call device_del() explicitly if we
> > already have a put_device() queued.
> > 
> > Is there a saner way to write this?
> 
> Ok, the "problem" here is that you have decided to do the "complex" way
> to initialize a struct device.  And as such, you have to do more
> housekeeping than if you were to just use the simple interface.
> 
> The rule is, after you call device_initialize() you HAVE to call
> put_device() on the pointer if something goes wrong and you want to
> clean up properly.  Unless you have called device_add(), and at that
> point in time, then you HAVE to call device_del() if the device_add()
> call succeeded.  If the device_add() call failed, then you HAVE to call
> put_device().
> 
> Yeah, it's a pain, but you are trying to hand-roll code that is not a
> "normal" path for a struct device, sorry.
> 
> I don't know if you really can encode all of that crazy logic in the
> cleanup api, UNLESS you can "switch" the cleanup function at a point in
> time (i.e. after device_add() is successful).  Is that possible?
> 
> Anyway, let me see about just cleaning up this code in general, I don't
> think you need the complex interface here for a tiny struct device at
> all, which would make this specific instance moot :)
> 
> Also, nit, you are racing with userspace by attempting to add new device
> files _AFTER_ the device is registered with the driver core, this whole
> thing can be made more simpler I hope, give me a bit...

Nope, I was wrong, I can fix the race condition, but the logic here for
how to init and clean up on errors is right, and you want this because
you are a bus and so, you need the two-step init/teardown process,
sorry.

Here's the patch I came up with to get rid of the race, but doesn't
really help you out here at all :(

------------------------
From foo@baz Mon Jun 12 03:07:54 PM CEST 2023
Date: Mon, 12 Jun 2023 15:07:54 +0200
To: Greg KH <gregkh@linuxfoundation.org>
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Subject: [PATCH] perf/core: fix narrow startup race when creating the perf nr_addr_filters sysfs file


Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>


diff --git a/kernel/events/core.c b/kernel/events/core.c
index db016e418931..d2a6182ad090 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11351,9 +11351,32 @@ static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
 static struct attribute *pmu_dev_attrs[] = {
 	&dev_attr_type.attr,
 	&dev_attr_perf_event_mux_interval_ms.attr,
+	&dev_attr_nr_addr_filters.attr,
+	NULL,
+};
+
+static umode_t pmu_dev_is_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pmu *pmu = dev_get_drvdata(dev);
+
+	if (!pmu->nr_addr_filters)
+		return 0;
+
+	return a->mode;
+
+	return 0;
+}
+
+static struct attribute_group pmu_dev_attr_group = {
+	.is_visible = pmu_dev_is_visible,
+	.attrs = pmu_dev_attrs,
+};
+
+const static struct attribute_group *pmu_dev_groups[] = {
+	&pmu_dev_attr_group,
 	NULL,
 };
-ATTRIBUTE_GROUPS(pmu_dev);
 
 static int pmu_bus_running;
 static struct bus_type pmu_bus = {
@@ -11389,18 +11412,11 @@ static int pmu_dev_alloc(struct pmu *pmu)
 	if (ret)
 		goto free_dev;
 
-	/* For PMUs with address filters, throw in an extra attribute: */
-	if (pmu->nr_addr_filters)
-		ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters);
-
-	if (ret)
-		goto del_dev;
-
-	if (pmu->attr_update)
+	if (pmu->attr_update) {
 		ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update);
-
-	if (ret)
-		goto del_dev;
+		if (ret)
+			goto del_dev;
+	}
 
 out:
 	return ret;
Greg KH June 12, 2023, 1:35 p.m. UTC | #7
On Mon, Jun 12, 2023 at 11:44:00AM +0200, Peter Zijlstra wrote:
> On Mon, Jun 12, 2023 at 11:07:59AM +0200, Peter Zijlstra wrote:
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  kernel/events/core.c |   65 ++++++++++++++++++++++++---------------------------
> >  1 file changed, 31 insertions(+), 34 deletions(-)
> > 
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -11285,49 +11285,46 @@ static void pmu_dev_release(struct devic
> >  
> >  static int pmu_dev_alloc(struct pmu *pmu)
> >  {
> > +	int ret;
> >  
> > +	struct device *dev __free(put_device) =
> > +		kzalloc(sizeof(struct device), GFP_KERNEL);
> > +	if (!dev)
> > +		return -ENOMEM;
> >  
> > +	dev->groups = pmu->attr_groups;
> > +	device_initialize(dev);
> >  
> > +	dev_set_drvdata(dev, pmu);
> > +	dev->bus = &pmu_bus;
> > +	dev->release = pmu_dev_release;
> >  
> > +	ret = dev_set_name(dev, "%s", pmu->name);
> >  	if (ret)
> > +		return ret;
> >  
> > +	ret = device_add(dev);
> >  	if (ret)
> > +		return ret;
> >  
> > +	struct device *del __free(device_del) = dev;
> 
> Greg, I'm not much familiar with the whole device model, but it seems
> unfortunate to me that one has to call device_del() explicitly if we
> already have a put_device() queued.
> 
> Is there a saner way to write this?

Ok, to answer my other question, yes, you are changing the free call
here in the "middle" of the function, sorry, I missed "del" vs. "dev"
and was wondering how this would work...

This should work, it's tricky, especially:

> > +	no_free_ptr(del);
> > +	pmu->dev = no_free_ptr(dev);

this.

I had to stare at it for a while to realize that yes, you are calling
the two different "cleanup" functions prior to thes lines, on the same
pointer, and that the order is correct.

Ick, this is going to be a rough audit for bus code that gets converted
to this, BUT bonus is that once it's done, any changes to the middle of
the function should "just work", and it's a good task for an intern to
do :)


Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mind if I try this series to convert a more "normal" driver to see how
it works with that?  That's going to be the true test, see if the
changes make sense to someone who doesn't really know the internals of
the driver core like this...

thanks,

greg k-h
Peter Zijlstra June 12, 2023, 2:13 p.m. UTC | #8
On Mon, Jun 12, 2023 at 03:35:49PM +0200, Greg KH wrote:

> Ick, this is going to be a rough audit for bus code that gets converted
> to this, BUT bonus is that once it's done, any changes to the middle of
> the function should "just work", and it's a good task for an intern to
> do :)

Hehe. The 'bonus' in all this is that if you get it wrong, it's the very
likely success case that goes sideways and it should instantly explode.

While all the unlikely error cases should all just work -- famous last
words etc..

> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thanks!

> Mind if I try this series to convert a more "normal" driver to see how
> it works with that?  That's going to be the true test, see if the
> changes make sense to someone who doesn't really know the internals of
> the driver core like this...

Not at all, feel tree to have a go at that. I picked code I was familiar
with, but it would ofc. be good to have others give it a spin too.
Greg KH June 12, 2023, 3:44 p.m. UTC | #9
On Mon, Jun 12, 2023 at 04:13:22PM +0200, Peter Zijlstra wrote:
> > Mind if I try this series to convert a more "normal" driver to see how
> > it works with that?  That's going to be the true test, see if the
> > changes make sense to someone who doesn't really know the internals of
> > the driver core like this...
> 
> Not at all, feel tree to have a go at that. I picked code I was familiar
> with, but it would ofc. be good to have others give it a spin too.

Ok, here's an attempt to clean up misc.c a bit.  It does two things,
adds a guard for the global misc_mtx mutex when used (sometimes, it's
not always paired with a release.)

Then in the last part of the file, I abuse the DEFINE_FREE() to handle a
special case of removing a proc file if things go bad (and add a
DEFINE_FREE() for class_destroy(), which should go into
include/device/class.h.

I've only test-built it, but is this the proper use of DEFINE_FREE()?
There wasn't much documentation :)

To be fair the end-result of misc_init() is much nicer and cleaner and
"obviously correct", which is good, even with the crazy proc file mess
in it.  So I like the idea overall, need to figure out when to use
DEFINE_CLASS() vs. DEFINE_FREE(), that isn't obvious to me.

Also, you can't put a DEFINE_FREE() within a function declaration, which
I guess makes sense, but the build warning is very odd when you attempt
it, mentioning an "invalid storage class".  Is that supposed to be able
to work?

thanks,

greg k-h


---
 drivers/char/misc.c | 69 +++++++++++++++++----------------------------
 1 file changed, 26 insertions(+), 43 deletions(-)

diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 1c44c29a666e..a203b17a2b82 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -123,10 +123,9 @@ static int misc_open(struct inode *inode, struct file *file)
 {
 	int minor = iminor(inode);
 	struct miscdevice *c = NULL, *iter;
-	int err = -ENODEV;
 	const struct file_operations *new_fops = NULL;
 
-	mutex_lock(&misc_mtx);
+	guard(mutex)(&misc_mtx);
 
 	list_for_each_entry(iter, &misc_list, list) {
 		if (iter->minor != minor)
@@ -149,7 +148,7 @@ static int misc_open(struct inode *inode, struct file *file)
 			break;
 		}
 		if (!new_fops)
-			goto fail;
+			return -ENODEV;
 	}
 
 	/*
@@ -159,13 +158,11 @@ static int misc_open(struct inode *inode, struct file *file)
 	 */
 	file->private_data = c;
 
-	err = 0;
 	replace_fops(file, new_fops);
 	if (file->f_op->open)
-		err = file->f_op->open(inode, file);
-fail:
-	mutex_unlock(&misc_mtx);
-	return err;
+		return file->f_op->open(inode, file);
+
+	return 0;
 }
 
 static struct class *misc_class;
@@ -197,29 +194,24 @@ static const struct file_operations misc_fops = {
 int misc_register(struct miscdevice *misc)
 {
 	dev_t dev;
-	int err = 0;
 	bool is_dynamic = (misc->minor == MISC_DYNAMIC_MINOR);
 
 	INIT_LIST_HEAD(&misc->list);
 
-	mutex_lock(&misc_mtx);
+	guard(mutex)(&misc_mtx);
 
 	if (is_dynamic) {
 		int i = misc_minor_alloc();
 
-		if (i < 0) {
-			err = -EBUSY;
-			goto out;
-		}
+		if (i < 0)
+			return -EBUSY;
 		misc->minor = i;
 	} else {
 		struct miscdevice *c;
 
 		list_for_each_entry(c, &misc_list, list) {
-			if (c->minor == misc->minor) {
-				err = -EBUSY;
-				goto out;
-			}
+			if (c->minor == misc->minor)
+				return -EBUSY;
 		}
 	}
 
@@ -233,8 +225,7 @@ int misc_register(struct miscdevice *misc)
 			misc_minor_free(misc->minor);
 			misc->minor = MISC_DYNAMIC_MINOR;
 		}
-		err = PTR_ERR(misc->this_device);
-		goto out;
+		return PTR_ERR(misc->this_device);
 	}
 
 	/*
@@ -242,9 +233,7 @@ int misc_register(struct miscdevice *misc)
 	 * earlier defaults
 	 */
 	list_add(&misc->list, &misc_list);
- out:
-	mutex_unlock(&misc_mtx);
-	return err;
+	return 0;
 }
 EXPORT_SYMBOL(misc_register);
 
@@ -261,11 +250,10 @@ void misc_deregister(struct miscdevice *misc)
 	if (WARN_ON(list_empty(&misc->list)))
 		return;
 
-	mutex_lock(&misc_mtx);
+	guard(mutex)(&misc_mtx);
 	list_del(&misc->list);
 	device_destroy(misc_class, MKDEV(MISC_MAJOR, misc->minor));
 	misc_minor_free(misc->minor);
-	mutex_unlock(&misc_mtx);
 }
 EXPORT_SYMBOL(misc_deregister);
 
@@ -280,29 +268,24 @@ static char *misc_devnode(const struct device *dev, umode_t *mode)
 	return NULL;
 }
 
+DEFINE_FREE(class_destroy, struct class *, if (_T) class_destroy(_T));
+DEFINE_FREE(remove_proc, struct proc_dir_entry *, if (_T) remove_proc_entry("misc", NULL));
 static int __init misc_init(void)
 {
-	int err;
-	struct proc_dir_entry *ret;
+	struct proc_dir_entry *ret __free(remove_proc) = proc_create_seq("misc", 0, NULL, &misc_seq_ops);
+	struct class *c __free(class_destroy) = class_create("misc");
 
-	ret = proc_create_seq("misc", 0, NULL, &misc_seq_ops);
-	misc_class = class_create("misc");
-	err = PTR_ERR(misc_class);
-	if (IS_ERR(misc_class))
-		goto fail_remove;
+	if (IS_ERR(c))
+		return PTR_ERR(c);
 
-	err = -EIO;
 	if (register_chrdev(MISC_MAJOR, "misc", &misc_fops))
-		goto fail_printk;
-	misc_class->devnode = misc_devnode;
-	return 0;
+		return -EIO;
 
-fail_printk:
-	pr_err("unable to get major %d for misc devices\n", MISC_MAJOR);
-	class_destroy(misc_class);
-fail_remove:
-	if (ret)
-		remove_proc_entry("misc", NULL);
-	return err;
+	c->devnode = misc_devnode;
+
+	misc_class = no_free_ptr(c);
+	no_free_ptr(ret);
+
+	return 0;
 }
 subsys_initcall(misc_init);
Peter Zijlstra June 13, 2023, 7:34 a.m. UTC | #10
On Mon, Jun 12, 2023 at 05:44:59PM +0200, Greg KH wrote:

> Then in the last part of the file, I abuse the DEFINE_FREE() to handle a
> special case of removing a proc file if things go bad (and add a
> DEFINE_FREE() for class_destroy(), which should go into
> include/device/class.h.
> 
> I've only test-built it, but is this the proper use of DEFINE_FREE()?
> There wasn't much documentation :)

Yes, this looks right.

> To be fair the end-result of misc_init() is much nicer and cleaner and
> "obviously correct", which is good, even with the crazy proc file mess
> in it.  So I like the idea overall, need to figure out when to use
> DEFINE_CLASS() vs. DEFINE_FREE(), that isn't obvious to me.

CLASS is meant for things that have an obvious contructor as well as a
destructor, that always go together. Like for example the lock things,
they always pair a lock and unlock. But also things like:
fdget()+fdput(), these can also always be paired, and if you want the
file to escape you simply take yet another reference to prevent the
fdput() from being the final.

> Also, you can't put a DEFINE_FREE() within a function declaration, which
> I guess makes sense, but the build warning is very odd when you attempt
> it, mentioning an "invalid storage class".  Is that supposed to be able
> to work?

No, DEFINE_FREE() and DEFINE_CLASS() end up defining a bunch of inline
functions, which can't be done inside another function.

If only C would have lambda functions ... alas.

> @@ -280,29 +268,24 @@ static char *misc_devnode(const struct device *dev, umode_t *mode)
>  	return NULL;
>  }
>  
> +DEFINE_FREE(class_destroy, struct class *, if (_T) class_destroy(_T));

Documentation for class_create() says it will return ERR_PTR(), so then
this should be something like:

DEFINE_FRERE(class_destroy, struct class *, if (!IS_ERR_OR_NULL(_T)) class_destroy(_T))

> +DEFINE_FREE(remove_proc, struct proc_dir_entry *, if (_T) remove_proc_entry("misc", NULL));
>  static int __init misc_init(void)
>  {
> +	struct proc_dir_entry *ret __free(remove_proc) = proc_create_seq("misc", 0, NULL, &misc_seq_ops);
> +	struct class *c __free(class_destroy) = class_create("misc");
>  
> +	if (IS_ERR(c))
> +		return PTR_ERR(c);
>  
>  	if (register_chrdev(MISC_MAJOR, "misc", &misc_fops))
> +		return -EIO;
>  
> +	c->devnode = misc_devnode;
> +
> +	misc_class = no_free_ptr(c);
> +	no_free_ptr(ret);
> +
> +	return 0;
>  }

And yes, this does look nicer.
Greg KH June 13, 2023, 7:50 a.m. UTC | #11
On Tue, Jun 13, 2023 at 09:34:15AM +0200, Peter Zijlstra wrote:
> On Mon, Jun 12, 2023 at 05:44:59PM +0200, Greg KH wrote:
> > To be fair the end-result of misc_init() is much nicer and cleaner and
> > "obviously correct", which is good, even with the crazy proc file mess
> > in it.  So I like the idea overall, need to figure out when to use
> > DEFINE_CLASS() vs. DEFINE_FREE(), that isn't obvious to me.
> 
> CLASS is meant for things that have an obvious contructor as well as a
> destructor, that always go together. Like for example the lock things,
> they always pair a lock and unlock. But also things like:
> fdget()+fdput(), these can also always be paired, and if you want the
> file to escape you simply take yet another reference to prevent the
> fdput() from being the final.

Ok, so then the class_destroy stuff down below here should be
DEFINE_CLASS()?

> > @@ -280,29 +268,24 @@ static char *misc_devnode(const struct device *dev, umode_t *mode)
> >  	return NULL;
> >  }
> >  
> > +DEFINE_FREE(class_destroy, struct class *, if (_T) class_destroy(_T));
> 
> Documentation for class_create() says it will return ERR_PTR(), so then
> this should be something like:
> 
> DEFINE_FRERE(class_destroy, struct class *, if (!IS_ERR_OR_NULL(_T)) class_destroy(_T))

Nit, as class_destroy() handles this type of check within it, it can be
even simpler:
	DEFINE_FREE(class_destroy, struct class *, class_destroy(_T));
or would that be:
	DEFINE_CLASS(class_destroy, struct class *, class_destroy(_T));
?

> > +DEFINE_FREE(remove_proc, struct proc_dir_entry *, if (_T) remove_proc_entry("misc", NULL));
> >  static int __init misc_init(void)
> >  {
> > +	struct proc_dir_entry *ret __free(remove_proc) = proc_create_seq("misc", 0, NULL, &misc_seq_ops);
> > +	struct class *c __free(class_destroy) = class_create("misc");
> >  
> > +	if (IS_ERR(c))
> > +		return PTR_ERR(c);
> >  
> >  	if (register_chrdev(MISC_MAJOR, "misc", &misc_fops))
> > +		return -EIO;
> >  
> > +	c->devnode = misc_devnode;
> > +
> > +	misc_class = no_free_ptr(c);
> > +	no_free_ptr(ret);
> > +
> > +	return 0;
> >  }
> 
> And yes, this does look nicer.

I have a ton of future patches coming that does a bunch of
class_create/destroy changes that would be made a LOT simpler with this
patchset, and I really don't want to have to hit the same codepaths
twice if at all possible.

So what's the odds this can be reasonable enough to get into 6.5-rc1 so
we can rely on it there?

thanks,

greg k-h
Peter Zijlstra June 13, 2023, 10:50 a.m. UTC | #12
On Tue, Jun 13, 2023 at 09:50:28AM +0200, Greg KH wrote:

> > DEFINE_FRERE(class_destroy, struct class *, if (!IS_ERR_OR_NULL(_T)) class_destroy(_T))
> 
> Nit, as class_destroy() handles this type of check within it, it can be
> even simpler:
> 	DEFINE_FREE(class_destroy, struct class *, class_destroy(_T));

Note that that means there will be an unconditional call to
class_destroy() in the success path. As long as that is never a hot-path
this should be fine I suppose, but it is something Linus pointed out
earlier.

> or would that be:
> 	DEFINE_CLASS(class_destroy, struct class *, class_destroy(_T));

Has a slightly different syntax per the comment I did do write :-)

DEFINE_CLASS(class, struct class *,
	     if (!IS_ERR_OR_NULL(_T)) class_destroy(_T),
	     class_create(cname), const char *name)

static int __init misc_init(void)
{
	struct proc_dir_entry *ret __free(remove_proc) =
		proc_create_seq("misc", 0, NULL, &misc_seq_ops);

	CLASS(class, c)("misc");
	if (IS_ERR(c))
		return PTR_ERR(c);

	if (register_chrdev(MISC_MAJOR, "misc", &misc_fops))
		return -EIO;

	c->devnode = misc_devnode;

	misc_class = no_free_ptr(c);
	no_free_ptr(ret);

	return 0;
}

The no_free_ptr() should work with CLASS(), but I'm not sure that's
recommended, lots of un-explored terretory here :-)

Similarly I suppose you could do something like:

DEFINE_CLASS(proc_dir, struct proc_dir_entry *,
	     proc_remove(_T), proc_create(pname, mode, parent, proc_ops),
	     const char *pname, umode_t mode, struct proc_dir_entry *parent,
	     const struct proc_ops *proc_ops)

EXTEND_CLASS(proc_dir, _seq, proc_create_seq(pname, mode, parent, ops, state_size, data),
	     const char *pname, umode_t mode, struct proc_dir_entry *parent,
	     const struct seq_operations *ops, unsigned int state_size, void *data)

EXTEND_CLASS(proc_dir, _seq_private, .....)

(urgh, C really needs better forwarding support)

Then you could write it something like:

static int __init misc_init(void)
{
	CLASS(proc_dir_seq, ret)("misc", 0, NULL, &misc_seq_ops);

	CLASS(class, c)("misc");
	if (IS_ERR(c))
		return PTR_ERR(c);

	if (register_chrdev(MISC_MAJOR, "misc", &misc_fops))
		return -EIO;

	c->devnode = misc_devnode;

	misc_class = no_free_ptr(c);
	no_free_ptr(ret);

	return 0;
}

Is what what we want?

(also, perhaps I should prefix the macro arguments with an '_', as is
you can't use 'name' as a constructor argument because the thing would
expand weird)

> I have a ton of future patches coming that does a bunch of
> class_create/destroy changes that would be made a LOT simpler with this
> patchset, and I really don't want to have to hit the same codepaths
> twice if at all possible.
> 
> So what's the odds this can be reasonable enough to get into 6.5-rc1 so
> we can rely on it there?

That's one for Linus I suppose.. the only remaining issue I still have
is the no_free_*() naming. One suggestion that made sense had it called
take_*().

All we really need are the first 4 patches to land; thereafter we can
gradually start converting things.
diff mbox series

Patch

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11285,49 +11285,46 @@  static void pmu_dev_release(struct devic
 
 static int pmu_dev_alloc(struct pmu *pmu)
 {
-	int ret = -ENOMEM;
+	int ret;
 
-	pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
-	if (!pmu->dev)
-		goto out;
+	struct device *dev __free(put_device) =
+		kzalloc(sizeof(struct device), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
 
-	pmu->dev->groups = pmu->attr_groups;
-	device_initialize(pmu->dev);
+	dev->groups = pmu->attr_groups;
+	device_initialize(dev);
 
-	dev_set_drvdata(pmu->dev, pmu);
-	pmu->dev->bus = &pmu_bus;
-	pmu->dev->release = pmu_dev_release;
+	dev_set_drvdata(dev, pmu);
+	dev->bus = &pmu_bus;
+	dev->release = pmu_dev_release;
 
-	ret = dev_set_name(pmu->dev, "%s", pmu->name);
+	ret = dev_set_name(dev, "%s", pmu->name);
 	if (ret)
-		goto free_dev;
+		return ret;
 
-	ret = device_add(pmu->dev);
+	ret = device_add(dev);
 	if (ret)
-		goto free_dev;
+		return ret;
 
-	/* For PMUs with address filters, throw in an extra attribute: */
-	if (pmu->nr_addr_filters)
-		ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters);
-
-	if (ret)
-		goto del_dev;
+	struct device *del __free(device_del) = dev;
 
-	if (pmu->attr_update)
-		ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update);
-
-	if (ret)
-		goto del_dev;
-
-out:
-	return ret;
-
-del_dev:
-	device_del(pmu->dev);
-
-free_dev:
-	put_device(pmu->dev);
-	goto out;
+	/* For PMUs with address filters, throw in an extra attribute: */
+	if (pmu->nr_addr_filters) {
+		ret = device_create_file(dev, &dev_attr_nr_addr_filters);
+		if (ret)
+			return ret;
+	}
+
+	if (pmu->attr_update) {
+		ret = sysfs_update_groups(&dev->kobj, pmu->attr_update);
+		if (ret)
+			return ret;
+	}
+
+	no_free_ptr(del);
+	pmu->dev = no_free_ptr(dev);
+	return 0;
 }
 
 static struct lock_class_key cpuctx_mutex;