diff mbox series

[31/32] genirq/msi: Simplify sysfs handling

Message ID 20211126232736.135247787@linutronix.de (mailing list archive)
State Superseded
Delegated to: Bjorn Helgaas
Headers show
Series genirq/msi, PCI/MSI: Spring cleaning - Part 2 | expand

Commit Message

Thomas Gleixner Nov. 27, 2021, 1:23 a.m. UTC
The sysfs handling for MSI is a convoluted maze and it is in the way of
supporting dynamic expansion of the MSI-X vectors because it only supports
a one off bulk population/free of the sysfs entries.

Change it to do:

   1) Creating an empty sysfs attribute group when msi_device_data is
      allocated

   2) Populate the entries when the MSI descriptor is initialized

   3) Free the entries when a MSI descriptor is detached from a Linux
      interrupt.

   4) Provide functions for the legacy non-irqdomain fallback code to
      do a bulk population/free. This code won't support dynamic
      expansion.

This makes the code simpler and reduces the number of allocations as the
empty attribute group can be shared.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/msi.h |    7 +
 kernel/irq/msi.c    |  196 +++++++++++++++++++++++-----------------------------
 2 files changed, 95 insertions(+), 108 deletions(-)

Comments

Greg KH Nov. 27, 2021, 12:32 p.m. UTC | #1
On Sat, Nov 27, 2021 at 02:23:15AM +0100, Thomas Gleixner wrote:
> The sysfs handling for MSI is a convoluted maze and it is in the way of
> supporting dynamic expansion of the MSI-X vectors because it only supports
> a one off bulk population/free of the sysfs entries.
> 
> Change it to do:
> 
>    1) Creating an empty sysfs attribute group when msi_device_data is
>       allocated
> 
>    2) Populate the entries when the MSI descriptor is initialized

How much later does this happen?  Can it happen while the device has a
driver bound to it?

>    3) Free the entries when a MSI descriptor is detached from a Linux
>       interrupt.
> 
>    4) Provide functions for the legacy non-irqdomain fallback code to
>       do a bulk population/free. This code won't support dynamic
>       expansion.
> 
> This makes the code simpler and reduces the number of allocations as the
> empty attribute group can be shared.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  include/linux/msi.h |    7 +
>  kernel/irq/msi.c    |  196 +++++++++++++++++++++++-----------------------------
>  2 files changed, 95 insertions(+), 108 deletions(-)
> 
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -72,6 +72,7 @@ struct irq_data;
>  struct msi_desc;
>  struct pci_dev;
>  struct platform_msi_priv_data;
> +struct device_attribute;
>  
>  void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
>  #ifdef CONFIG_GENERIC_MSI_IRQ
> @@ -127,6 +128,7 @@ struct pci_msi_desc {
>   * @dev:	Pointer to the device which uses this descriptor
>   * @msg:	The last set MSI message cached for reuse
>   * @affinity:	Optional pointer to a cpu affinity mask for this descriptor
> + * @sysfs_attr:	Pointer to sysfs device attribute
>   *
>   * @write_msi_msg:	Callback that may be called when the MSI message
>   *			address or data changes
> @@ -146,6 +148,9 @@ struct msi_desc {
>  #ifdef CONFIG_IRQ_MSI_IOMMU
>  	const void			*iommu_cookie;
>  #endif
> +#ifdef CONFIG_SYSFS
> +	struct device_attribute		*sysfs_attrs;
> +#endif
>  
>  	void (*write_msi_msg)(struct msi_desc *entry, void *data);
>  	void *write_msi_msg_data;
> @@ -171,7 +176,6 @@ enum msi_desc_filter {
>   * @lock:		Spinlock to protect register access
>   * @properties:		MSI properties which are interesting to drivers
>   * @num_descs:		The number of allocated MSI descriptors for the device
> - * @attrs:		Pointer to the sysfs attribute group
>   * @platform_data:	Platform-MSI specific data
>   * @list:		List of MSI descriptors associated to the device
>   * @mutex:		Mutex protecting the MSI list
> @@ -182,7 +186,6 @@ struct msi_device_data {
>  	raw_spinlock_t			lock;
>  	unsigned long			properties;
>  	unsigned int			num_descs;
> -	const struct attribute_group    **attrs;
>  	struct platform_msi_priv_data	*platform_data;
>  	struct list_head		list;
>  	struct mutex			mutex;
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -19,6 +19,7 @@
>  
>  #include "internals.h"
>  
> +static inline int msi_sysfs_create_group(struct device *dev);
>  #define dev_to_msi_list(dev)	(&(dev)->msi.data->list)
>  
>  /**
> @@ -208,6 +209,7 @@ static void msi_device_data_release(stru
>  int msi_setup_device_data(struct device *dev)
>  {
>  	struct msi_device_data *md;
> +	int ret;
>  
>  	if (dev->msi.data)
>  		return 0;
> @@ -216,6 +218,12 @@ int msi_setup_device_data(struct device
>  	if (!md)
>  		return -ENOMEM;
>  
> +	ret = msi_sysfs_create_group(dev);
> +	if (ret) {
> +		devres_free(md);
> +		return ret;
> +	}
> +
>  	raw_spin_lock_init(&md->lock);
>  	INIT_LIST_HEAD(&md->list);
>  	mutex_init(&md->mutex);
> @@ -395,6 +403,20 @@ int __msi_get_virq(struct device *dev, u
>  EXPORT_SYMBOL_GPL(__msi_get_virq);
>  
>  #ifdef CONFIG_SYSFS
> +static struct attribute *msi_dev_attrs[] = {
> +	NULL
> +};
> +
> +static const struct attribute_group msi_irqs_group = {
> +	.name	= "msi_irqs",
> +	.attrs	= msi_dev_attrs,
> +};
> +
> +static inline int msi_sysfs_create_group(struct device *dev)
> +{
> +	return devm_device_add_group(dev, &msi_irqs_group);

Much nicer, but you changed the lifetime rules of when these attributes
will be removed, is that ok?

I still worry that these attributes show up "after" the device is
registered with the driver core, but hey, it's no worse than it
currently is, so that's not caused by this patch series...

> @@ -404,97 +426,74 @@ static ssize_t msi_mode_show(struct devi
>  	return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
>  }
>  
> -/**
> - * msi_populate_sysfs - Populate msi_irqs sysfs entries for devices
> - * @dev:	The device(PCI, platform etc) who will get sysfs entries
> - */
> -static const struct attribute_group **msi_populate_sysfs(struct device *dev)
> +static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc)
>  {
> -	const struct attribute_group **msi_irq_groups;
> -	struct attribute **msi_attrs, *msi_attr;
> -	struct device_attribute *msi_dev_attr;
> -	struct attribute_group *msi_irq_group;
> -	struct msi_desc *entry;
> -	int ret = -ENOMEM;
> -	int num_msi = 0;
> -	int count = 0;
> +	struct device_attribute *attrs = desc->sysfs_attrs;
>  	int i;
>  
> -	/* Determine how many msi entries we have */
> -	msi_for_each_desc(entry, dev, MSI_DESC_ALL)
> -		num_msi += entry->nvec_used;
> -	if (!num_msi)
> -		return NULL;
> +	if (!attrs)
> +		return;
>  
> -	/* Dynamically create the MSI attributes for the device */
> -	msi_attrs = kcalloc(num_msi + 1, sizeof(void *), GFP_KERNEL);
> -	if (!msi_attrs)
> -		return ERR_PTR(-ENOMEM);
> -
> -	msi_for_each_desc(entry, dev, MSI_DESC_ALL) {
> -		for (i = 0; i < entry->nvec_used; i++) {
> -			msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
> -			if (!msi_dev_attr)
> -				goto error_attrs;
> -			msi_attrs[count] = &msi_dev_attr->attr;
> -
> -			sysfs_attr_init(&msi_dev_attr->attr);
> -			msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
> -							    entry->irq + i);
> -			if (!msi_dev_attr->attr.name)
> -				goto error_attrs;
> -			msi_dev_attr->attr.mode = 0444;
> -			msi_dev_attr->show = msi_mode_show;
> -			++count;
> -		}
> +	desc->sysfs_attrs = NULL;
> +	for (i = 0; i < desc->nvec_used; i++) {
> +		if (attrs[i].show)
> +			sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
> +		kfree(attrs[i].attr.name);

That's a cute hack, but should be documented somewhere in the code (that
if there is no show function, that means no attribute was registered
here).

If you add a comment for this (either here or when you register the
attribute), feel free to add:

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thomas Gleixner Nov. 27, 2021, 7:31 p.m. UTC | #2
On Sat, Nov 27 2021 at 13:32, Greg Kroah-Hartman wrote:
> On Sat, Nov 27, 2021 at 02:23:15AM +0100, Thomas Gleixner wrote:
>> The sysfs handling for MSI is a convoluted maze and it is in the way of
>> supporting dynamic expansion of the MSI-X vectors because it only supports
>> a one off bulk population/free of the sysfs entries.
>> 
>> Change it to do:
>> 
>>    1) Creating an empty sysfs attribute group when msi_device_data is
>>       allocated
>> 
>>    2) Populate the entries when the MSI descriptor is initialized
>
> How much later does this happen?  Can it happen while the device has a
> driver bound to it?

That's not later than before. It's when the driver initializes the
MSI[X] interrupts, which usually happens in the probe() function.

The difference is that the group, (i.e.) directory is created slightly
earlier.

>> +
>> +static inline int msi_sysfs_create_group(struct device *dev)
>> +{
>> +	return devm_device_add_group(dev, &msi_irqs_group);
>
> Much nicer, but you changed the lifetime rules of when these attributes
> will be removed, is that ok?

The msi entries are removed at the same place as they are removed in the
current mainline code, i.e. when the device driver shuts the device
down and disables MSI[X], which happens usually during remove()

What's different now is that the empty group stays around a bit
longer. I don't see how that matters.

> I still worry that these attributes show up "after" the device is
> registered with the driver core, but hey, it's no worse than it
> currently is, so that's not caused by this patch series...

Happens that register before or after driver->probe()?

>> -		}
>> +	desc->sysfs_attrs = NULL;
>> +	for (i = 0; i < desc->nvec_used; i++) {
>> +		if (attrs[i].show)
>> +			sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
>> +		kfree(attrs[i].attr.name);
>
> That's a cute hack, but should be documented somewhere in the code (that
> if there is no show function, that means no attribute was registered
> here).
>
> If you add a comment for this (either here or when you register the
> attribute), feel free to add:

Will do.

Thanks,

        tglx
Greg KH Nov. 28, 2021, 11:07 a.m. UTC | #3
On Sat, Nov 27, 2021 at 08:31:37PM +0100, Thomas Gleixner wrote:
> On Sat, Nov 27 2021 at 13:32, Greg Kroah-Hartman wrote:
> > On Sat, Nov 27, 2021 at 02:23:15AM +0100, Thomas Gleixner wrote:
> >> The sysfs handling for MSI is a convoluted maze and it is in the way of
> >> supporting dynamic expansion of the MSI-X vectors because it only supports
> >> a one off bulk population/free of the sysfs entries.
> >> 
> >> Change it to do:
> >> 
> >>    1) Creating an empty sysfs attribute group when msi_device_data is
> >>       allocated
> >> 
> >>    2) Populate the entries when the MSI descriptor is initialized
> >
> > How much later does this happen?  Can it happen while the device has a
> > driver bound to it?
> 
> That's not later than before. It's when the driver initializes the
> MSI[X] interrupts, which usually happens in the probe() function.
> 
> The difference is that the group, (i.e.) directory is created slightly
> earlier.

Ok, but that still happens when probe() is called for the driver, right?

> >> +
> >> +static inline int msi_sysfs_create_group(struct device *dev)
> >> +{
> >> +	return devm_device_add_group(dev, &msi_irqs_group);
> >
> > Much nicer, but you changed the lifetime rules of when these attributes
> > will be removed, is that ok?
> 
> The msi entries are removed at the same place as they are removed in the
> current mainline code, i.e. when the device driver shuts the device
> down and disables MSI[X], which happens usually during remove()
> 
> What's different now is that the empty group stays around a bit
> longer. I don't see how that matters.

How much longer does it stick around?

What happens if this sequence happens:
	- probe()
	- disconnect()
	- probe()
with the same device (i.e. the device is not removed from the system)?

Which can happen as userspace can trigger disconnect() or even worse, if
the driver is unloaded and then loaded again?  Will the second call to
create this directory fail as it is not cleaned up yet?

I can never remember if devm_*() stuff sticks around for the device
lifecycle, or for the driver/device lifecycle, which is one big reason
why I don't like that api...

> > I still worry that these attributes show up "after" the device is
> > registered with the driver core, but hey, it's no worse than it
> > currently is, so that's not caused by this patch series...
> 
> Happens that register before or after driver->probe()?

During probe is a bit too late, but we can handle that as we are used to
it.  If it happens after probe() succeeds, based on something else being
asked for in the driver (like the device being opened), then userspace
has no chance of ever noticing these attributes being added.

But again, this isn't new to your code series, so I wouldn't worry about
it.  Obviously userspace tools do not care or really notice these
attributes at all otherwise the authors of them would have complained
a long time ago :)

So again, no real objection from me here, just meta-comments, except for
the above thing with the devm_* call to ensure that the
probe/disconnect/probe sequence will still work just as well as it does
today.  Should be easy enough to test out by just unloading a module and
then loading it again with this patch series applied.

thanks,

greg k-h
Thomas Gleixner Nov. 28, 2021, 7:33 p.m. UTC | #4
On Sun, Nov 28 2021 at 12:07, Greg Kroah-Hartman wrote:
> On Sat, Nov 27, 2021 at 08:31:37PM +0100, Thomas Gleixner wrote:
>> On Sat, Nov 27 2021 at 13:32, Greg Kroah-Hartman wrote:
>> > On Sat, Nov 27, 2021 at 02:23:15AM +0100, Thomas Gleixner wrote:
>> >> The sysfs handling for MSI is a convoluted maze and it is in the way of
>> >> supporting dynamic expansion of the MSI-X vectors because it only supports
>> >> a one off bulk population/free of the sysfs entries.
>> >> 
>> >> Change it to do:
>> >> 
>> >>    1) Creating an empty sysfs attribute group when msi_device_data is
>> >>       allocated
>> >> 
>> >>    2) Populate the entries when the MSI descriptor is initialized
>> >
>> > How much later does this happen?  Can it happen while the device has a
>> > driver bound to it?
>> 
>> That's not later than before. It's when the driver initializes the
>> MSI[X] interrupts, which usually happens in the probe() function.
>> 
>> The difference is that the group, (i.e.) directory is created slightly
>> earlier.
>
> Ok, but that still happens when probe() is called for the driver,
> right?

Yes.

>> >> +static inline int msi_sysfs_create_group(struct device *dev)
>> >> +{
>> >> +	return devm_device_add_group(dev, &msi_irqs_group);
>> >
>> > Much nicer, but you changed the lifetime rules of when these attributes
>> > will be removed, is that ok?
>> 
>> The msi entries are removed at the same place as they are removed in the
>> current mainline code, i.e. when the device driver shuts the device
>> down and disables MSI[X], which happens usually during remove()
>> 
>> What's different now is that the empty group stays around a bit
>> longer. I don't see how that matters.
>
> How much longer does it stick around?
>
> What happens if this sequence happens:
> 	- probe()
> 	- disconnect()
> 	- probe()
> with the same device (i.e. the device is not removed from the system)?
>
> Which can happen as userspace can trigger disconnect() or even worse, if
> the driver is unloaded and then loaded again?  Will the second call to
> create this directory fail as it is not cleaned up yet?
>
> I can never remember if devm_*() stuff sticks around for the device
> lifecycle, or for the driver/device lifecycle, which is one big reason
> why I don't like that api...

Driver lifecycle AFAICT.

>> > I still worry that these attributes show up "after" the device is
>> > registered with the driver core, but hey, it's no worse than it
>> > currently is, so that's not caused by this patch series...
>> 
>> Happens that register before or after driver->probe()?
>
> During probe is a bit too late, but we can handle that as we are used to
> it.  If it happens after probe() succeeds, based on something else being
> asked for in the driver (like the device being opened), then userspace
> has no chance of ever noticing these attributes being added.
>
> But again, this isn't new to your code series, so I wouldn't worry about
> it.  Obviously userspace tools do not care or really notice these
> attributes at all otherwise the authors of them would have complained
> a long time ago :)

I have no idea how these attributes are used at all. Neil should knows
as he added it in the first place.

> So again, no real objection from me here, just meta-comments, except for
> the above thing with the devm_* call to ensure that the
> probe/disconnect/probe sequence will still work just as well as it does
> today.  Should be easy enough to test out by just unloading a module and
> then loading it again with this patch series applied.

That works just fine. Tested that already before posting. After module
removal the directory is gone.

Thanks,

        tglx
diff mbox series

Patch

--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -72,6 +72,7 @@  struct irq_data;
 struct msi_desc;
 struct pci_dev;
 struct platform_msi_priv_data;
+struct device_attribute;
 
 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 #ifdef CONFIG_GENERIC_MSI_IRQ
@@ -127,6 +128,7 @@  struct pci_msi_desc {
  * @dev:	Pointer to the device which uses this descriptor
  * @msg:	The last set MSI message cached for reuse
  * @affinity:	Optional pointer to a cpu affinity mask for this descriptor
+ * @sysfs_attr:	Pointer to sysfs device attribute
  *
  * @write_msi_msg:	Callback that may be called when the MSI message
  *			address or data changes
@@ -146,6 +148,9 @@  struct msi_desc {
 #ifdef CONFIG_IRQ_MSI_IOMMU
 	const void			*iommu_cookie;
 #endif
+#ifdef CONFIG_SYSFS
+	struct device_attribute		*sysfs_attrs;
+#endif
 
 	void (*write_msi_msg)(struct msi_desc *entry, void *data);
 	void *write_msi_msg_data;
@@ -171,7 +176,6 @@  enum msi_desc_filter {
  * @lock:		Spinlock to protect register access
  * @properties:		MSI properties which are interesting to drivers
  * @num_descs:		The number of allocated MSI descriptors for the device
- * @attrs:		Pointer to the sysfs attribute group
  * @platform_data:	Platform-MSI specific data
  * @list:		List of MSI descriptors associated to the device
  * @mutex:		Mutex protecting the MSI list
@@ -182,7 +186,6 @@  struct msi_device_data {
 	raw_spinlock_t			lock;
 	unsigned long			properties;
 	unsigned int			num_descs;
-	const struct attribute_group    **attrs;
 	struct platform_msi_priv_data	*platform_data;
 	struct list_head		list;
 	struct mutex			mutex;
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -19,6 +19,7 @@ 
 
 #include "internals.h"
 
+static inline int msi_sysfs_create_group(struct device *dev);
 #define dev_to_msi_list(dev)	(&(dev)->msi.data->list)
 
 /**
@@ -208,6 +209,7 @@  static void msi_device_data_release(stru
 int msi_setup_device_data(struct device *dev)
 {
 	struct msi_device_data *md;
+	int ret;
 
 	if (dev->msi.data)
 		return 0;
@@ -216,6 +218,12 @@  int msi_setup_device_data(struct device
 	if (!md)
 		return -ENOMEM;
 
+	ret = msi_sysfs_create_group(dev);
+	if (ret) {
+		devres_free(md);
+		return ret;
+	}
+
 	raw_spin_lock_init(&md->lock);
 	INIT_LIST_HEAD(&md->list);
 	mutex_init(&md->mutex);
@@ -395,6 +403,20 @@  int __msi_get_virq(struct device *dev, u
 EXPORT_SYMBOL_GPL(__msi_get_virq);
 
 #ifdef CONFIG_SYSFS
+static struct attribute *msi_dev_attrs[] = {
+	NULL
+};
+
+static const struct attribute_group msi_irqs_group = {
+	.name	= "msi_irqs",
+	.attrs	= msi_dev_attrs,
+};
+
+static inline int msi_sysfs_create_group(struct device *dev)
+{
+	return devm_device_add_group(dev, &msi_irqs_group);
+}
+
 static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
@@ -404,97 +426,74 @@  static ssize_t msi_mode_show(struct devi
 	return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
 }
 
-/**
- * msi_populate_sysfs - Populate msi_irqs sysfs entries for devices
- * @dev:	The device(PCI, platform etc) who will get sysfs entries
- */
-static const struct attribute_group **msi_populate_sysfs(struct device *dev)
+static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc)
 {
-	const struct attribute_group **msi_irq_groups;
-	struct attribute **msi_attrs, *msi_attr;
-	struct device_attribute *msi_dev_attr;
-	struct attribute_group *msi_irq_group;
-	struct msi_desc *entry;
-	int ret = -ENOMEM;
-	int num_msi = 0;
-	int count = 0;
+	struct device_attribute *attrs = desc->sysfs_attrs;
 	int i;
 
-	/* Determine how many msi entries we have */
-	msi_for_each_desc(entry, dev, MSI_DESC_ALL)
-		num_msi += entry->nvec_used;
-	if (!num_msi)
-		return NULL;
+	if (!attrs)
+		return;
 
-	/* Dynamically create the MSI attributes for the device */
-	msi_attrs = kcalloc(num_msi + 1, sizeof(void *), GFP_KERNEL);
-	if (!msi_attrs)
-		return ERR_PTR(-ENOMEM);
-
-	msi_for_each_desc(entry, dev, MSI_DESC_ALL) {
-		for (i = 0; i < entry->nvec_used; i++) {
-			msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
-			if (!msi_dev_attr)
-				goto error_attrs;
-			msi_attrs[count] = &msi_dev_attr->attr;
-
-			sysfs_attr_init(&msi_dev_attr->attr);
-			msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
-							    entry->irq + i);
-			if (!msi_dev_attr->attr.name)
-				goto error_attrs;
-			msi_dev_attr->attr.mode = 0444;
-			msi_dev_attr->show = msi_mode_show;
-			++count;
-		}
+	desc->sysfs_attrs = NULL;
+	for (i = 0; i < desc->nvec_used; i++) {
+		if (attrs[i].show)
+			sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
+		kfree(attrs[i].attr.name);
 	}
+	kfree(attrs);
+}
 
-	msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
-	if (!msi_irq_group)
-		goto error_attrs;
-	msi_irq_group->name = "msi_irqs";
-	msi_irq_group->attrs = msi_attrs;
-
-	msi_irq_groups = kcalloc(2, sizeof(void *), GFP_KERNEL);
-	if (!msi_irq_groups)
-		goto error_irq_group;
-	msi_irq_groups[0] = msi_irq_group;
+static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc)
+{
+	struct device_attribute *attrs;
+	int ret, i;
 
-	ret = sysfs_create_groups(&dev->kobj, msi_irq_groups);
-	if (ret)
-		goto error_irq_groups;
+	attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+
+	desc->sysfs_attrs = attrs;
+	for (i = 0; i < desc->nvec_used; i++) {
+		sysfs_attr_init(&attrs[i].attr);
+		attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i);
+		if (!attrs[i].attr.name) {
+			ret = -ENOMEM;
+			goto fail;
+		}
 
-	return msi_irq_groups;
+		attrs[i].attr.mode = 0444;
+		attrs[i].show = msi_mode_show;
 
-error_irq_groups:
-	kfree(msi_irq_groups);
-error_irq_group:
-	kfree(msi_irq_group);
-error_attrs:
-	count = 0;
-	msi_attr = msi_attrs[count];
-	while (msi_attr) {
-		msi_dev_attr = container_of(msi_attr, struct device_attribute, attr);
-		kfree(msi_attr->name);
-		kfree(msi_dev_attr);
-		++count;
-		msi_attr = msi_attrs[count];
+		ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
+		if (ret) {
+			attrs[i].show = NULL;
+			goto fail;
+		}
 	}
-	kfree(msi_attrs);
-	return ERR_PTR(ret);
+	return 0;
+
+fail:
+	msi_sysfs_remove_desc(dev, desc);
+	return ret;
 }
 
+#ifdef CONFIG_PCI_MSI_ARCH_FALLBACK
 /**
  * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
  * @dev:	The device(PCI, platform etc) which will get sysfs entries
  */
 int msi_device_populate_sysfs(struct device *dev)
 {
-	const struct attribute_group **group = msi_populate_sysfs(dev);
+	struct msi_desc *desc;
+	int ret;
 
-	if (IS_ERR(group))
-		return PTR_ERR(group);
-	dev->msi.data->attrs = group;
+	msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
+		if (desc->sysfs_attrs)
+			continue;
+		ret = msi_sysfs_populate_desc(dev, desc);
+		if (ret)
+			return ret;
+	}
 	return 0;
 }
 
@@ -505,28 +504,17 @@  int msi_device_populate_sysfs(struct dev
  */
 void msi_device_destroy_sysfs(struct device *dev)
 {
-	const struct attribute_group **msi_irq_groups = dev->msi.data->attrs;
-	struct device_attribute *dev_attr;
-	struct attribute **msi_attrs;
-	int count = 0;
-
-	dev->msi.data->attrs = NULL;
-	if (!msi_irq_groups)
-		return;
+	struct msi_desc *desc;
 
-	sysfs_remove_groups(&dev->kobj, msi_irq_groups);
-	msi_attrs = msi_irq_groups[0]->attrs;
-	while (msi_attrs[count]) {
-		dev_attr = container_of(msi_attrs[count], struct device_attribute, attr);
-		kfree(dev_attr->attr.name);
-		kfree(dev_attr);
-		++count;
-	}
-	kfree(msi_attrs);
-	kfree(msi_irq_groups[0]);
-	kfree(msi_irq_groups);
+	msi_for_each_desc(desc, dev, MSI_DESC_ALL)
+		msi_sysfs_remove_desc(dev, desc);
 }
-#endif
+#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK */
+#else /* CONFIG_SYSFS */
+static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
+static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
+static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { }
+#endif /* !CONFIG_SYSFS */
 
 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
 static inline void irq_chip_write_msi_msg(struct irq_data *data,
@@ -959,6 +947,12 @@  int __msi_domain_alloc_irqs(struct irq_d
 			ret = msi_init_virq(domain, virq + i, vflags);
 			if (ret)
 				return ret;
+
+			if (info->flags & MSI_FLAG_DEV_SYSFS) {
+				ret = msi_sysfs_populate_desc(dev, desc);
+				if (ret)
+					return ret;
+			}
 		}
 		allocated++;
 	}
@@ -1003,18 +997,7 @@  int msi_domain_alloc_irqs_descs_locked(s
 
 	ret = ops->domain_alloc_irqs(domain, dev, nvec);
 	if (ret)
-		goto cleanup;
-
-	if (!(info->flags & MSI_FLAG_DEV_SYSFS))
-		return 0;
-
-	ret = msi_device_populate_sysfs(dev);
-	if (ret)
-		goto cleanup;
-	return 0;
-
-cleanup:
-	msi_domain_free_irqs_descs_locked(domain, dev);
+		msi_domain_free_irqs_descs_locked(domain, dev);
 	return ret;
 }
 
@@ -1039,6 +1022,7 @@  int msi_domain_alloc_irqs(struct irq_dom
 
 void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
 {
+	struct msi_domain_info *info = domain->host_data;
 	struct irq_data *irqd;
 	struct msi_desc *desc;
 	int i;
@@ -1053,6 +1037,8 @@  void __msi_domain_free_irqs(struct irq_d
 		}
 
 		irq_domain_free_irqs(desc->irq, desc->nvec_used);
+		if (info->flags & MSI_FLAG_DEV_SYSFS)
+			msi_sysfs_remove_desc(dev, desc);
 		desc->irq = 0;
 	}
 }
@@ -1081,8 +1067,6 @@  void msi_domain_free_irqs_descs_locked(s
 
 	lockdep_assert_held(&dev->msi.data->mutex);
 
-	if (info->flags & MSI_FLAG_DEV_SYSFS)
-		msi_device_destroy_sysfs(dev);
 	ops->domain_free_irqs(domain, dev);
 	msi_domain_free_msi_descs(info, dev);
 }