diff mbox series

[3/5] ACPI: scan: Fix device object rescan in acpi_scan_clear_dep()

Message ID 7272740.EvYhyI6sBW@kreacher (mailing list archive)
State Mainlined, archived
Headers show
Series ACPI: scan: Fixes and cleanups related to dependencies list handling | expand

Commit Message

Rafael J. Wysocki June 16, 2021, 2:23 p.m. UTC
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

In general, acpi_bus_attach() can only be run safely under
acpi_scan_lock, but that lock cannot be acquired under
acpi_dep_list_lock, so make acpi_scan_clear_dep() schedule deferred
execution of acpi_bus_attach() under acpi_scan_lock instead of
calling it directly.

This also fixes a possible race between acpi_scan_clear_dep() and
device removal that might cause a device object that went away to
be accessed, because acpi_scan_clear_dep() is changed to acquire
a reference on the consumer device object.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c |   50 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 5 deletions(-)

Comments

Hans de Goede June 16, 2021, 2:48 p.m. UTC | #1
Hi,

On 6/16/21 4:23 PM, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> In general, acpi_bus_attach() can only be run safely under
> acpi_scan_lock, but that lock cannot be acquired under
> acpi_dep_list_lock, so make acpi_scan_clear_dep() schedule deferred
> execution of acpi_bus_attach() under acpi_scan_lock instead of
> calling it directly.
> 
> This also fixes a possible race between acpi_scan_clear_dep() and
> device removal that might cause a device object that went away to
> be accessed, because acpi_scan_clear_dep() is changed to acquire
> a reference on the consumer device object.
> 
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
>  drivers/acpi/scan.c |   50 +++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 45 insertions(+), 5 deletions(-)
> 
> Index: linux-pm/drivers/acpi/scan.c
> ===================================================================
> --- linux-pm.orig/drivers/acpi/scan.c
> +++ linux-pm/drivers/acpi/scan.c
> @@ -2115,16 +2115,56 @@ static int acpi_dev_get_first_consumer_d
>  	return 0;
>  }
>  
> -static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data)
> -{
> +struct acpi_scan_clear_dep_work {
> +	struct work_struct work;
>  	struct acpi_device *adev;
> +};
> +
> +static void acpi_scan_clear_dep_fn(struct work_struct *work)
> +{
> +	struct acpi_scan_clear_dep_work *cdw;
> +
> +	cdw = container_of(work, struct acpi_scan_clear_dep_work, work);
>  
> -	acpi_bus_get_device(dep->consumer, &adev);
> +	acpi_scan_lock_acquire();
> +	acpi_bus_attach(cdw->adev, true);
> +	acpi_scan_lock_release();
> +
> +	acpi_dev_put(cdw->adev);
> +	kfree(cdw);
> +}
> +
> +static bool acpi_scan_clear_dep_queue(struct acpi_device *adev)
> +{
> +	struct acpi_scan_clear_dep_work *cdw;
> +
> +	if (adev->dep_unmet)
> +		return false;
> +
> +	cdw = kmalloc(sizeof(*cdw), GFP_KERNEL);
> +	if (!cdw)
> +		return false;
> +
> +	cdw->adev = adev;
> +	INIT_WORK(&cdw->work, acpi_scan_clear_dep_fn);
> +	/*
> +	 * Since the work function may block on the lock until the entire
> +	 * initial enumeration of devices is complete, put it into the unbound
> +	 * workqueue.
> +	 */
> +	queue_work(system_unbound_wq, &cdw->work);

Hmm, I'm a bit worried about this. Even with the system_unbound_wq
some code may expect at least some progress being made with processing
works during the initial enumeration. OTOH this does run pretty early on.

Still I wonder if it would not be better to create + use our own workqueue
for this ?

I guess we can always do this if we run into issues later...

With that said / otherwise the patch looks good to me:

Reviewed-by: Hans de Goede <hdegoede@redhat.com>

Regards,

Hans




> +
> +	return true;
> +}
> +
> +static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data)
> +{
> +	struct acpi_device *adev = acpi_bus_get_acpi_device(dep->consumer);
>  
>  	if (adev) {
>  		adev->dep_unmet--;
> -		if (!adev->dep_unmet)
> -			acpi_bus_attach(adev, true);
> +		if (!acpi_scan_clear_dep_queue(adev))
> +			acpi_dev_put(adev);
>  	}
>  
>  	list_del(&dep->node);
> 
> 
>
Rafael J. Wysocki June 16, 2021, 3:12 p.m. UTC | #2
On Wed, Jun 16, 2021 at 4:48 PM Hans de Goede <hdegoede@redhat.com> wrote:
>
> Hi,
>
> On 6/16/21 4:23 PM, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > In general, acpi_bus_attach() can only be run safely under
> > acpi_scan_lock, but that lock cannot be acquired under
> > acpi_dep_list_lock, so make acpi_scan_clear_dep() schedule deferred
> > execution of acpi_bus_attach() under acpi_scan_lock instead of
> > calling it directly.
> >
> > This also fixes a possible race between acpi_scan_clear_dep() and
> > device removal that might cause a device object that went away to
> > be accessed, because acpi_scan_clear_dep() is changed to acquire
> > a reference on the consumer device object.
> >
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > ---
> >  drivers/acpi/scan.c |   50 +++++++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 45 insertions(+), 5 deletions(-)
> >
> > Index: linux-pm/drivers/acpi/scan.c
> > ===================================================================
> > --- linux-pm.orig/drivers/acpi/scan.c
> > +++ linux-pm/drivers/acpi/scan.c
> > @@ -2115,16 +2115,56 @@ static int acpi_dev_get_first_consumer_d
> >       return 0;
> >  }
> >
> > -static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data)
> > -{
> > +struct acpi_scan_clear_dep_work {
> > +     struct work_struct work;
> >       struct acpi_device *adev;
> > +};
> > +
> > +static void acpi_scan_clear_dep_fn(struct work_struct *work)
> > +{
> > +     struct acpi_scan_clear_dep_work *cdw;
> > +
> > +     cdw = container_of(work, struct acpi_scan_clear_dep_work, work);
> >
> > -     acpi_bus_get_device(dep->consumer, &adev);
> > +     acpi_scan_lock_acquire();
> > +     acpi_bus_attach(cdw->adev, true);
> > +     acpi_scan_lock_release();
> > +
> > +     acpi_dev_put(cdw->adev);
> > +     kfree(cdw);
> > +}
> > +
> > +static bool acpi_scan_clear_dep_queue(struct acpi_device *adev)
> > +{
> > +     struct acpi_scan_clear_dep_work *cdw;
> > +
> > +     if (adev->dep_unmet)
> > +             return false;
> > +
> > +     cdw = kmalloc(sizeof(*cdw), GFP_KERNEL);
> > +     if (!cdw)
> > +             return false;
> > +
> > +     cdw->adev = adev;
> > +     INIT_WORK(&cdw->work, acpi_scan_clear_dep_fn);
> > +     /*
> > +      * Since the work function may block on the lock until the entire
> > +      * initial enumeration of devices is complete, put it into the unbound
> > +      * workqueue.
> > +      */
> > +     queue_work(system_unbound_wq, &cdw->work);
>
> Hmm, I'm a bit worried about this. Even with the system_unbound_wq
> some code may expect at least some progress being made with processing
> works during the initial enumeration. OTOH this does run pretty early on.
>
> Still I wonder if it would not be better to create + use our own workqueue
> for this ?
>
> I guess we can always do this if we run into issues later...

Exactly my thought.

> With that said / otherwise the patch looks good to me:
>
> Reviewed-by: Hans de Goede <hdegoede@redhat.com>

Thanks!
diff mbox series

Patch

Index: linux-pm/drivers/acpi/scan.c
===================================================================
--- linux-pm.orig/drivers/acpi/scan.c
+++ linux-pm/drivers/acpi/scan.c
@@ -2115,16 +2115,56 @@  static int acpi_dev_get_first_consumer_d
 	return 0;
 }
 
-static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data)
-{
+struct acpi_scan_clear_dep_work {
+	struct work_struct work;
 	struct acpi_device *adev;
+};
+
+static void acpi_scan_clear_dep_fn(struct work_struct *work)
+{
+	struct acpi_scan_clear_dep_work *cdw;
+
+	cdw = container_of(work, struct acpi_scan_clear_dep_work, work);
 
-	acpi_bus_get_device(dep->consumer, &adev);
+	acpi_scan_lock_acquire();
+	acpi_bus_attach(cdw->adev, true);
+	acpi_scan_lock_release();
+
+	acpi_dev_put(cdw->adev);
+	kfree(cdw);
+}
+
+static bool acpi_scan_clear_dep_queue(struct acpi_device *adev)
+{
+	struct acpi_scan_clear_dep_work *cdw;
+
+	if (adev->dep_unmet)
+		return false;
+
+	cdw = kmalloc(sizeof(*cdw), GFP_KERNEL);
+	if (!cdw)
+		return false;
+
+	cdw->adev = adev;
+	INIT_WORK(&cdw->work, acpi_scan_clear_dep_fn);
+	/*
+	 * Since the work function may block on the lock until the entire
+	 * initial enumeration of devices is complete, put it into the unbound
+	 * workqueue.
+	 */
+	queue_work(system_unbound_wq, &cdw->work);
+
+	return true;
+}
+
+static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data)
+{
+	struct acpi_device *adev = acpi_bus_get_acpi_device(dep->consumer);
 
 	if (adev) {
 		adev->dep_unmet--;
-		if (!adev->dep_unmet)
-			acpi_bus_attach(adev, true);
+		if (!acpi_scan_clear_dep_queue(adev))
+			acpi_dev_put(adev);
 	}
 
 	list_del(&dep->node);