diff mbox

[v2,1/2] driver core: detach device's pm_domain after devres_release_all

Message ID 1502786217-212887-2-git-send-email-shawn.lin@rock-chips.com (mailing list archive)
State New, archived
Headers show

Commit Message

Shawn Lin Aug. 15, 2017, 8:36 a.m. UTC
Move dev_pm_domain_detach after devres_release_all to avoid
accessing device's registers with genpd been powered off.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
---

Changes in v2: None

 drivers/base/dd.c       | 35 ++++++++++++++++++++++++++++++-----
 drivers/base/platform.c | 18 ++----------------
 2 files changed, 32 insertions(+), 21 deletions(-)

Comments

Greg KH Aug. 29, 2017, 6:42 a.m. UTC | #1
On Tue, Aug 15, 2017 at 04:36:56PM +0800, Shawn Lin wrote:
> Move dev_pm_domain_detach after devres_release_all to avoid
> accessing device's registers with genpd been powered off.

So, what is this going to break that is working already today?  :)

> 
> Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
> ---
> 
> Changes in v2: None
> 
>  drivers/base/dd.c       | 35 ++++++++++++++++++++++++++++++-----
>  drivers/base/platform.c | 18 ++----------------
>  2 files changed, 32 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index ad44b40..13dc0ad 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -25,7 +25,9 @@
>  #include <linux/kthread.h>
>  #include <linux/wait.h>
>  #include <linux/async.h>
> +#include <linux/platform_device.h>
>  #include <linux/pm_runtime.h>
> +#include <linux/pm_domain.h>
>  #include <linux/pinctrl/devinfo.h>
>  
>  #include "base.h"
> @@ -356,6 +358,8 @@ static int really_probe(struct device *dev, struct device_driver *drv)
>  	int local_trigger_count = atomic_read(&deferred_trigger_count);
>  	bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE) &&
>  			   !drv->suppress_bind_attrs;
> +	struct platform_driver *pdrv;
> +	bool do_pm_domain = false;
>  
>  	if (defer_all_probes) {
>  		/*
> @@ -414,6 +418,16 @@ static int really_probe(struct device *dev, struct device_driver *drv)
>  		if (ret)
>  			goto probe_failed;
>  	} else if (drv->probe) {
> +		ret = dev_pm_domain_attach(dev, true);
> +		pdrv = to_platform_driver(dev->driver);
> +		/* don't fail if just dev_pm_domain_attach failed */
> +		if (pdrv->prevent_deferred_probe &&
> +		    ret == -EPROBE_DEFER) {
> +			dev_warn(dev, "probe deferral not supported\n");
> +			ret = -ENXIO;
> +			goto probe_failed;
> +		}
> +		do_pm_domain = true;
>  		ret = drv->probe(dev);
>  		if (ret)
>  			goto probe_failed;
> @@ -421,13 +435,17 @@ static int really_probe(struct device *dev, struct device_driver *drv)
>  
>  	if (test_remove) {
>  		test_remove = false;
> +		do_pm_domain = false;
>  
> -		if (dev->bus->remove)
> +		if (dev->bus->remove) {
>  			dev->bus->remove(dev);
> -		else if (drv->remove)
> +		} else if (drv->remove) {
>  			drv->remove(dev);
> -
> +			do_pm_domain = true;

Why is this set to true if you have a driver remove function, but not if
you only have a bus remove function?  Why the difference?


> +		}
>  		devres_release_all(dev);
> +		if (do_pm_domain)
> +			dev_pm_domain_detach(dev, true);
>  		driver_sysfs_remove(dev);
>  		dev->driver = NULL;
>  		dev_set_drvdata(dev, NULL);
> @@ -458,6 +476,8 @@ static int really_probe(struct device *dev, struct device_driver *drv)
>  pinctrl_bind_failed:
>  	device_links_no_driver(dev);
>  	devres_release_all(dev);
> +	if (do_pm_domain)
> +		dev_pm_domain_detach(dev, true);

Can't you just always call this on the error path?

>  	driver_sysfs_remove(dev);
>  	dev->driver = NULL;
>  	dev_set_drvdata(dev, NULL);
> @@ -818,6 +838,7 @@ int driver_attach(struct device_driver *drv)
>  static void __device_release_driver(struct device *dev, struct device *parent)
>  {
>  	struct device_driver *drv;
> +	bool do_pm_domain = false;
>  
>  	drv = dev->driver;
>  	if (drv) {
> @@ -855,15 +876,19 @@ static void __device_release_driver(struct device *dev, struct device *parent)
>  
>  		pm_runtime_put_sync(dev);
>  
> -		if (dev->bus && dev->bus->remove)
> +		if (dev->bus && dev->bus->remove) {
>  			dev->bus->remove(dev);
> -		else if (drv->remove)
> +		} else if (drv->remove) {
> +			do_pm_domain = true;

Same question here about drivers and bus default functions.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Shawn Lin Aug. 29, 2017, 8:08 a.m. UTC | #2
Hi Greg,

On 2017/8/29 14:42, Greg Kroah-Hartman wrote:
> On Tue, Aug 15, 2017 at 04:36:56PM +0800, Shawn Lin wrote:
>> Move dev_pm_domain_detach after devres_release_all to avoid
>> accessing device's registers with genpd been powered off.
> 
> So, what is this going to break that is working already today?  :)

Thanks for your comment!

The background of this patch is that:
(1) Some SoCs, including Rockchips' SoCs, couldn't support
accessing controllers' registers w/o clk and power domain enabled.
(2) Many common drivers use devm_request_irq to request irq for either
shared irq or non-shared irq.
(3) So we rely on devres_release_all to free irq automatically.

So the actually race condition is:
(1) Driver A probe failed or calling remove
(2) power domain is detached right now
(3) A irq triggerd cocurrently just before calling devm_irq_release..
(4) Driver A's ISR read its register .. panic..

The issue is exposed by enabing CONFIG_DEBUG_SHIRQ. Thus devres_free_irq
will try to call the ISR as it says: "It's a shared IRQ -- the driver
ought to be prepared for an IRQ event to happen even now it's being
freed". So it calls the driver's ISR w/o power domain enabled, which
hangup the system... This is theoretically help folks to make the code
robust enough to deal with shared case.

But, for no matter whether the irq is shared or non-shared, the race
condition is there. So we possible have two choices that
(1) Either using request_irq and free_irq directly
(2) Or moving dev_pm_domain_detach after devres_release_all which
makes sure we free the irq before powering off power domain.

However doesn't choice(1) imply that devm_request_irq shouldn't
exist? :) So I try to fix it like what this patch does.

> 
>>
>> Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
>> ---

...

> 
> Why is this set to true if you have a driver remove function, but not if
> you only have a bus remove function?  Why the difference?
> 
> 

Sorry, I will fix these all and always call dev_pm_domain_detach on the
error  path.

>> +		}
>>   		devres_release_all(dev);
>> +		if (do_pm_domain)
>> +			dev_pm_domain_detach(dev, true);
>>   		driver_sysfs_remove(dev);
>>   		dev->driver = NULL;
>>   		dev_set_drvdata(dev, NULL);
>> @@ -458,6 +476,8 @@ static int really_probe(struct device *dev, struct device_driver *drv)
>>   pinctrl_bind_failed:
>>   	device_links_no_driver(dev);
>>   	devres_release_all(dev);
>> +	if (do_pm_domain)
>> +		dev_pm_domain_detach(dev, true);
> 
> Can't you just always call this on the error path?
> 
>>   	driver_sysfs_remove(dev);
>>   	dev->driver = NULL;
>>   	dev_set_drvdata(dev, NULL);
>> @@ -818,6 +838,7 @@ int driver_attach(struct device_driver *drv)
>>   static void __device_release_driver(struct device *dev, struct device *parent)
>>   {
>>   	struct device_driver *drv;
>> +	bool do_pm_domain = false;
>>   
>>   	drv = dev->driver;
>>   	if (drv) {
>> @@ -855,15 +876,19 @@ static void __device_release_driver(struct device *dev, struct device *parent)
>>   
>>   		pm_runtime_put_sync(dev);
>>   
>> -		if (dev->bus && dev->bus->remove)
>> +		if (dev->bus && dev->bus->remove) {
>>   			dev->bus->remove(dev);
>> -		else if (drv->remove)
>> +		} else if (drv->remove) {
>> +			do_pm_domain = true;
> 
> Same question here about drivers and bus default functions.
> 
> thanks,
> 
> greg k-h
> 
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Greg KH Aug. 29, 2017, 9:03 a.m. UTC | #3
On Tue, Aug 29, 2017 at 04:08:52PM +0800, Shawn Lin wrote:
> Hi Greg,
> 
> On 2017/8/29 14:42, Greg Kroah-Hartman wrote:
> > On Tue, Aug 15, 2017 at 04:36:56PM +0800, Shawn Lin wrote:
> > > Move dev_pm_domain_detach after devres_release_all to avoid
> > > accessing device's registers with genpd been powered off.
> > 
> > So, what is this going to break that is working already today?  :)
> 
> Thanks for your comment!
> 
> The background of this patch is that:
> (1) Some SoCs, including Rockchips' SoCs, couldn't support
> accessing controllers' registers w/o clk and power domain enabled.
> (2) Many common drivers use devm_request_irq to request irq for either
> shared irq or non-shared irq.
> (3) So we rely on devres_release_all to free irq automatically.
> 
> So the actually race condition is:
> (1) Driver A probe failed or calling remove
> (2) power domain is detached right now
> (3) A irq triggerd cocurrently just before calling devm_irq_release..
> (4) Driver A's ISR read its register .. panic..

If a probe failed, the ISR should never be called, right?  So that
should not be an issue here.

> The issue is exposed by enabing CONFIG_DEBUG_SHIRQ. Thus devres_free_irq
> will try to call the ISR as it says: "It's a shared IRQ -- the driver
> ought to be prepared for an IRQ event to happen even now it's being
> freed". So it calls the driver's ISR w/o power domain enabled, which
> hangup the system... This is theoretically help folks to make the code
> robust enough to deal with shared case.
> 
> But, for no matter whether the irq is shared or non-shared, the race
> condition is there. So we possible have two choices that
> (1) Either using request_irq and free_irq directly
> (2) Or moving dev_pm_domain_detach after devres_release_all which
> makes sure we free the irq before powering off power domain.
> 
> However doesn't choice(1) imply that devm_request_irq shouldn't
> exist? :) So I try to fix it like what this patch does.

Ok, this makes a lot more sense, please put this kind of information in
the patch changelog when you resend it.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index ad44b40..13dc0ad 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -25,7 +25,9 @@ 
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/async.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
 #include <linux/pinctrl/devinfo.h>
 
 #include "base.h"
@@ -356,6 +358,8 @@  static int really_probe(struct device *dev, struct device_driver *drv)
 	int local_trigger_count = atomic_read(&deferred_trigger_count);
 	bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE) &&
 			   !drv->suppress_bind_attrs;
+	struct platform_driver *pdrv;
+	bool do_pm_domain = false;
 
 	if (defer_all_probes) {
 		/*
@@ -414,6 +418,16 @@  static int really_probe(struct device *dev, struct device_driver *drv)
 		if (ret)
 			goto probe_failed;
 	} else if (drv->probe) {
+		ret = dev_pm_domain_attach(dev, true);
+		pdrv = to_platform_driver(dev->driver);
+		/* don't fail if just dev_pm_domain_attach failed */
+		if (pdrv->prevent_deferred_probe &&
+		    ret == -EPROBE_DEFER) {
+			dev_warn(dev, "probe deferral not supported\n");
+			ret = -ENXIO;
+			goto probe_failed;
+		}
+		do_pm_domain = true;
 		ret = drv->probe(dev);
 		if (ret)
 			goto probe_failed;
@@ -421,13 +435,17 @@  static int really_probe(struct device *dev, struct device_driver *drv)
 
 	if (test_remove) {
 		test_remove = false;
+		do_pm_domain = false;
 
-		if (dev->bus->remove)
+		if (dev->bus->remove) {
 			dev->bus->remove(dev);
-		else if (drv->remove)
+		} else if (drv->remove) {
 			drv->remove(dev);
-
+			do_pm_domain = true;
+		}
 		devres_release_all(dev);
+		if (do_pm_domain)
+			dev_pm_domain_detach(dev, true);
 		driver_sysfs_remove(dev);
 		dev->driver = NULL;
 		dev_set_drvdata(dev, NULL);
@@ -458,6 +476,8 @@  static int really_probe(struct device *dev, struct device_driver *drv)
 pinctrl_bind_failed:
 	device_links_no_driver(dev);
 	devres_release_all(dev);
+	if (do_pm_domain)
+		dev_pm_domain_detach(dev, true);
 	driver_sysfs_remove(dev);
 	dev->driver = NULL;
 	dev_set_drvdata(dev, NULL);
@@ -818,6 +838,7 @@  int driver_attach(struct device_driver *drv)
 static void __device_release_driver(struct device *dev, struct device *parent)
 {
 	struct device_driver *drv;
+	bool do_pm_domain = false;
 
 	drv = dev->driver;
 	if (drv) {
@@ -855,15 +876,19 @@  static void __device_release_driver(struct device *dev, struct device *parent)
 
 		pm_runtime_put_sync(dev);
 
-		if (dev->bus && dev->bus->remove)
+		if (dev->bus && dev->bus->remove) {
 			dev->bus->remove(dev);
-		else if (drv->remove)
+		} else if (drv->remove) {
+			do_pm_domain = true;
 			drv->remove(dev);
+		}
 
 		device_links_driver_cleanup(dev);
 		dma_deconfigure(dev);
 
 		devres_release_all(dev);
+		if (do_pm_domain)
+			dev_pm_domain_detach(dev, true);
 		dev->driver = NULL;
 		dev_set_drvdata(dev, NULL);
 		if (dev->pm_domain && dev->pm_domain->dismiss)
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index d1bd992..8fa688d 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -572,22 +572,8 @@  static int platform_drv_probe(struct device *_dev)
 	if (ret < 0)
 		return ret;
 
-	ret = dev_pm_domain_attach(_dev, true);
-	if (ret != -EPROBE_DEFER) {
-		if (drv->probe) {
-			ret = drv->probe(dev);
-			if (ret)
-				dev_pm_domain_detach(_dev, true);
-		} else {
-			/* don't fail if just dev_pm_domain_attach failed */
-			ret = 0;
-		}
-	}
-
-	if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {
-		dev_warn(_dev, "probe deferral not supported\n");
-		ret = -ENXIO;
-	}
+	if (drv->probe)
+		ret = drv->probe(dev);
 
 	return ret;
 }