diff mbox

[v2,12/13] drm/i915: Listen for PMIC bus access notifications

Message ID 20170123210958.18410-13-hdegoede@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Hans de Goede Jan. 23, 2017, 9:09 p.m. UTC
Listen for PMIC bus access notifications and get FORCEWAKE_ALL while
the bus is accessed to avoid needing to do any forcewakes, which need
PMIC bus access, while the PMIC bus is busy:

This fixes errors like these showing up in dmesg, usually followed
by a gfx or system freeze:

[drm:fw_domains_get [i915]] *ERROR* render: timed out waiting for forcewake ack request.
[drm:fw_domains_get [i915]] *MEDIA* render: timed out waiting for forcewake ack request.
i2c_designware 808622C1:06: punit semaphore timed out, resetting
i2c_designware 808622C1:06: PUNIT SEM: 2
i2c_designware 808622C1:06: couldn't acquire bus ownership

Downside of this approach is that it causes wakeups whenever the PMIC
bus is accessed. Unfortunately we cannot simply wait for the PMIC bus
to go idle when we hit a race, as forcewakes may be done from interrupt
handlers where we cannot sleep to wait for the i2c PMIC bus access to
finish.

Note that the notifications and thus the wakeups will only happen on
baytrail / cherrytrail devices using PMICs with a shared i2c bus for
P-Unit and host PMIC access (i2c busses with a _SEM method in their
APCI node), e.g. an axp288 PMIC.

I plan to write some patches for drivers accessing the PMIC bus to
limit their bus accesses to a bare minimum (e.g. cache registers, do not
update battery level more often then 4 times a minute), to limit the
amount of wakeups.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=155241
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: tagorereddy <tagore.chandan@gmail.com>
---
Changes in v2:
-Spelling: P-Unit, PMIC
---
 drivers/gpu/drm/i915/i915_drv.h     |  1 +
 drivers/gpu/drm/i915/intel_uncore.c | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

Comments

Ville Syrjälä Jan. 27, 2017, 1:52 p.m. UTC | #1
On Mon, Jan 23, 2017 at 10:09:57PM +0100, Hans de Goede wrote:
> Listen for PMIC bus access notifications and get FORCEWAKE_ALL while
> the bus is accessed to avoid needing to do any forcewakes, which need
> PMIC bus access, while the PMIC bus is busy:
> 
> This fixes errors like these showing up in dmesg, usually followed
> by a gfx or system freeze:
> 
> [drm:fw_domains_get [i915]] *ERROR* render: timed out waiting for forcewake ack request.
> [drm:fw_domains_get [i915]] *MEDIA* render: timed out waiting for forcewake ack request.
> i2c_designware 808622C1:06: punit semaphore timed out, resetting
> i2c_designware 808622C1:06: PUNIT SEM: 2
> i2c_designware 808622C1:06: couldn't acquire bus ownership
> 
> Downside of this approach is that it causes wakeups whenever the PMIC
> bus is accessed. Unfortunately we cannot simply wait for the PMIC bus
> to go idle when we hit a race, as forcewakes may be done from interrupt
> handlers where we cannot sleep to wait for the i2c PMIC bus access to
> finish.
> 
> Note that the notifications and thus the wakeups will only happen on
> baytrail / cherrytrail devices using PMICs with a shared i2c bus for
> P-Unit and host PMIC access (i2c busses with a _SEM method in their
> APCI node), e.g. an axp288 PMIC.
> 
> I plan to write some patches for drivers accessing the PMIC bus to
> limit their bus accesses to a bare minimum (e.g. cache registers, do not
> update battery level more often then 4 times a minute), to limit the
> amount of wakeups.
> 
> BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=155241
> Signed-off-by: Hans de Goede <hdegoede@redhat.com>
> Tested-by: tagorereddy <tagore.chandan@gmail.com>
> ---
> Changes in v2:
> -Spelling: P-Unit, PMIC
> ---
>  drivers/gpu/drm/i915/i915_drv.h     |  1 +
>  drivers/gpu/drm/i915/intel_uncore.c | 35 +++++++++++++++++++++++++++++++++++
>  2 files changed, 36 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c717329..52f7dde 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -721,6 +721,7 @@ struct intel_uncore {
>  	const struct intel_forcewake_range *fw_domains_table;
>  	unsigned int fw_domains_table_entries;
>  
> +	struct notifier_block pmic_bus_access_nb;
>  	struct intel_uncore_funcs funcs;
>  
>  	unsigned fifo_count;
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 3767307..175fe02 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -25,6 +25,7 @@
>  #include "intel_drv.h"
>  #include "i915_vgpu.h"
>  
> +#include <asm/iosf_mbi.h>
>  #include <linux/pm_runtime.h>
>  
>  #define FORCEWAKE_ACK_TIMEOUT_MS 50
> @@ -429,12 +430,16 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
>  
>  void intel_uncore_suspend(struct drm_i915_private *dev_priv)
>  {
> +	iosf_mbi_unregister_pmic_bus_access_notifier(
> +		&dev_priv->uncore.pmic_bus_access_nb);
>  	__intel_uncore_forcewake_reset(dev_priv, false);
>  }
>  
>  void intel_uncore_resume(struct drm_i915_private *dev_priv)
>  {
>  	__intel_uncore_early_sanitize(dev_priv, true);
> +	iosf_mbi_register_pmic_bus_access_notifier(
> +		&dev_priv->uncore.pmic_bus_access_nb);
>  	i915_check_and_clear_faults(dev_priv);
>  }

The early/normal/late suspend/resume ordering starts to bother me a
little more now. I wonder if we're totally safe wrt. the suspend/resume
order of the devices now.

> @@ -1390,6 +1395,28 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
>  	dev_priv->uncore.fw_domains_table_entries = ARRAY_SIZE((d)); \
>  }
>  
> +static int i915_pmic_bus_access_notifier(struct notifier_block *nb,
> +					 unsigned long action, void *data)
> +{
> +	struct drm_i915_private *dev_priv = container_of(nb,
> +			struct drm_i915_private, uncore.pmic_bus_access_nb);
> +
> +	switch (action) {
> +	case MBI_PMIC_BUS_ACCESS_BEGIN:
> +		/*
> +		 * forcewake all to make sure that we don't need to forcewake
> +		 * any power-planes while the pmic bus is busy.
> +		 */
> +		intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);

I must say I don't really like this stuff at all. But if it helps I gues
we should go for it. I'd like to see the comment elaborate a bit more on
why we think it's is needed.

> +		break;
> +	case MBI_PMIC_BUS_ACCESS_END:
> +		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +		break;
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +
>  void intel_uncore_init(struct drm_i915_private *dev_priv)
>  {
>  	i915_check_vgpu(dev_priv);
> @@ -1399,6 +1426,8 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
>  	__intel_uncore_early_sanitize(dev_priv, false);
>  
>  	dev_priv->uncore.unclaimed_mmio_check = 1;
> +	dev_priv->uncore.pmic_bus_access_nb.notifier_call =
> +		i915_pmic_bus_access_notifier;
>  
>  	switch (INTEL_INFO(dev_priv)->gen) {
>  	default:
> @@ -1458,6 +1487,9 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
>  		ASSIGN_READ_MMIO_VFUNCS(vgpu);
>  	}
>  
> +	iosf_mbi_register_pmic_bus_access_notifier(
> +		&dev_priv->uncore.pmic_bus_access_nb);
> +
>  	i915_check_and_clear_faults(dev_priv);
>  }
>  #undef ASSIGN_WRITE_MMIO_VFUNCS
> @@ -1465,6 +1497,9 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
>  
>  void intel_uncore_fini(struct drm_i915_private *dev_priv)
>  {
> +	iosf_mbi_unregister_pmic_bus_access_notifier(
> +		&dev_priv->uncore.pmic_bus_access_nb);
> +
>  	/* Paranoia: make sure we have disabled everything before we exit. */
>  	intel_uncore_sanitize(dev_priv);
>  	__intel_uncore_forcewake_reset(dev_priv, false);
> -- 
> 2.9.3
Hans de Goede Jan. 28, 2017, 5:39 p.m. UTC | #2
Hi,

On 01/27/2017 02:52 PM, Ville Syrjälä wrote:
> On Mon, Jan 23, 2017 at 10:09:57PM +0100, Hans de Goede wrote:
>> Listen for PMIC bus access notifications and get FORCEWAKE_ALL while
>> the bus is accessed to avoid needing to do any forcewakes, which need
>> PMIC bus access, while the PMIC bus is busy:
>>
>> This fixes errors like these showing up in dmesg, usually followed
>> by a gfx or system freeze:
>>
>> [drm:fw_domains_get [i915]] *ERROR* render: timed out waiting for forcewake ack request.
>> [drm:fw_domains_get [i915]] *MEDIA* render: timed out waiting for forcewake ack request.
>> i2c_designware 808622C1:06: punit semaphore timed out, resetting
>> i2c_designware 808622C1:06: PUNIT SEM: 2
>> i2c_designware 808622C1:06: couldn't acquire bus ownership
>>
>> Downside of this approach is that it causes wakeups whenever the PMIC
>> bus is accessed. Unfortunately we cannot simply wait for the PMIC bus
>> to go idle when we hit a race, as forcewakes may be done from interrupt
>> handlers where we cannot sleep to wait for the i2c PMIC bus access to
>> finish.
>>
>> Note that the notifications and thus the wakeups will only happen on
>> baytrail / cherrytrail devices using PMICs with a shared i2c bus for
>> P-Unit and host PMIC access (i2c busses with a _SEM method in their
>> APCI node), e.g. an axp288 PMIC.
>>
>> I plan to write some patches for drivers accessing the PMIC bus to
>> limit their bus accesses to a bare minimum (e.g. cache registers, do not
>> update battery level more often then 4 times a minute), to limit the
>> amount of wakeups.
>>
>> BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=155241
>> Signed-off-by: Hans de Goede <hdegoede@redhat.com>
>> Tested-by: tagorereddy <tagore.chandan@gmail.com>
>> ---
>> Changes in v2:
>> -Spelling: P-Unit, PMIC
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h     |  1 +
>>  drivers/gpu/drm/i915/intel_uncore.c | 35 +++++++++++++++++++++++++++++++++++
>>  2 files changed, 36 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index c717329..52f7dde 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -721,6 +721,7 @@ struct intel_uncore {
>>  	const struct intel_forcewake_range *fw_domains_table;
>>  	unsigned int fw_domains_table_entries;
>>
>> +	struct notifier_block pmic_bus_access_nb;
>>  	struct intel_uncore_funcs funcs;
>>
>>  	unsigned fifo_count;
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
>> index 3767307..175fe02 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -25,6 +25,7 @@
>>  #include "intel_drv.h"
>>  #include "i915_vgpu.h"
>>
>> +#include <asm/iosf_mbi.h>
>>  #include <linux/pm_runtime.h>
>>
>>  #define FORCEWAKE_ACK_TIMEOUT_MS 50
>> @@ -429,12 +430,16 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
>>
>>  void intel_uncore_suspend(struct drm_i915_private *dev_priv)
>>  {
>> +	iosf_mbi_unregister_pmic_bus_access_notifier(
>> +		&dev_priv->uncore.pmic_bus_access_nb);
>>  	__intel_uncore_forcewake_reset(dev_priv, false);
>>  }
>>
>>  void intel_uncore_resume(struct drm_i915_private *dev_priv)
>>  {
>>  	__intel_uncore_early_sanitize(dev_priv, true);
>> +	iosf_mbi_register_pmic_bus_access_notifier(
>> +		&dev_priv->uncore.pmic_bus_access_nb);
>>  	i915_check_and_clear_faults(dev_priv);
>>  }
>
> The early/normal/late suspend/resume ordering starts to bother me a
> little more now. I wonder if we're totally safe wrt. the suspend/resume
> order of the devices now.

As I mentioned before I'm not touching the existing ordering. As for the
newly added calls the unregister must be done before calling
intel_uncore_forcewake_reset() to make sure that the
i915_pmic_bus_access_notifier() has put any forcewakes it may hold. Note
this is ensured to happen on unregister as iosf_mbi_unregister_pmic_bus_access_notifier
waits for the bus to be idle before unregistering the notifier
(in a race free manner of course).

Likewise he iosf_mbi_register_pmic_bus_access_notifier() call is
done at the only possible spot, it must be done afer
__intel_uncore_early_sanitize and I added a WARN_ON to get a traceback
and the "timed out waiting for forcewake ack request" errors this patch
fixes are triggered (on suspend/resume) from i915_check_and_clear_faults().

Note tagorereddy was seeing these under different circumstances,
this is just the code-path I hit in my tests.

So we cannot just do a iosf_mbi_punit_acquire around the
i915_check_and_clear_faults() call, there are many other places
where a forcewake is done and we could potentially race.

>
>> @@ -1390,6 +1395,28 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
>>  	dev_priv->uncore.fw_domains_table_entries = ARRAY_SIZE((d)); \
>>  }
>>
>> +static int i915_pmic_bus_access_notifier(struct notifier_block *nb,
>> +					 unsigned long action, void *data)
>> +{
>> +	struct drm_i915_private *dev_priv = container_of(nb,
>> +			struct drm_i915_private, uncore.pmic_bus_access_nb);
>> +
>> +	switch (action) {
>> +	case MBI_PMIC_BUS_ACCESS_BEGIN:
>> +		/*
>> +		 * forcewake all to make sure that we don't need to forcewake
>> +		 * any power-planes while the pmic bus is busy.
>> +		 */
>> +		intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
>
> I must say I don't really like this stuff at all. But if it helps I gues
> we should go for it. I'd like to see the comment elaborate a bit more on
> why we think it's is needed.

OK, how about:

                 /*
                  * forcewake all now to make sure that we don't need to do a
                  * forcewake later which on systems where this notifier gets
                  * called requires the punit to access to the shared pmic i2c
                  * bus, which will be busy after this notification, leading to:
                  * "render: timed out waiting for forcewake ack request."
                  * errors.
                  */

?

>
>> +		break;
>> +	case MBI_PMIC_BUS_ACCESS_END:
>> +		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>> +		break;
>> +	}
>> +
>> +	return NOTIFY_OK;
>> +}
>> +
>>  void intel_uncore_init(struct drm_i915_private *dev_priv)
>>  {
>>  	i915_check_vgpu(dev_priv);
>> @@ -1399,6 +1426,8 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
>>  	__intel_uncore_early_sanitize(dev_priv, false);
>>
>>  	dev_priv->uncore.unclaimed_mmio_check = 1;
>> +	dev_priv->uncore.pmic_bus_access_nb.notifier_call =
>> +		i915_pmic_bus_access_notifier;
>>
>>  	switch (INTEL_INFO(dev_priv)->gen) {
>>  	default:
>> @@ -1458,6 +1487,9 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
>>  		ASSIGN_READ_MMIO_VFUNCS(vgpu);
>>  	}
>>
>> +	iosf_mbi_register_pmic_bus_access_notifier(
>> +		&dev_priv->uncore.pmic_bus_access_nb);
>> +
>>  	i915_check_and_clear_faults(dev_priv);
>>  }
>>  #undef ASSIGN_WRITE_MMIO_VFUNCS
>> @@ -1465,6 +1497,9 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
>>
>>  void intel_uncore_fini(struct drm_i915_private *dev_priv)
>>  {
>> +	iosf_mbi_unregister_pmic_bus_access_notifier(
>> +		&dev_priv->uncore.pmic_bus_access_nb);
>> +
>>  	/* Paranoia: make sure we have disabled everything before we exit. */
>>  	intel_uncore_sanitize(dev_priv);
>>  	__intel_uncore_forcewake_reset(dev_priv, false);
>> --
>> 2.9.3
>


Regards,

Hans
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c717329..52f7dde 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -721,6 +721,7 @@  struct intel_uncore {
 	const struct intel_forcewake_range *fw_domains_table;
 	unsigned int fw_domains_table_entries;
 
+	struct notifier_block pmic_bus_access_nb;
 	struct intel_uncore_funcs funcs;
 
 	unsigned fifo_count;
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 3767307..175fe02 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -25,6 +25,7 @@ 
 #include "intel_drv.h"
 #include "i915_vgpu.h"
 
+#include <asm/iosf_mbi.h>
 #include <linux/pm_runtime.h>
 
 #define FORCEWAKE_ACK_TIMEOUT_MS 50
@@ -429,12 +430,16 @@  static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
 
 void intel_uncore_suspend(struct drm_i915_private *dev_priv)
 {
+	iosf_mbi_unregister_pmic_bus_access_notifier(
+		&dev_priv->uncore.pmic_bus_access_nb);
 	__intel_uncore_forcewake_reset(dev_priv, false);
 }
 
 void intel_uncore_resume(struct drm_i915_private *dev_priv)
 {
 	__intel_uncore_early_sanitize(dev_priv, true);
+	iosf_mbi_register_pmic_bus_access_notifier(
+		&dev_priv->uncore.pmic_bus_access_nb);
 	i915_check_and_clear_faults(dev_priv);
 }
 
@@ -1390,6 +1395,28 @@  static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 	dev_priv->uncore.fw_domains_table_entries = ARRAY_SIZE((d)); \
 }
 
+static int i915_pmic_bus_access_notifier(struct notifier_block *nb,
+					 unsigned long action, void *data)
+{
+	struct drm_i915_private *dev_priv = container_of(nb,
+			struct drm_i915_private, uncore.pmic_bus_access_nb);
+
+	switch (action) {
+	case MBI_PMIC_BUS_ACCESS_BEGIN:
+		/*
+		 * forcewake all to make sure that we don't need to forcewake
+		 * any power-planes while the pmic bus is busy.
+		 */
+		intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+		break;
+	case MBI_PMIC_BUS_ACCESS_END:
+		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 void intel_uncore_init(struct drm_i915_private *dev_priv)
 {
 	i915_check_vgpu(dev_priv);
@@ -1399,6 +1426,8 @@  void intel_uncore_init(struct drm_i915_private *dev_priv)
 	__intel_uncore_early_sanitize(dev_priv, false);
 
 	dev_priv->uncore.unclaimed_mmio_check = 1;
+	dev_priv->uncore.pmic_bus_access_nb.notifier_call =
+		i915_pmic_bus_access_notifier;
 
 	switch (INTEL_INFO(dev_priv)->gen) {
 	default:
@@ -1458,6 +1487,9 @@  void intel_uncore_init(struct drm_i915_private *dev_priv)
 		ASSIGN_READ_MMIO_VFUNCS(vgpu);
 	}
 
+	iosf_mbi_register_pmic_bus_access_notifier(
+		&dev_priv->uncore.pmic_bus_access_nb);
+
 	i915_check_and_clear_faults(dev_priv);
 }
 #undef ASSIGN_WRITE_MMIO_VFUNCS
@@ -1465,6 +1497,9 @@  void intel_uncore_init(struct drm_i915_private *dev_priv)
 
 void intel_uncore_fini(struct drm_i915_private *dev_priv)
 {
+	iosf_mbi_unregister_pmic_bus_access_notifier(
+		&dev_priv->uncore.pmic_bus_access_nb);
+
 	/* Paranoia: make sure we have disabled everything before we exit. */
 	intel_uncore_sanitize(dev_priv);
 	__intel_uncore_forcewake_reset(dev_priv, false);