diff mbox series

drm/i915/debugfs: Dump i915 children runtime status

Message ID 20220328102227.14545-1-anshuman.gupta@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/debugfs: Dump i915 children runtime status | expand

Commit Message

Gupta, Anshuman March 28, 2022, 10:22 a.m. UTC
i915 doesn't use pm_suspend_ignore_children() which warrants that
any runtime active child of i915 will block the runtime suspend
of i915.
i915_runtime_pm_status only exposes i915 runtime pm usage_count,
which is not sufficient to debug in the scenarios when i915 has
zero usage_count but there are runtime active children.
Dump i915 child's runtime pm status to debug such
i915 runtime suspend issues.

Cc: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 38 +++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

Comments

Nilawar, Badal March 29, 2022, 5:13 p.m. UTC | #1
On 28-03-2022 15:52, Anshuman Gupta wrote:
> i915 doesn't use pm_suspend_ignore_children() which warrants that
> any runtime active child of i915 will block the runtime suspend
> of i915.
> i915_runtime_pm_status only exposes i915 runtime pm usage_count,
> which is not sufficient to debug in the scenarios when i915 has
> zero usage_count but there are runtime active children.
> Dump i915 child's runtime pm status to debug such
> i915 runtime suspend issues.
> 
> Cc: Chris Wilson <chris.p.wilson@intel.com>
> Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c | 38 +++++++++++++++++++++++++++++
>   1 file changed, 38 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 445b4da23950..ea1730419f8d 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -483,6 +483,40 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>   	return 0;
>   }
>   
> +#ifdef CONFIG_PM
> +static int i915_runtime_dump_child_status(struct device *dev, void *data)
> +{
> +	struct seq_file *m = data;
> +	const char *rpm_status;
> +
> +	/* Early return if runtime_pm is disabled */
> +	if (dev->power.disable_depth)
> +		return 0;
> +
> +	switch (dev->power.runtime_status) {
> +	case RPM_SUSPENDED:
> +		rpm_status = "suspended";
> +		break;
> +	case RPM_SUSPENDING:
> +		rpm_status = "suspending";
> +		break;
> +	case RPM_RESUMING:
> +		rpm_status = "resuming";
> +		break;
> +	case RPM_ACTIVE:
> +		rpm_status = "active";
> +		break;
> +	default:
> +		rpm_status = "unknown";
> +	}
> +
> +	seq_printf(m, "\t%s %s: Runtime status: %s\n", dev_driver_string(dev),
> +		   dev_name(dev), rpm_status);
> +
> +	return 0;
> +}
> +#endif
> +
>   static int i915_runtime_pm_status(struct seq_file *m, void *unused)
>   {
>   	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> @@ -500,6 +534,10 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
>   #ifdef CONFIG_PM
>   	seq_printf(m, "Usage count: %d\n",
>   		   atomic_read(&dev_priv->drm.dev->power.usage_count));
> +	seq_printf(m, "Runtime active children: %d\n",
> +		   atomic_read(&dev_priv->drm.dev->power.child_count));
> +	device_for_each_child(&pdev->dev, m, i915_runtime_dump_child_status);
> +
These changes looks fine to me.
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
>   #else
>   	seq_printf(m, "Device Power Management (CONFIG_PM) disabled\n");
>   #endif
Dixit, Ashutosh March 29, 2022, 11:09 p.m. UTC | #2
On Mon, 28 Mar 2022 03:22:27 -0700, Anshuman Gupta wrote:
>
> +#ifdef CONFIG_PM
> +static int i915_runtime_dump_child_status(struct device *dev, void *data)
> +{
> +	struct seq_file *m = data;
> +	const char *rpm_status;
> +
> +	/* Early return if runtime_pm is disabled */
> +	if (dev->power.disable_depth)
> +		return 0;
> +
> +	switch (dev->power.runtime_status) {
> +	case RPM_SUSPENDED:
> +		rpm_status = "suspended";
> +		break;
> +	case RPM_SUSPENDING:
> +		rpm_status = "suspending";
> +		break;
> +	case RPM_RESUMING:
> +		rpm_status = "resuming";
> +		break;
> +	case RPM_ACTIVE:
> +		rpm_status = "active";
> +		break;
> +	default:
> +		rpm_status = "unknown";
> +	}
> +
> +	seq_printf(m, "\t%s %s: Runtime status: %s\n", dev_driver_string(dev),
> +		   dev_name(dev), rpm_status);
> +
> +	return 0;
> +}
> +#endif

Maybe a nit, but perhaps defining a const array is better than having a
switch statement? Similar to what is done in rtpm_status_str(). The
function itself is very similar to rtpm_status_str() so can probably
benefit from that similarity. Can perhaps even be nearly identical to
rtpm_status_str() (since that is static in the genpd (generic power domain)
code).

See also 2bd5306a8764 ("PM / Domains: add debugfs listing of struct
generic_pm_domain-s"), though I am not sure if genpd's are applicable in
our case and certainly look way out of scope for now. Thanks.

> +
>  static int i915_runtime_pm_status(struct seq_file *m, void *unused)
>  {
>	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> @@ -500,6 +534,10 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
>  #ifdef CONFIG_PM
>	seq_printf(m, "Usage count: %d\n",
>		   atomic_read(&dev_priv->drm.dev->power.usage_count));
> +	seq_printf(m, "Runtime active children: %d\n",
> +		   atomic_read(&dev_priv->drm.dev->power.child_count));
> +	device_for_each_child(&pdev->dev, m, i915_runtime_dump_child_status);
> +
>  #else
>	seq_printf(m, "Device Power Management (CONFIG_PM) disabled\n");
>  #endif
> --
> 2.26.2
>
Jani Nikula April 1, 2022, 12:01 p.m. UTC | #3
On Tue, 29 Mar 2022, "Dixit, Ashutosh" <ashutosh.dixit@intel.com> wrote:
> On Mon, 28 Mar 2022 03:22:27 -0700, Anshuman Gupta wrote:
>>
>> +#ifdef CONFIG_PM
>> +static int i915_runtime_dump_child_status(struct device *dev, void *data)
>> +{
>> +	struct seq_file *m = data;
>> +	const char *rpm_status;
>> +
>> +	/* Early return if runtime_pm is disabled */
>> +	if (dev->power.disable_depth)
>> +		return 0;
>> +
>> +	switch (dev->power.runtime_status) {
>> +	case RPM_SUSPENDED:
>> +		rpm_status = "suspended";
>> +		break;
>> +	case RPM_SUSPENDING:
>> +		rpm_status = "suspending";
>> +		break;
>> +	case RPM_RESUMING:
>> +		rpm_status = "resuming";
>> +		break;
>> +	case RPM_ACTIVE:
>> +		rpm_status = "active";
>> +		break;
>> +	default:
>> +		rpm_status = "unknown";
>> +	}
>> +
>> +	seq_printf(m, "\t%s %s: Runtime status: %s\n", dev_driver_string(dev),
>> +		   dev_name(dev), rpm_status);
>> +
>> +	return 0;
>> +}
>> +#endif
>
> Maybe a nit, but perhaps defining a const array is better than having a
> switch statement? Similar to what is done in rtpm_status_str(). The
> function itself is very similar to rtpm_status_str() so can probably
> benefit from that similarity. Can perhaps even be nearly identical to
> rtpm_status_str() (since that is static in the genpd (generic power domain)
> code).
>
> See also 2bd5306a8764 ("PM / Domains: add debugfs listing of struct
> generic_pm_domain-s"), though I am not sure if genpd's are applicable in
> our case and certainly look way out of scope for now. Thanks.

See also /sys/devices/i915/power/runtime_status and
/sys/devices/i915/power/runtime_active_kids.

Kinda feels like the info should be made available there?

BR,
Jani.

>
>> +
>>  static int i915_runtime_pm_status(struct seq_file *m, void *unused)
>>  {
>>	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>> @@ -500,6 +534,10 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
>>  #ifdef CONFIG_PM
>>	seq_printf(m, "Usage count: %d\n",
>>		   atomic_read(&dev_priv->drm.dev->power.usage_count));
>> +	seq_printf(m, "Runtime active children: %d\n",
>> +		   atomic_read(&dev_priv->drm.dev->power.child_count));
>> +	device_for_each_child(&pdev->dev, m, i915_runtime_dump_child_status);
>> +
>>  #else
>>	seq_printf(m, "Device Power Management (CONFIG_PM) disabled\n");
>>  #endif
>> --
>> 2.26.2
>>
Gupta, Anshuman April 1, 2022, 12:40 p.m. UTC | #4
> -----Original Message-----
> From: Jani Nikula <jani.nikula@linux.intel.com>
> Sent: Friday, April 1, 2022 5:31 PM
> To: Dixit, Ashutosh <ashutosh.dixit@intel.com>; Gupta, Anshuman
> <anshuman.gupta@intel.com>
> Cc: intel-gfx@lists.freedesktop.org; Wilson, Chris P <chris.p.wilson@intel.com>;
> Vivi, Rodrigo <rodrigo.vivi@intel.com>
> Subject: Re: [Intel-gfx] [PATCH] drm/i915/debugfs: Dump i915 children runtime
> status
> 
> On Tue, 29 Mar 2022, "Dixit, Ashutosh" <ashutosh.dixit@intel.com> wrote:
> > On Mon, 28 Mar 2022 03:22:27 -0700, Anshuman Gupta wrote:
> >>
> >> +#ifdef CONFIG_PM
> >> +static int i915_runtime_dump_child_status(struct device *dev, void
> >> +*data) {
> >> +	struct seq_file *m = data;
> >> +	const char *rpm_status;
> >> +
> >> +	/* Early return if runtime_pm is disabled */
> >> +	if (dev->power.disable_depth)
> >> +		return 0;
> >> +
> >> +	switch (dev->power.runtime_status) {
> >> +	case RPM_SUSPENDED:
> >> +		rpm_status = "suspended";
> >> +		break;
> >> +	case RPM_SUSPENDING:
> >> +		rpm_status = "suspending";
> >> +		break;
> >> +	case RPM_RESUMING:
> >> +		rpm_status = "resuming";
> >> +		break;
> >> +	case RPM_ACTIVE:
> >> +		rpm_status = "active";
> >> +		break;
> >> +	default:
> >> +		rpm_status = "unknown";
> >> +	}
> >> +
> >> +	seq_printf(m, "\t%s %s: Runtime status: %s\n", dev_driver_string(dev),
> >> +		   dev_name(dev), rpm_status);
> >> +
> >> +	return 0;
> >> +}
> >> +#endif
> >
> > Maybe a nit, but perhaps defining a const array is better than having
> > a switch statement? Similar to what is done in rtpm_status_str(). The
> > function itself is very similar to rtpm_status_str() so can probably
> > benefit from that similarity. Can perhaps even be nearly identical to
> > rtpm_status_str() (since that is static in the genpd (generic power
> > domain) code).
> >
> > See also 2bd5306a8764 ("PM / Domains: add debugfs listing of struct
> > generic_pm_domain-s"), though I am not sure if genpd's are applicable
> > in our case and certainly look way out of scope for now. Thanks.
> 
> See also /sys/devices/i915/power/runtime_status and
> /sys/devices/i915/power/runtime_active_kids.
> 
> Kinda feels like the info should be made available there?
runtime_active_kids we are already printing by dev_priv->drm.dev->power.child_count.
About runtime_status , we already prints usage count and pci device power state, IMO that is sufficient for debug ?
If it is really needed , I will add dev->power.runtime_status in next revision.
Thanks,
Anshuman Gupta.




> 
> BR,
> Jani.
> 
> >
> >> +
> >>  static int i915_runtime_pm_status(struct seq_file *m, void *unused)
> >>  {
> >>	struct drm_i915_private *dev_priv = node_to_i915(m->private);  @@
> >>-500,6 +534,10 @@ static int i915_runtime_pm_status(struct seq_file
> >>*m, void *unused)
> >>  #ifdef CONFIG_PM
> >>	seq_printf(m, "Usage count: %d\n",
> >>		   atomic_read(&dev_priv->drm.dev->power.usage_count));
> >> +	seq_printf(m, "Runtime active children: %d\n",
> >> +		   atomic_read(&dev_priv->drm.dev->power.child_count));
> >> +	device_for_each_child(&pdev->dev, m,
> >> +i915_runtime_dump_child_status);
> >> +
> >>  #else
> >>	seq_printf(m, "Device Power Management (CONFIG_PM) disabled\n");
> >>  #endif
> >> --
> >> 2.26.2
> >>
> 
> --
> Jani Nikula, Intel Open Source Graphics Center
Jani Nikula April 1, 2022, 12:55 p.m. UTC | #5
On Fri, 01 Apr 2022, "Gupta, Anshuman" <anshuman.gupta@intel.com> wrote:
>> -----Original Message-----
>> From: Jani Nikula <jani.nikula@linux.intel.com>
>> Sent: Friday, April 1, 2022 5:31 PM
>> To: Dixit, Ashutosh <ashutosh.dixit@intel.com>; Gupta, Anshuman
>> <anshuman.gupta@intel.com>
>> Cc: intel-gfx@lists.freedesktop.org; Wilson, Chris P <chris.p.wilson@intel.com>;
>> Vivi, Rodrigo <rodrigo.vivi@intel.com>
>> Subject: Re: [Intel-gfx] [PATCH] drm/i915/debugfs: Dump i915 children runtime
>> status
>> 
>> On Tue, 29 Mar 2022, "Dixit, Ashutosh" <ashutosh.dixit@intel.com> wrote:
>> > On Mon, 28 Mar 2022 03:22:27 -0700, Anshuman Gupta wrote:
>> >>
>> >> +#ifdef CONFIG_PM
>> >> +static int i915_runtime_dump_child_status(struct device *dev, void
>> >> +*data) {
>> >> +	struct seq_file *m = data;
>> >> +	const char *rpm_status;
>> >> +
>> >> +	/* Early return if runtime_pm is disabled */
>> >> +	if (dev->power.disable_depth)
>> >> +		return 0;
>> >> +
>> >> +	switch (dev->power.runtime_status) {
>> >> +	case RPM_SUSPENDED:
>> >> +		rpm_status = "suspended";
>> >> +		break;
>> >> +	case RPM_SUSPENDING:
>> >> +		rpm_status = "suspending";
>> >> +		break;
>> >> +	case RPM_RESUMING:
>> >> +		rpm_status = "resuming";
>> >> +		break;
>> >> +	case RPM_ACTIVE:
>> >> +		rpm_status = "active";
>> >> +		break;
>> >> +	default:
>> >> +		rpm_status = "unknown";
>> >> +	}
>> >> +
>> >> +	seq_printf(m, "\t%s %s: Runtime status: %s\n", dev_driver_string(dev),
>> >> +		   dev_name(dev), rpm_status);
>> >> +
>> >> +	return 0;
>> >> +}
>> >> +#endif
>> >
>> > Maybe a nit, but perhaps defining a const array is better than having
>> > a switch statement? Similar to what is done in rtpm_status_str(). The
>> > function itself is very similar to rtpm_status_str() so can probably
>> > benefit from that similarity. Can perhaps even be nearly identical to
>> > rtpm_status_str() (since that is static in the genpd (generic power
>> > domain) code).
>> >
>> > See also 2bd5306a8764 ("PM / Domains: add debugfs listing of struct
>> > generic_pm_domain-s"), though I am not sure if genpd's are applicable
>> > in our case and certainly look way out of scope for now. Thanks.
>> 
>> See also /sys/devices/i915/power/runtime_status and
>> /sys/devices/i915/power/runtime_active_kids.
>> 
>> Kinda feels like the info should be made available there?
> runtime_active_kids we are already printing by dev_priv->drm.dev->power.child_count.
> About runtime_status , we already prints usage count and pci device power state, IMO that is sufficient for debug ?
> If it is really needed , I will add dev->power.runtime_status in next revision.

My point is, the patch at hand adds runtime pm status printing that
isn't specific to drm or i915 into i915 debugfs. Why?

What is the reason we should take on the burden of maintaining this
while the right place for it might be in runtime pm code, benefiting
other drivers in addition to ours?

BR,
Jani.


> Thanks,
> Anshuman Gupta.
>
>
>
>
>> 
>> BR,
>> Jani.
>> 
>> >
>> >> +
>> >>  static int i915_runtime_pm_status(struct seq_file *m, void *unused)
>> >>  {
>> >>	struct drm_i915_private *dev_priv = node_to_i915(m->private);  @@
>> >>-500,6 +534,10 @@ static int i915_runtime_pm_status(struct seq_file
>> >>*m, void *unused)
>> >>  #ifdef CONFIG_PM
>> >>	seq_printf(m, "Usage count: %d\n",
>> >>		   atomic_read(&dev_priv->drm.dev->power.usage_count));
>> >> +	seq_printf(m, "Runtime active children: %d\n",
>> >> +		   atomic_read(&dev_priv->drm.dev->power.child_count));
>> >> +	device_for_each_child(&pdev->dev, m,
>> >> +i915_runtime_dump_child_status);
>> >> +
>> >>  #else
>> >>	seq_printf(m, "Device Power Management (CONFIG_PM) disabled\n");
>> >>  #endif
>> >> --
>> >> 2.26.2
>> >>
>> 
>> --
>> Jani Nikula, Intel Open Source Graphics Center
Gupta, Anshuman April 1, 2022, 1:07 p.m. UTC | #6
> -----Original Message-----
> From: Jani Nikula <jani.nikula@linux.intel.com>
> Sent: Friday, April 1, 2022 6:26 PM
> To: Gupta, Anshuman <anshuman.gupta@intel.com>; Dixit, Ashutosh
> <ashutosh.dixit@intel.com>
> Cc: intel-gfx@lists.freedesktop.org; Wilson, Chris P <chris.p.wilson@intel.com>;
> Vivi, Rodrigo <rodrigo.vivi@intel.com>
> Subject: RE: [Intel-gfx] [PATCH] drm/i915/debugfs: Dump i915 children runtime
> status
> 
> On Fri, 01 Apr 2022, "Gupta, Anshuman" <anshuman.gupta@intel.com> wrote:
> >> -----Original Message-----
> >> From: Jani Nikula <jani.nikula@linux.intel.com>
> >> Sent: Friday, April 1, 2022 5:31 PM
> >> To: Dixit, Ashutosh <ashutosh.dixit@intel.com>; Gupta, Anshuman
> >> <anshuman.gupta@intel.com>
> >> Cc: intel-gfx@lists.freedesktop.org; Wilson, Chris P
> >> <chris.p.wilson@intel.com>; Vivi, Rodrigo <rodrigo.vivi@intel.com>
> >> Subject: Re: [Intel-gfx] [PATCH] drm/i915/debugfs: Dump i915 children
> >> runtime status
> >>
> >> On Tue, 29 Mar 2022, "Dixit, Ashutosh" <ashutosh.dixit@intel.com> wrote:
> >> > On Mon, 28 Mar 2022 03:22:27 -0700, Anshuman Gupta wrote:
> >> >>
> >> >> +#ifdef CONFIG_PM
> >> >> +static int i915_runtime_dump_child_status(struct device *dev,
> >> >> +void
> >> >> +*data) {
> >> >> +	struct seq_file *m = data;
> >> >> +	const char *rpm_status;
> >> >> +
> >> >> +	/* Early return if runtime_pm is disabled */
> >> >> +	if (dev->power.disable_depth)
> >> >> +		return 0;
> >> >> +
> >> >> +	switch (dev->power.runtime_status) {
> >> >> +	case RPM_SUSPENDED:
> >> >> +		rpm_status = "suspended";
> >> >> +		break;
> >> >> +	case RPM_SUSPENDING:
> >> >> +		rpm_status = "suspending";
> >> >> +		break;
> >> >> +	case RPM_RESUMING:
> >> >> +		rpm_status = "resuming";
> >> >> +		break;
> >> >> +	case RPM_ACTIVE:
> >> >> +		rpm_status = "active";
> >> >> +		break;
> >> >> +	default:
> >> >> +		rpm_status = "unknown";
> >> >> +	}
> >> >> +
> >> >> +	seq_printf(m, "\t%s %s: Runtime status: %s\n", dev_driver_string(dev),
> >> >> +		   dev_name(dev), rpm_status);
> >> >> +
> >> >> +	return 0;
> >> >> +}
> >> >> +#endif
> >> >
> >> > Maybe a nit, but perhaps defining a const array is better than
> >> > having a switch statement? Similar to what is done in
> >> > rtpm_status_str(). The function itself is very similar to
> >> > rtpm_status_str() so can probably benefit from that similarity. Can
> >> > perhaps even be nearly identical to
> >> > rtpm_status_str() (since that is static in the genpd (generic power
> >> > domain) code).
> >> >
> >> > See also 2bd5306a8764 ("PM / Domains: add debugfs listing of struct
> >> > generic_pm_domain-s"), though I am not sure if genpd's are
> >> > applicable in our case and certainly look way out of scope for now. Thanks.
> >>
> >> See also /sys/devices/i915/power/runtime_status and
> >> /sys/devices/i915/power/runtime_active_kids.
> >>
> >> Kinda feels like the info should be made available there?
> > runtime_active_kids we are already printing by dev_priv->drm.dev-
> >power.child_count.
> > About runtime_status , we already prints usage count and pci device power
> state, IMO that is sufficient for debug ?
> > If it is really needed , I will add dev->power.runtime_status in next revision.
> 
> My point is, the patch at hand adds runtime pm status printing that isn't specific
> to drm or i915 into i915 debugfs. Why?
> 
> What is the reason we should take on the burden of maintaining this while the
> right place for it might be in runtime pm code, benefiting other drivers in
> addition to ours?
Benefit is there to debug CI runtime suspend failures , we need to know the culprit child blocking i915 runtime PM.
runtime_active_kids just revels the count , it doesn't reveal the culprit children.
Thanks,
Anshuman.
> 
> BR,
> Jani.
> 
> 
> > Thanks,
> > Anshuman Gupta.
> >
> >
> >
> >
> >>
> >> BR,
> >> Jani.
> >>
> >> >
> >> >> +
> >> >>  static int i915_runtime_pm_status(struct seq_file *m, void
> >> >>*unused)
> >> >>  {
> >> >>	struct drm_i915_private *dev_priv = node_to_i915(m->private);  @@
> >> >>-500,6 +534,10 @@ static int i915_runtime_pm_status(struct seq_file
> >> >>*m, void *unused)
> >> >>  #ifdef CONFIG_PM
> >> >>	seq_printf(m, "Usage count: %d\n",
> >> >>		   atomic_read(&dev_priv->drm.dev->power.usage_count));
> >> >> +	seq_printf(m, "Runtime active children: %d\n",
> >> >> +		   atomic_read(&dev_priv->drm.dev->power.child_count));
> >> >> +	device_for_each_child(&pdev->dev, m,
> >> >> +i915_runtime_dump_child_status);
> >> >> +
> >> >>  #else
> >> >>	seq_printf(m, "Device Power Management (CONFIG_PM) disabled\n");
> >> >>  #endif
> >> >> --
> >> >> 2.26.2
> >> >>
> >>
> >> --
> >> Jani Nikula, Intel Open Source Graphics Center
> 
> --
> Jani Nikula, Intel Open Source Graphics Center
Jani Nikula April 1, 2022, 2:09 p.m. UTC | #7
On Fri, 01 Apr 2022, "Gupta, Anshuman" <anshuman.gupta@intel.com> wrote:
>> -----Original Message-----
>> From: Jani Nikula <jani.nikula@linux.intel.com>
>> Sent: Friday, April 1, 2022 6:26 PM
>> To: Gupta, Anshuman <anshuman.gupta@intel.com>; Dixit, Ashutosh
>> <ashutosh.dixit@intel.com>
>> Cc: intel-gfx@lists.freedesktop.org; Wilson, Chris P <chris.p.wilson@intel.com>;
>> Vivi, Rodrigo <rodrigo.vivi@intel.com>
>> Subject: RE: [Intel-gfx] [PATCH] drm/i915/debugfs: Dump i915 children runtime
>> status
>> 
>> On Fri, 01 Apr 2022, "Gupta, Anshuman" <anshuman.gupta@intel.com> wrote:
>> >> -----Original Message-----
>> >> From: Jani Nikula <jani.nikula@linux.intel.com>
>> >> Sent: Friday, April 1, 2022 5:31 PM
>> >> To: Dixit, Ashutosh <ashutosh.dixit@intel.com>; Gupta, Anshuman
>> >> <anshuman.gupta@intel.com>
>> >> Cc: intel-gfx@lists.freedesktop.org; Wilson, Chris P
>> >> <chris.p.wilson@intel.com>; Vivi, Rodrigo <rodrigo.vivi@intel.com>
>> >> Subject: Re: [Intel-gfx] [PATCH] drm/i915/debugfs: Dump i915 children
>> >> runtime status
>> >>
>> >> On Tue, 29 Mar 2022, "Dixit, Ashutosh" <ashutosh.dixit@intel.com> wrote:
>> >> > On Mon, 28 Mar 2022 03:22:27 -0700, Anshuman Gupta wrote:
>> >> >>
>> >> >> +#ifdef CONFIG_PM
>> >> >> +static int i915_runtime_dump_child_status(struct device *dev,
>> >> >> +void
>> >> >> +*data) {
>> >> >> +	struct seq_file *m = data;
>> >> >> +	const char *rpm_status;
>> >> >> +
>> >> >> +	/* Early return if runtime_pm is disabled */
>> >> >> +	if (dev->power.disable_depth)
>> >> >> +		return 0;
>> >> >> +
>> >> >> +	switch (dev->power.runtime_status) {
>> >> >> +	case RPM_SUSPENDED:
>> >> >> +		rpm_status = "suspended";
>> >> >> +		break;
>> >> >> +	case RPM_SUSPENDING:
>> >> >> +		rpm_status = "suspending";
>> >> >> +		break;
>> >> >> +	case RPM_RESUMING:
>> >> >> +		rpm_status = "resuming";
>> >> >> +		break;
>> >> >> +	case RPM_ACTIVE:
>> >> >> +		rpm_status = "active";
>> >> >> +		break;
>> >> >> +	default:
>> >> >> +		rpm_status = "unknown";
>> >> >> +	}
>> >> >> +
>> >> >> +	seq_printf(m, "\t%s %s: Runtime status: %s\n", dev_driver_string(dev),
>> >> >> +		   dev_name(dev), rpm_status);
>> >> >> +
>> >> >> +	return 0;
>> >> >> +}
>> >> >> +#endif
>> >> >
>> >> > Maybe a nit, but perhaps defining a const array is better than
>> >> > having a switch statement? Similar to what is done in
>> >> > rtpm_status_str(). The function itself is very similar to
>> >> > rtpm_status_str() so can probably benefit from that similarity. Can
>> >> > perhaps even be nearly identical to
>> >> > rtpm_status_str() (since that is static in the genpd (generic power
>> >> > domain) code).
>> >> >
>> >> > See also 2bd5306a8764 ("PM / Domains: add debugfs listing of struct
>> >> > generic_pm_domain-s"), though I am not sure if genpd's are
>> >> > applicable in our case and certainly look way out of scope for now. Thanks.
>> >>
>> >> See also /sys/devices/i915/power/runtime_status and
>> >> /sys/devices/i915/power/runtime_active_kids.
>> >>
>> >> Kinda feels like the info should be made available there?
>> > runtime_active_kids we are already printing by dev_priv->drm.dev-
>> >power.child_count.
>> > About runtime_status , we already prints usage count and pci device power
>> state, IMO that is sufficient for debug ?
>> > If it is really needed , I will add dev->power.runtime_status in next revision.
>> 
>> My point is, the patch at hand adds runtime pm status printing that isn't specific
>> to drm or i915 into i915 debugfs. Why?
>> 
>> What is the reason we should take on the burden of maintaining this while the
>> right place for it might be in runtime pm code, benefiting other drivers in
>> addition to ours?
> Benefit is there to debug CI runtime suspend failures , we need to know the culprit child blocking i915 runtime PM.
> runtime_active_kids just revels the count , it doesn't reveal the culprit children.

I understand. But how is that problem or the information specific to
i915? Why should this be added to i915 instead of runtime pm infra?
Surely this is not even a new problem; how do others currently figure
this information out?

So I'm not going to block this if you all think this is a good idea. But
the point is, the first solution should not be to add some i915 specific
stuff when a more generic solution might exist or be preferred.


BR,
Jani.




> Thanks,
> Anshuman.
>> 
>> BR,
>> Jani.
>> 
>> 
>> > Thanks,
>> > Anshuman Gupta.
>> >
>> >
>> >
>> >
>> >>
>> >> BR,
>> >> Jani.
>> >>
>> >> >
>> >> >> +
>> >> >>  static int i915_runtime_pm_status(struct seq_file *m, void
>> >> >>*unused)
>> >> >>  {
>> >> >>	struct drm_i915_private *dev_priv = node_to_i915(m->private);  @@
>> >> >>-500,6 +534,10 @@ static int i915_runtime_pm_status(struct seq_file
>> >> >>*m, void *unused)
>> >> >>  #ifdef CONFIG_PM
>> >> >>	seq_printf(m, "Usage count: %d\n",
>> >> >>		   atomic_read(&dev_priv->drm.dev->power.usage_count));
>> >> >> +	seq_printf(m, "Runtime active children: %d\n",
>> >> >> +		   atomic_read(&dev_priv->drm.dev->power.child_count));
>> >> >> +	device_for_each_child(&pdev->dev, m,
>> >> >> +i915_runtime_dump_child_status);
>> >> >> +
>> >> >>  #else
>> >> >>	seq_printf(m, "Device Power Management (CONFIG_PM) disabled\n");
>> >> >>  #endif
>> >> >> --
>> >> >> 2.26.2
>> >> >>
>> >>
>> >> --
>> >> Jani Nikula, Intel Open Source Graphics Center
>> 
>> --
>> Jani Nikula, Intel Open Source Graphics Center
Gupta, Anshuman April 1, 2022, 3:42 p.m. UTC | #8
> -----Original Message-----
> From: Jani Nikula <jani.nikula@linux.intel.com>
> Sent: Friday, April 1, 2022 7:40 PM
> To: Gupta, Anshuman <anshuman.gupta@intel.com>; Dixit, Ashutosh
> <ashutosh.dixit@intel.com>
> Cc: intel-gfx@lists.freedesktop.org; Wilson, Chris P <chris.p.wilson@intel.com>;
> Vivi, Rodrigo <rodrigo.vivi@intel.com>
> Subject: RE: [Intel-gfx] [PATCH] drm/i915/debugfs: Dump i915 children runtime
> status
> 
> On Fri, 01 Apr 2022, "Gupta, Anshuman" <anshuman.gupta@intel.com> wrote:
> >> -----Original Message-----
> >> From: Jani Nikula <jani.nikula@linux.intel.com>
> >> Sent: Friday, April 1, 2022 6:26 PM
> >> To: Gupta, Anshuman <anshuman.gupta@intel.com>; Dixit, Ashutosh
> >> <ashutosh.dixit@intel.com>
> >> Cc: intel-gfx@lists.freedesktop.org; Wilson, Chris P
> >> <chris.p.wilson@intel.com>; Vivi, Rodrigo <rodrigo.vivi@intel.com>
> >> Subject: RE: [Intel-gfx] [PATCH] drm/i915/debugfs: Dump i915 children
> >> runtime status
> >>
> >> On Fri, 01 Apr 2022, "Gupta, Anshuman" <anshuman.gupta@intel.com>
> wrote:
> >> >> -----Original Message-----
> >> >> From: Jani Nikula <jani.nikula@linux.intel.com>
> >> >> Sent: Friday, April 1, 2022 5:31 PM
> >> >> To: Dixit, Ashutosh <ashutosh.dixit@intel.com>; Gupta, Anshuman
> >> >> <anshuman.gupta@intel.com>
> >> >> Cc: intel-gfx@lists.freedesktop.org; Wilson, Chris P
> >> >> <chris.p.wilson@intel.com>; Vivi, Rodrigo <rodrigo.vivi@intel.com>
> >> >> Subject: Re: [Intel-gfx] [PATCH] drm/i915/debugfs: Dump i915
> >> >> children runtime status
> >> >>
> >> >> On Tue, 29 Mar 2022, "Dixit, Ashutosh" <ashutosh.dixit@intel.com>
> wrote:
> >> >> > On Mon, 28 Mar 2022 03:22:27 -0700, Anshuman Gupta wrote:
> >> >> >>
> >> >> >> +#ifdef CONFIG_PM
> >> >> >> +static int i915_runtime_dump_child_status(struct device *dev,
> >> >> >> +void
> >> >> >> +*data) {
> >> >> >> +	struct seq_file *m = data;
> >> >> >> +	const char *rpm_status;
> >> >> >> +
> >> >> >> +	/* Early return if runtime_pm is disabled */
> >> >> >> +	if (dev->power.disable_depth)
> >> >> >> +		return 0;
> >> >> >> +
> >> >> >> +	switch (dev->power.runtime_status) {
> >> >> >> +	case RPM_SUSPENDED:
> >> >> >> +		rpm_status = "suspended";
> >> >> >> +		break;
> >> >> >> +	case RPM_SUSPENDING:
> >> >> >> +		rpm_status = "suspending";
> >> >> >> +		break;
> >> >> >> +	case RPM_RESUMING:
> >> >> >> +		rpm_status = "resuming";
> >> >> >> +		break;
> >> >> >> +	case RPM_ACTIVE:
> >> >> >> +		rpm_status = "active";
> >> >> >> +		break;
> >> >> >> +	default:
> >> >> >> +		rpm_status = "unknown";
> >> >> >> +	}
> >> >> >> +
> >> >> >> +	seq_printf(m, "\t%s %s: Runtime status: %s\n",
> dev_driver_string(dev),
> >> >> >> +		   dev_name(dev), rpm_status);
> >> >> >> +
> >> >> >> +	return 0;
> >> >> >> +}
> >> >> >> +#endif
> >> >> >
> >> >> > Maybe a nit, but perhaps defining a const array is better than
> >> >> > having a switch statement? Similar to what is done in
> >> >> > rtpm_status_str(). The function itself is very similar to
> >> >> > rtpm_status_str() so can probably benefit from that similarity.
> >> >> > Can perhaps even be nearly identical to
> >> >> > rtpm_status_str() (since that is static in the genpd (generic
> >> >> > power
> >> >> > domain) code).
> >> >> >
> >> >> > See also 2bd5306a8764 ("PM / Domains: add debugfs listing of
> >> >> > struct generic_pm_domain-s"), though I am not sure if genpd's
> >> >> > are applicable in our case and certainly look way out of scope for now.
> Thanks.
> >> >>
> >> >> See also /sys/devices/i915/power/runtime_status and
> >> >> /sys/devices/i915/power/runtime_active_kids.
> >> >>
> >> >> Kinda feels like the info should be made available there?
> >> > runtime_active_kids we are already printing by dev_priv->drm.dev-
> >> >power.child_count.
> >> > About runtime_status , we already prints usage count and pci device
> >> >power
> >> state, IMO that is sufficient for debug ?
> >> > If it is really needed , I will add dev->power.runtime_status in next revision.
> >>
> >> My point is, the patch at hand adds runtime pm status printing that
> >> isn't specific to drm or i915 into i915 debugfs. Why?
> >>
> >> What is the reason we should take on the burden of maintaining this
> >> while the right place for it might be in runtime pm code, benefiting
> >> other drivers in addition to ours?
> > Benefit is there to debug CI runtime suspend failures , we need to know the
> culprit child blocking i915 runtime PM.
> > runtime_active_kids just revels the count , it doesn't reveal the culprit children.
> 
> I understand. But how is that problem or the information specific to i915? Why
> should this be added to i915 instead of runtime pm infra?
> Surely this is not even a new problem; how do others currently figure this
> information out?
> 
> So I'm not going to block this if you all think this is a good idea. But the point is,
> the first solution should not be to add some i915 specific stuff when a more
> generic solution might exist or be preferred.
Hi Rafael,
Could you please provide your input,  about generic interface to dump the active children of a device.
Thanks,
Anshuman Gupta.
> 
> 
> BR,
> Jani.
> 
> 
> 
> 
> > Thanks,
> > Anshuman.
> >>
> >> BR,
> >> Jani.
> >>
> >>
> >> > Thanks,
> >> > Anshuman Gupta.
> >> >
> >> >
> >> >
> >> >
> >> >>
> >> >> BR,
> >> >> Jani.
> >> >>
> >> >> >
> >> >> >> +
> >> >> >>  static int i915_runtime_pm_status(struct seq_file *m, void
> >> >> >>*unused)
> >> >> >>  {
> >> >> >>	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> >> >> >>@@
> >> >> >>-500,6 +534,10 @@ static int i915_runtime_pm_status(struct
> >> >> >>seq_file *m, void *unused)
> >> >> >>  #ifdef CONFIG_PM
> >> >> >>	seq_printf(m, "Usage count: %d\n",
> >> >> >>		   atomic_read(&dev_priv->drm.dev-
> >power.usage_count));
> >> >> >> +	seq_printf(m, "Runtime active children: %d\n",
> >> >> >> +		   atomic_read(&dev_priv->drm.dev-
> >power.child_count));
> >> >> >> +	device_for_each_child(&pdev->dev, m,
> >> >> >> +i915_runtime_dump_child_status);
> >> >> >> +
> >> >> >>  #else
> >> >> >>	seq_printf(m, "Device Power Management (CONFIG_PM)
> >> >> >>disabled\n");
> >> >> >>  #endif
> >> >> >> --
> >> >> >> 2.26.2
> >> >> >>
> >> >>
> >> >> --
> >> >> Jani Nikula, Intel Open Source Graphics Center
> >>
> >> --
> >> Jani Nikula, Intel Open Source Graphics Center
> 
> --
> Jani Nikula, Intel Open Source Graphics Center
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 445b4da23950..ea1730419f8d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -483,6 +483,40 @@  static int i915_rps_boost_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int i915_runtime_dump_child_status(struct device *dev, void *data)
+{
+	struct seq_file *m = data;
+	const char *rpm_status;
+
+	/* Early return if runtime_pm is disabled */
+	if (dev->power.disable_depth)
+		return 0;
+
+	switch (dev->power.runtime_status) {
+	case RPM_SUSPENDED:
+		rpm_status = "suspended";
+		break;
+	case RPM_SUSPENDING:
+		rpm_status = "suspending";
+		break;
+	case RPM_RESUMING:
+		rpm_status = "resuming";
+		break;
+	case RPM_ACTIVE:
+		rpm_status = "active";
+		break;
+	default:
+		rpm_status = "unknown";
+	}
+
+	seq_printf(m, "\t%s %s: Runtime status: %s\n", dev_driver_string(dev),
+		   dev_name(dev), rpm_status);
+
+	return 0;
+}
+#endif
+
 static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
@@ -500,6 +534,10 @@  static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 #ifdef CONFIG_PM
 	seq_printf(m, "Usage count: %d\n",
 		   atomic_read(&dev_priv->drm.dev->power.usage_count));
+	seq_printf(m, "Runtime active children: %d\n",
+		   atomic_read(&dev_priv->drm.dev->power.child_count));
+	device_for_each_child(&pdev->dev, m, i915_runtime_dump_child_status);
+
 #else
 	seq_printf(m, "Device Power Management (CONFIG_PM) disabled\n");
 #endif