diff mbox series

[3/3] drm/i915/hwmon: Block waiting for GuC reset to complete

Message ID 20230410223509.3593109-4-ashutosh.dixit@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/guc: Disable PL1 power limit when loading GuC firmware | expand

Commit Message

Dixit, Ashutosh April 10, 2023, 10:35 p.m. UTC
Instead of erroring out when GuC reset is in progress, block waiting for
GuC reset to complete which is a more reasonable uapi behavior.

v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)

Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
 drivers/gpu/drm/i915/i915_hwmon.c | 38 +++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 5 deletions(-)

Comments

Rodrigo Vivi April 18, 2023, 5:35 a.m. UTC | #1
On Mon, Apr 10, 2023 at 03:35:09PM -0700, Ashutosh Dixit wrote:
> Instead of erroring out when GuC reset is in progress, block waiting for
> GuC reset to complete which is a more reasonable uapi behavior.
> 
> v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)
> 
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_hwmon.c | 38 +++++++++++++++++++++++++++----
>  1 file changed, 33 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> index 9ab8971679fe3..8471a667dfc71 100644
> --- a/drivers/gpu/drm/i915/i915_hwmon.c
> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> @@ -51,6 +51,7 @@ struct hwm_drvdata {
>  	char name[12];
>  	int gt_n;
>  	bool reset_in_progress;
> +	wait_queue_head_t waitq;
>  };
>  
>  struct i915_hwmon {
> @@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
>  static int
>  hwm_power_max_write(struct hwm_drvdata *ddat, long val)
>  {
> +#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
> +
> +	int ret = 0, timeout = GUC_RESET_TIMEOUT;
>  	struct i915_hwmon *hwmon = ddat->hwmon;
>  	intel_wakeref_t wakeref;
> -	int ret = 0;
> +	DEFINE_WAIT(wait);
>  	u32 nval;
>  
> -	mutex_lock(&hwmon->hwmon_lock);
> -	if (hwmon->ddat.reset_in_progress) {
> -		ret = -EAGAIN;
> -		goto unlock;
> +	/* Block waiting for GuC reset to complete when needed */
> +	for (;;) {
> +		mutex_lock(&hwmon->hwmon_lock);

I'm really afraid of how this mutex is handled with the wait queue.
some initial thought it looks like it is trying to reimplement ww_mutex?

all other examples of the wait_queue usages like this or didn't use
locks or had it in a total different flow that I could not correlate.

> +
> +		prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
> +
> +		if (!hwmon->ddat.reset_in_progress)
> +			break;

If this breaks we never unlock it?

> +
> +		if (signal_pending(current)) {
> +			ret = -EINTR;
> +			break;
> +		}
> +
> +		if (!timeout) {
> +			ret = -ETIME;
> +			break;
> +		}
> +
> +		mutex_unlock(&hwmon->hwmon_lock);

do we need to lock the signal pending and timeout as well?
or only wrapping it around the hwmon->ddat access would be
enough?

> +
> +		timeout = schedule_timeout(timeout);
>  	}
> +	finish_wait(&ddat->waitq, &wait);
> +	if (ret)
> +		goto unlock;
> +
>  	wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
>  
>  	/* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */
> @@ -508,6 +534,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
>  	intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
>  			 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
>  	hwmon->ddat.reset_in_progress = false;
> +	wake_up_all(&hwmon->ddat.waitq);
>  
>  	mutex_unlock(&hwmon->hwmon_lock);
>  }
> @@ -784,6 +811,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
>  	ddat->uncore = &i915->uncore;
>  	snprintf(ddat->name, sizeof(ddat->name), "i915");
>  	ddat->gt_n = -1;
> +	init_waitqueue_head(&ddat->waitq);
>  
>  	for_each_gt(gt, i915, i) {
>  		ddat_gt = hwmon->ddat_gt + i;
> -- 
> 2.38.0
>
Dixit, Ashutosh April 18, 2023, 5:23 p.m. UTC | #2
On Mon, 17 Apr 2023 22:35:58 -0700, Rodrigo Vivi wrote:
>

Hi Rodrigo,

> On Mon, Apr 10, 2023 at 03:35:09PM -0700, Ashutosh Dixit wrote:
> > Instead of erroring out when GuC reset is in progress, block waiting for
> > GuC reset to complete which is a more reasonable uapi behavior.
> >
> > v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)
> >
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_hwmon.c | 38 +++++++++++++++++++++++++++----
> >  1 file changed, 33 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> > index 9ab8971679fe3..8471a667dfc71 100644
> > --- a/drivers/gpu/drm/i915/i915_hwmon.c
> > +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> > @@ -51,6 +51,7 @@ struct hwm_drvdata {
> >	char name[12];
> >	int gt_n;
> >	bool reset_in_progress;
> > +	wait_queue_head_t waitq;
> >  };
> >
> >  struct i915_hwmon {
> > @@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
> >  static int
> >  hwm_power_max_write(struct hwm_drvdata *ddat, long val)
> >  {
> > +#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
> > +
> > +	int ret = 0, timeout = GUC_RESET_TIMEOUT;
> >	struct i915_hwmon *hwmon = ddat->hwmon;
> >	intel_wakeref_t wakeref;
> > -	int ret = 0;
> > +	DEFINE_WAIT(wait);
> >	u32 nval;
> >
> > -	mutex_lock(&hwmon->hwmon_lock);
> > -	if (hwmon->ddat.reset_in_progress) {
> > -		ret = -EAGAIN;
> > -		goto unlock;
> > +	/* Block waiting for GuC reset to complete when needed */
> > +	for (;;) {
> > +		mutex_lock(&hwmon->hwmon_lock);
>
> I'm really afraid of how this mutex is handled with the wait queue.
> some initial thought it looks like it is trying to reimplement ww_mutex?

Sorry, but I am missing the relation with ww_mutex. No such relation is
intended.

> all other examples of the wait_queue usages like this or didn't use
> locks or had it in a total different flow that I could not correlate.

Actually there are several examples of prepare_to_wait/finish_wait
sequences with both spinlock and mutex in the kernel. See
e.g. rpm_suspend(), wait_for_rtrs_disconnection(), softsynthx_read().

Also, as I mentioned, except for the lock, the sequence here is identical
to intel_guc_wait_for_pending_msg().

>
> > +
> > +		prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
> > +
> > +		if (!hwmon->ddat.reset_in_progress)
> > +			break;
>
> If this breaks we never unlock it?

Correct, this is the original case in Patch 2 where the mutex is acquired
in the beginning of the function and released just before the final exit
from the function (so the mutex is held for the entire duration of the
function).

>
> > +
> > +		if (signal_pending(current)) {
> > +			ret = -EINTR;
> > +			break;
> > +		}
> > +
> > +		if (!timeout) {
> > +			ret = -ETIME;
> > +			break;
> > +		}
> > +
> > +		mutex_unlock(&hwmon->hwmon_lock);
>
> do we need to lock the signal pending and timeout as well?
> or only wrapping it around the hwmon->ddat access would be
> enough?

Strictly, the mutex is only needed for the hwmon->ddat.reset_in_progress
flag. But because this is not a performance path, implementing it as done
in the patch simplifies the code flow (since there are several if/else,
goto's, mutex lock/unlock and prepare_to_wait/finish_wait to consider).

So if possible I *really* want to not try to over-optimize here (I did try
a few other things when writing the patch but it was getting ugly). The
only real requirement is to drop the lock before calling schedule_timeout()
below (and we are reacquiring the lock as soon as we are scheduled back in,
as you can see in the loop above).

>
> > +
> > +		timeout = schedule_timeout(timeout);
> >	}
> > +	finish_wait(&ddat->waitq, &wait);
> > +	if (ret)
> > +		goto unlock;
> > +
> >	wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
> >
> >	/* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */
> > @@ -508,6 +534,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
> >	intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
> >			 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
> >	hwmon->ddat.reset_in_progress = false;
> > +	wake_up_all(&hwmon->ddat.waitq);
> >
> >	mutex_unlock(&hwmon->hwmon_lock);
> >  }
> > @@ -784,6 +811,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
> >	ddat->uncore = &i915->uncore;
> >	snprintf(ddat->name, sizeof(ddat->name), "i915");
> >	ddat->gt_n = -1;
> > +	init_waitqueue_head(&ddat->waitq);
> >
> >	for_each_gt(gt, i915, i) {
> >		ddat_gt = hwmon->ddat_gt + i;
> > --
> > 2.38.0
> >

From what I understand is the locking above is fine and is not the
point. The real race is between schedule_timeout() (which suspends the
thread) and wake_up_all() (which schedules it back in). But this
prepare_to_wait/finish_wait pattern is so widespread that the kernel
guarantees that this works correctly as long as you do things in the
correct order (otherwise we'd see a lot more kernel hangs/deadlocks).

Thanks,
Ashutosh
Tvrtko Ursulin April 19, 2023, 1:21 p.m. UTC | #3
On 10/04/2023 23:35, Ashutosh Dixit wrote:
> Instead of erroring out when GuC reset is in progress, block waiting for
> GuC reset to complete which is a more reasonable uapi behavior.
> 
> v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)
> 
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_hwmon.c | 38 +++++++++++++++++++++++++++----
>   1 file changed, 33 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> index 9ab8971679fe3..8471a667dfc71 100644
> --- a/drivers/gpu/drm/i915/i915_hwmon.c
> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> @@ -51,6 +51,7 @@ struct hwm_drvdata {
>   	char name[12];
>   	int gt_n;
>   	bool reset_in_progress;
> +	wait_queue_head_t waitq;
>   };
>   
>   struct i915_hwmon {
> @@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
>   static int
>   hwm_power_max_write(struct hwm_drvdata *ddat, long val)
>   {
> +#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
> +
> +	int ret = 0, timeout = GUC_RESET_TIMEOUT;

Patch looks good to me apart that I am not sure what is the purpose of 
the timeout? This is just the sysfs write path or has more callers? If 
the former perhaps it would be better to just use interruptible 
everything (mutex and sleep) and wait for as long as it takes or until 
user presses Ctrl-C?

Regards,

Tvrtko

>   	struct i915_hwmon *hwmon = ddat->hwmon;
>   	intel_wakeref_t wakeref;
> -	int ret = 0;
> +	DEFINE_WAIT(wait);
>   	u32 nval;
>   
> -	mutex_lock(&hwmon->hwmon_lock);
> -	if (hwmon->ddat.reset_in_progress) {
> -		ret = -EAGAIN;
> -		goto unlock;
> +	/* Block waiting for GuC reset to complete when needed */
> +	for (;;) {
> +		mutex_lock(&hwmon->hwmon_lock);
> +
> +		prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
> +
> +		if (!hwmon->ddat.reset_in_progress)
> +			break;
> +
> +		if (signal_pending(current)) {
> +			ret = -EINTR;
> +			break;
> +		}
> +
> +		if (!timeout) {
> +			ret = -ETIME;
> +			break;
> +		}
> +
> +		mutex_unlock(&hwmon->hwmon_lock);
> +
> +		timeout = schedule_timeout(timeout);
>   	}
> +	finish_wait(&ddat->waitq, &wait);
> +	if (ret)
> +		goto unlock;
> +
>   	wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
>   
>   	/* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */
> @@ -508,6 +534,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
>   	intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
>   			 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
>   	hwmon->ddat.reset_in_progress = false;
> +	wake_up_all(&hwmon->ddat.waitq);
>   
>   	mutex_unlock(&hwmon->hwmon_lock);
>   }
> @@ -784,6 +811,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
>   	ddat->uncore = &i915->uncore;
>   	snprintf(ddat->name, sizeof(ddat->name), "i915");
>   	ddat->gt_n = -1;
> +	init_waitqueue_head(&ddat->waitq);
>   
>   	for_each_gt(gt, i915, i) {
>   		ddat_gt = hwmon->ddat_gt + i;
Rodrigo Vivi April 19, 2023, 7:40 p.m. UTC | #4
On Tue, Apr 18, 2023 at 10:23:50AM -0700, Dixit, Ashutosh wrote:
> On Mon, 17 Apr 2023 22:35:58 -0700, Rodrigo Vivi wrote:
> >
> 
> Hi Rodrigo,
> 
> > On Mon, Apr 10, 2023 at 03:35:09PM -0700, Ashutosh Dixit wrote:
> > > Instead of erroring out when GuC reset is in progress, block waiting for
> > > GuC reset to complete which is a more reasonable uapi behavior.
> > >
> > > v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)
> > >
> > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_hwmon.c | 38 +++++++++++++++++++++++++++----
> > >  1 file changed, 33 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> > > index 9ab8971679fe3..8471a667dfc71 100644
> > > --- a/drivers/gpu/drm/i915/i915_hwmon.c
> > > +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> > > @@ -51,6 +51,7 @@ struct hwm_drvdata {
> > >	char name[12];
> > >	int gt_n;
> > >	bool reset_in_progress;
> > > +	wait_queue_head_t waitq;
> > >  };
> > >
> > >  struct i915_hwmon {
> > > @@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
> > >  static int
> > >  hwm_power_max_write(struct hwm_drvdata *ddat, long val)
> > >  {
> > > +#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
> > > +
> > > +	int ret = 0, timeout = GUC_RESET_TIMEOUT;
> > >	struct i915_hwmon *hwmon = ddat->hwmon;
> > >	intel_wakeref_t wakeref;
> > > -	int ret = 0;
> > > +	DEFINE_WAIT(wait);
> > >	u32 nval;
> > >
> > > -	mutex_lock(&hwmon->hwmon_lock);
> > > -	if (hwmon->ddat.reset_in_progress) {
> > > -		ret = -EAGAIN;
> > > -		goto unlock;
> > > +	/* Block waiting for GuC reset to complete when needed */
> > > +	for (;;) {
> > > +		mutex_lock(&hwmon->hwmon_lock);
> >
> > I'm really afraid of how this mutex is handled with the wait queue.
> > some initial thought it looks like it is trying to reimplement ww_mutex?
> 
> Sorry, but I am missing the relation with ww_mutex. No such relation is
> intended.
> 
> > all other examples of the wait_queue usages like this or didn't use
> > locks or had it in a total different flow that I could not correlate.
> 
> Actually there are several examples of prepare_to_wait/finish_wait
> sequences with both spinlock and mutex in the kernel. See
> e.g. rpm_suspend(), wait_for_rtrs_disconnection(), softsynthx_read().
> 
> Also, as I mentioned, except for the lock, the sequence here is identical
> to intel_guc_wait_for_pending_msg().
> 
> >
> > > +
> > > +		prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
> > > +
> > > +		if (!hwmon->ddat.reset_in_progress)
> > > +			break;
> >
> > If this breaks we never unlock it?
> 
> Correct, this is the original case in Patch 2 where the mutex is acquired
> in the beginning of the function and released just before the final exit
> from the function (so the mutex is held for the entire duration of the
> function).

I got really confused here... I looked at the patch 2 again and I don't
see any place where the lock remains outside of the function. What was
what I asked to remove on the initial versions.

But now with this one I'm even more confused because I couldn't follow
to understand who will remove the lock and when.

> 
> >
> > > +
> > > +		if (signal_pending(current)) {
> > > +			ret = -EINTR;
> > > +			break;
> > > +		}
> > > +
> > > +		if (!timeout) {
> > > +			ret = -ETIME;
> > > +			break;
> > > +		}
> > > +
> > > +		mutex_unlock(&hwmon->hwmon_lock);
> >
> > do we need to lock the signal pending and timeout as well?
> > or only wrapping it around the hwmon->ddat access would be
> > enough?
> 
> Strictly, the mutex is only needed for the hwmon->ddat.reset_in_progress
> flag. But because this is not a performance path, implementing it as done
> in the patch simplifies the code flow (since there are several if/else,
> goto's, mutex lock/unlock and prepare_to_wait/finish_wait to consider).
> 
> So if possible I *really* want to not try to over-optimize here (I did try
> a few other things when writing the patch but it was getting ugly). The
> only real requirement is to drop the lock before calling schedule_timeout()
> below (and we are reacquiring the lock as soon as we are scheduled back in,
> as you can see in the loop above).
> 
> >
> > > +
> > > +		timeout = schedule_timeout(timeout);
> > >	}
> > > +	finish_wait(&ddat->waitq, &wait);
> > > +	if (ret)
> > > +		goto unlock;
> > > +
> > >	wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
> > >
> > >	/* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */
> > > @@ -508,6 +534,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
> > >	intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
> > >			 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
> > >	hwmon->ddat.reset_in_progress = false;
> > > +	wake_up_all(&hwmon->ddat.waitq);
> > >
> > >	mutex_unlock(&hwmon->hwmon_lock);
> > >  }
> > > @@ -784,6 +811,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
> > >	ddat->uncore = &i915->uncore;
> > >	snprintf(ddat->name, sizeof(ddat->name), "i915");
> > >	ddat->gt_n = -1;
> > > +	init_waitqueue_head(&ddat->waitq);
> > >
> > >	for_each_gt(gt, i915, i) {
> > >		ddat_gt = hwmon->ddat_gt + i;
> > > --
> > > 2.38.0
> > >
> 
> From what I understand is the locking above is fine and is not the
> point. The real race is between schedule_timeout() (which suspends the
> thread) and wake_up_all() (which schedules it back in). But this
> prepare_to_wait/finish_wait pattern is so widespread that the kernel
> guarantees that this works correctly as long as you do things in the
> correct order (otherwise we'd see a lot more kernel hangs/deadlocks).
> 
> Thanks,
> Ashutosh
Dixit, Ashutosh April 19, 2023, 10:10 p.m. UTC | #5
On Wed, 19 Apr 2023 06:21:27 -0700, Tvrtko Ursulin wrote:
>

Hi Tvrtko,

> On 10/04/2023 23:35, Ashutosh Dixit wrote:
> > Instead of erroring out when GuC reset is in progress, block waiting for
> > GuC reset to complete which is a more reasonable uapi behavior.
> >
> > v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)
> >
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_hwmon.c | 38 +++++++++++++++++++++++++++----
> >   1 file changed, 33 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> > index 9ab8971679fe3..8471a667dfc71 100644
> > --- a/drivers/gpu/drm/i915/i915_hwmon.c
> > +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> > @@ -51,6 +51,7 @@ struct hwm_drvdata {
> >	char name[12];
> >	int gt_n;
> >	bool reset_in_progress;
> > +	wait_queue_head_t waitq;
> >   };
> >     struct i915_hwmon {
> > @@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
> >   static int
> >   hwm_power_max_write(struct hwm_drvdata *ddat, long val)
> >   {
> > +#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
> > +
> > +	int ret = 0, timeout = GUC_RESET_TIMEOUT;
>
> Patch looks good to me

Great, thanks :)

> apart that I am not sure what is the purpose of the timeout? This is just
> the sysfs write path or has more callers?

It is just the sysfs path, but the sysfs is accessed also by the oneAPI
stack (Level 0). In the initial version I also didn't have the timeout
thinking that the app can send a signal to the blocked thread to unblock
it. I introduced the timeout after Rodrigo brought it up and I am now
thinking maybe it's better to have the timeout in the driver since the app
has no knowledge of how long GuC resets can take. But I can remove it if
you think it's not needed.

> If the
> former perhaps it would be better to just use interruptible everything
> (mutex and sleep) and wait for as long as it takes or until user presses
> Ctrl-C?

Now we are not holding the mutexes for long, just long enough do register
rmw's. So not holding the mutex across GuC reset as we were
originally. Therefore I am thinking mutex_lock_interruptible is not needed?
The sleep is already interruptible (TASK_INTERRUPTIBLE).

Anyway please let me know if you think we need to change anything.

Thanks.
--
Ashutosh

> >	struct i915_hwmon *hwmon = ddat->hwmon;
> >	intel_wakeref_t wakeref;
> > -	int ret = 0;
> > +	DEFINE_WAIT(wait);
> >	u32 nval;
> >   -	mutex_lock(&hwmon->hwmon_lock);
> > -	if (hwmon->ddat.reset_in_progress) {
> > -		ret = -EAGAIN;
> > -		goto unlock;
> > +	/* Block waiting for GuC reset to complete when needed */
> > +	for (;;) {
> > +		mutex_lock(&hwmon->hwmon_lock);
> > +
> > +		prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
> > +
> > +		if (!hwmon->ddat.reset_in_progress)
> > +			break;
> > +
> > +		if (signal_pending(current)) {
> > +			ret = -EINTR;
> > +			break;
> > +		}
> > +
> > +		if (!timeout) {
> > +			ret = -ETIME;
> > +			break;
> > +		}
> > +
> > +		mutex_unlock(&hwmon->hwmon_lock);
> > +
> > +		timeout = schedule_timeout(timeout);
> >	}
> > +	finish_wait(&ddat->waitq, &wait);
> > +	if (ret)
> > +		goto unlock;
> > +
> >	wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
> >		/* Disable PL1 limit and verify, because the limit cannot be
> > disabled on all platforms */
> > @@ -508,6 +534,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
> >	intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
> >			 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
> >	hwmon->ddat.reset_in_progress = false;
> > +	wake_up_all(&hwmon->ddat.waitq);
> >		mutex_unlock(&hwmon->hwmon_lock);
> >   }
> > @@ -784,6 +811,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
> >	ddat->uncore = &i915->uncore;
> >	snprintf(ddat->name, sizeof(ddat->name), "i915");
> >	ddat->gt_n = -1;
> > +	init_waitqueue_head(&ddat->waitq);
> >		for_each_gt(gt, i915, i) {
> >		ddat_gt = hwmon->ddat_gt + i;
Dixit, Ashutosh April 19, 2023, 10:13 p.m. UTC | #6
On Wed, 19 Apr 2023 12:40:44 -0700, Rodrigo Vivi wrote:
>

Hi Rodrigo,

> On Tue, Apr 18, 2023 at 10:23:50AM -0700, Dixit, Ashutosh wrote:
> > On Mon, 17 Apr 2023 22:35:58 -0700, Rodrigo Vivi wrote:
> > >
> >
> > Hi Rodrigo,
> >
> > > On Mon, Apr 10, 2023 at 03:35:09PM -0700, Ashutosh Dixit wrote:
> > > > Instead of erroring out when GuC reset is in progress, block waiting for
> > > > GuC reset to complete which is a more reasonable uapi behavior.
> > > >
> > > > v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)
> > > >
> > > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_hwmon.c | 38 +++++++++++++++++++++++++++----
> > > >  1 file changed, 33 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> > > > index 9ab8971679fe3..8471a667dfc71 100644
> > > > --- a/drivers/gpu/drm/i915/i915_hwmon.c
> > > > +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> > > > @@ -51,6 +51,7 @@ struct hwm_drvdata {
> > > >	char name[12];
> > > >	int gt_n;
> > > >	bool reset_in_progress;
> > > > +	wait_queue_head_t waitq;
> > > >  };
> > > >
> > > >  struct i915_hwmon {
> > > > @@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
> > > >  static int
> > > >  hwm_power_max_write(struct hwm_drvdata *ddat, long val)
> > > >  {
> > > > +#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
> > > > +
> > > > +	int ret = 0, timeout = GUC_RESET_TIMEOUT;
> > > >	struct i915_hwmon *hwmon = ddat->hwmon;
> > > >	intel_wakeref_t wakeref;
> > > > -	int ret = 0;
> > > > +	DEFINE_WAIT(wait);
> > > >	u32 nval;
> > > >
> > > > -	mutex_lock(&hwmon->hwmon_lock);
> > > > -	if (hwmon->ddat.reset_in_progress) {
> > > > -		ret = -EAGAIN;
> > > > -		goto unlock;
> > > > +	/* Block waiting for GuC reset to complete when needed */
> > > > +	for (;;) {
> > > > +		mutex_lock(&hwmon->hwmon_lock);
> > >
> > > I'm really afraid of how this mutex is handled with the wait queue.
> > > some initial thought it looks like it is trying to reimplement ww_mutex?
> >
> > Sorry, but I am missing the relation with ww_mutex. No such relation is
> > intended.
> >
> > > all other examples of the wait_queue usages like this or didn't use
> > > locks or had it in a total different flow that I could not correlate.
> >
> > Actually there are several examples of prepare_to_wait/finish_wait
> > sequences with both spinlock and mutex in the kernel. See
> > e.g. rpm_suspend(), wait_for_rtrs_disconnection(), softsynthx_read().
> >
> > Also, as I mentioned, except for the lock, the sequence here is identical
> > to intel_guc_wait_for_pending_msg().
> >
> > >
> > > > +
> > > > +		prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
> > > > +
> > > > +		if (!hwmon->ddat.reset_in_progress)
> > > > +			break;
> > >
> > > If this breaks we never unlock it?
> >
> > Correct, this is the original case in Patch 2 where the mutex is acquired
> > in the beginning of the function and released just before the final exit
> > from the function (so the mutex is held for the entire duration of the
> > function).
>
> I got really confused here...

Sorry, the patch is a little confusing/tricky but I thought I'd better
stick to the standard 'for (;;)' loop pattern otherwise it will also be
hard to review.

> I looked at the patch 2 again and I don't see any place where the lock
> remains outside of the function. What was what I asked to remove on the
> initial versions.

So it was in Patch 1 where we changed the code to take the lock in the
beginning of the function and release it at the end of the function (you
can see it Patch 1).

In Patch 2 the 'unlock' label and 'goto unlock' is introduced and the lock
is released at the 'unlock' label (it is visible in Patch 2).

> But now with this one I'm even more confused because I couldn't follow
> to understand who will remove the lock and when.

In Patch 3 again the lock is released at the the 'unlock' label (i.e. the
destination of 'goto unlock', not visible in Patch 3). But we execute 'goto
unlock' only when 'ret != 0' in the 'for (;;)' loop. But when 'ret == 0'
(when 'ddat.reset_in_progress' flag is clear) we hold the mutex, execute
the entire function and finally release the lock at the end of the
function.

Hopefully this helps.

Thanks.
--
Ashutosh

>
> >
> > >
> > > > +
> > > > +		if (signal_pending(current)) {
> > > > +			ret = -EINTR;
> > > > +			break;
> > > > +		}
> > > > +
> > > > +		if (!timeout) {
> > > > +			ret = -ETIME;
> > > > +			break;
> > > > +		}
> > > > +
> > > > +		mutex_unlock(&hwmon->hwmon_lock);
> > >
> > > do we need to lock the signal pending and timeout as well?
> > > or only wrapping it around the hwmon->ddat access would be
> > > enough?
> >
> > Strictly, the mutex is only needed for the hwmon->ddat.reset_in_progress
> > flag. But because this is not a performance path, implementing it as done
> > in the patch simplifies the code flow (since there are several if/else,
> > goto's, mutex lock/unlock and prepare_to_wait/finish_wait to consider).
> >
> > So if possible I *really* want to not try to over-optimize here (I did try
> > a few other things when writing the patch but it was getting ugly). The
> > only real requirement is to drop the lock before calling schedule_timeout()
> > below (and we are reacquiring the lock as soon as we are scheduled back in,
> > as you can see in the loop above).
> >
> > >
> > > > +
> > > > +		timeout = schedule_timeout(timeout);
> > > >	}
> > > > +	finish_wait(&ddat->waitq, &wait);
> > > > +	if (ret)
> > > > +		goto unlock;
> > > > +
> > > >	wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
> > > >
> > > >	/* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */
> > > > @@ -508,6 +534,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
> > > >	intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
> > > >			 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
> > > >	hwmon->ddat.reset_in_progress = false;
> > > > +	wake_up_all(&hwmon->ddat.waitq);
> > > >
> > > >	mutex_unlock(&hwmon->hwmon_lock);
> > > >  }
> > > > @@ -784,6 +811,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
> > > >	ddat->uncore = &i915->uncore;
> > > >	snprintf(ddat->name, sizeof(ddat->name), "i915");
> > > >	ddat->gt_n = -1;
> > > > +	init_waitqueue_head(&ddat->waitq);
> > > >
> > > >	for_each_gt(gt, i915, i) {
> > > >		ddat_gt = hwmon->ddat_gt + i;
> > > > --
> > > > 2.38.0
> > > >
> >
> > From what I understand is the locking above is fine and is not the
> > point. The real race is between schedule_timeout() (which suspends the
> > thread) and wake_up_all() (which schedules it back in). But this
> > prepare_to_wait/finish_wait pattern is so widespread that the kernel
> > guarantees that this works correctly as long as you do things in the
> > correct order (otherwise we'd see a lot more kernel hangs/deadlocks).
> >
> > Thanks,
> > Ashutosh
Tvrtko Ursulin April 20, 2023, 7:57 a.m. UTC | #7
On 19/04/2023 23:10, Dixit, Ashutosh wrote:
> On Wed, 19 Apr 2023 06:21:27 -0700, Tvrtko Ursulin wrote:
>>
> 
> Hi Tvrtko,
> 
>> On 10/04/2023 23:35, Ashutosh Dixit wrote:
>>> Instead of erroring out when GuC reset is in progress, block waiting for
>>> GuC reset to complete which is a more reasonable uapi behavior.
>>>
>>> v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)
>>>
>>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_hwmon.c | 38 +++++++++++++++++++++++++++----
>>>    1 file changed, 33 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
>>> index 9ab8971679fe3..8471a667dfc71 100644
>>> --- a/drivers/gpu/drm/i915/i915_hwmon.c
>>> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
>>> @@ -51,6 +51,7 @@ struct hwm_drvdata {
>>> 	char name[12];
>>> 	int gt_n;
>>> 	bool reset_in_progress;
>>> +	wait_queue_head_t waitq;
>>>    };
>>>      struct i915_hwmon {
>>> @@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
>>>    static int
>>>    hwm_power_max_write(struct hwm_drvdata *ddat, long val)
>>>    {
>>> +#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
>>> +
>>> +	int ret = 0, timeout = GUC_RESET_TIMEOUT;
>>
>> Patch looks good to me
> 
> Great, thanks :)
> 
>> apart that I am not sure what is the purpose of the timeout? This is just
>> the sysfs write path or has more callers?
> 
> It is just the sysfs path, but the sysfs is accessed also by the oneAPI
> stack (Level 0). In the initial version I also didn't have the timeout
> thinking that the app can send a signal to the blocked thread to unblock
> it. I introduced the timeout after Rodrigo brought it up and I am now
> thinking maybe it's better to have the timeout in the driver since the app
> has no knowledge of how long GuC resets can take. But I can remove it if
> you think it's not needed.

Maybe I am missing something but I don't get why we would need to 
provide a timeout facility in sysfs? If the library writes here to 
configure something it already has to expect a blocking write by the 
nature of a a write(2) and sysfs contract. It can take long for any 
reason so I hope we are not guaranteeing some latency number to someone? 
Or the concern is just about things getting stuck? In which case I think 
Ctrl-C is the answer because ETIME is not even listed as an errno for 
write(2).

>> If the
>> former perhaps it would be better to just use interruptible everything
>> (mutex and sleep) and wait for as long as it takes or until user presses
>> Ctrl-C?
> 
> Now we are not holding the mutexes for long, just long enough do register
> rmw's. So not holding the mutex across GuC reset as we were
> originally. Therefore I am thinking mutex_lock_interruptible is not needed?
> The sleep is already interruptible (TASK_INTERRUPTIBLE).

Ah yes, you are right.

Regards,

Tvrtko

> Anyway please let me know if you think we need to change anything.
> 
> Thanks.
> --
> Ashutosh
> 
>>> 	struct i915_hwmon *hwmon = ddat->hwmon;
>>> 	intel_wakeref_t wakeref;
>>> -	int ret = 0;
>>> +	DEFINE_WAIT(wait);
>>> 	u32 nval;
>>>    -	mutex_lock(&hwmon->hwmon_lock);
>>> -	if (hwmon->ddat.reset_in_progress) {
>>> -		ret = -EAGAIN;
>>> -		goto unlock;
>>> +	/* Block waiting for GuC reset to complete when needed */
>>> +	for (;;) {
>>> +		mutex_lock(&hwmon->hwmon_lock);
>>> +
>>> +		prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
>>> +
>>> +		if (!hwmon->ddat.reset_in_progress)
>>> +			break;
>>> +
>>> +		if (signal_pending(current)) {
>>> +			ret = -EINTR;
>>> +			break;
>>> +		}
>>> +
>>> +		if (!timeout) {
>>> +			ret = -ETIME;
>>> +			break;
>>> +		}
>>> +
>>> +		mutex_unlock(&hwmon->hwmon_lock);
>>> +
>>> +		timeout = schedule_timeout(timeout);
>>> 	}
>>> +	finish_wait(&ddat->waitq, &wait);
>>> +	if (ret)
>>> +		goto unlock;
>>> +
>>> 	wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
>>> 		/* Disable PL1 limit and verify, because the limit cannot be
>>> disabled on all platforms */
>>> @@ -508,6 +534,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
>>> 	intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
>>> 			 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
>>> 	hwmon->ddat.reset_in_progress = false;
>>> +	wake_up_all(&hwmon->ddat.waitq);
>>> 		mutex_unlock(&hwmon->hwmon_lock);
>>>    }
>>> @@ -784,6 +811,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
>>> 	ddat->uncore = &i915->uncore;
>>> 	snprintf(ddat->name, sizeof(ddat->name), "i915");
>>> 	ddat->gt_n = -1;
>>> +	init_waitqueue_head(&ddat->waitq);
>>> 		for_each_gt(gt, i915, i) {
>>> 		ddat_gt = hwmon->ddat_gt + i;
Rodrigo Vivi April 20, 2023, 3:43 p.m. UTC | #8
On Thu, Apr 20, 2023 at 08:57:24AM +0100, Tvrtko Ursulin wrote:
> 
> On 19/04/2023 23:10, Dixit, Ashutosh wrote:
> > On Wed, 19 Apr 2023 06:21:27 -0700, Tvrtko Ursulin wrote:
> > > 
> > 
> > Hi Tvrtko,
> > 
> > > On 10/04/2023 23:35, Ashutosh Dixit wrote:
> > > > Instead of erroring out when GuC reset is in progress, block waiting for
> > > > GuC reset to complete which is a more reasonable uapi behavior.
> > > > 
> > > > v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)
> > > > 
> > > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > > > ---
> > > >    drivers/gpu/drm/i915/i915_hwmon.c | 38 +++++++++++++++++++++++++++----
> > > >    1 file changed, 33 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> > > > index 9ab8971679fe3..8471a667dfc71 100644
> > > > --- a/drivers/gpu/drm/i915/i915_hwmon.c
> > > > +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> > > > @@ -51,6 +51,7 @@ struct hwm_drvdata {
> > > > 	char name[12];
> > > > 	int gt_n;
> > > > 	bool reset_in_progress;
> > > > +	wait_queue_head_t waitq;
> > > >    };
> > > >      struct i915_hwmon {
> > > > @@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
> > > >    static int
> > > >    hwm_power_max_write(struct hwm_drvdata *ddat, long val)
> > > >    {
> > > > +#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
> > > > +
> > > > +	int ret = 0, timeout = GUC_RESET_TIMEOUT;
> > > 
> > > Patch looks good to me
> > 
> > Great, thanks :)
> > 
> > > apart that I am not sure what is the purpose of the timeout? This is just
> > > the sysfs write path or has more callers?
> > 
> > It is just the sysfs path, but the sysfs is accessed also by the oneAPI
> > stack (Level 0). In the initial version I also didn't have the timeout
> > thinking that the app can send a signal to the blocked thread to unblock
> > it. I introduced the timeout after Rodrigo brought it up and I am now
> > thinking maybe it's better to have the timeout in the driver since the app
> > has no knowledge of how long GuC resets can take. But I can remove it if
> > you think it's not needed.
> 
> Maybe I am missing something but I don't get why we would need to provide a
> timeout facility in sysfs? If the library writes here to configure something
> it already has to expect a blocking write by the nature of a a write(2) and
> sysfs contract. It can take long for any reason so I hope we are not
> guaranteeing some latency number to someone? Or the concern is just about
> things getting stuck? In which case I think Ctrl-C is the answer because
> ETIME is not even listed as an errno for write(2).

I suggested the timeout on the other version because of that race,
which is fixed now with this approach. It is probably better to remove
it now to avoid confusions. I'm sorry about that.

> 
> > > If the
> > > former perhaps it would be better to just use interruptible everything
> > > (mutex and sleep) and wait for as long as it takes or until user presses
> > > Ctrl-C?
> > 
> > Now we are not holding the mutexes for long, just long enough do register
> > rmw's. So not holding the mutex across GuC reset as we were
> > originally. Therefore I am thinking mutex_lock_interruptible is not needed?
> > The sleep is already interruptible (TASK_INTERRUPTIBLE).
> 
> Ah yes, you are right.
> 
> Regards,
> 
> Tvrtko
> 
> > Anyway please let me know if you think we need to change anything.
> > 
> > Thanks.
> > --
> > Ashutosh
> > 
> > > > 	struct i915_hwmon *hwmon = ddat->hwmon;
> > > > 	intel_wakeref_t wakeref;
> > > > -	int ret = 0;
> > > > +	DEFINE_WAIT(wait);
> > > > 	u32 nval;
> > > >    -	mutex_lock(&hwmon->hwmon_lock);
> > > > -	if (hwmon->ddat.reset_in_progress) {
> > > > -		ret = -EAGAIN;
> > > > -		goto unlock;
> > > > +	/* Block waiting for GuC reset to complete when needed */
> > > > +	for (;;) {
> > > > +		mutex_lock(&hwmon->hwmon_lock);
> > > > +
> > > > +		prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
> > > > +
> > > > +		if (!hwmon->ddat.reset_in_progress)
> > > > +			break;
> > > > +
> > > > +		if (signal_pending(current)) {
> > > > +			ret = -EINTR;
> > > > +			break;
> > > > +		}
> > > > +
> > > > +		if (!timeout) {
> > > > +			ret = -ETIME;
> > > > +			break;
> > > > +		}
> > > > +
> > > > +		mutex_unlock(&hwmon->hwmon_lock);
> > > > +
> > > > +		timeout = schedule_timeout(timeout);
> > > > 	}
> > > > +	finish_wait(&ddat->waitq, &wait);
> > > > +	if (ret)
> > > > +		goto unlock;
> > > > +
> > > > 	wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
> > > > 		/* Disable PL1 limit and verify, because the limit cannot be
> > > > disabled on all platforms */
> > > > @@ -508,6 +534,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
> > > > 	intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
> > > > 			 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
> > > > 	hwmon->ddat.reset_in_progress = false;
> > > > +	wake_up_all(&hwmon->ddat.waitq);
> > > > 		mutex_unlock(&hwmon->hwmon_lock);
> > > >    }
> > > > @@ -784,6 +811,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
> > > > 	ddat->uncore = &i915->uncore;
> > > > 	snprintf(ddat->name, sizeof(ddat->name), "i915");
> > > > 	ddat->gt_n = -1;
> > > > +	init_waitqueue_head(&ddat->waitq);
> > > > 		for_each_gt(gt, i915, i) {
> > > > 		ddat_gt = hwmon->ddat_gt + i;
Rodrigo Vivi April 20, 2023, 3:45 p.m. UTC | #9
On Wed, Apr 19, 2023 at 03:13:08PM -0700, Dixit, Ashutosh wrote:
> On Wed, 19 Apr 2023 12:40:44 -0700, Rodrigo Vivi wrote:
> >
> 
> Hi Rodrigo,
> 
> > On Tue, Apr 18, 2023 at 10:23:50AM -0700, Dixit, Ashutosh wrote:
> > > On Mon, 17 Apr 2023 22:35:58 -0700, Rodrigo Vivi wrote:
> > > >
> > >
> > > Hi Rodrigo,
> > >
> > > > On Mon, Apr 10, 2023 at 03:35:09PM -0700, Ashutosh Dixit wrote:
> > > > > Instead of erroring out when GuC reset is in progress, block waiting for
> > > > > GuC reset to complete which is a more reasonable uapi behavior.
> > > > >
> > > > > v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)
> > > > >
> > > > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > > > > ---
> > > > >  drivers/gpu/drm/i915/i915_hwmon.c | 38 +++++++++++++++++++++++++++----
> > > > >  1 file changed, 33 insertions(+), 5 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> > > > > index 9ab8971679fe3..8471a667dfc71 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_hwmon.c
> > > > > +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> > > > > @@ -51,6 +51,7 @@ struct hwm_drvdata {
> > > > >	char name[12];
> > > > >	int gt_n;
> > > > >	bool reset_in_progress;
> > > > > +	wait_queue_head_t waitq;
> > > > >  };
> > > > >
> > > > >  struct i915_hwmon {
> > > > > @@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
> > > > >  static int
> > > > >  hwm_power_max_write(struct hwm_drvdata *ddat, long val)
> > > > >  {
> > > > > +#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
> > > > > +
> > > > > +	int ret = 0, timeout = GUC_RESET_TIMEOUT;
> > > > >	struct i915_hwmon *hwmon = ddat->hwmon;
> > > > >	intel_wakeref_t wakeref;
> > > > > -	int ret = 0;
> > > > > +	DEFINE_WAIT(wait);
> > > > >	u32 nval;
> > > > >
> > > > > -	mutex_lock(&hwmon->hwmon_lock);
> > > > > -	if (hwmon->ddat.reset_in_progress) {
> > > > > -		ret = -EAGAIN;
> > > > > -		goto unlock;
> > > > > +	/* Block waiting for GuC reset to complete when needed */
> > > > > +	for (;;) {
> > > > > +		mutex_lock(&hwmon->hwmon_lock);
> > > >
> > > > I'm really afraid of how this mutex is handled with the wait queue.
> > > > some initial thought it looks like it is trying to reimplement ww_mutex?
> > >
> > > Sorry, but I am missing the relation with ww_mutex. No such relation is
> > > intended.
> > >
> > > > all other examples of the wait_queue usages like this or didn't use
> > > > locks or had it in a total different flow that I could not correlate.
> > >
> > > Actually there are several examples of prepare_to_wait/finish_wait
> > > sequences with both spinlock and mutex in the kernel. See
> > > e.g. rpm_suspend(), wait_for_rtrs_disconnection(), softsynthx_read().
> > >
> > > Also, as I mentioned, except for the lock, the sequence here is identical
> > > to intel_guc_wait_for_pending_msg().
> > >
> > > >
> > > > > +
> > > > > +		prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
> > > > > +
> > > > > +		if (!hwmon->ddat.reset_in_progress)
> > > > > +			break;
> > > >
> > > > If this breaks we never unlock it?
> > >
> > > Correct, this is the original case in Patch 2 where the mutex is acquired
> > > in the beginning of the function and released just before the final exit
> > > from the function (so the mutex is held for the entire duration of the
> > > function).
> >
> > I got really confused here...
> 
> Sorry, the patch is a little confusing/tricky but I thought I'd better
> stick to the standard 'for (;;)' loop pattern otherwise it will also be
> hard to review.
> 
> > I looked at the patch 2 again and I don't see any place where the lock
> > remains outside of the function. What was what I asked to remove on the
> > initial versions.
> 
> So it was in Patch 1 where we changed the code to take the lock in the
> beginning of the function and release it at the end of the function (you
> can see it Patch 1).
> 
> In Patch 2 the 'unlock' label and 'goto unlock' is introduced and the lock
> is released at the 'unlock' label (it is visible in Patch 2).
> 
> > But now with this one I'm even more confused because I couldn't follow
> > to understand who will remove the lock and when.
> 
> In Patch 3 again the lock is released at the the 'unlock' label (i.e. the
> destination of 'goto unlock', not visible in Patch 3). But we execute 'goto
> unlock' only when 'ret != 0' in the 'for (;;)' loop. But when 'ret == 0'
> (when 'ddat.reset_in_progress' flag is clear) we hold the mutex, execute
> the entire function and finally release the lock at the end of the
> function.
> 
> Hopefully this helps.

more coffee also helped! I'm sorry for the noise.

with the timeout thing sorted out:

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>


> 
> Thanks.
> --
> Ashutosh
> 
> >
> > >
> > > >
> > > > > +
> > > > > +		if (signal_pending(current)) {
> > > > > +			ret = -EINTR;
> > > > > +			break;
> > > > > +		}
> > > > > +
> > > > > +		if (!timeout) {
> > > > > +			ret = -ETIME;
> > > > > +			break;
> > > > > +		}
> > > > > +
> > > > > +		mutex_unlock(&hwmon->hwmon_lock);
> > > >
> > > > do we need to lock the signal pending and timeout as well?
> > > > or only wrapping it around the hwmon->ddat access would be
> > > > enough?
> > >
> > > Strictly, the mutex is only needed for the hwmon->ddat.reset_in_progress
> > > flag. But because this is not a performance path, implementing it as done
> > > in the patch simplifies the code flow (since there are several if/else,
> > > goto's, mutex lock/unlock and prepare_to_wait/finish_wait to consider).
> > >
> > > So if possible I *really* want to not try to over-optimize here (I did try
> > > a few other things when writing the patch but it was getting ugly). The
> > > only real requirement is to drop the lock before calling schedule_timeout()
> > > below (and we are reacquiring the lock as soon as we are scheduled back in,
> > > as you can see in the loop above).
> > >
> > > >
> > > > > +
> > > > > +		timeout = schedule_timeout(timeout);
> > > > >	}
> > > > > +	finish_wait(&ddat->waitq, &wait);
> > > > > +	if (ret)
> > > > > +		goto unlock;
> > > > > +
> > > > >	wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
> > > > >
> > > > >	/* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */
> > > > > @@ -508,6 +534,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
> > > > >	intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
> > > > >			 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
> > > > >	hwmon->ddat.reset_in_progress = false;
> > > > > +	wake_up_all(&hwmon->ddat.waitq);
> > > > >
> > > > >	mutex_unlock(&hwmon->hwmon_lock);
> > > > >  }
> > > > > @@ -784,6 +811,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
> > > > >	ddat->uncore = &i915->uncore;
> > > > >	snprintf(ddat->name, sizeof(ddat->name), "i915");
> > > > >	ddat->gt_n = -1;
> > > > > +	init_waitqueue_head(&ddat->waitq);
> > > > >
> > > > >	for_each_gt(gt, i915, i) {
> > > > >		ddat_gt = hwmon->ddat_gt + i;
> > > > > --
> > > > > 2.38.0
> > > > >
> > >
> > > From what I understand is the locking above is fine and is not the
> > > point. The real race is between schedule_timeout() (which suspends the
> > > thread) and wake_up_all() (which schedules it back in). But this
> > > prepare_to_wait/finish_wait pattern is so widespread that the kernel
> > > guarantees that this works correctly as long as you do things in the
> > > correct order (otherwise we'd see a lot more kernel hangs/deadlocks).
> > >
> > > Thanks,
> > > Ashutosh
Dixit, Ashutosh April 20, 2023, 4:26 p.m. UTC | #10
On Thu, 20 Apr 2023 08:43:52 -0700, Rodrigo Vivi wrote:
>

Hi Rodrigo,

> On Thu, Apr 20, 2023 at 08:57:24AM +0100, Tvrtko Ursulin wrote:
> >
> > On 19/04/2023 23:10, Dixit, Ashutosh wrote:
> > > On Wed, 19 Apr 2023 06:21:27 -0700, Tvrtko Ursulin wrote:
> > > >
> > >
> > > Hi Tvrtko,
> > >
> > > > On 10/04/2023 23:35, Ashutosh Dixit wrote:
> > > > > Instead of erroring out when GuC reset is in progress, block waiting for
> > > > > GuC reset to complete which is a more reasonable uapi behavior.
> > > > >
> > > > > v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)
> > > > >
> > > > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > > > > ---
> > > > >    drivers/gpu/drm/i915/i915_hwmon.c | 38 +++++++++++++++++++++++++++----
> > > > >    1 file changed, 33 insertions(+), 5 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> > > > > index 9ab8971679fe3..8471a667dfc71 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_hwmon.c
> > > > > +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> > > > > @@ -51,6 +51,7 @@ struct hwm_drvdata {
> > > > >	char name[12];
> > > > >	int gt_n;
> > > > >	bool reset_in_progress;
> > > > > +	wait_queue_head_t waitq;
> > > > >    };
> > > > >      struct i915_hwmon {
> > > > > @@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
> > > > >    static int
> > > > >    hwm_power_max_write(struct hwm_drvdata *ddat, long val)
> > > > >    {
> > > > > +#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
> > > > > +
> > > > > +	int ret = 0, timeout = GUC_RESET_TIMEOUT;
> > > >
> > > > Patch looks good to me
> > >
> > > Great, thanks :)
> > >
> > > > apart that I am not sure what is the purpose of the timeout? This is just
> > > > the sysfs write path or has more callers?
> > >
> > > It is just the sysfs path, but the sysfs is accessed also by the oneAPI
> > > stack (Level 0). In the initial version I also didn't have the timeout
> > > thinking that the app can send a signal to the blocked thread to unblock
> > > it. I introduced the timeout after Rodrigo brought it up and I am now
> > > thinking maybe it's better to have the timeout in the driver since the app
> > > has no knowledge of how long GuC resets can take. But I can remove it if
> > > you think it's not needed.
> >
> > Maybe I am missing something but I don't get why we would need to provide a
> > timeout facility in sysfs? If the library writes here to configure something
> > it already has to expect a blocking write by the nature of a a write(2) and
> > sysfs contract. It can take long for any reason so I hope we are not
> > guaranteeing some latency number to someone? Or the concern is just about
> > things getting stuck? In which case I think Ctrl-C is the answer because
> > ETIME is not even listed as an errno for write(2).

Hmm, good point.

> I suggested the timeout on the other version because of that race,
> which is fixed now with this approach. It is probably better to remove
> it now to avoid confusions. I'm sorry about that.

No problem, I've removed the timeout in the latest version.

Thanks for the R-b.

Ashutosh
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
index 9ab8971679fe3..8471a667dfc71 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -51,6 +51,7 @@  struct hwm_drvdata {
 	char name[12];
 	int gt_n;
 	bool reset_in_progress;
+	wait_queue_head_t waitq;
 };
 
 struct i915_hwmon {
@@ -395,16 +396,41 @@  hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
 static int
 hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 {
+#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
+
+	int ret = 0, timeout = GUC_RESET_TIMEOUT;
 	struct i915_hwmon *hwmon = ddat->hwmon;
 	intel_wakeref_t wakeref;
-	int ret = 0;
+	DEFINE_WAIT(wait);
 	u32 nval;
 
-	mutex_lock(&hwmon->hwmon_lock);
-	if (hwmon->ddat.reset_in_progress) {
-		ret = -EAGAIN;
-		goto unlock;
+	/* Block waiting for GuC reset to complete when needed */
+	for (;;) {
+		mutex_lock(&hwmon->hwmon_lock);
+
+		prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
+
+		if (!hwmon->ddat.reset_in_progress)
+			break;
+
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
+		if (!timeout) {
+			ret = -ETIME;
+			break;
+		}
+
+		mutex_unlock(&hwmon->hwmon_lock);
+
+		timeout = schedule_timeout(timeout);
 	}
+	finish_wait(&ddat->waitq, &wait);
+	if (ret)
+		goto unlock;
+
 	wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
 
 	/* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */
@@ -508,6 +534,7 @@  void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
 	intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
 			 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
 	hwmon->ddat.reset_in_progress = false;
+	wake_up_all(&hwmon->ddat.waitq);
 
 	mutex_unlock(&hwmon->hwmon_lock);
 }
@@ -784,6 +811,7 @@  void i915_hwmon_register(struct drm_i915_private *i915)
 	ddat->uncore = &i915->uncore;
 	snprintf(ddat->name, sizeof(ddat->name), "i915");
 	ddat->gt_n = -1;
+	init_waitqueue_head(&ddat->waitq);
 
 	for_each_gt(gt, i915, i) {
 		ddat_gt = hwmon->ddat_gt + i;