diff mbox series

drm/i915: don't use uncore spinlock to protect critical section in vblank

Message ID 20231116112700.648963-1-luciano.coelho@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: don't use uncore spinlock to protect critical section in vblank | expand

Commit Message

Luca Coelho Nov. 16, 2023, 11:27 a.m. UTC
Since we're abstracting the display code from the underlying driver
(i.e. i915 vs xe), we can't use the uncore's spinlock to protect
critical sections of our code.

After further inspection, it seems that the spinlock is not needed at
all and this can be handled by disabling preemption and interrupts
instead.

Change the vblank code so that we don't use uncore's spinlock anymore
and update the comments accordingly.  While at it, also remove
comments pointing out where to insert RT-specific calls, since we're
now explicitly calling preempt_disable/enable() anywyay.

Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---

Note: this replaces my previous patch discussed here:
https://patchwork.freedesktop.org/patch/563857/


 drivers/gpu/drm/i915/display/intel_vblank.c | 32 ++++++++++-----------
 1 file changed, 15 insertions(+), 17 deletions(-)

Comments

Ville Syrjälä Nov. 17, 2023, 7:19 a.m. UTC | #1
On Thu, Nov 16, 2023 at 01:27:00PM +0200, Luca Coelho wrote:
> Since we're abstracting the display code from the underlying driver
> (i.e. i915 vs xe), we can't use the uncore's spinlock to protect
> critical sections of our code.
> 
> After further inspection, it seems that the spinlock is not needed at
> all and this can be handled by disabling preemption and interrupts
> instead.

uncore.lock has multiple purposes:
1. serialize all register accesses to the same cacheline as on
   certain platforms that can hang the machine
2. protect the forcewake/etc. state

1 is relevant here, 2 is not.

> 
> Change the vblank code so that we don't use uncore's spinlock anymore
> and update the comments accordingly.  While at it, also remove
> comments pointing out where to insert RT-specific calls, since we're
> now explicitly calling preempt_disable/enable() anywyay.
> 
> Cc: Jani Nikula <jani.nikula@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
> ---
> 
> Note: this replaces my previous patch discussed here:
> https://patchwork.freedesktop.org/patch/563857/
> 
> 
>  drivers/gpu/drm/i915/display/intel_vblank.c | 32 ++++++++++-----------
>  1 file changed, 15 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c
> index 2cec2abf9746..28e38960806e 100644
> --- a/drivers/gpu/drm/i915/display/intel_vblank.c
> +++ b/drivers/gpu/drm/i915/display/intel_vblank.c
> @@ -302,13 +302,12 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
>  	}
>  
>  	/*
> -	 * Lock uncore.lock, as we will do multiple timing critical raw
> -	 * register reads, potentially with preemption disabled, so the
> -	 * following code must not block on uncore.lock.
> +	 * We will do multiple timing critical raw register reads, so
> +	 * disable interrupts and preemption to make sure this code
> +	 * doesn't get blocked.
>  	 */
> -	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
> -
> -	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
> +	local_irq_save(irqflags);
> +	preempt_disable();
>  
>  	/* Get optional system timestamp before query. */
>  	if (stime)
> @@ -372,9 +371,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
>  	if (etime)
>  		*etime = ktime_get();
>  
> -	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
> -
> -	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
> +	preempt_enable();
> +	local_irq_restore(irqflags);
>  
>  	/*
>  	 * While in vblank, position will be negative
> @@ -408,13 +406,14 @@ bool intel_crtc_get_vblank_timestamp(struct drm_crtc *crtc, int *max_error,
>  
>  int intel_get_crtc_scanline(struct intel_crtc *crtc)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>  	unsigned long irqflags;
>  	int position;
>  
> -	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
> +	local_irq_save(irqflags);
> +	preempt_disable();
>  	position = __intel_get_crtc_scanline(crtc);
> -	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
> +	preempt_enable();
> +	local_irq_restore(irqflags);
>  
>  	return position;
>  }
> @@ -528,16 +527,16 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state,
>  	 * Belts and suspenders locking to guarantee everyone sees 100%
>  	 * consistent state during fastset seamless refresh rate changes.
>  	 *
> -	 * vblank_time_lock takes care of all drm_vblank.c stuff, and
> -	 * uncore.lock takes care of __intel_get_crtc_scanline() which
> -	 * may get called elsewhere as well.
> +	 * vblank_time_lock takes care of all drm_vblank.c stuff.  For
> +	 * __intel_get_crtc_scanline() we don't need locking or
> +	 * disabling preemption and irqs, since this is already done
> +	 * by the vblank_time_lock spinlock calls.
>  	 *
>  	 * TODO maybe just protect everything (including
>  	 * __intel_get_crtc_scanline()) with vblank_time_lock?
>  	 * Need to audit everything to make sure it's safe.
>  	 */
>  	spin_lock_irqsave(&i915->drm.vblank_time_lock, irqflags);
> -	spin_lock(&i915->uncore.lock);
>  
>  	drm_calc_timestamping_constants(&crtc->base, &adjusted_mode);
>  
> @@ -547,6 +546,5 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state,
>  
>  	crtc->scanline_offset = intel_crtc_scanline_offset(crtc_state);
>  
> -	spin_unlock(&i915->uncore.lock);
>  	spin_unlock_irqrestore(&i915->drm.vblank_time_lock, irqflags);
>  }
> -- 
> 2.39.2
Luca Coelho Nov. 17, 2023, 8:05 a.m. UTC | #2
Thanks for your comments, Ville!

On Fri, 2023-11-17 at 09:19 +0200, Ville Syrjälä wrote:
> On Thu, Nov 16, 2023 at 01:27:00PM +0200, Luca Coelho wrote:
> > Since we're abstracting the display code from the underlying driver
> > (i.e. i915 vs xe), we can't use the uncore's spinlock to protect
> > critical sections of our code.
> > 
> > After further inspection, it seems that the spinlock is not needed at
> > all and this can be handled by disabling preemption and interrupts
> > instead.
> 
> uncore.lock has multiple purposes:
> 1. serialize all register accesses to the same cacheline as on
>    certain platforms that can hang the machine

Okay, do you remember which platforms? I couldn't find any reference to
this reason.  Also, the only place where where we take the uncore.lock
is in this vblank code I changed, where the only explanation I found
was about timing, specifically when using RT-kernels and in very old
and slow platforms... (this was added 10 years ago).


> 2. protect the forcewake/etc. state
> 
> 1 is relevant here, 2 is not.

Okay, good that we have only one known problem. :)

--
Cheers,
Luca.
Ville Syrjälä Nov. 17, 2023, 8:41 a.m. UTC | #3
On Fri, Nov 17, 2023 at 08:05:21AM +0000, Coelho, Luciano wrote:
> Thanks for your comments, Ville!
> 
> On Fri, 2023-11-17 at 09:19 +0200, Ville Syrjälä wrote:
> > On Thu, Nov 16, 2023 at 01:27:00PM +0200, Luca Coelho wrote:
> > > Since we're abstracting the display code from the underlying driver
> > > (i.e. i915 vs xe), we can't use the uncore's spinlock to protect
> > > critical sections of our code.
> > > 
> > > After further inspection, it seems that the spinlock is not needed at
> > > all and this can be handled by disabling preemption and interrupts
> > > instead.
> > 
> > uncore.lock has multiple purposes:
> > 1. serialize all register accesses to the same cacheline as on
> >    certain platforms that can hang the machine
> 
> Okay, do you remember which platforms?

HSW is the one I remember for sure being affected.
Althoguh I don't recall if I ever managed to hang it
using display registers specifically. intel_gpu_top
certainly was very good at reproducing the problem.

> I couldn't find any reference to
> this reason. 

If all else fails git log is your friend.

> Also, the only place where where we take the uncore.lock
> is in this vblank code I changed, where the only explanation I found
> was about timing, specifically when using RT-kernels and in very old
> and slow platforms... (this was added 10 years ago).
> 
> 
> > 2. protect the forcewake/etc. state
> > 
> > 1 is relevant here, 2 is not.
> 
> Okay, good that we have only one known problem. :)
> 
> --
> Cheers,
> Luca.
Luca Coelho Nov. 17, 2023, 8:46 a.m. UTC | #4
On Fri, 2023-11-17 at 10:41 +0200, Ville Syrjälä wrote:
> On Fri, Nov 17, 2023 at 08:05:21AM +0000, Coelho, Luciano wrote:
> > Thanks for your comments, Ville!
> > 
> > On Fri, 2023-11-17 at 09:19 +0200, Ville Syrjälä wrote:
> > > On Thu, Nov 16, 2023 at 01:27:00PM +0200, Luca Coelho wrote:
> > > > Since we're abstracting the display code from the underlying driver
> > > > (i.e. i915 vs xe), we can't use the uncore's spinlock to protect
> > > > critical sections of our code.
> > > > 
> > > > After further inspection, it seems that the spinlock is not needed at
> > > > all and this can be handled by disabling preemption and interrupts
> > > > instead.
> > > 
> > > uncore.lock has multiple purposes:
> > > 1. serialize all register accesses to the same cacheline as on
> > >    certain platforms that can hang the machine
> > 
> > Okay, do you remember which platforms?
> 
> HSW is the one I remember for sure being affected.
> Althoguh I don't recall if I ever managed to hang it
> using display registers specifically. intel_gpu_top
> certainly was very good at reproducing the problem.

So do we know if the display registers are affected at all?


> > I couldn't find any reference to
> > this reason. 
> 
> If all else fails git log is your friend.

Of course! That's where I found the info about the RT stuff.  But I
didn't find anything else regarding this.  Maybe I should just look
harder, if you don't have a reference at hand. ;)

--
Cheers,
Luca.
Ville Syrjälä Nov. 17, 2023, 9:26 a.m. UTC | #5
On Fri, Nov 17, 2023 at 10:41:43AM +0200, Ville Syrjälä wrote:
> On Fri, Nov 17, 2023 at 08:05:21AM +0000, Coelho, Luciano wrote:
> > Thanks for your comments, Ville!
> > 
> > On Fri, 2023-11-17 at 09:19 +0200, Ville Syrjälä wrote:
> > > On Thu, Nov 16, 2023 at 01:27:00PM +0200, Luca Coelho wrote:
> > > > Since we're abstracting the display code from the underlying driver
> > > > (i.e. i915 vs xe), we can't use the uncore's spinlock to protect
> > > > critical sections of our code.
> > > > 
> > > > After further inspection, it seems that the spinlock is not needed at
> > > > all and this can be handled by disabling preemption and interrupts
> > > > instead.
> > > 
> > > uncore.lock has multiple purposes:
> > > 1. serialize all register accesses to the same cacheline as on
> > >    certain platforms that can hang the machine
> > 
> > Okay, do you remember which platforms?
> 
> HSW is the one I remember for sure being affected.
> Althoguh I don't recall if I ever managed to hang it
> using display registers specifically. intel_gpu_top
> certainly was very good at reproducing the problem.
> 
> > I couldn't find any reference to
> > this reason. 
> 
> If all else fails git log is your friend.

It seems to be documented in intel_uncore.h. Though that one
mentions IVB instead of HSW for some reason. I don't recall
seeing it on IVB myself, but I suppose it might have been an
issue there as well. How long the problem remained after HSW
I have no idea.

> 
> > Also, the only place where where we take the uncore.lock
> > is in this vblank code I changed, where the only explanation I found
> > was about timing, specifically when using RT-kernels and in very old
> > and slow platforms... (this was added 10 years ago).
> > 
> > 
> > > 2. protect the forcewake/etc. state
> > > 
> > > 1 is relevant here, 2 is not.
> > 
> > Okay, good that we have only one known problem. :)
> > 
> > --
> > Cheers,
> > Luca.
> 
> -- 
> Ville Syrjälä
> Intel
Luca Coelho Nov. 17, 2023, 12:21 p.m. UTC | #6
Adding Tvrtko, for some reason he didn't get CCed before.


On Fri, 2023-11-17 at 11:26 +0200, Ville Syrjälä wrote:
> On Fri, Nov 17, 2023 at 10:41:43AM +0200, Ville Syrjälä wrote:
> > On Fri, Nov 17, 2023 at 08:05:21AM +0000, Coelho, Luciano wrote:
> > > Thanks for your comments, Ville!
> > > 
> > > On Fri, 2023-11-17 at 09:19 +0200, Ville Syrjälä wrote:
> > > > On Thu, Nov 16, 2023 at 01:27:00PM +0200, Luca Coelho wrote:
> > > > > Since we're abstracting the display code from the underlying driver
> > > > > (i.e. i915 vs xe), we can't use the uncore's spinlock to protect
> > > > > critical sections of our code.
> > > > > 
> > > > > After further inspection, it seems that the spinlock is not needed at
> > > > > all and this can be handled by disabling preemption and interrupts
> > > > > instead.
> > > > 
> > > > uncore.lock has multiple purposes:
> > > > 1. serialize all register accesses to the same cacheline as on
> > > >    certain platforms that can hang the machine
> > > 
> > > Okay, do you remember which platforms?
> > 
> > HSW is the one I remember for sure being affected.
> > Althoguh I don't recall if I ever managed to hang it
> > using display registers specifically. intel_gpu_top
> > certainly was very good at reproducing the problem.
> > 
> > > I couldn't find any reference to
> > > this reason. 
> > 
> > If all else fails git log is your friend.
> 
> It seems to be documented in intel_uncore.h. Though that one
> mentions IVB instead of HSW for some reason. I don't recall
> seeing it on IVB myself, but I suppose it might have been an
> issue there as well. How long the problem remained after HSW
> I have no idea.

Oh, somehow I missed that.  Thanks.

So, it seems that the locking is indeed needed.  I think there are two
options, then:

1. Go back to my previous version of the patch, where I had the wrapper
that didn't lock anything on Xe and implement the display part when we
get a similar implementation of the uncore.lock on Xe (if ever needed).

2. Implement a display-local lock for this, as suggested at some point,
including the other intel_de*() accesses.  But can we be sure that it's
enough to protect only the registers accessed by the display? I.e.
won't accessing display registers non-serially in relation to non-
display registers?


Preferences?

--
Cheers,
Luca.
Tvrtko Ursulin Nov. 17, 2023, 12:46 p.m. UTC | #7
On 17/11/2023 12:21, Coelho, Luciano wrote:
> Adding Tvrtko, for some reason he didn't get CCed before.
> 
> 
> On Fri, 2023-11-17 at 11:26 +0200, Ville Syrjälä wrote:
>> On Fri, Nov 17, 2023 at 10:41:43AM +0200, Ville Syrjälä wrote:
>>> On Fri, Nov 17, 2023 at 08:05:21AM +0000, Coelho, Luciano wrote:
>>>> Thanks for your comments, Ville!
>>>>
>>>> On Fri, 2023-11-17 at 09:19 +0200, Ville Syrjälä wrote:
>>>>> On Thu, Nov 16, 2023 at 01:27:00PM +0200, Luca Coelho wrote:
>>>>>> Since we're abstracting the display code from the underlying driver
>>>>>> (i.e. i915 vs xe), we can't use the uncore's spinlock to protect
>>>>>> critical sections of our code.
>>>>>>
>>>>>> After further inspection, it seems that the spinlock is not needed at
>>>>>> all and this can be handled by disabling preemption and interrupts
>>>>>> instead.
>>>>>
>>>>> uncore.lock has multiple purposes:
>>>>> 1. serialize all register accesses to the same cacheline as on
>>>>>     certain platforms that can hang the machine
>>>>
>>>> Okay, do you remember which platforms?
>>>
>>> HSW is the one I remember for sure being affected.
>>> Althoguh I don't recall if I ever managed to hang it
>>> using display registers specifically. intel_gpu_top
>>> certainly was very good at reproducing the problem.
>>>
>>>> I couldn't find any reference to
>>>> this reason.
>>>
>>> If all else fails git log is your friend.
>>
>> It seems to be documented in intel_uncore.h. Though that one
>> mentions IVB instead of HSW for some reason. I don't recall
>> seeing it on IVB myself, but I suppose it might have been an
>> issue there as well. How long the problem remained after HSW
>> I have no idea.
> 
> Oh, somehow I missed that.  Thanks.
> 
> So, it seems that the locking is indeed needed.  I think there are two
> options, then:
> 
> 1. Go back to my previous version of the patch, where I had the wrapper
> that didn't lock anything on Xe and implement the display part when we
> get a similar implementation of the uncore.lock on Xe (if ever needed).
> 
> 2. Implement a display-local lock for this, as suggested at some point,
> including the other intel_de*() accesses.  But can we be sure that it's
> enough to protect only the registers accessed by the display? I.e.
> won't accessing display registers non-serially in relation to non-
> display registers?
> 
> 
> Preferences?

AFAIR my initial complaint was the naming which was along the lines of 
intel_spin_lock(uncore). How to come up with a clean and logical name is 
the question...

On Xe you don't need a lock since HSW/IVB/cacheline angle does not exist 
but you need a name which does not clash with either.

Assuming you still need the preempt off (or irq off) on Xe for better 
timings, maybe along the lines of intel_vblank_section_enter/exit(i915)? 
Although I am not up to speed with what object instead of i915 would you 
be passing in from Xe ie. how exactly polymorphism is implemented in 
shared code.

Regards,

Tvrtko
Rodrigo Vivi Nov. 17, 2023, 4:50 p.m. UTC | #8
On Fri, Nov 17, 2023 at 11:26:44AM +0200, Ville Syrjälä wrote:
> On Fri, Nov 17, 2023 at 10:41:43AM +0200, Ville Syrjälä wrote:
> > On Fri, Nov 17, 2023 at 08:05:21AM +0000, Coelho, Luciano wrote:
> > > Thanks for your comments, Ville!
> > > 
> > > On Fri, 2023-11-17 at 09:19 +0200, Ville Syrjälä wrote:
> > > > On Thu, Nov 16, 2023 at 01:27:00PM +0200, Luca Coelho wrote:
> > > > > Since we're abstracting the display code from the underlying driver
> > > > > (i.e. i915 vs xe), we can't use the uncore's spinlock to protect
> > > > > critical sections of our code.
> > > > > 
> > > > > After further inspection, it seems that the spinlock is not needed at
> > > > > all and this can be handled by disabling preemption and interrupts
> > > > > instead.
> > > > 
> > > > uncore.lock has multiple purposes:
> > > > 1. serialize all register accesses to the same cacheline as on
> > > >    certain platforms that can hang the machine
> > > 
> > > Okay, do you remember which platforms?
> > 
> > HSW is the one I remember for sure being affected.
> > Althoguh I don't recall if I ever managed to hang it
> > using display registers specifically. intel_gpu_top
> > certainly was very good at reproducing the problem.
> > 
> > > I couldn't find any reference to
> > > this reason. 
> > 
> > If all else fails git log is your friend.
> 
> It seems to be documented in intel_uncore.h. Though that one
> mentions IVB instead of HSW for some reason. I don't recall
> seeing it on IVB myself, but I suppose it might have been an
> issue there as well. How long the problem remained after HSW
> I have no idea.

Paulo very recently told me that he could easily reproduce the issue
on IVB, simply by running 2 glxgears at the same time.

> 
> > 
> > > Also, the only place where where we take the uncore.lock
> > > is in this vblank code I changed, where the only explanation I found
> > > was about timing, specifically when using RT-kernels and in very old
> > > and slow platforms... (this was added 10 years ago).
> > > 
> > > 
> > > > 2. protect the forcewake/etc. state
> > > > 
> > > > 1 is relevant here, 2 is not.
> > > 
> > > Okay, good that we have only one known problem. :)

and good it is an old one! :)

> > > 
> > > --
> > > Cheers,
> > > Luca.
> > 
> > -- 
> > Ville Syrjälä
> > Intel
> 
> -- 
> Ville Syrjälä
> Intel
Zanoni, Paulo R Nov. 17, 2023, 5:15 p.m. UTC | #9
On Fri, 2023-11-17 at 11:50 -0500, Rodrigo Vivi wrote:
> On Fri, Nov 17, 2023 at 11:26:44AM +0200, Ville Syrjälä wrote:
> > On Fri, Nov 17, 2023 at 10:41:43AM +0200, Ville Syrjälä wrote:
> > > On Fri, Nov 17, 2023 at 08:05:21AM +0000, Coelho, Luciano wrote:
> > > > Thanks for your comments, Ville!
> > > > 
> > > > On Fri, 2023-11-17 at 09:19 +0200, Ville Syrjälä wrote:
> > > > > On Thu, Nov 16, 2023 at 01:27:00PM +0200, Luca Coelho wrote:
> > > > > > Since we're abstracting the display code from the underlying driver
> > > > > > (i.e. i915 vs xe), we can't use the uncore's spinlock to protect
> > > > > > critical sections of our code.
> > > > > > 
> > > > > > After further inspection, it seems that the spinlock is not needed at
> > > > > > all and this can be handled by disabling preemption and interrupts
> > > > > > instead.
> > > > > 
> > > > > uncore.lock has multiple purposes:
> > > > > 1. serialize all register accesses to the same cacheline as on
> > > > >    certain platforms that can hang the machine
> > > > 
> > > > Okay, do you remember which platforms?
> > > 
> > > HSW is the one I remember for sure being affected.
> > > Althoguh I don't recall if I ever managed to hang it
> > > using display registers specifically. intel_gpu_top
> > > certainly was very good at reproducing the problem.
> > > 
> > > > I couldn't find any reference to
> > > > this reason. 
> > > 
> > > If all else fails git log is your friend.
> > 
> > It seems to be documented in intel_uncore.h. Though that one
> > mentions IVB instead of HSW for some reason. I don't recall
> > seeing it on IVB myself, but I suppose it might have been an
> > issue there as well. How long the problem remained after HSW
> > I have no idea.
> 
> Paulo very recently told me that he could easily reproduce the issue
> on IVB, simply by running 2 glxgears at the same time.

Just a minor correction: I didn't give the degree of confidence in my
answer that the sentence above suggests :). It's all "as far as I
remember". This is all from like 10 years ago and I can't remember what
I had for lunch yesterday. Maybe it was some other similar bug that I
could reproduce with glxgears. Also, the way we used registers was
different back then, maybe today glxgears is not enough to do it
anymore. And I think it required vblank_mode=0.

> 
> > 
> > > 
> > > > Also, the only place where where we take the uncore.lock
> > > > is in this vblank code I changed, where the only explanation I found
> > > > was about timing, specifically when using RT-kernels and in very old
> > > > and slow platforms... (this was added 10 years ago).
> > > > 
> > > > 
> > > > > 2. protect the forcewake/etc. state
> > > > > 
> > > > > 1 is relevant here, 2 is not.
> > > > 
> > > > Okay, good that we have only one known problem. :)
> 
> and good it is an old one! :)
> 
> > > > 
> > > > --
> > > > Cheers,
> > > > Luca.
> > > 
> > > -- 
> > > Ville Syrjälä
> > > Intel
> > 
> > -- 
> > Ville Syrjälä
> > Intel
Luca Coelho Nov. 29, 2023, 8:20 a.m. UTC | #10
On Fri, 2023-11-17 at 12:46 +0000, Tvrtko Ursulin wrote:
> On 17/11/2023 12:21, Coelho, Luciano wrote:
> > Adding Tvrtko, for some reason he didn't get CCed before.
> > 
> > 
> > On Fri, 2023-11-17 at 11:26 +0200, Ville Syrjälä wrote:
> > > On Fri, Nov 17, 2023 at 10:41:43AM +0200, Ville Syrjälä wrote:
> > > > On Fri, Nov 17, 2023 at 08:05:21AM +0000, Coelho, Luciano wrote:
> > > > > Thanks for your comments, Ville!
> > > > > 
> > > > > On Fri, 2023-11-17 at 09:19 +0200, Ville Syrjälä wrote:
> > > > > > On Thu, Nov 16, 2023 at 01:27:00PM +0200, Luca Coelho wrote:
> > > > > > > Since we're abstracting the display code from the underlying driver
> > > > > > > (i.e. i915 vs xe), we can't use the uncore's spinlock to protect
> > > > > > > critical sections of our code.
> > > > > > > 
> > > > > > > After further inspection, it seems that the spinlock is not needed at
> > > > > > > all and this can be handled by disabling preemption and interrupts
> > > > > > > instead.
> > > > > > 
> > > > > > uncore.lock has multiple purposes:
> > > > > > 1. serialize all register accesses to the same cacheline as on
> > > > > >     certain platforms that can hang the machine
> > > > > 
> > > > > Okay, do you remember which platforms?
> > > > 
> > > > HSW is the one I remember for sure being affected.
> > > > Althoguh I don't recall if I ever managed to hang it
> > > > using display registers specifically. intel_gpu_top
> > > > certainly was very good at reproducing the problem.
> > > > 
> > > > > I couldn't find any reference to
> > > > > this reason.
> > > > 
> > > > If all else fails git log is your friend.
> > > 
> > > It seems to be documented in intel_uncore.h. Though that one
> > > mentions IVB instead of HSW for some reason. I don't recall
> > > seeing it on IVB myself, but I suppose it might have been an
> > > issue there as well. How long the problem remained after HSW
> > > I have no idea.
> > 
> > Oh, somehow I missed that.  Thanks.
> > 
> > So, it seems that the locking is indeed needed.  I think there are two
> > options, then:
> > 
> > 1. Go back to my previous version of the patch, where I had the wrapper
> > that didn't lock anything on Xe and implement the display part when we
> > get a similar implementation of the uncore.lock on Xe (if ever needed).
> > 
> > 2. Implement a display-local lock for this, as suggested at some point,
> > including the other intel_de*() accesses.  But can we be sure that it's
> > enough to protect only the registers accessed by the display? I.e.
> > won't accessing display registers non-serially in relation to non-
> > display registers?
> > 
> > 
> > Preferences?
> 
> AFAIR my initial complaint was the naming which was along the lines of 
> intel_spin_lock(uncore). How to come up with a clean and logical name is 
> the question...

You're right, that was your first comment, and now we're back to it. :)


> On Xe you don't need a lock since HSW/IVB/cacheline angle does not exist 
> but you need a name which does not clash with either.
> 
> Assuming you still need the preempt off (or irq off) on Xe for better 
> timings, maybe along the lines of intel_vblank_section_enter/exit(i915)?

I like this name, because it's indeed this vblank section we're trying
to protect here, and this is not used anywhere else.

 
> Although I am not up to speed with what object instead of i915 would you 
> be passing in from Xe ie. how exactly polymorphism is implemented in 
> shared code.

AFAICT we are still using the i915 structure for most things inside the
display code, so I guess we should use that for now.

I'll send a new version of my original patch in a bit.

--
Cheers,
Luca.
Luca Coelho Nov. 29, 2023, 8:22 a.m. UTC | #11
On Fri, 2023-11-17 at 17:15 +0000, Zanoni, Paulo R wrote:
> On Fri, 2023-11-17 at 11:50 -0500, Rodrigo Vivi wrote:
> > On Fri, Nov 17, 2023 at 11:26:44AM +0200, Ville Syrjälä wrote:
> > > On Fri, Nov 17, 2023 at 10:41:43AM +0200, Ville Syrjälä wrote:
> > > > On Fri, Nov 17, 2023 at 08:05:21AM +0000, Coelho, Luciano wrote:
> > > > > Thanks for your comments, Ville!
> > > > > 
> > > > > On Fri, 2023-11-17 at 09:19 +0200, Ville Syrjälä wrote:
> > > > > > On Thu, Nov 16, 2023 at 01:27:00PM +0200, Luca Coelho wrote:
> > > > > > > Since we're abstracting the display code from the underlying driver
> > > > > > > (i.e. i915 vs xe), we can't use the uncore's spinlock to protect
> > > > > > > critical sections of our code.
> > > > > > > 
> > > > > > > After further inspection, it seems that the spinlock is not needed at
> > > > > > > all and this can be handled by disabling preemption and interrupts
> > > > > > > instead.
> > > > > > 
> > > > > > uncore.lock has multiple purposes:
> > > > > > 1. serialize all register accesses to the same cacheline as on
> > > > > >    certain platforms that can hang the machine
> > > > > 
> > > > > Okay, do you remember which platforms?
> > > > 
> > > > HSW is the one I remember for sure being affected.
> > > > Althoguh I don't recall if I ever managed to hang it
> > > > using display registers specifically. intel_gpu_top
> > > > certainly was very good at reproducing the problem.
> > > > 
> > > > > I couldn't find any reference to
> > > > > this reason. 
> > > > 
> > > > If all else fails git log is your friend.
> > > 
> > > It seems to be documented in intel_uncore.h. Though that one
> > > mentions IVB instead of HSW for some reason. I don't recall
> > > seeing it on IVB myself, but I suppose it might have been an
> > > issue there as well. How long the problem remained after HSW
> > > I have no idea.
> > 
> > Paulo very recently told me that he could easily reproduce the issue
> > on IVB, simply by running 2 glxgears at the same time.
> 
> Just a minor correction: I didn't give the degree of confidence in my
> answer that the sentence above suggests :). It's all "as far as I
> remember". This is all from like 10 years ago and I can't remember what
> I had for lunch yesterday. Maybe it was some other similar bug that I
> could reproduce with glxgears. Also, the way we used registers was
> different back then, maybe today glxgears is not enough to do it
> anymore. And I think it required vblank_mode=0.

Great, thanks for this information! It's good to know the actual facts
for this implementation.  So, we'll keep things mostly as they are,
without removing any locking and go back to my original version of this
implementation, which keeps the locking with i915.

--
Cheers,
Luca.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c
index 2cec2abf9746..28e38960806e 100644
--- a/drivers/gpu/drm/i915/display/intel_vblank.c
+++ b/drivers/gpu/drm/i915/display/intel_vblank.c
@@ -302,13 +302,12 @@  static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
 	}
 
 	/*
-	 * Lock uncore.lock, as we will do multiple timing critical raw
-	 * register reads, potentially with preemption disabled, so the
-	 * following code must not block on uncore.lock.
+	 * We will do multiple timing critical raw register reads, so
+	 * disable interrupts and preemption to make sure this code
+	 * doesn't get blocked.
 	 */
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-
-	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+	local_irq_save(irqflags);
+	preempt_disable();
 
 	/* Get optional system timestamp before query. */
 	if (stime)
@@ -372,9 +371,8 @@  static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
 	if (etime)
 		*etime = ktime_get();
 
-	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
-
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+	preempt_enable();
+	local_irq_restore(irqflags);
 
 	/*
 	 * While in vblank, position will be negative
@@ -408,13 +406,14 @@  bool intel_crtc_get_vblank_timestamp(struct drm_crtc *crtc, int *max_error,
 
 int intel_get_crtc_scanline(struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	unsigned long irqflags;
 	int position;
 
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	local_irq_save(irqflags);
+	preempt_disable();
 	position = __intel_get_crtc_scanline(crtc);
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+	preempt_enable();
+	local_irq_restore(irqflags);
 
 	return position;
 }
@@ -528,16 +527,16 @@  void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state,
 	 * Belts and suspenders locking to guarantee everyone sees 100%
 	 * consistent state during fastset seamless refresh rate changes.
 	 *
-	 * vblank_time_lock takes care of all drm_vblank.c stuff, and
-	 * uncore.lock takes care of __intel_get_crtc_scanline() which
-	 * may get called elsewhere as well.
+	 * vblank_time_lock takes care of all drm_vblank.c stuff.  For
+	 * __intel_get_crtc_scanline() we don't need locking or
+	 * disabling preemption and irqs, since this is already done
+	 * by the vblank_time_lock spinlock calls.
 	 *
 	 * TODO maybe just protect everything (including
 	 * __intel_get_crtc_scanline()) with vblank_time_lock?
 	 * Need to audit everything to make sure it's safe.
 	 */
 	spin_lock_irqsave(&i915->drm.vblank_time_lock, irqflags);
-	spin_lock(&i915->uncore.lock);
 
 	drm_calc_timestamping_constants(&crtc->base, &adjusted_mode);
 
@@ -547,6 +546,5 @@  void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state,
 
 	crtc->scanline_offset = intel_crtc_scanline_offset(crtc_state);
 
-	spin_unlock(&i915->uncore.lock);
 	spin_unlock_irqrestore(&i915->drm.vblank_time_lock, irqflags);
 }