diff mbox series

[v2] drm/i915: Reduce MCR lock surface

Message ID 20231004120407.12251-1-nirmoy.das@intel.com (mailing list archive)
State New, archived
Headers show
Series [v2] drm/i915: Reduce MCR lock surface | expand

Commit Message

Nirmoy Das Oct. 4, 2023, 12:04 p.m. UTC
Take the mcr lock only when driver needs to write into a mcr based
tlb based registers.

To prevent GT reset interference, employ gt->reset.mutex instead, since
intel_gt_mcr_multicast_write relies on gt->uncore->lock not being held.

v2: remove unused var, flags.

Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_tlb.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

Comments

Rodrigo Vivi Oct. 4, 2023, 12:44 p.m. UTC | #1
On Wed, Oct 04, 2023 at 02:04:07PM +0200, Nirmoy Das wrote:
> Take the mcr lock only when driver needs to write into a mcr based
> tlb based registers.
> 
> To prevent GT reset interference, employ gt->reset.mutex instead, since
> intel_gt_mcr_multicast_write relies on gt->uncore->lock not being held.

This looks a lot like protecting code and not protecting data [1]

But to be really honest I'm afraid we were already doing this before
this patch but with 2 other locks instead.

[1] - https://blog.ffwll.ch/2022/07/locking-engineering.html

> 
> v2: remove unused var, flags.
> 
> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_tlb.c | 13 +++++--------
>  1 file changed, 5 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
> index 139608c30d97..0ad905df4a98 100644
> --- a/drivers/gpu/drm/i915/gt/intel_tlb.c
> +++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
> @@ -52,15 +52,13 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>  	struct intel_engine_cs *engine;
>  	intel_engine_mask_t awake, tmp;
>  	enum intel_engine_id id;
> -	unsigned long flags;
>  
>  	if (GRAPHICS_VER(i915) < 8)
>  		return;
>  
>  	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
>  
> -	intel_gt_mcr_lock(gt, &flags);
> -	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
> +	mutex_lock(&gt->reset.mutex);/* serialise invalidate with GT reset */

I'm still looking at this and the commit message above and trying to understand
why we are doing this and changing the previous 2 by this other one. why?

>  
>  	awake = 0;
>  	for_each_engine(engine, gt, id) {
> @@ -68,9 +66,9 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>  			continue;
>  
>  		if (engine->tlb_inv.mcr)
> -			intel_gt_mcr_multicast_write_fw(gt,
> -							engine->tlb_inv.reg.mcr_reg,
> -							engine->tlb_inv.request);
> +			intel_gt_mcr_multicast_write(gt,
> +						     engine->tlb_inv.reg.mcr_reg,
> +						     engine->tlb_inv.request);

you are already taking the forcewake_all domain above, so you wouldn't
need to convert this to the variant that grabs the forcewake underneath.

Also this is not mentioned in the commit message above.

>  		else
>  			intel_uncore_write_fw(uncore,
>  					      engine->tlb_inv.reg.reg,
> @@ -90,8 +88,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>  	     IS_ALDERLAKE_P(i915)))
>  		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
>  
> -	spin_unlock(&uncore->lock);
> -	intel_gt_mcr_unlock(gt, flags);
> +	mutex_unlock(&gt->reset.mutex);
>  
>  	for_each_engine_masked(engine, gt, awake, tmp) {
>  		if (wait_for_invalidate(engine))
> -- 
> 2.41.0
>
Nirmoy Das Oct. 4, 2023, 1:54 p.m. UTC | #2
Hi Rodrigo,

On 10/4/2023 2:44 PM, Rodrigo Vivi wrote:
> On Wed, Oct 04, 2023 at 02:04:07PM +0200, Nirmoy Das wrote:
>> Take the mcr lock only when driver needs to write into a mcr based
>> tlb based registers.
>>
>> To prevent GT reset interference, employ gt->reset.mutex instead, since
>> intel_gt_mcr_multicast_write relies on gt->uncore->lock not being held.
> This looks a lot like protecting code and not protecting data [1]
>
> But to be really honest I'm afraid we were already doing this before
> this patch but with 2 other locks instead.

I haven't thought about that but yes, the issue was there already.


>
> [1] - https://blog.ffwll.ch/2022/07/locking-engineering.html
>
>> v2: remove unused var, flags.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_tlb.c | 13 +++++--------
>>   1 file changed, 5 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
>> index 139608c30d97..0ad905df4a98 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_tlb.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
>> @@ -52,15 +52,13 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>>   	struct intel_engine_cs *engine;
>>   	intel_engine_mask_t awake, tmp;
>>   	enum intel_engine_id id;
>> -	unsigned long flags;
>>   
>>   	if (GRAPHICS_VER(i915) < 8)
>>   		return;
>>   
>>   	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
>>   
>> -	intel_gt_mcr_lock(gt, &flags);
>> -	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
>> +	mutex_lock(&gt->reset.mutex);/* serialise invalidate with GT reset */
> I'm still looking at this and the commit message above and trying to understand
> why we are doing this and changing the previous 2 by this other one. why?


We need the MCR lock only for intel_gt_mcr_multicast_*() so I am not 
replacing the two locks here but moving the mcr lock down

where we were doing intel_gt_mcr_multicast_write_fw()


why s/spin_lock(&uncore->lock)/mutex_lock(&gt->reset.mutex):

intel_gt_mcr_multicast_*() expects gt->uncore->lock to be not held and 
to achieve this, I could do something like:

if (engine->tlb_inv.mcr) {

      spin_unlock(&uncore->lock);

      intel_gt_mcr_lock(gt, &flags);

      intel_gt_mcr_multicast_write_fw

      intel_gt_mcr_unlock(gt, flags);

     spin_lock(&uncore->lock);

}

Or take gt->reset.mutex instead which should block any concurrent gt reset.

If this is not acceptable then I can pick the above 1st option but I am 
not sure how safe is it do release uncore->lock and then take it back again.

>
>>   
>>   	awake = 0;
>>   	for_each_engine(engine, gt, id) {
>> @@ -68,9 +66,9 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>>   			continue;
>>   
>>   		if (engine->tlb_inv.mcr)
>> -			intel_gt_mcr_multicast_write_fw(gt,
>> -							engine->tlb_inv.reg.mcr_reg,
>> -							engine->tlb_inv.request);
>> +			intel_gt_mcr_multicast_write(gt,
>> +						     engine->tlb_inv.reg.mcr_reg,
>> +						     engine->tlb_inv.request);
> you are already taking the forcewake_all domain above, so you wouldn't
> need to convert this to the variant that grabs the forcewake underneath.
>
> Also this is not mentioned in the commit message above.

intel_gt_mcr_multicast_write() takes the mcr lock for us, helps replacing multiple lines into one.
Will there be any side-effects for that ?

I should've added that the commit message.

Regards,
Nirmoy


>
>>   		else
>>   			intel_uncore_write_fw(uncore,
>>   					      engine->tlb_inv.reg.reg,
>> @@ -90,8 +88,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>>   	     IS_ALDERLAKE_P(i915)))
>>   		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
>>   
>> -	spin_unlock(&uncore->lock);
>> -	intel_gt_mcr_unlock(gt, flags);
>> +	mutex_unlock(&gt->reset.mutex);
>>   
>>   	for_each_engine_masked(engine, gt, awake, tmp) {
>>   		if (wait_for_invalidate(engine))
>> -- 
>> 2.41.0
>>
Rodrigo Vivi Oct. 4, 2023, 2:37 p.m. UTC | #3
On Wed, Oct 04, 2023 at 03:54:59PM +0200, Nirmoy Das wrote:
> Hi Rodrigo,
> 
> On 10/4/2023 2:44 PM, Rodrigo Vivi wrote:
> > On Wed, Oct 04, 2023 at 02:04:07PM +0200, Nirmoy Das wrote:
> > > Take the mcr lock only when driver needs to write into a mcr based
> > > tlb based registers.
> > > 
> > > To prevent GT reset interference, employ gt->reset.mutex instead, since
> > > intel_gt_mcr_multicast_write relies on gt->uncore->lock not being held.
> > This looks a lot like protecting code and not protecting data [1]
> > 
> > But to be really honest I'm afraid we were already doing this before
> > this patch but with 2 other locks instead.
> 
> I haven't thought about that but yes, the issue was there already.
> 
> 
> > 
> > [1] - https://blog.ffwll.ch/2022/07/locking-engineering.html
> > 
> > > v2: remove unused var, flags.
> > > 
> > > Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> > > ---
> > >   drivers/gpu/drm/i915/gt/intel_tlb.c | 13 +++++--------
> > >   1 file changed, 5 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
> > > index 139608c30d97..0ad905df4a98 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_tlb.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
> > > @@ -52,15 +52,13 @@ static void mmio_invalidate_full(struct intel_gt *gt)
> > >   	struct intel_engine_cs *engine;
> > >   	intel_engine_mask_t awake, tmp;
> > >   	enum intel_engine_id id;
> > > -	unsigned long flags;
> > >   	if (GRAPHICS_VER(i915) < 8)
> > >   		return;
> > >   	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
> > > -	intel_gt_mcr_lock(gt, &flags);
> > > -	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
> > > +	mutex_lock(&gt->reset.mutex);/* serialise invalidate with GT reset */
> > I'm still looking at this and the commit message above and trying to understand
> > why we are doing this and changing the previous 2 by this other one. why?
> 
> 
> We need the MCR lock only for intel_gt_mcr_multicast_*() so I am not
> replacing the two locks here but moving the mcr lock down
> 
> where we were doing intel_gt_mcr_multicast_write_fw()
> 
> 
> why s/spin_lock(&uncore->lock)/mutex_lock(&gt->reset.mutex):
> 
> intel_gt_mcr_multicast_*() expects gt->uncore->lock to be not held

is there any lockdep assert or primitive that we could/should do
that to avoid this same issue in the future?
anyway, this is also another thing that it is important for the
commit message.

and why is that? what I have in mind goes along with the comment
above intel_de_read_fw():
"""
Access to registers should
 * therefore generally be serialised, by either the dev_priv->uncore.lock or
"""

> and to
> achieve this, I could do something like:
> 
> if (engine->tlb_inv.mcr) {
> 
>      spin_unlock(&uncore->lock);
> 
>      intel_gt_mcr_lock(gt, &flags);
> 
>      intel_gt_mcr_multicast_write_fw
> 
>      intel_gt_mcr_unlock(gt, flags);
> 
>     spin_lock(&uncore->lock);
> 
> }
> 
> Or take gt->reset.mutex instead which should block any concurrent gt reset.
> 
> If this is not acceptable then I can pick the above 1st option but I am not
> sure how safe is it do release uncore->lock and then take it back again.

hmm... probably the gt_reset one is better than releasing and grabbing it
again.

> 
> > 
> > >   	awake = 0;
> > >   	for_each_engine(engine, gt, id) {
> > > @@ -68,9 +66,9 @@ static void mmio_invalidate_full(struct intel_gt *gt)
> > >   			continue;
> > >   		if (engine->tlb_inv.mcr)
> > > -			intel_gt_mcr_multicast_write_fw(gt,
> > > -							engine->tlb_inv.reg.mcr_reg,
> > > -							engine->tlb_inv.request);
> > > +			intel_gt_mcr_multicast_write(gt,
> > > +						     engine->tlb_inv.reg.mcr_reg,
> > > +						     engine->tlb_inv.request);
> > you are already taking the forcewake_all domain above, so you wouldn't
> > need to convert this to the variant that grabs the forcewake underneath.
> > 
> > Also this is not mentioned in the commit message above.
> 
> intel_gt_mcr_multicast_write() takes the mcr lock for us, helps replacing multiple lines into one.
> Will there be any side-effects for that ?

hmm... I can't forsee side-effects here... but I'm asking myself why on the non
MCR ones we are using the global forcewake_all and the _fw to start with.
Maybe there was a reason for that? Because in general we should prefer the non _fw
variants to start with. Maybe we should dig into the history there to understand
why the line below started with the intel_uncore_write_fw below?

> 
> I should've added that the commit message.

I'm even wondering if this should be 2 separated patches?!

> 
> Regards,
> Nirmoy
> 
> 
> > 
> > >   		else
> > >   			intel_uncore_write_fw(uncore,
> > >   					      engine->tlb_inv.reg.reg,
> > > @@ -90,8 +88,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
> > >   	     IS_ALDERLAKE_P(i915)))
> > >   		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
> > > -	spin_unlock(&uncore->lock);
> > > -	intel_gt_mcr_unlock(gt, flags);
> > > +	mutex_unlock(&gt->reset.mutex);
> > >   	for_each_engine_masked(engine, gt, awake, tmp) {
> > >   		if (wait_for_invalidate(engine))
> > > -- 
> > > 2.41.0
> > >
Nirmoy Das Oct. 4, 2023, 4:17 p.m. UTC | #4
Hi Rodrigo,

On 10/4/2023 4:37 PM, Rodrigo Vivi wrote:
> On Wed, Oct 04, 2023 at 03:54:59PM +0200, Nirmoy Das wrote:
>> Hi Rodrigo,
>>
>> On 10/4/2023 2:44 PM, Rodrigo Vivi wrote:
>>> On Wed, Oct 04, 2023 at 02:04:07PM +0200, Nirmoy Das wrote:
>>>> Take the mcr lock only when driver needs to write into a mcr based
>>>> tlb based registers.
>>>>
>>>> To prevent GT reset interference, employ gt->reset.mutex instead, since
>>>> intel_gt_mcr_multicast_write relies on gt->uncore->lock not being held.
>>> This looks a lot like protecting code and not protecting data [1]
>>>
>>> But to be really honest I'm afraid we were already doing this before
>>> this patch but with 2 other locks instead.
>> I haven't thought about that but yes, the issue was there already.
>>
>>
>>> [1] - https://blog.ffwll.ch/2022/07/locking-engineering.html
>>>
>>>> v2: remove unused var, flags.
>>>>
>>>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/gt/intel_tlb.c | 13 +++++--------
>>>>    1 file changed, 5 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
>>>> index 139608c30d97..0ad905df4a98 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_tlb.c
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
>>>> @@ -52,15 +52,13 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>>>>    	struct intel_engine_cs *engine;
>>>>    	intel_engine_mask_t awake, tmp;
>>>>    	enum intel_engine_id id;
>>>> -	unsigned long flags;
>>>>    	if (GRAPHICS_VER(i915) < 8)
>>>>    		return;
>>>>    	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
>>>> -	intel_gt_mcr_lock(gt, &flags);
>>>> -	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
>>>> +	mutex_lock(&gt->reset.mutex);/* serialise invalidate with GT reset */
>>> I'm still looking at this and the commit message above and trying to understand
>>> why we are doing this and changing the previous 2 by this other one. why?
>>
>> We need the MCR lock only for intel_gt_mcr_multicast_*() so I am not
>> replacing the two locks here but moving the mcr lock down
>>
>> where we were doing intel_gt_mcr_multicast_write_fw()
>>
>>
>> why s/spin_lock(&uncore->lock)/mutex_lock(&gt->reset.mutex):
>>
>> intel_gt_mcr_multicast_*() expects gt->uncore->lock to be not held
> is there any lockdep assert or primitive that we could/should do
> that to avoid this same issue in the future?

We have locdep asserts for those mcr functions.


> anyway, this is also another thing that it is important for the
> commit message.
>
> and why is that? what I have in mind goes along with the comment
> above intel_de_read_fw():
> """
> Access to registers should
>   * therefore generally be serialised, by either the dev_priv->uncore.lock or
> """

Yes, the commit message should've been more clear.


Anyways, please ignore this patch. I need to find a better way and it 
also didn't fix the issue completely that I was working on.


Thanks,

Nirmoy

>
>> and to
>> achieve this, I could do something like:
>>
>> if (engine->tlb_inv.mcr) {
>>
>>       spin_unlock(&uncore->lock);
>>
>>       intel_gt_mcr_lock(gt, &flags);
>>
>>       intel_gt_mcr_multicast_write_fw
>>
>>       intel_gt_mcr_unlock(gt, flags);
>>
>>      spin_lock(&uncore->lock);
>>
>> }
>>
>> Or take gt->reset.mutex instead which should block any concurrent gt reset.
>>
>> If this is not acceptable then I can pick the above 1st option but I am not
>> sure how safe is it do release uncore->lock and then take it back again.
> hmm... probably the gt_reset one is better than releasing and grabbing it
> again.
>
>>>>    	awake = 0;
>>>>    	for_each_engine(engine, gt, id) {
>>>> @@ -68,9 +66,9 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>>>>    			continue;
>>>>    		if (engine->tlb_inv.mcr)
>>>> -			intel_gt_mcr_multicast_write_fw(gt,
>>>> -							engine->tlb_inv.reg.mcr_reg,
>>>> -							engine->tlb_inv.request);
>>>> +			intel_gt_mcr_multicast_write(gt,
>>>> +						     engine->tlb_inv.reg.mcr_reg,
>>>> +						     engine->tlb_inv.request);
>>> you are already taking the forcewake_all domain above, so you wouldn't
>>> need to convert this to the variant that grabs the forcewake underneath.
>>>
>>> Also this is not mentioned in the commit message above.
>> intel_gt_mcr_multicast_write() takes the mcr lock for us, helps replacing multiple lines into one.
>> Will there be any side-effects for that ?
> hmm... I can't forsee side-effects here... but I'm asking myself why on the non
> MCR ones we are using the global forcewake_all and the _fw to start with.
> Maybe there was a reason for that? Because in general we should prefer the non _fw
> variants to start with. Maybe we should dig into the history there to understand
> why the line below started with the intel_uncore_write_fw below?
>
>> I should've added that the commit message.
> I'm even wondering if this should be 2 separated patches?!
>
>> Regards,
>> Nirmoy
>>
>>
>>>>    		else
>>>>    			intel_uncore_write_fw(uncore,
>>>>    					      engine->tlb_inv.reg.reg,
>>>> @@ -90,8 +88,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>>>>    	     IS_ALDERLAKE_P(i915)))
>>>>    		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
>>>> -	spin_unlock(&uncore->lock);
>>>> -	intel_gt_mcr_unlock(gt, flags);
>>>> +	mutex_unlock(&gt->reset.mutex);
>>>>    	for_each_engine_masked(engine, gt, awake, tmp) {
>>>>    		if (wait_for_invalidate(engine))
>>>> -- 
>>>> 2.41.0
>>>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
index 139608c30d97..0ad905df4a98 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.c
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -52,15 +52,13 @@  static void mmio_invalidate_full(struct intel_gt *gt)
 	struct intel_engine_cs *engine;
 	intel_engine_mask_t awake, tmp;
 	enum intel_engine_id id;
-	unsigned long flags;
 
 	if (GRAPHICS_VER(i915) < 8)
 		return;
 
 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 
-	intel_gt_mcr_lock(gt, &flags);
-	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
+	mutex_lock(&gt->reset.mutex);/* serialise invalidate with GT reset */
 
 	awake = 0;
 	for_each_engine(engine, gt, id) {
@@ -68,9 +66,9 @@  static void mmio_invalidate_full(struct intel_gt *gt)
 			continue;
 
 		if (engine->tlb_inv.mcr)
-			intel_gt_mcr_multicast_write_fw(gt,
-							engine->tlb_inv.reg.mcr_reg,
-							engine->tlb_inv.request);
+			intel_gt_mcr_multicast_write(gt,
+						     engine->tlb_inv.reg.mcr_reg,
+						     engine->tlb_inv.request);
 		else
 			intel_uncore_write_fw(uncore,
 					      engine->tlb_inv.reg.reg,
@@ -90,8 +88,7 @@  static void mmio_invalidate_full(struct intel_gt *gt)
 	     IS_ALDERLAKE_P(i915)))
 		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
 
-	spin_unlock(&uncore->lock);
-	intel_gt_mcr_unlock(gt, flags);
+	mutex_unlock(&gt->reset.mutex);
 
 	for_each_engine_masked(engine, gt, awake, tmp) {
 		if (wait_for_invalidate(engine))