diff mbox series

[v6,04/10] gpu: host1x: Remove cancelled waiters immediately

Message ID 20210329133836.2115236-5-mperttunen@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Fixes and cleanups for Host1x | expand

Commit Message

Mikko Perttunen March 29, 2021, 1:38 p.m. UTC
Before this patch, cancelled waiters would only be cleaned up
once their threshold value was reached. Make host1x_intr_put_ref
process the cancellation immediately to fix this.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
---
v6:
* Call schedule instead of cpu_relax while waiting for pending
  interrupt processing
v5:
* Add parameter to flush, i.e. wait for all pending waiters to
  complete before returning. The reason this is not always true
  is that the pending waiter might be the place that is calling
  the put_ref.
---
 drivers/gpu/host1x/intr.c   | 23 +++++++++++++++++------
 drivers/gpu/host1x/intr.h   |  4 +++-
 drivers/gpu/host1x/syncpt.c |  2 +-
 3 files changed, 21 insertions(+), 8 deletions(-)

Comments

Dmitry Osipenko March 29, 2021, 8:27 p.m. UTC | #1
29.03.2021 16:38, Mikko Perttunen пишет:
> Before this patch, cancelled waiters would only be cleaned up
> once their threshold value was reached. Make host1x_intr_put_ref
> process the cancellation immediately to fix this.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
> v6:
> * Call schedule instead of cpu_relax while waiting for pending
>   interrupt processing
> v5:
> * Add parameter to flush, i.e. wait for all pending waiters to
>   complete before returning. The reason this is not always true
>   is that the pending waiter might be the place that is calling
>   the put_ref.
> ---
>  drivers/gpu/host1x/intr.c   | 23 +++++++++++++++++------
>  drivers/gpu/host1x/intr.h   |  4 +++-
>  drivers/gpu/host1x/syncpt.c |  2 +-
>  3 files changed, 21 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
> index 9245add23b5d..69b0e8e41466 100644
> --- a/drivers/gpu/host1x/intr.c
> +++ b/drivers/gpu/host1x/intr.c
> @@ -242,18 +242,29 @@ int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
>  	return 0;
>  }
>  
> -void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref)
> +void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
> +			 bool flush)
>  {
>  	struct host1x_waitlist *waiter = ref;
>  	struct host1x_syncpt *syncpt;
>  
> -	while (atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED) ==
> -	       WLS_REMOVED)
> -		schedule();
> +	atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED);
>  
>  	syncpt = host->syncpt + id;
> -	(void)process_wait_list(host, syncpt,
> -				host1x_syncpt_load(host->syncpt + id));
> +
> +	spin_lock(&syncpt->intr.lock);
> +	if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) ==
> +	    WLS_CANCELLED) {
> +		list_del(&waiter->list);
> +		kref_put(&waiter->refcount, waiter_release);
> +	}
> +	spin_unlock(&syncpt->intr.lock);

Looks like we need to use IRQ-safe version of the locking here in order
not to race with the interrupt handler(?), preventing lockup.

But what real bug is fixed by this patch? If no real problem is fixed,
then maybe will be better to defer touching this code till we will just
replace it all with a proper dma-fence handlers?
Mikko Perttunen March 29, 2021, 9:36 p.m. UTC | #2
On 3/29/21 11:27 PM, Dmitry Osipenko wrote:
> 29.03.2021 16:38, Mikko Perttunen пишет:
>> Before this patch, cancelled waiters would only be cleaned up
>> once their threshold value was reached. Make host1x_intr_put_ref
>> process the cancellation immediately to fix this.
>>
>> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
>> ---
>> v6:
>> * Call schedule instead of cpu_relax while waiting for pending
>>    interrupt processing
>> v5:
>> * Add parameter to flush, i.e. wait for all pending waiters to
>>    complete before returning. The reason this is not always true
>>    is that the pending waiter might be the place that is calling
>>    the put_ref.
>> ---
>>   drivers/gpu/host1x/intr.c   | 23 +++++++++++++++++------
>>   drivers/gpu/host1x/intr.h   |  4 +++-
>>   drivers/gpu/host1x/syncpt.c |  2 +-
>>   3 files changed, 21 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
>> index 9245add23b5d..69b0e8e41466 100644
>> --- a/drivers/gpu/host1x/intr.c
>> +++ b/drivers/gpu/host1x/intr.c
>> @@ -242,18 +242,29 @@ int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
>>   	return 0;
>>   }
>>   
>> -void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref)
>> +void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
>> +			 bool flush)
>>   {
>>   	struct host1x_waitlist *waiter = ref;
>>   	struct host1x_syncpt *syncpt;
>>   
>> -	while (atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED) ==
>> -	       WLS_REMOVED)
>> -		schedule();
>> +	atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED);
>>   
>>   	syncpt = host->syncpt + id;
>> -	(void)process_wait_list(host, syncpt,
>> -				host1x_syncpt_load(host->syncpt + id));
>> +
>> +	spin_lock(&syncpt->intr.lock);
>> +	if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) ==
>> +	    WLS_CANCELLED) {
>> +		list_del(&waiter->list);
>> +		kref_put(&waiter->refcount, waiter_release);
>> +	}
>> +	spin_unlock(&syncpt->intr.lock);
> 
> Looks like we need to use IRQ-safe version of the locking here in order
> not to race with the interrupt handler(?), preventing lockup.

The potential contention is with the syncpt_thresh_work scheduled work, 
and not the actual interrupt handler, so there is no issue.

> 
> But what real bug is fixed by this patch? If no real problem is fixed,
> then maybe will be better to defer touching this code till we will just
> replace it all with a proper dma-fence handlers?
> 

It improves things in that we won't litter the waiter data structures 
with unbounded waiter entries when waits are cancelled. Also, I prefer 
working in steps when possible - next is writing dma_fences on top of 
this (which is already done) and then eventually phasing/refactoring 
code from intr.c to fence.c so eventually only dma_fences remain. In my 
experience that works better than big rewrites.

Mikko
Dmitry Osipenko March 30, 2021, 1:35 a.m. UTC | #3
30.03.2021 00:36, Mikko Perttunen пишет:
> On 3/29/21 11:27 PM, Dmitry Osipenko wrote:
>> 29.03.2021 16:38, Mikko Perttunen пишет:
>>> Before this patch, cancelled waiters would only be cleaned up
>>> once their threshold value was reached. Make host1x_intr_put_ref
>>> process the cancellation immediately to fix this.
>>>
>>> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
>>> ---
>>> v6:
>>> * Call schedule instead of cpu_relax while waiting for pending
>>>    interrupt processing
>>> v5:
>>> * Add parameter to flush, i.e. wait for all pending waiters to
>>>    complete before returning. The reason this is not always true
>>>    is that the pending waiter might be the place that is calling
>>>    the put_ref.
>>> ---
>>>   drivers/gpu/host1x/intr.c   | 23 +++++++++++++++++------
>>>   drivers/gpu/host1x/intr.h   |  4 +++-
>>>   drivers/gpu/host1x/syncpt.c |  2 +-
>>>   3 files changed, 21 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
>>> index 9245add23b5d..69b0e8e41466 100644
>>> --- a/drivers/gpu/host1x/intr.c
>>> +++ b/drivers/gpu/host1x/intr.c
>>> @@ -242,18 +242,29 @@ int host1x_intr_add_action(struct host1x *host,
>>> struct host1x_syncpt *syncpt,
>>>       return 0;
>>>   }
>>>   -void host1x_intr_put_ref(struct host1x *host, unsigned int id,
>>> void *ref)
>>> +void host1x_intr_put_ref(struct host1x *host, unsigned int id, void
>>> *ref,
>>> +             bool flush)
>>>   {
>>>       struct host1x_waitlist *waiter = ref;
>>>       struct host1x_syncpt *syncpt;
>>>   -    while (atomic_cmpxchg(&waiter->state, WLS_PENDING,
>>> WLS_CANCELLED) ==
>>> -           WLS_REMOVED)
>>> -        schedule();
>>> +    atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED);
>>>         syncpt = host->syncpt + id;
>>> -    (void)process_wait_list(host, syncpt,
>>> -                host1x_syncpt_load(host->syncpt + id));
>>> +
>>> +    spin_lock(&syncpt->intr.lock);
>>> +    if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) ==
>>> +        WLS_CANCELLED) {
>>> +        list_del(&waiter->list);
>>> +        kref_put(&waiter->refcount, waiter_release);
>>> +    }
>>> +    spin_unlock(&syncpt->intr.lock);
>>
>> Looks like we need to use IRQ-safe version of the locking here in order
>> not to race with the interrupt handler(?), preventing lockup.
> 
> The potential contention is with the syncpt_thresh_work scheduled work,
> and not the actual interrupt handler, so there is no issue.

I see now, thanks.

>> But what real bug is fixed by this patch? If no real problem is fixed,
>> then maybe will be better to defer touching this code till we will just
>> replace it all with a proper dma-fence handlers?
>>
> 
> It improves things in that we won't litter the waiter data structures
> with unbounded waiter entries when waits are cancelled. Also, I prefer
> working in steps when possible - next is writing dma_fences on top of
> this (which is already done) and then eventually phasing/refactoring
> code from intr.c to fence.c so eventually only dma_fences remain. In my
> experience that works better than big rewrites.

So this change is a cleanup and not a bugfix, which wasn't clear from
the commit description. In my experience it usually tends to help with a
review if commit message explicitly states whether it is a minor cleanup
or a critical bugfix.

The small cleanups should be okay. It could be better if you could
explicitly separate the fixes from cleanups since there is a better
chance that fixes will be picked up immediately.

I'd also suggest to try to group the cleanup changes with the new
features that benefit from them, where possible. This should make
patches to look more logical, not like it's some random change, helping
with a review.

I'll try to give a test to this series later today.
diff mbox series

Patch

diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 9245add23b5d..69b0e8e41466 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -242,18 +242,29 @@  int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
 	return 0;
 }
 
-void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref)
+void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
+			 bool flush)
 {
 	struct host1x_waitlist *waiter = ref;
 	struct host1x_syncpt *syncpt;
 
-	while (atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED) ==
-	       WLS_REMOVED)
-		schedule();
+	atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED);
 
 	syncpt = host->syncpt + id;
-	(void)process_wait_list(host, syncpt,
-				host1x_syncpt_load(host->syncpt + id));
+
+	spin_lock(&syncpt->intr.lock);
+	if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) ==
+	    WLS_CANCELLED) {
+		list_del(&waiter->list);
+		kref_put(&waiter->refcount, waiter_release);
+	}
+	spin_unlock(&syncpt->intr.lock);
+
+	if (flush) {
+		/* Wait until any concurrently executing handler has finished. */
+		while (atomic_read(&waiter->state) != WLS_HANDLED)
+			schedule();
+	}
 
 	kref_put(&waiter->refcount, waiter_release);
 }
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
index aac38194398f..6ea55e615e3a 100644
--- a/drivers/gpu/host1x/intr.h
+++ b/drivers/gpu/host1x/intr.h
@@ -74,8 +74,10 @@  int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
  * Unreference an action submitted to host1x_intr_add_action().
  * You must call this if you passed non-NULL as ref.
  * @ref the ref returned from host1x_intr_add_action()
+ * @flush wait until any pending handlers have completed before returning.
  */
-void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref);
+void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
+			 bool flush);
 
 /* Initialize host1x sync point interrupt */
 int host1x_intr_init(struct host1x *host, unsigned int irq_sync);
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 9a113016d482..f061dfd5bbc7 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -308,7 +308,7 @@  int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 		}
 	}
 
-	host1x_intr_put_ref(sp->host, sp->id, ref);
+	host1x_intr_put_ref(sp->host, sp->id, ref, true);
 
 done:
 	return err;