diff mbox series

[16/26] drm/i915: use new iterator in i915_gem_object_wait_reservation v2

Message ID 20210917123513.1106-17-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show
Series [01/26] dma-buf: add dma_resv_for_each_fence_unlocked v2 | expand

Commit Message

Christian König Sept. 17, 2021, 12:35 p.m. UTC
Simplifying the code a bit.

v2: add missing rcu read unlock.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_wait.c | 57 ++++++------------------
 1 file changed, 14 insertions(+), 43 deletions(-)

Comments

Tvrtko Ursulin Sept. 20, 2021, 10 a.m. UTC | #1
On 17/09/2021 13:35, Christian König wrote:
> Simplifying the code a bit.
> 
> v2: add missing rcu read unlock.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_wait.c | 57 ++++++------------------
>   1 file changed, 14 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
> index f909aaa09d9c..e416cf528635 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
> @@ -37,55 +37,26 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
>   				 unsigned int flags,
>   				 long timeout)
>   {
> -	struct dma_fence *excl;
> -	bool prune_fences = false;
> -
> -	if (flags & I915_WAIT_ALL) {
> -		struct dma_fence **shared;
> -		unsigned int count, i;
> -		int ret;
> -
> -		ret = dma_resv_get_fences(resv, &excl, &count, &shared);
> -		if (ret)
> -			return ret;
> -
> -		for (i = 0; i < count; i++) {
> -			timeout = i915_gem_object_wait_fence(shared[i],
> -							     flags, timeout);
> -			if (timeout < 0)
> -				break;
> -
> -			dma_fence_put(shared[i]);
> -		}
> -
> -		for (; i < count; i++)
> -			dma_fence_put(shared[i]);
> -		kfree(shared);
> -
> -		/*
> -		 * If both shared fences and an exclusive fence exist,
> -		 * then by construction the shared fences must be later
> -		 * than the exclusive fence. If we successfully wait for
> -		 * all the shared fences, we know that the exclusive fence
> -		 * must all be signaled. If all the shared fences are
> -		 * signaled, we can prune the array and recover the
> -		 * floating references on the fences/requests.
> -		 */
> -		prune_fences = count && timeout >= 0;
> -	} else {
> -		excl = dma_resv_get_excl_unlocked(resv);
> +	struct dma_resv_iter cursor;
> +	struct dma_fence *fence;
> +
> +	rcu_read_lock();
> +	dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL);
> +	dma_resv_for_each_fence_unlocked(&cursor, fence) {
> +		rcu_read_unlock();
> +		timeout = i915_gem_object_wait_fence(fence, flags, timeout);

Converting this one could be problematic. It's the wait ioctl which used 
to grab an atomic snapshot and wait for that rendering to complete. With 
this change I think it has the potential to run forever keeps catching 
new activity against the same object.

I am not sure whether or not the difference is relevant for how 
userspace uses it but I think needs discussion.

Hm actually there are internal callers as well, and at least some of 
those have the object locked. Would a wider refactoring to separate 
those into buckets (locked vs unlocked) make sense?

Regards,

Tvrtko


> +		rcu_read_lock();
> +		if (timeout < 0)
> +			break;
>   	}
> -
> -	if (excl && timeout >= 0)
> -		timeout = i915_gem_object_wait_fence(excl, flags, timeout);
> -
> -	dma_fence_put(excl);
> +	dma_resv_iter_end(&cursor);
> +	rcu_read_unlock();
>   
>   	/*
>   	 * Opportunistically prune the fences iff we know they have *all* been
>   	 * signaled.
>   	 */
> -	if (prune_fences)
> +	if (timeout > 0)
>   		dma_resv_prune(resv);
>   
>   	return timeout;
>
Christian König Sept. 21, 2021, 5:35 p.m. UTC | #2
Am 20.09.21 um 12:00 schrieb Tvrtko Ursulin:
>
> On 17/09/2021 13:35, Christian König wrote:
>> Simplifying the code a bit.
>>
>> v2: add missing rcu read unlock.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/i915/gem/i915_gem_wait.c | 57 ++++++------------------
>>   1 file changed, 14 insertions(+), 43 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c 
>> b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
>> index f909aaa09d9c..e416cf528635 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
>> @@ -37,55 +37,26 @@ i915_gem_object_wait_reservation(struct dma_resv 
>> *resv,
>>                    unsigned int flags,
>>                    long timeout)
>>   {
>> -    struct dma_fence *excl;
>> -    bool prune_fences = false;
>> -
>> -    if (flags & I915_WAIT_ALL) {
>> -        struct dma_fence **shared;
>> -        unsigned int count, i;
>> -        int ret;
>> -
>> -        ret = dma_resv_get_fences(resv, &excl, &count, &shared);
>> -        if (ret)
>> -            return ret;
>> -
>> -        for (i = 0; i < count; i++) {
>> -            timeout = i915_gem_object_wait_fence(shared[i],
>> -                                 flags, timeout);
>> -            if (timeout < 0)
>> -                break;
>> -
>> -            dma_fence_put(shared[i]);
>> -        }
>> -
>> -        for (; i < count; i++)
>> -            dma_fence_put(shared[i]);
>> -        kfree(shared);
>> -
>> -        /*
>> -         * If both shared fences and an exclusive fence exist,
>> -         * then by construction the shared fences must be later
>> -         * than the exclusive fence. If we successfully wait for
>> -         * all the shared fences, we know that the exclusive fence
>> -         * must all be signaled. If all the shared fences are
>> -         * signaled, we can prune the array and recover the
>> -         * floating references on the fences/requests.
>> -         */
>> -        prune_fences = count && timeout >= 0;
>> -    } else {
>> -        excl = dma_resv_get_excl_unlocked(resv);
>> +    struct dma_resv_iter cursor;
>> +    struct dma_fence *fence;
>> +
>> +    rcu_read_lock();
>> +    dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL);
>> +    dma_resv_for_each_fence_unlocked(&cursor, fence) {
>> +        rcu_read_unlock();
>> +        timeout = i915_gem_object_wait_fence(fence, flags, timeout);
>
> Converting this one could be problematic. It's the wait ioctl which 
> used to grab an atomic snapshot and wait for that rendering to 
> complete. With this change I think it has the potential to run forever 
> keeps catching new activity against the same object.
>
> I am not sure whether or not the difference is relevant for how 
> userspace uses it but I think needs discussion.

It was years ago, but IIRC we had the same discussion for the 
dma_resv_wait_timeout() function and the result was that this is not a 
valid use case and waiting forever if you see new work over and over 
again is a valid result.

Let me double check the history of this code here as well.

> Hm actually there are internal callers as well, and at least some of 
> those have the object locked. Would a wider refactoring to separate 
> those into buckets (locked vs unlocked) make sense?

Yes definitely.

Regards,
Christian.

>
> Regards,
>
> Tvrtko
>
>
>> +        rcu_read_lock();
>> +        if (timeout < 0)
>> +            break;
>>       }
>> -
>> -    if (excl && timeout >= 0)
>> -        timeout = i915_gem_object_wait_fence(excl, flags, timeout);
>> -
>> -    dma_fence_put(excl);
>> +    dma_resv_iter_end(&cursor);
>> +    rcu_read_unlock();
>>         /*
>>        * Opportunistically prune the fences iff we know they have 
>> *all* been
>>        * signaled.
>>        */
>> -    if (prune_fences)
>> +    if (timeout > 0)
>>           dma_resv_prune(resv);
>>         return timeout;
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index f909aaa09d9c..e416cf528635 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -37,55 +37,26 @@  i915_gem_object_wait_reservation(struct dma_resv *resv,
 				 unsigned int flags,
 				 long timeout)
 {
-	struct dma_fence *excl;
-	bool prune_fences = false;
-
-	if (flags & I915_WAIT_ALL) {
-		struct dma_fence **shared;
-		unsigned int count, i;
-		int ret;
-
-		ret = dma_resv_get_fences(resv, &excl, &count, &shared);
-		if (ret)
-			return ret;
-
-		for (i = 0; i < count; i++) {
-			timeout = i915_gem_object_wait_fence(shared[i],
-							     flags, timeout);
-			if (timeout < 0)
-				break;
-
-			dma_fence_put(shared[i]);
-		}
-
-		for (; i < count; i++)
-			dma_fence_put(shared[i]);
-		kfree(shared);
-
-		/*
-		 * If both shared fences and an exclusive fence exist,
-		 * then by construction the shared fences must be later
-		 * than the exclusive fence. If we successfully wait for
-		 * all the shared fences, we know that the exclusive fence
-		 * must all be signaled. If all the shared fences are
-		 * signaled, we can prune the array and recover the
-		 * floating references on the fences/requests.
-		 */
-		prune_fences = count && timeout >= 0;
-	} else {
-		excl = dma_resv_get_excl_unlocked(resv);
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+
+	rcu_read_lock();
+	dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL);
+	dma_resv_for_each_fence_unlocked(&cursor, fence) {
+		rcu_read_unlock();
+		timeout = i915_gem_object_wait_fence(fence, flags, timeout);
+		rcu_read_lock();
+		if (timeout < 0)
+			break;
 	}
-
-	if (excl && timeout >= 0)
-		timeout = i915_gem_object_wait_fence(excl, flags, timeout);
-
-	dma_fence_put(excl);
+	dma_resv_iter_end(&cursor);
+	rcu_read_unlock();
 
 	/*
 	 * Opportunistically prune the fences iff we know they have *all* been
 	 * signaled.
 	 */
-	if (prune_fences)
+	if (timeout > 0)
 		dma_resv_prune(resv);
 
 	return timeout;