diff mbox

[13/17] drm/radeon: Remove custom dma_fence_ops->wait implementation

Message ID 20180427061724.28497-14-daniel.vetter@ffwll.ch (mailing list archive)
State New, archived
Headers show

Commit Message

Daniel Vetter April 27, 2018, 6:17 a.m. UTC
It's a copy of dma_fence_default_wait, written slightly differently.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: "David (ChunMing) Zhou" <David1.Zhou@amd.com>
Cc: amd-gfx@lists.freedesktop.org
---
 drivers/gpu/drm/radeon/radeon_fence.c | 63 ---------------------------
 1 file changed, 63 deletions(-)

Comments

Christian König April 29, 2018, 7:08 a.m. UTC | #1
NAK, there is a subtitle but major difference:

> -		if (rdev->needs_reset) {
> -			t = -EDEADLK;
> -			break;
> -		}

Without that the whole radeon GPU reset code breaks.

Regards,
Christian.


Am 27.04.2018 um 08:17 schrieb Daniel Vetter:
> It's a copy of dma_fence_default_wait, written slightly differently.
>
> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: "Christian König" <christian.koenig@amd.com>
> Cc: "David (ChunMing) Zhou" <David1.Zhou@amd.com>
> Cc: amd-gfx@lists.freedesktop.org
> ---
>   drivers/gpu/drm/radeon/radeon_fence.c | 63 ---------------------------
>   1 file changed, 63 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
> index e86f2bd38410..32690a525bfc 100644
> --- a/drivers/gpu/drm/radeon/radeon_fence.c
> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
> @@ -1051,72 +1051,9 @@ static const char *radeon_fence_get_timeline_name(struct dma_fence *f)
>   	}
>   }
>   
> -static inline bool radeon_test_signaled(struct radeon_fence *fence)
> -{
> -	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
> -}
> -
> -struct radeon_wait_cb {
> -	struct dma_fence_cb base;
> -	struct task_struct *task;
> -};
> -
> -static void
> -radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
> -{
> -	struct radeon_wait_cb *wait =
> -		container_of(cb, struct radeon_wait_cb, base);
> -
> -	wake_up_process(wait->task);
> -}
> -
> -static signed long radeon_fence_default_wait(struct dma_fence *f, bool intr,
> -					     signed long t)
> -{
> -	struct radeon_fence *fence = to_radeon_fence(f);
> -	struct radeon_device *rdev = fence->rdev;
> -	struct radeon_wait_cb cb;
> -
> -	cb.task = current;
> -
> -	if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
> -		return t;
> -
> -	while (t > 0) {
> -		if (intr)
> -			set_current_state(TASK_INTERRUPTIBLE);
> -		else
> -			set_current_state(TASK_UNINTERRUPTIBLE);
> -
> -		/*
> -		 * radeon_test_signaled must be called after
> -		 * set_current_state to prevent a race with wake_up_process
> -		 */
> -		if (radeon_test_signaled(fence))
> -			break;
> -
> -		if (rdev->needs_reset) {
> -			t = -EDEADLK;
> -			break;
> -		}
> -
> -		t = schedule_timeout(t);
> -
> -		if (t > 0 && intr && signal_pending(current))
> -			t = -ERESTARTSYS;
> -	}
> -
> -	__set_current_state(TASK_RUNNING);
> -	dma_fence_remove_callback(f, &cb.base);
> -
> -	return t;
> -}
> -
>   const struct dma_fence_ops radeon_fence_ops = {
>   	.get_driver_name = radeon_fence_get_driver_name,
>   	.get_timeline_name = radeon_fence_get_timeline_name,
>   	.enable_signaling = radeon_fence_enable_signaling,
>   	.signaled = radeon_fence_is_signaled,
> -	.wait = radeon_fence_default_wait,
> -	.release = NULL,
>   };
Daniel Vetter April 30, 2018, 3:38 p.m. UTC | #2
On Sun, Apr 29, 2018 at 09:08:31AM +0200, Christian König wrote:
> NAK, there is a subtitle but major difference:
> 
> > -		if (rdev->needs_reset) {
> > -			t = -EDEADLK;
> > -			break;
> > -		}
> 
> Without that the whole radeon GPU reset code breaks.

Oops I've missed that. How does this work when you register a callback
using ->enable_signaling and then block on it? Everything just dies?

We have lots of users of that for buffer/fence sharing. A really ugly, but
probably working fix for this would be a kthread worker that just looks
for ->needs_reset and force-completes all fences with
dma_fence_set_error(-EIO), which is kinda what's supposed to happen here
anyway.
-Daniel

> 
> Regards,
> Christian.
> 
> 
> Am 27.04.2018 um 08:17 schrieb Daniel Vetter:
> > It's a copy of dma_fence_default_wait, written slightly differently.
> > 
> > Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Alex Deucher <alexander.deucher@amd.com>
> > Cc: "Christian König" <christian.koenig@amd.com>
> > Cc: "David (ChunMing) Zhou" <David1.Zhou@amd.com>
> > Cc: amd-gfx@lists.freedesktop.org
> > ---
> >   drivers/gpu/drm/radeon/radeon_fence.c | 63 ---------------------------
> >   1 file changed, 63 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
> > index e86f2bd38410..32690a525bfc 100644
> > --- a/drivers/gpu/drm/radeon/radeon_fence.c
> > +++ b/drivers/gpu/drm/radeon/radeon_fence.c
> > @@ -1051,72 +1051,9 @@ static const char *radeon_fence_get_timeline_name(struct dma_fence *f)
> >   	}
> >   }
> > -static inline bool radeon_test_signaled(struct radeon_fence *fence)
> > -{
> > -	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
> > -}
> > -
> > -struct radeon_wait_cb {
> > -	struct dma_fence_cb base;
> > -	struct task_struct *task;
> > -};
> > -
> > -static void
> > -radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
> > -{
> > -	struct radeon_wait_cb *wait =
> > -		container_of(cb, struct radeon_wait_cb, base);
> > -
> > -	wake_up_process(wait->task);
> > -}
> > -
> > -static signed long radeon_fence_default_wait(struct dma_fence *f, bool intr,
> > -					     signed long t)
> > -{
> > -	struct radeon_fence *fence = to_radeon_fence(f);
> > -	struct radeon_device *rdev = fence->rdev;
> > -	struct radeon_wait_cb cb;
> > -
> > -	cb.task = current;
> > -
> > -	if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
> > -		return t;
> > -
> > -	while (t > 0) {
> > -		if (intr)
> > -			set_current_state(TASK_INTERRUPTIBLE);
> > -		else
> > -			set_current_state(TASK_UNINTERRUPTIBLE);
> > -
> > -		/*
> > -		 * radeon_test_signaled must be called after
> > -		 * set_current_state to prevent a race with wake_up_process
> > -		 */
> > -		if (radeon_test_signaled(fence))
> > -			break;
> > -
> > -		if (rdev->needs_reset) {
> > -			t = -EDEADLK;
> > -			break;
> > -		}
> > -
> > -		t = schedule_timeout(t);
> > -
> > -		if (t > 0 && intr && signal_pending(current))
> > -			t = -ERESTARTSYS;
> > -	}
> > -
> > -	__set_current_state(TASK_RUNNING);
> > -	dma_fence_remove_callback(f, &cb.base);
> > -
> > -	return t;
> > -}
> > -
> >   const struct dma_fence_ops radeon_fence_ops = {
> >   	.get_driver_name = radeon_fence_get_driver_name,
> >   	.get_timeline_name = radeon_fence_get_timeline_name,
> >   	.enable_signaling = radeon_fence_enable_signaling,
> >   	.signaled = radeon_fence_is_signaled,
> > -	.wait = radeon_fence_default_wait,
> > -	.release = NULL,
> >   };
>
Christian König April 30, 2018, 6:26 p.m. UTC | #3
Am 30.04.2018 um 17:38 schrieb Daniel Vetter:
> On Sun, Apr 29, 2018 at 09:08:31AM +0200, Christian König wrote:
>> NAK, there is a subtitle but major difference:
>>
>>> -		if (rdev->needs_reset) {
>>> -			t = -EDEADLK;
>>> -			break;
>>> -		}
>> Without that the whole radeon GPU reset code breaks.
> Oops I've missed that. How does this work when you register a callback
> using ->enable_signaling and then block on it? Everything just dies?

The short answer is we simply avoid using enable_signaling() from inside 
driver IOCTLs.

> We have lots of users of that for buffer/fence sharing. A really ugly, but
> probably working fix for this would be a kthread worker that just looks
> for ->needs_reset and force-completes all fences with
> dma_fence_set_error(-EIO), which is kinda what's supposed to happen here
> anyway.

That actually won't help. Radeon does this dance to return an error from 
dma_fence_wait() when the GPU needs a reset.

This way all IOCTLs should return to userspace with -EAGAIN and when 
they are restarted we block for the running GPU reset to finish.

I was against this approach, but it works as long as radeon only has to 
deal with it's own fences.

Christian.

> -Daniel
>
>> Regards,
>> Christian.
>>
>>
>> Am 27.04.2018 um 08:17 schrieb Daniel Vetter:
>>> It's a copy of dma_fence_default_wait, written slightly differently.
>>>
>>> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>> Cc: "Christian König" <christian.koenig@amd.com>
>>> Cc: "David (ChunMing) Zhou" <David1.Zhou@amd.com>
>>> Cc: amd-gfx@lists.freedesktop.org
>>> ---
>>>    drivers/gpu/drm/radeon/radeon_fence.c | 63 ---------------------------
>>>    1 file changed, 63 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
>>> index e86f2bd38410..32690a525bfc 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>>> @@ -1051,72 +1051,9 @@ static const char *radeon_fence_get_timeline_name(struct dma_fence *f)
>>>    	}
>>>    }
>>> -static inline bool radeon_test_signaled(struct radeon_fence *fence)
>>> -{
>>> -	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
>>> -}
>>> -
>>> -struct radeon_wait_cb {
>>> -	struct dma_fence_cb base;
>>> -	struct task_struct *task;
>>> -};
>>> -
>>> -static void
>>> -radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
>>> -{
>>> -	struct radeon_wait_cb *wait =
>>> -		container_of(cb, struct radeon_wait_cb, base);
>>> -
>>> -	wake_up_process(wait->task);
>>> -}
>>> -
>>> -static signed long radeon_fence_default_wait(struct dma_fence *f, bool intr,
>>> -					     signed long t)
>>> -{
>>> -	struct radeon_fence *fence = to_radeon_fence(f);
>>> -	struct radeon_device *rdev = fence->rdev;
>>> -	struct radeon_wait_cb cb;
>>> -
>>> -	cb.task = current;
>>> -
>>> -	if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
>>> -		return t;
>>> -
>>> -	while (t > 0) {
>>> -		if (intr)
>>> -			set_current_state(TASK_INTERRUPTIBLE);
>>> -		else
>>> -			set_current_state(TASK_UNINTERRUPTIBLE);
>>> -
>>> -		/*
>>> -		 * radeon_test_signaled must be called after
>>> -		 * set_current_state to prevent a race with wake_up_process
>>> -		 */
>>> -		if (radeon_test_signaled(fence))
>>> -			break;
>>> -
>>> -		if (rdev->needs_reset) {
>>> -			t = -EDEADLK;
>>> -			break;
>>> -		}
>>> -
>>> -		t = schedule_timeout(t);
>>> -
>>> -		if (t > 0 && intr && signal_pending(current))
>>> -			t = -ERESTARTSYS;
>>> -	}
>>> -
>>> -	__set_current_state(TASK_RUNNING);
>>> -	dma_fence_remove_callback(f, &cb.base);
>>> -
>>> -	return t;
>>> -}
>>> -
>>>    const struct dma_fence_ops radeon_fence_ops = {
>>>    	.get_driver_name = radeon_fence_get_driver_name,
>>>    	.get_timeline_name = radeon_fence_get_timeline_name,
>>>    	.enable_signaling = radeon_fence_enable_signaling,
>>>    	.signaled = radeon_fence_is_signaled,
>>> -	.wait = radeon_fence_default_wait,
>>> -	.release = NULL,
>>>    };
Daniel Vetter April 30, 2018, 7:35 p.m. UTC | #4
On Mon, Apr 30, 2018 at 8:26 PM, Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
> Am 30.04.2018 um 17:38 schrieb Daniel Vetter:
>>
>> On Sun, Apr 29, 2018 at 09:08:31AM +0200, Christian König wrote:
>>>
>>> NAK, there is a subtitle but major difference:
>>>
>>>> -               if (rdev->needs_reset) {
>>>> -                       t = -EDEADLK;
>>>> -                       break;
>>>> -               }
>>>
>>> Without that the whole radeon GPU reset code breaks.
>>
>> Oops I've missed that. How does this work when you register a callback
>> using ->enable_signaling and then block on it? Everything just dies?
>
>
> The short answer is we simply avoid using enable_signaling() from inside
> driver IOCTLs.
>
>> We have lots of users of that for buffer/fence sharing. A really ugly, but
>> probably working fix for this would be a kthread worker that just looks
>> for ->needs_reset and force-completes all fences with
>> dma_fence_set_error(-EIO), which is kinda what's supposed to happen here
>> anyway.
>
>
> That actually won't help. Radeon does this dance to return an error from
> dma_fence_wait() when the GPU needs a reset.
>
> This way all IOCTLs should return to userspace with -EAGAIN and when they
> are restarted we block for the running GPU reset to finish.
>
> I was against this approach, but it works as long as radeon only has to deal
> with it's own fences.

Yeah, as soon as you mix in a 2nd driver it goes boom, since you can
easily construct loops (even if they go through ->enable_signaling and
maybe just an irq handler to fire off something somewhere else).
Currently we're really bad a detecting these loops (aka there's
nothing), but I hope that the cross-release lockdep stuff gets enabled
soon. Then we could annotate dma_fences and lockdep should complain
about a lot of these issues.

Alas, problem exists already, but I'm not going to attempt to fix it.

Anyway, I'll drop this patch here.
-Daniel

>
> Christian.
>
>
>> -Daniel
>>
>>> Regards,
>>> Christian.
>>>
>>>
>>> Am 27.04.2018 um 08:17 schrieb Daniel Vetter:
>>>>
>>>> It's a copy of dma_fence_default_wait, written slightly differently.
>>>>
>>>> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>>> Cc: "Christian König" <christian.koenig@amd.com>
>>>> Cc: "David (ChunMing) Zhou" <David1.Zhou@amd.com>
>>>> Cc: amd-gfx@lists.freedesktop.org
>>>> ---
>>>>    drivers/gpu/drm/radeon/radeon_fence.c | 63
>>>> ---------------------------
>>>>    1 file changed, 63 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c
>>>> b/drivers/gpu/drm/radeon/radeon_fence.c
>>>> index e86f2bd38410..32690a525bfc 100644
>>>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>>>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>>>> @@ -1051,72 +1051,9 @@ static const char
>>>> *radeon_fence_get_timeline_name(struct dma_fence *f)
>>>>         }
>>>>    }
>>>> -static inline bool radeon_test_signaled(struct radeon_fence *fence)
>>>> -{
>>>> -       return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>>>> &fence->base.flags);
>>>> -}
>>>> -
>>>> -struct radeon_wait_cb {
>>>> -       struct dma_fence_cb base;
>>>> -       struct task_struct *task;
>>>> -};
>>>> -
>>>> -static void
>>>> -radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
>>>> -{
>>>> -       struct radeon_wait_cb *wait =
>>>> -               container_of(cb, struct radeon_wait_cb, base);
>>>> -
>>>> -       wake_up_process(wait->task);
>>>> -}
>>>> -
>>>> -static signed long radeon_fence_default_wait(struct dma_fence *f, bool
>>>> intr,
>>>> -                                            signed long t)
>>>> -{
>>>> -       struct radeon_fence *fence = to_radeon_fence(f);
>>>> -       struct radeon_device *rdev = fence->rdev;
>>>> -       struct radeon_wait_cb cb;
>>>> -
>>>> -       cb.task = current;
>>>> -
>>>> -       if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
>>>> -               return t;
>>>> -
>>>> -       while (t > 0) {
>>>> -               if (intr)
>>>> -                       set_current_state(TASK_INTERRUPTIBLE);
>>>> -               else
>>>> -                       set_current_state(TASK_UNINTERRUPTIBLE);
>>>> -
>>>> -               /*
>>>> -                * radeon_test_signaled must be called after
>>>> -                * set_current_state to prevent a race with
>>>> wake_up_process
>>>> -                */
>>>> -               if (radeon_test_signaled(fence))
>>>> -                       break;
>>>> -
>>>> -               if (rdev->needs_reset) {
>>>> -                       t = -EDEADLK;
>>>> -                       break;
>>>> -               }
>>>> -
>>>> -               t = schedule_timeout(t);
>>>> -
>>>> -               if (t > 0 && intr && signal_pending(current))
>>>> -                       t = -ERESTARTSYS;
>>>> -       }
>>>> -
>>>> -       __set_current_state(TASK_RUNNING);
>>>> -       dma_fence_remove_callback(f, &cb.base);
>>>> -
>>>> -       return t;
>>>> -}
>>>> -
>>>>    const struct dma_fence_ops radeon_fence_ops = {
>>>>         .get_driver_name = radeon_fence_get_driver_name,
>>>>         .get_timeline_name = radeon_fence_get_timeline_name,
>>>>         .enable_signaling = radeon_fence_enable_signaling,
>>>>         .signaled = radeon_fence_is_signaled,
>>>> -       .wait = radeon_fence_default_wait,
>>>> -       .release = NULL,
>>>>    };
>
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index e86f2bd38410..32690a525bfc 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -1051,72 +1051,9 @@  static const char *radeon_fence_get_timeline_name(struct dma_fence *f)
 	}
 }
 
-static inline bool radeon_test_signaled(struct radeon_fence *fence)
-{
-	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
-}
-
-struct radeon_wait_cb {
-	struct dma_fence_cb base;
-	struct task_struct *task;
-};
-
-static void
-radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
-{
-	struct radeon_wait_cb *wait =
-		container_of(cb, struct radeon_wait_cb, base);
-
-	wake_up_process(wait->task);
-}
-
-static signed long radeon_fence_default_wait(struct dma_fence *f, bool intr,
-					     signed long t)
-{
-	struct radeon_fence *fence = to_radeon_fence(f);
-	struct radeon_device *rdev = fence->rdev;
-	struct radeon_wait_cb cb;
-
-	cb.task = current;
-
-	if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
-		return t;
-
-	while (t > 0) {
-		if (intr)
-			set_current_state(TASK_INTERRUPTIBLE);
-		else
-			set_current_state(TASK_UNINTERRUPTIBLE);
-
-		/*
-		 * radeon_test_signaled must be called after
-		 * set_current_state to prevent a race with wake_up_process
-		 */
-		if (radeon_test_signaled(fence))
-			break;
-
-		if (rdev->needs_reset) {
-			t = -EDEADLK;
-			break;
-		}
-
-		t = schedule_timeout(t);
-
-		if (t > 0 && intr && signal_pending(current))
-			t = -ERESTARTSYS;
-	}
-
-	__set_current_state(TASK_RUNNING);
-	dma_fence_remove_callback(f, &cb.base);
-
-	return t;
-}
-
 const struct dma_fence_ops radeon_fence_ops = {
 	.get_driver_name = radeon_fence_get_driver_name,
 	.get_timeline_name = radeon_fence_get_timeline_name,
 	.enable_signaling = radeon_fence_enable_signaling,
 	.signaled = radeon_fence_is_signaled,
-	.wait = radeon_fence_default_wait,
-	.release = NULL,
 };