diff mbox

[3/6] drm/ttm: call ttm_bo_cleanup_refs with reservation and lru lock held

Message ID 1354101944-10455-3-git-send-email-maarten.lankhorst@canonical.com (mailing list archive)
State New, archived
Headers show

Commit Message

Maarten Lankhorst Nov. 28, 2012, 11:25 a.m. UTC
By removing the unlocking of lru and retaking it immediately, a race is
removed where the bo is taken off the swap list or the lru list between
the unlock and relock. As such the cleanup_refs code can be simplified,
it will attempt to call ttm_bo_wait non-blockingly, and if it fails
it will drop the locks and perform a blocking wait, or return an error
if no_wait_gpu was set.

The need for looping is also eliminated, since swapout and evict_mem_first
will always follow the destruction path, so no new fence is allowed
to be attached. As far as I can see this may already have been the case,
but the unlocking / relocking required a complicated loop to deal with
re-reservation.

The downside is that ttm_bo_cleanup_memtype_use is no longer called with
reservation held, so drivers must be aware that move_notify with a null
parameter doesn't require a reservation.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 112 +++++++++++++++++++++++--------------------
 1 file changed, 60 insertions(+), 52 deletions(-)

Comments

Thomas Hellstrom Nov. 28, 2012, 11:54 a.m. UTC | #1
On 11/28/2012 12:25 PM, Maarten Lankhorst wrote:
> By removing the unlocking of lru and retaking it immediately, a race is
> removed where the bo is taken off the swap list or the lru list between
> the unlock and relock. As such the cleanup_refs code can be simplified,
> it will attempt to call ttm_bo_wait non-blockingly, and if it fails
> it will drop the locks and perform a blocking wait, or return an error
> if no_wait_gpu was set.
>
> The need for looping is also eliminated, since swapout and evict_mem_first
> will always follow the destruction path, so no new fence is allowed
> to be attached. As far as I can see this may already have been the case,
> but the unlocking / relocking required a complicated loop to deal with
> re-reservation.
>
> The downside is that ttm_bo_cleanup_memtype_use is no longer called with
> reservation held, so drivers must be aware that move_notify with a null
> parameter doesn't require a reservation.

Why can't we unreserve *after* ttm_bo_cleanup_memtype_use? That's not
immediately clear from this patch.

>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 112 +++++++++++++++++++++++--------------------
>   1 file changed, 60 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 202fc20..02b275b 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -486,14 +486,6 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
>   		bo->ttm = NULL;
>   	}
>   	ttm_bo_mem_put(bo, &bo->mem);
> -
> -	atomic_set(&bo->reserved, 0);
> -
> -	/*
> -	 * Make processes trying to reserve really pick it up.
> -	 */
> -	smp_mb__after_atomic_dec();
> -	wake_up_all(&bo->event_queue);
>   }
>   
>   static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
> @@ -515,6 +507,9 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
>   		put_count = ttm_bo_del_from_lru(bo);
>   
>   		spin_unlock(&glob->lru_lock);
> +		atomic_set(&bo->reserved, 0);
> +		wake_up_all(&bo->event_queue);
> +

I think (although I'm not 100% sure) that if we use atomic_set() to 
unreserve, and it's not followed by a spin_unlock(), we need to insert
a memory barrier, like is done above in the removed code, otherwise 
memory operations protected by reserve may be reordered until after 
reservation.

>   		ttm_bo_cleanup_memtype_use(bo);
>   
>   		ttm_bo_list_ref_sub(bo, put_count, true);
> @@ -543,68 +538,72 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
>   }
>   
>   /**
> - * function ttm_bo_cleanup_refs
> + * function ttm_bo_cleanup_refs_and_unlock
>    * If bo idle, remove from delayed- and lru lists, and unref.
>    * If not idle, do nothing.
>    *
> + * Must be called with lru_lock and reservation held, this function
> + * will drop both before returning.
> + *
>    * @interruptible         Any sleeps should occur interruptibly.
> - * @no_wait_reserve       Never wait for reserve. Return -EBUSY instead.
>    * @no_wait_gpu           Never wait for gpu. Return -EBUSY instead.
>    */
>   
> -static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> -			       bool interruptible,
> -			       bool no_wait_reserve,
> -			       bool no_wait_gpu)
> +static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
> +					  bool interruptible,
> +					  bool no_wait_gpu)
>   {
>   	struct ttm_bo_device *bdev = bo->bdev;
> +	struct ttm_bo_driver *driver = bdev->driver;
>   	struct ttm_bo_global *glob = bo->glob;
>   	int put_count;
>   	int ret = 0;
>   
> -retry:
>   	spin_lock(&bdev->fence_lock);
> -	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
> -	spin_unlock(&bdev->fence_lock);
> +	ret = ttm_bo_wait(bo, false, false, true);
>   
> -	if (unlikely(ret != 0))
> +	if (ret && no_wait_gpu) {
> +		spin_unlock(&bdev->fence_lock);
> +		atomic_set(&bo->reserved, 0);
> +		wake_up_all(&bo->event_queue);
> +		spin_unlock(&glob->lru_lock);
>   		return ret;
> +	} else if (ret) {
> +		void *sync_obj;
>   
> -retry_reserve:
> -	spin_lock(&glob->lru_lock);
> -
> -	if (unlikely(list_empty(&bo->ddestroy))) {
> +		/**
> +		 * Take a reference to the fence and unreserve,
> +		 * at this point the buffer should be dead, so
> +		 * no new sync objects can be attached.
> +		 */
> +		sync_obj = driver->sync_obj_ref(&bo->sync_obj);
> +		spin_unlock(&bdev->fence_lock);
> +		atomic_set(&bo->reserved, 0);
> +		wake_up_all(&bo->event_queue);
>   		spin_unlock(&glob->lru_lock);
> -		return 0;
> -	}
> -
> -	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
>   
> -	if (unlikely(ret == -EBUSY)) {
> -		spin_unlock(&glob->lru_lock);
> -		if (likely(!no_wait_reserve))
> -			ret = ttm_bo_wait_unreserved(bo, interruptible);
> -		if (unlikely(ret != 0))
> +		ret = driver->sync_obj_wait(sync_obj, false, interruptible);
> +		driver->sync_obj_unref(&sync_obj);
> +		if (ret)
>   			return ret;
>   
> -		goto retry_reserve;
> -	}
> -
> -	BUG_ON(ret != 0);
> +		/* remove sync_obj with ttm_bo_wait */
> +		spin_lock(&bdev->fence_lock);
> +		ret = ttm_bo_wait(bo, false, false, true);
> +		spin_unlock(&bdev->fence_lock);
>   
> -	/**
> -	 * We can re-check for sync object without taking
> -	 * the bo::lock since setting the sync object requires
> -	 * also bo::reserved. A busy object at this point may
> -	 * be caused by another thread recently starting an accelerated
> -	 * eviction.
> -	 */
> +		WARN_ON(ret);
>   
> -	if (unlikely(bo->sync_obj)) {
> +		spin_lock(&glob->lru_lock);
> +	} else {
> +		spin_unlock(&bdev->fence_lock);
>   		atomic_set(&bo->reserved, 0);
>   		wake_up_all(&bo->event_queue);
> +	}
> +
> +	if (unlikely(list_empty(&bo->ddestroy))) {
>   		spin_unlock(&glob->lru_lock);
> -		goto retry;
> +		return 0;
>   	}
>   
>   	put_count = ttm_bo_del_from_lru(bo);
> @@ -647,9 +646,13 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
>   			kref_get(&nentry->list_kref);
>   		}
>   
> -		spin_unlock(&glob->lru_lock);
> -		ret = ttm_bo_cleanup_refs(entry, false, !remove_all,
> -					  !remove_all);
> +		ret = ttm_bo_reserve_locked(entry, false, !remove_all, false, 0);
> +		if (!ret)
> +			ret = ttm_bo_cleanup_refs_and_unlock(entry, false,
> +							     !remove_all);
> +		else
> +			spin_unlock(&glob->lru_lock);
> +
>   		kref_put(&entry->list_kref, ttm_bo_release_list);
>   		entry = nentry;
>   
> @@ -803,9 +806,13 @@ retry:
>   	kref_get(&bo->list_kref);
>   
>   	if (!list_empty(&bo->ddestroy)) {
> -		spin_unlock(&glob->lru_lock);
> -		ret = ttm_bo_cleanup_refs(bo, interruptible,
> -					  no_wait_reserve, no_wait_gpu);
> +		ret = ttm_bo_reserve_locked(bo, interruptible, no_wait_reserve, false, 0);
> +		if (!ret)
> +			ret = ttm_bo_cleanup_refs_and_unlock(bo, interruptible,
> +							     no_wait_gpu);
> +		else
> +			spin_unlock(&glob->lru_lock);
> +
>   		kref_put(&bo->list_kref, ttm_bo_release_list);
>   
>   		return ret;
> @@ -1799,8 +1806,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
>   		kref_get(&bo->list_kref);
>   
>   		if (!list_empty(&bo->ddestroy)) {
> -			spin_unlock(&glob->lru_lock);
> -			(void) ttm_bo_cleanup_refs(bo, false, false, false);
> +			ttm_bo_reserve_locked(bo, false, false, false, 0);
> +			ttm_bo_cleanup_refs_and_unlock(bo, false, false);
> +
>   			kref_put(&bo->list_kref, ttm_bo_release_list);
>   			spin_lock(&glob->lru_lock);
>   			continue;
Maarten Lankhorst Nov. 28, 2012, 12:15 p.m. UTC | #2
Op 28-11-12 12:54, Thomas Hellstrom schreef:
> On 11/28/2012 12:25 PM, Maarten Lankhorst wrote:
>> By removing the unlocking of lru and retaking it immediately, a race is
>> removed where the bo is taken off the swap list or the lru list between
>> the unlock and relock. As such the cleanup_refs code can be simplified,
>> it will attempt to call ttm_bo_wait non-blockingly, and if it fails
>> it will drop the locks and perform a blocking wait, or return an error
>> if no_wait_gpu was set.
>>
>> The need for looping is also eliminated, since swapout and evict_mem_first
>> will always follow the destruction path, so no new fence is allowed
>> to be attached. As far as I can see this may already have been the case,
>> but the unlocking / relocking required a complicated loop to deal with
>> re-reservation.
>>
>> The downside is that ttm_bo_cleanup_memtype_use is no longer called with
>> reservation held, so drivers must be aware that move_notify with a null
>> parameter doesn't require a reservation.
>
> Why can't we unreserve *after* ttm_bo_cleanup_memtype_use? That's not
> immediately clear from this patch.
Because we would hold the reservation while waiting and with the object still
on swap and lru lists still, that would defeat the whole purpose of keeping
the object on multiple lists, plus break current code that assumes bo's on the
those lists can always be reserved.

the if (ret && !no_wait_gpu) path has to drop the reservation and lru lock, and
isn't guaranteed to be able to retake it. Maybe it could be guaranteed now, but
I'm sure that would not be the case if the reservations were shared across
devices.

The alternatives are worse: doing a blocking reserve would rightfully anger lockdep,
and how do you want to handle tryreserve failing?

Since there is a path in which no reservation can be taken, to weed out bugs I felt it
would be better that any bugs that trigger from not holding a reservation would be
easier to find if it happens all the time, instead of only in some corner cases.

~Maarten
Maarten Lankhorst Nov. 28, 2012, 12:28 p.m. UTC | #3
Op 28-11-12 12:54, Thomas Hellstrom schreef:
>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c | 112 +++++++++++++++++++++++--------------------
>>   1 file changed, 60 insertions(+), 52 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 202fc20..02b275b 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -486,14 +486,6 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
>>           bo->ttm = NULL;
>>       }
>>       ttm_bo_mem_put(bo, &bo->mem);
>> -
>> -    atomic_set(&bo->reserved, 0);
>> -
>> -    /*
>> -     * Make processes trying to reserve really pick it up.
>> -     */
>> -    smp_mb__after_atomic_dec();
>> -    wake_up_all(&bo->event_queue);
>>   }
>>     static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
>> @@ -515,6 +507,9 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
>>           put_count = ttm_bo_del_from_lru(bo);
>>             spin_unlock(&glob->lru_lock);
>> +        atomic_set(&bo->reserved, 0);
>> +        wake_up_all(&bo->event_queue);
>> +
>
> I think (although I'm not 100% sure) that if we use atomic_set() to unreserve, and it's not followed by a spin_unlock(), we need to insert
> a memory barrier, like is done above in the removed code, otherwise memory operations protected by reserve may be reordered until after reservation.
Hm yeah, looks like ttm_bo_cleanup_memtype_use probably needs a smb_mb() at the end now. The original smp_mb__after_atomic_dec was a noop,
since the wake_up_all call takes a spinlock too.

Thanks for catching it, I'll await the reply to my other email then maybe reword, fix this and resend.

~Maarten
Thomas Hellstrom Nov. 28, 2012, 1:21 p.m. UTC | #4
On 11/28/2012 01:15 PM, Maarten Lankhorst wrote:
> Op 28-11-12 12:54, Thomas Hellstrom schreef:
>> On 11/28/2012 12:25 PM, Maarten Lankhorst wrote:
>>> By removing the unlocking of lru and retaking it immediately, a race is
>>> removed where the bo is taken off the swap list or the lru list between
>>> the unlock and relock. As such the cleanup_refs code can be simplified,
>>> it will attempt to call ttm_bo_wait non-blockingly, and if it fails
>>> it will drop the locks and perform a blocking wait, or return an error
>>> if no_wait_gpu was set.
>>>
>>> The need for looping is also eliminated, since swapout and evict_mem_first
>>> will always follow the destruction path, so no new fence is allowed
>>> to be attached. As far as I can see this may already have been the case,
>>> but the unlocking / relocking required a complicated loop to deal with
>>> re-reservation.
>>>
>>> The downside is that ttm_bo_cleanup_memtype_use is no longer called with
>>> reservation held, so drivers must be aware that move_notify with a null
>>> parameter doesn't require a reservation.
>> Why can't we unreserve *after* ttm_bo_cleanup_memtype_use? That's not
>> immediately clear from this patch.
> Because we would hold the reservation while waiting and with the object still
> on swap and lru lists still, that would defeat the whole purpose of keeping
> the object on multiple lists, plus break current code that assumes bo's on the
> those lists can always be reserved.
>
> the if (ret && !no_wait_gpu) path has to drop the reservation and lru lock, and
> isn't guaranteed to be able to retake it. Maybe it could be guaranteed now, but
> I'm sure that would not be the case if the reservations were shared across
> devices.
The evict path removes the BO from the LRU lists, drops the LRU lock but 
hangs on to the reservation,
and in case the wait goes wrong, re-adds the bo to the LRU lists and 
returns an error.

Isn't it possible to do the same in the !no_wait_gpu case?
/Thomas
Maarten Lankhorst Nov. 28, 2012, 1:55 p.m. UTC | #5
Op 28-11-12 14:21, Thomas Hellstrom schreef:
> On 11/28/2012 01:15 PM, Maarten Lankhorst wrote:
>> Op 28-11-12 12:54, Thomas Hellstrom schreef:
>>> On 11/28/2012 12:25 PM, Maarten Lankhorst wrote:
>>>> By removing the unlocking of lru and retaking it immediately, a race is
>>>> removed where the bo is taken off the swap list or the lru list between
>>>> the unlock and relock. As such the cleanup_refs code can be simplified,
>>>> it will attempt to call ttm_bo_wait non-blockingly, and if it fails
>>>> it will drop the locks and perform a blocking wait, or return an error
>>>> if no_wait_gpu was set.
>>>>
>>>> The need for looping is also eliminated, since swapout and evict_mem_first
>>>> will always follow the destruction path, so no new fence is allowed
>>>> to be attached. As far as I can see this may already have been the case,
>>>> but the unlocking / relocking required a complicated loop to deal with
>>>> re-reservation.
>>>>
>>>> The downside is that ttm_bo_cleanup_memtype_use is no longer called with
>>>> reservation held, so drivers must be aware that move_notify with a null
>>>> parameter doesn't require a reservation.
>>> Why can't we unreserve *after* ttm_bo_cleanup_memtype_use? That's not
>>> immediately clear from this patch.
>> Because we would hold the reservation while waiting and with the object still
>> on swap and lru lists still, that would defeat the whole purpose of keeping
>> the object on multiple lists, plus break current code that assumes bo's on the
>> those lists can always be reserved.
>>
>> the if (ret && !no_wait_gpu) path has to drop the reservation and lru lock, and
>> isn't guaranteed to be able to retake it. Maybe it could be guaranteed now, but
>> I'm sure that would not be the case if the reservations were shared across
>> devices.
> The evict path removes the BO from the LRU lists, drops the LRU lock but hangs on to the reservation,
> and in case the wait goes wrong, re-adds the bo to the LRU lists and returns an error.
If you really want to, we could hang on to the !no_wait_gpu path, wait shouldn't ever fail there, so I suppose
leaving it off the lru lists and not re-add on any list in case of wait fail is fine. It's still on the ddestroy list in that
case, so not adding it back to the other lists is harmless.

But I thought it was a feature that it still appeared on the lists while not being reserved, since in that
case swapout and evict_first wouldn't need to hunt down another buffer to swap out or free, instead waiting
ddestroy wait to finish, with possibly multiple waiters.
> Isn't it possible to do the same in the !no_wait_gpu case?
> /Thomas
Sure, we wouldn't even need to re-add since only having the bo on the ddestroy list not catastrophical.
I was just worried because since that it is still a behavioral change.

I'm tempted to measure events about that though, and use perf events on how often contentions
occur, buffers get evicted and loaded back in again, etc..

Before we start worrying about any optimizations, we should worry about solid instrumentation first.

I think for example that evict_first and swapout could walk the ddestroy list first, all buffers that
can be reserved there will be tested if non-blocking wait succeeds or not. If it does succeed function
returns early, if not it unreserves it again without touching any list. This would cause ttm to prefer
non-blocking destruction of bo's before normal code runs, but without measurements it's no more
than a nice thought experiment.

~Maarten
Thomas Hellstrom Nov. 28, 2012, 2:23 p.m. UTC | #6
On 11/28/2012 02:55 PM, Maarten Lankhorst wrote:
> Op 28-11-12 14:21, Thomas Hellstrom schreef:
>> On 11/28/2012 01:15 PM, Maarten Lankhorst wrote:
>>> Op 28-11-12 12:54, Thomas Hellstrom schreef:
>>>> On 11/28/2012 12:25 PM, Maarten Lankhorst wrote:
>>>>> By removing the unlocking of lru and retaking it immediately, a race is
>>>>> removed where the bo is taken off the swap list or the lru list between
>>>>> the unlock and relock. As such the cleanup_refs code can be simplified,
>>>>> it will attempt to call ttm_bo_wait non-blockingly, and if it fails
>>>>> it will drop the locks and perform a blocking wait, or return an error
>>>>> if no_wait_gpu was set.
>>>>>
>>>>> The need for looping is also eliminated, since swapout and evict_mem_first
>>>>> will always follow the destruction path, so no new fence is allowed
>>>>> to be attached. As far as I can see this may already have been the case,
>>>>> but the unlocking / relocking required a complicated loop to deal with
>>>>> re-reservation.
>>>>>
>>>>> The downside is that ttm_bo_cleanup_memtype_use is no longer called with
>>>>> reservation held, so drivers must be aware that move_notify with a null
>>>>> parameter doesn't require a reservation.
>>>> Why can't we unreserve *after* ttm_bo_cleanup_memtype_use? That's not
>>>> immediately clear from this patch.
>>> Because we would hold the reservation while waiting and with the object still
>>> on swap and lru lists still, that would defeat the whole purpose of keeping
>>> the object on multiple lists, plus break current code that assumes bo's on the
>>> those lists can always be reserved.
>>>
>>> the if (ret && !no_wait_gpu) path has to drop the reservation and lru lock, and
>>> isn't guaranteed to be able to retake it. Maybe it could be guaranteed now, but
>>> I'm sure that would not be the case if the reservations were shared across
>>> devices.
>> The evict path removes the BO from the LRU lists, drops the LRU lock but hangs on to the reservation,
>> and in case the wait goes wrong, re-adds the bo to the LRU lists and returns an error.
> If you really want to, we could hang on to the !no_wait_gpu path, wait shouldn't ever fail there, so I suppose
> leaving it off the lru lists and not re-add on any list in case of wait fail is fine. It's still on the ddestroy list in that
> case, so not adding it back to the other lists is harmless.
>
Well I'm a bit afraid that theoretically, other callers may have a bo 
reserved, while cleanup_refs_and_unlock
more or less runs the whole destroy path on that buffer. Sure, we have 
control over those other reservers,
but it may come back and bite us.

Also the wait might fail if a signal is hit, so it's definitely 
possible, and even likely in the case of the X server process.

Anyway, I prefer if we could try to keep the reservation across the 
ttm_cleanup_memtype_use  function, and as far
as I can tell, the only thing preventing that is the reservation release 
in the (!no_wait_gpu) path. So if we alter that to
do the same as the evict path I think without looking to deeply into the 
consequences that we should be safe.

Then we should be able to skip patch 2 as well.

/Thomas
Maarten Lankhorst Nov. 28, 2012, 2:46 p.m. UTC | #7
Op 28-11-12 15:23, Thomas Hellstrom schreef:
> On 11/28/2012 02:55 PM, Maarten Lankhorst wrote:
>> Op 28-11-12 14:21, Thomas Hellstrom schreef:
>>> On 11/28/2012 01:15 PM, Maarten Lankhorst wrote:
>>>> Op 28-11-12 12:54, Thomas Hellstrom schreef:
>>>>> On 11/28/2012 12:25 PM, Maarten Lankhorst wrote:
>>>>>> By removing the unlocking of lru and retaking it immediately, a race is
>>>>>> removed where the bo is taken off the swap list or the lru list between
>>>>>> the unlock and relock. As such the cleanup_refs code can be simplified,
>>>>>> it will attempt to call ttm_bo_wait non-blockingly, and if it fails
>>>>>> it will drop the locks and perform a blocking wait, or return an error
>>>>>> if no_wait_gpu was set.
>>>>>>
>>>>>> The need for looping is also eliminated, since swapout and evict_mem_first
>>>>>> will always follow the destruction path, so no new fence is allowed
>>>>>> to be attached. As far as I can see this may already have been the case,
>>>>>> but the unlocking / relocking required a complicated loop to deal with
>>>>>> re-reservation.
>>>>>>
>>>>>> The downside is that ttm_bo_cleanup_memtype_use is no longer called with
>>>>>> reservation held, so drivers must be aware that move_notify with a null
>>>>>> parameter doesn't require a reservation.
>>>>> Why can't we unreserve *after* ttm_bo_cleanup_memtype_use? That's not
>>>>> immediately clear from this patch.
>>>> Because we would hold the reservation while waiting and with the object still
>>>> on swap and lru lists still, that would defeat the whole purpose of keeping
>>>> the object on multiple lists, plus break current code that assumes bo's on the
>>>> those lists can always be reserved.
>>>>
>>>> the if (ret && !no_wait_gpu) path has to drop the reservation and lru lock, and
>>>> isn't guaranteed to be able to retake it. Maybe it could be guaranteed now, but
>>>> I'm sure that would not be the case if the reservations were shared across
>>>> devices.
>>> The evict path removes the BO from the LRU lists, drops the LRU lock but hangs on to the reservation,
>>> and in case the wait goes wrong, re-adds the bo to the LRU lists and returns an error.
>> If you really want to, we could hang on to the !no_wait_gpu path, wait shouldn't ever fail there, so I suppose
>> leaving it off the lru lists and not re-add on any list in case of wait fail is fine. It's still on the ddestroy list in that
>> case, so not adding it back to the other lists is harmless.
>>
> Well I'm a bit afraid that theoretically, other callers may have a bo reserved, while cleanup_refs_and_unlock
> more or less runs the whole destroy path on that buffer. Sure, we have control over those other reservers,
> but it may come back and bite us.
That's why initially I moved all the destruction to ttm_bo_release_list, to have all destruction in
only 1 place. But even now it's serialized with the lru lock, while the destruction may not happen
right away, it still happens before last list ref to the bo is dropped.

But it's your call, just choose the approach you want and I'll resubmit this. :-)

> Also the wait might fail if a signal is hit, so it's definitely possible, and even likely in the case of the X server process.
>
> Anyway, I prefer if we could try to keep the reservation across the ttm_cleanup_memtype_use  function, and as far
> as I can tell, the only thing preventing that is the reservation release in the (!no_wait_gpu) path. So if we alter that to
> do the same as the evict path I think without looking to deeply into the consequences that we should be safe.
I think returning success early without removing off ddestroy list if re-reserving fails
with lru lock held would be better.

We completed the wait and attempt to reserve the bo, which failed. Without the lru
lock atomicity, this can't happen since the only places that would do it call this with
the lru lock held.

With the atomicity removal, the only place that could do this is ttm_bo_delayed_delete
with remove_all set to true. And even if that happened the destruction code would run
*anyway* since we completed the waiting part already, it would just not necessarily be
run from this thread, but that guarantee didn't exist anyway.
> Then we should be able to skip patch 2 as well.
If my tryreserve approach sounds sane, second patch should still be skippable. :-)

~Maarten
Thomas Hellstrom Nov. 28, 2012, 3:10 p.m. UTC | #8
On 11/28/2012 03:46 PM, Maarten Lankhorst wrote:
> Op 28-11-12 15:23, Thomas Hellstrom schreef:
>> On 11/28/2012 02:55 PM, Maarten Lankhorst wrote:
>>> Op 28-11-12 14:21, Thomas Hellstrom schreef:
>>>> On 11/28/2012 01:15 PM, Maarten Lankhorst wrote:
>>>>> Op 28-11-12 12:54, Thomas Hellstrom schreef:
>>>>>> On 11/28/2012 12:25 PM, Maarten Lankhorst wrote:
>>>>>>> By removing the unlocking of lru and retaking it immediately, a race is
>>>>>>> removed where the bo is taken off the swap list or the lru list between
>>>>>>> the unlock and relock. As such the cleanup_refs code can be simplified,
>>>>>>> it will attempt to call ttm_bo_wait non-blockingly, and if it fails
>>>>>>> it will drop the locks and perform a blocking wait, or return an error
>>>>>>> if no_wait_gpu was set.
>>>>>>>
>>>>>>> The need for looping is also eliminated, since swapout and evict_mem_first
>>>>>>> will always follow the destruction path, so no new fence is allowed
>>>>>>> to be attached. As far as I can see this may already have been the case,
>>>>>>> but the unlocking / relocking required a complicated loop to deal with
>>>>>>> re-reservation.
>>>>>>>
>>>>>>> The downside is that ttm_bo_cleanup_memtype_use is no longer called with
>>>>>>> reservation held, so drivers must be aware that move_notify with a null
>>>>>>> parameter doesn't require a reservation.
>>>>>> Why can't we unreserve *after* ttm_bo_cleanup_memtype_use? That's not
>>>>>> immediately clear from this patch.
>>>>> Because we would hold the reservation while waiting and with the object still
>>>>> on swap and lru lists still, that would defeat the whole purpose of keeping
>>>>> the object on multiple lists, plus break current code that assumes bo's on the
>>>>> those lists can always be reserved.
>>>>>
>>>>> the if (ret && !no_wait_gpu) path has to drop the reservation and lru lock, and
>>>>> isn't guaranteed to be able to retake it. Maybe it could be guaranteed now, but
>>>>> I'm sure that would not be the case if the reservations were shared across
>>>>> devices.
>>>> The evict path removes the BO from the LRU lists, drops the LRU lock but hangs on to the reservation,
>>>> and in case the wait goes wrong, re-adds the bo to the LRU lists and returns an error.
>>> If you really want to, we could hang on to the !no_wait_gpu path, wait shouldn't ever fail there, so I suppose
>>> leaving it off the lru lists and not re-add on any list in case of wait fail is fine. It's still on the ddestroy list in that
>>> case, so not adding it back to the other lists is harmless.
>>>
>> Well I'm a bit afraid that theoretically, other callers may have a bo reserved, while cleanup_refs_and_unlock
>> more or less runs the whole destroy path on that buffer. Sure, we have control over those other reservers,
>> but it may come back and bite us.
> That's why initially I moved all the destruction to ttm_bo_release_list, to have all destruction in
> only 1 place. But even now it's serialized with the lru lock, while the destruction may not happen
> right away, it still happens before last list ref to the bo is dropped.
>
> But it's your call, just choose the approach you want and I'll resubmit this. :-)
>
>> Also the wait might fail if a signal is hit, so it's definitely possible, and even likely in the case of the X server process.
>>
>> Anyway, I prefer if we could try to keep the reservation across the ttm_cleanup_memtype_use  function, and as far
>> as I can tell, the only thing preventing that is the reservation release in the (!no_wait_gpu) path. So if we alter that to
>> do the same as the evict path I think without looking to deeply into the consequences that we should be safe.
> I think returning success early without removing off ddestroy list if re-reserving fails
> with lru lock held would be better.
>
> We completed the wait and attempt to reserve the bo, which failed. Without the lru
> lock atomicity, this can't happen since the only places that would do it call this with
> the lru lock held.
>
> With the atomicity removal, the only place that could do this is ttm_bo_delayed_delete
> with remove_all set to true. And even if that happened the destruction code would run
> *anyway* since we completed the waiting part already, it would just not necessarily be
> run from this thread, but that guarantee didn't exist anyway.
>> Then we should be able to skip patch 2 as well.
> If my tryreserve approach sounds sane, second patch should still be skippable. :-)

Sure, Lets go for that approach.

>
> ~Maarten
/Thomas
diff mbox

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 202fc20..02b275b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -486,14 +486,6 @@  static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 		bo->ttm = NULL;
 	}
 	ttm_bo_mem_put(bo, &bo->mem);
-
-	atomic_set(&bo->reserved, 0);
-
-	/*
-	 * Make processes trying to reserve really pick it up.
-	 */
-	smp_mb__after_atomic_dec();
-	wake_up_all(&bo->event_queue);
 }
 
 static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
@@ -515,6 +507,9 @@  static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 		put_count = ttm_bo_del_from_lru(bo);
 
 		spin_unlock(&glob->lru_lock);
+		atomic_set(&bo->reserved, 0);
+		wake_up_all(&bo->event_queue);
+
 		ttm_bo_cleanup_memtype_use(bo);
 
 		ttm_bo_list_ref_sub(bo, put_count, true);
@@ -543,68 +538,72 @@  static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 }
 
 /**
- * function ttm_bo_cleanup_refs
+ * function ttm_bo_cleanup_refs_and_unlock
  * If bo idle, remove from delayed- and lru lists, and unref.
  * If not idle, do nothing.
  *
+ * Must be called with lru_lock and reservation held, this function
+ * will drop both before returning.
+ *
  * @interruptible         Any sleeps should occur interruptibly.
- * @no_wait_reserve       Never wait for reserve. Return -EBUSY instead.
  * @no_wait_gpu           Never wait for gpu. Return -EBUSY instead.
  */
 
-static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
-			       bool interruptible,
-			       bool no_wait_reserve,
-			       bool no_wait_gpu)
+static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
+					  bool interruptible,
+					  bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_driver *driver = bdev->driver;
 	struct ttm_bo_global *glob = bo->glob;
 	int put_count;
 	int ret = 0;
 
-retry:
 	spin_lock(&bdev->fence_lock);
-	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-	spin_unlock(&bdev->fence_lock);
+	ret = ttm_bo_wait(bo, false, false, true);
 
-	if (unlikely(ret != 0))
+	if (ret && no_wait_gpu) {
+		spin_unlock(&bdev->fence_lock);
+		atomic_set(&bo->reserved, 0);
+		wake_up_all(&bo->event_queue);
+		spin_unlock(&glob->lru_lock);
 		return ret;
+	} else if (ret) {
+		void *sync_obj;
 
-retry_reserve:
-	spin_lock(&glob->lru_lock);
-
-	if (unlikely(list_empty(&bo->ddestroy))) {
+		/**
+		 * Take a reference to the fence and unreserve,
+		 * at this point the buffer should be dead, so
+		 * no new sync objects can be attached.
+		 */
+		sync_obj = driver->sync_obj_ref(&bo->sync_obj);
+		spin_unlock(&bdev->fence_lock);
+		atomic_set(&bo->reserved, 0);
+		wake_up_all(&bo->event_queue);
 		spin_unlock(&glob->lru_lock);
-		return 0;
-	}
-
-	ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
 
-	if (unlikely(ret == -EBUSY)) {
-		spin_unlock(&glob->lru_lock);
-		if (likely(!no_wait_reserve))
-			ret = ttm_bo_wait_unreserved(bo, interruptible);
-		if (unlikely(ret != 0))
+		ret = driver->sync_obj_wait(sync_obj, false, interruptible);
+		driver->sync_obj_unref(&sync_obj);
+		if (ret)
 			return ret;
 
-		goto retry_reserve;
-	}
-
-	BUG_ON(ret != 0);
+		/* remove sync_obj with ttm_bo_wait */
+		spin_lock(&bdev->fence_lock);
+		ret = ttm_bo_wait(bo, false, false, true);
+		spin_unlock(&bdev->fence_lock);
 
-	/**
-	 * We can re-check for sync object without taking
-	 * the bo::lock since setting the sync object requires
-	 * also bo::reserved. A busy object at this point may
-	 * be caused by another thread recently starting an accelerated
-	 * eviction.
-	 */
+		WARN_ON(ret);
 
-	if (unlikely(bo->sync_obj)) {
+		spin_lock(&glob->lru_lock);
+	} else {
+		spin_unlock(&bdev->fence_lock);
 		atomic_set(&bo->reserved, 0);
 		wake_up_all(&bo->event_queue);
+	}
+
+	if (unlikely(list_empty(&bo->ddestroy))) {
 		spin_unlock(&glob->lru_lock);
-		goto retry;
+		return 0;
 	}
 
 	put_count = ttm_bo_del_from_lru(bo);
@@ -647,9 +646,13 @@  static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
 			kref_get(&nentry->list_kref);
 		}
 
-		spin_unlock(&glob->lru_lock);
-		ret = ttm_bo_cleanup_refs(entry, false, !remove_all,
-					  !remove_all);
+		ret = ttm_bo_reserve_locked(entry, false, !remove_all, false, 0);
+		if (!ret)
+			ret = ttm_bo_cleanup_refs_and_unlock(entry, false,
+							     !remove_all);
+		else
+			spin_unlock(&glob->lru_lock);
+
 		kref_put(&entry->list_kref, ttm_bo_release_list);
 		entry = nentry;
 
@@ -803,9 +806,13 @@  retry:
 	kref_get(&bo->list_kref);
 
 	if (!list_empty(&bo->ddestroy)) {
-		spin_unlock(&glob->lru_lock);
-		ret = ttm_bo_cleanup_refs(bo, interruptible,
-					  no_wait_reserve, no_wait_gpu);
+		ret = ttm_bo_reserve_locked(bo, interruptible, no_wait_reserve, false, 0);
+		if (!ret)
+			ret = ttm_bo_cleanup_refs_and_unlock(bo, interruptible,
+							     no_wait_gpu);
+		else
+			spin_unlock(&glob->lru_lock);
+
 		kref_put(&bo->list_kref, ttm_bo_release_list);
 
 		return ret;
@@ -1799,8 +1806,9 @@  static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 		kref_get(&bo->list_kref);
 
 		if (!list_empty(&bo->ddestroy)) {
-			spin_unlock(&glob->lru_lock);
-			(void) ttm_bo_cleanup_refs(bo, false, false, false);
+			ttm_bo_reserve_locked(bo, false, false, false, 0);
+			ttm_bo_cleanup_refs_and_unlock(bo, false, false);
+
 			kref_put(&bo->list_kref, ttm_bo_release_list);
 			spin_lock(&glob->lru_lock);
 			continue;