diff mbox series

[2/2] drm/vmwgfx: Make sure unpinning handles reservations

Message ID 20210408172245.673785-2-zackr@vmware.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/vmwgfx: Fix the lockdep breakage | expand

Commit Message

Zack Rusin April 8, 2021, 5:22 p.m. UTC
Quite often it's a little hard to tell if reservations are already held
in code paths that unpin bo's. While our pinning/unpinning code should
be more explicit that requires a substential amount of work so instead
we can avoid the issues by making sure we try to reserve before unpinning.
Because we unpin those bo's only on destruction/error paths just that check
tells us if we're already reserved or not and allows to cleanly unpin.

Reviewed-by: Martin Krastev <krastevm@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Fixes: d1a73c641afd ("drm/vmwgfx: Make sure we unpin no longer needed buffers")
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Zack Rusin <zackr@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 17 ++++++++++++++++-
 drivers/gpu/drm/vmwgfx/vmwgfx_mob.c |  8 ++++----
 2 files changed, 20 insertions(+), 5 deletions(-)

Comments

Thomas Hellström (Intel) April 9, 2021, 7:38 a.m. UTC | #1
Hi, Zack,

On 4/8/21 7:22 PM, Zack Rusin wrote:
> Quite often it's a little hard to tell if reservations are already held
> in code paths that unpin bo's. While our pinning/unpinning code should
> be more explicit that requires a substential amount of work so instead
> we can avoid the issues by making sure we try to reserve before unpinning.
> Because we unpin those bo's only on destruction/error paths just that check
> tells us if we're already reserved or not and allows to cleanly unpin.
>
> Reviewed-by: Martin Krastev <krastevm@vmware.com>
> Reviewed-by: Roland Scheidegger <sroland@vmware.com>
> Fixes: d1a73c641afd ("drm/vmwgfx: Make sure we unpin no longer needed buffers")
> Cc: dri-devel@lists.freedesktop.org
> Signed-off-by: Zack Rusin <zackr@vmware.com>
> ---
>   drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 17 ++++++++++++++++-
>   drivers/gpu/drm/vmwgfx/vmwgfx_mob.c |  8 ++++----
>   2 files changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> index 8087a9013455..03bef9c17e56 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> @@ -1517,6 +1517,21 @@ static inline struct vmw_surface *vmw_surface_reference(struct vmw_surface *srf)
>   	return srf;
>   }
>   
> +/*
> + * vmw_bo_unpin_safe - currently pinning requires a reservation to be held
> + * but sometimes it's hard to tell if we're in a callback whose parent
> + * is already holding a reservation, to avoid deadloacks we have to try
> + * to get a reservation explicitly to also try to avoid messing up the
> + * internal ttm lru bo list
> + */
> +static inline void vmw_bo_unpin_safe(struct ttm_buffer_object *bo)
> +{
> +	bool locked = dma_resv_trylock(bo->base.resv);

Isn't there a chance another thread is holding the lock and releasing it 
at this position?

> +	ttm_bo_unpin(bo);
> +	if (locked)
> +		dma_resv_unlock(bo->base.resv);
> +}
> +
>   static inline void vmw_bo_unreference(struct vmw_buffer_object **buf)
>   {
>   	struct vmw_buffer_object *tmp_buf = *buf;
> @@ -1524,7 +1539,7 @@ static inline void vmw_bo_unreference(struct vmw_buffer_object **buf)
>   	*buf = NULL;
>   	if (tmp_buf != NULL) {
>   		if (tmp_buf->base.pin_count > 0)
> -			ttm_bo_unpin(&tmp_buf->base);
> +			vmw_bo_unpin_safe(&tmp_buf->base);
Hmm. If execbuf is referencing a buffer that someone else has pinned, 
wouldn't execbuf incorrectly unpin that buffer when calling unreference? 
Would it perhaps be possible to if needed, use the TTM release_notify 
callback to unpin any leaking pins similar to what's done in 
ttm_bo_release? Although that I guess goes somewhat against that 
recently added WARN_ON_ONCE.
>   		ttm_bo_put(&tmp_buf->base);
>   	}
>   }
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
> index a0b53141dded..23ffeb2dd6e0 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
> @@ -277,7 +277,7 @@ static int vmw_otable_batch_setup(struct vmw_private *dev_priv,
>   						 &batch->otables[i]);
>   	}
>   
> -	ttm_bo_unpin(batch->otable_bo);
> +	vmw_bo_unpin_safe(batch->otable_bo);
Could it be we're the only user here? If so safe to reserve and unpin.
>   	ttm_bo_put(batch->otable_bo);
>   	batch->otable_bo = NULL;
>   	return ret;
> @@ -343,7 +343,7 @@ static void vmw_otable_batch_takedown(struct vmw_private *dev_priv,
>   	vmw_bo_fence_single(bo, NULL);
>   	ttm_bo_unreserve(bo);
>   
> -	ttm_bo_unpin(batch->otable_bo);
> +	vmw_bo_unpin_safe(batch->otable_bo);
Would it be possible to just move ttm_bo_unpin() above the 
ttm_bo_unreserve() above?
>   	ttm_bo_put(batch->otable_bo);
>   	batch->otable_bo = NULL;
>   }
> @@ -530,7 +530,7 @@ static void vmw_mob_pt_setup(struct vmw_mob *mob,
>   void vmw_mob_destroy(struct vmw_mob *mob)
>   {
>   	if (mob->pt_bo) {
> -		ttm_bo_unpin(mob->pt_bo);
> +		vmw_bo_unpin_safe(mob->pt_bo);
>   		ttm_bo_put(mob->pt_bo);
>   		mob->pt_bo = NULL;
>   	}
> @@ -646,7 +646,7 @@ int vmw_mob_bind(struct vmw_private *dev_priv,
>   out_no_cmd_space:
>   	vmw_fifo_resource_dec(dev_priv);
>   	if (pt_set_up) {
> -		ttm_bo_unpin(mob->pt_bo);
> +		vmw_bo_unpin_safe(mob->pt_bo);
Perhaps the same here?
>   		ttm_bo_put(mob->pt_bo);
>   		mob->pt_bo = NULL;
>   	}

/Thomas
Daniel Vetter April 9, 2021, 7:40 a.m. UTC | #2
On Thu, Apr 08, 2021 at 01:22:45PM -0400, Zack Rusin wrote:
> Quite often it's a little hard to tell if reservations are already held
> in code paths that unpin bo's. While our pinning/unpinning code should
> be more explicit that requires a substential amount of work so instead
> we can avoid the issues by making sure we try to reserve before unpinning.
> Because we unpin those bo's only on destruction/error paths just that check
> tells us if we're already reserved or not and allows to cleanly unpin.
> 
> Reviewed-by: Martin Krastev <krastevm@vmware.com>
> Reviewed-by: Roland Scheidegger <sroland@vmware.com>
> Fixes: d1a73c641afd ("drm/vmwgfx: Make sure we unpin no longer needed buffers")
> Cc: dri-devel@lists.freedesktop.org
> Signed-off-by: Zack Rusin <zackr@vmware.com>
> ---
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 17 ++++++++++++++++-
>  drivers/gpu/drm/vmwgfx/vmwgfx_mob.c |  8 ++++----
>  2 files changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> index 8087a9013455..03bef9c17e56 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> @@ -1517,6 +1517,21 @@ static inline struct vmw_surface *vmw_surface_reference(struct vmw_surface *srf)
>  	return srf;
>  }
>  
> +/*
> + * vmw_bo_unpin_safe - currently pinning requires a reservation to be held
> + * but sometimes it's hard to tell if we're in a callback whose parent
> + * is already holding a reservation, to avoid deadloacks we have to try
> + * to get a reservation explicitly to also try to avoid messing up the
> + * internal ttm lru bo list
> + */
> +static inline void vmw_bo_unpin_safe(struct ttm_buffer_object *bo)
> +{
> +	bool locked = dma_resv_trylock(bo->base.resv);
> +	ttm_bo_unpin(bo);
> +	if (locked)
> +		dma_resv_unlock(bo->base.resv);
> +}
> +
>  static inline void vmw_bo_unreference(struct vmw_buffer_object **buf)
>  {
>  	struct vmw_buffer_object *tmp_buf = *buf;
> @@ -1524,7 +1539,7 @@ static inline void vmw_bo_unreference(struct vmw_buffer_object **buf)
>  	*buf = NULL;
>  	if (tmp_buf != NULL) {
>  		if (tmp_buf->base.pin_count > 0)
> -			ttm_bo_unpin(&tmp_buf->base);
> +			vmw_bo_unpin_safe(&tmp_buf->base);

So in the unreference callback I understand it might be tricky and you
need this, but do all the others below really don't know whether the bo is
locked or not?

Also _trylock is a bit much yolo locking here, I'd minimally put a comment
there that we don't actually care about races, it's just to shut up ttm
locking checks. Whether that's true or not is another question I think.

And if it's just this case here, maybe inline the trylock, and for the
others do a vmw_bo_unpin_unlocked which unconditionally grabs the lock?
-Daniel

>  		ttm_bo_put(&tmp_buf->base);
>  	}
>  }
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
> index a0b53141dded..23ffeb2dd6e0 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
> @@ -277,7 +277,7 @@ static int vmw_otable_batch_setup(struct vmw_private *dev_priv,
>  						 &batch->otables[i]);
>  	}
>  
> -	ttm_bo_unpin(batch->otable_bo);
> +	vmw_bo_unpin_safe(batch->otable_bo);
>  	ttm_bo_put(batch->otable_bo);
>  	batch->otable_bo = NULL;
>  	return ret;
> @@ -343,7 +343,7 @@ static void vmw_otable_batch_takedown(struct vmw_private *dev_priv,
>  	vmw_bo_fence_single(bo, NULL);
>  	ttm_bo_unreserve(bo);
>  
> -	ttm_bo_unpin(batch->otable_bo);
> +	vmw_bo_unpin_safe(batch->otable_bo);
>  	ttm_bo_put(batch->otable_bo);
>  	batch->otable_bo = NULL;
>  }
> @@ -530,7 +530,7 @@ static void vmw_mob_pt_setup(struct vmw_mob *mob,
>  void vmw_mob_destroy(struct vmw_mob *mob)
>  {
>  	if (mob->pt_bo) {
> -		ttm_bo_unpin(mob->pt_bo);
> +		vmw_bo_unpin_safe(mob->pt_bo);
>  		ttm_bo_put(mob->pt_bo);
>  		mob->pt_bo = NULL;
>  	}
> @@ -646,7 +646,7 @@ int vmw_mob_bind(struct vmw_private *dev_priv,
>  out_no_cmd_space:
>  	vmw_fifo_resource_dec(dev_priv);
>  	if (pt_set_up) {
> -		ttm_bo_unpin(mob->pt_bo);
> +		vmw_bo_unpin_safe(mob->pt_bo);
>  		ttm_bo_put(mob->pt_bo);
>  		mob->pt_bo = NULL;
>  	}
> -- 
> 2.27.0
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Zack Rusin April 10, 2021, 7:02 p.m. UTC | #3
On 4/9/21 3:40 AM, Daniel Vetter wrote:
> On Thu, Apr 08, 2021 at 01:22:45PM -0400, Zack Rusin wrote:
>> Quite often it's a little hard to tell if reservations are already held
>> in code paths that unpin bo's. While our pinning/unpinning code should
>> be more explicit that requires a substential amount of work so instead
>> we can avoid the issues by making sure we try to reserve before unpinning.
>> Because we unpin those bo's only on destruction/error paths just that check
>> tells us if we're already reserved or not and allows to cleanly unpin.
>>
>> Reviewed-by: Martin Krastev <krastevm@vmware.com>
>> Reviewed-by: Roland Scheidegger <sroland@vmware.com>
>> Fixes: d1a73c641afd ("drm/vmwgfx: Make sure we unpin no longer needed buffers")
>> Cc: dri-devel@lists.freedesktop.org
>> Signed-off-by: Zack Rusin <zackr@vmware.com>
>> ---
>>   drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 17 ++++++++++++++++-
>>   drivers/gpu/drm/vmwgfx/vmwgfx_mob.c |  8 ++++----
>>   2 files changed, 20 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
>> index 8087a9013455..03bef9c17e56 100644
>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
>> @@ -1517,6 +1517,21 @@ static inline struct vmw_surface *vmw_surface_reference(struct vmw_surface *srf)
>>   	return srf;
>>   }
>>   
>> +/*
>> + * vmw_bo_unpin_safe - currently pinning requires a reservation to be held
>> + * but sometimes it's hard to tell if we're in a callback whose parent
>> + * is already holding a reservation, to avoid deadloacks we have to try
>> + * to get a reservation explicitly to also try to avoid messing up the
>> + * internal ttm lru bo list
>> + */
>> +static inline void vmw_bo_unpin_safe(struct ttm_buffer_object *bo)
>> +{
>> +	bool locked = dma_resv_trylock(bo->base.resv);
>> +	ttm_bo_unpin(bo);
>> +	if (locked)
>> +		dma_resv_unlock(bo->base.resv);
>> +}
>> +
>>   static inline void vmw_bo_unreference(struct vmw_buffer_object **buf)
>>   {
>>   	struct vmw_buffer_object *tmp_buf = *buf;
>> @@ -1524,7 +1539,7 @@ static inline void vmw_bo_unreference(struct vmw_buffer_object **buf)
>>   	*buf = NULL;
>>   	if (tmp_buf != NULL) {
>>   		if (tmp_buf->base.pin_count > 0)
>> -			ttm_bo_unpin(&tmp_buf->base);
>> +			vmw_bo_unpin_safe(&tmp_buf->base);
> 
> So in the unreference callback I understand it might be tricky and you
> need this, but do all the others below really don't know whether the bo is
> locked or not?

TBH, I just liked having all those paths going through the same 
functions. I agree that it wasn't really correct or particularly graceful.

> Also _trylock is a bit much yolo locking here, I'd minimally put a comment
> there that we don't actually care about races, it's just to shut up ttm
> locking checks. Whether that's true or not is another question I think.
> 
> And if it's just this case here, maybe inline the trylock, and for the
> others do a vmw_bo_unpin_unlocked which unconditionally grabs the lock?

Fair enough, I think that's a good suggestion, so I went ahead and did 
just that.

z
Zack Rusin April 10, 2021, 7:04 p.m. UTC | #4
On 4/9/21 3:38 AM, Thomas Hellström (Intel) wrote:
> Hi, Zack,
> 
> On 4/8/21 7:22 PM, Zack Rusin wrote:
>> Quite often it's a little hard to tell if reservations are already held
>> in code paths that unpin bo's. While our pinning/unpinning code should
>> be more explicit that requires a substential amount of work so instead
>> we can avoid the issues by making sure we try to reserve before 
>> unpinning.
>> Because we unpin those bo's only on destruction/error paths just that 
>> check
>> tells us if we're already reserved or not and allows to cleanly unpin.
>>
>> Reviewed-by: Martin Krastev <krastevm@vmware.com>
>> Reviewed-by: Roland Scheidegger <sroland@vmware.com>
>> Fixes: d1a73c641afd ("drm/vmwgfx: Make sure we unpin no longer needed 
>> buffers")
>> Cc: dri-devel@lists.freedesktop.org
>> Signed-off-by: Zack Rusin <zackr@vmware.com>
>> ---
>>   drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 17 ++++++++++++++++-
>>   drivers/gpu/drm/vmwgfx/vmwgfx_mob.c |  8 ++++----
>>   2 files changed, 20 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
>> b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
>> index 8087a9013455..03bef9c17e56 100644
>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
>> @@ -1517,6 +1517,21 @@ static inline struct vmw_surface 
>> *vmw_surface_reference(struct vmw_surface *srf)
>>       return srf;
>>   }
>> +/*
>> + * vmw_bo_unpin_safe - currently pinning requires a reservation to be 
>> held
>> + * but sometimes it's hard to tell if we're in a callback whose parent
>> + * is already holding a reservation, to avoid deadloacks we have to try
>> + * to get a reservation explicitly to also try to avoid messing up the
>> + * internal ttm lru bo list
>> + */
>> +static inline void vmw_bo_unpin_safe(struct ttm_buffer_object *bo)
>> +{
>> +    bool locked = dma_resv_trylock(bo->base.resv);
> 
> Isn't there a chance another thread is holding the lock and releasing it 
> at this position?

Yes, it was definitely possible. In v2 I implemented it the way Daniel 
suggested, I think it's a decent compromise. Thanks for taking a look at 
this!

z
diff mbox series

Patch

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 8087a9013455..03bef9c17e56 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1517,6 +1517,21 @@  static inline struct vmw_surface *vmw_surface_reference(struct vmw_surface *srf)
 	return srf;
 }
 
+/*
+ * vmw_bo_unpin_safe - currently pinning requires a reservation to be held
+ * but sometimes it's hard to tell if we're in a callback whose parent
+ * is already holding a reservation, to avoid deadloacks we have to try
+ * to get a reservation explicitly to also try to avoid messing up the
+ * internal ttm lru bo list
+ */
+static inline void vmw_bo_unpin_safe(struct ttm_buffer_object *bo)
+{
+	bool locked = dma_resv_trylock(bo->base.resv);
+	ttm_bo_unpin(bo);
+	if (locked)
+		dma_resv_unlock(bo->base.resv);
+}
+
 static inline void vmw_bo_unreference(struct vmw_buffer_object **buf)
 {
 	struct vmw_buffer_object *tmp_buf = *buf;
@@ -1524,7 +1539,7 @@  static inline void vmw_bo_unreference(struct vmw_buffer_object **buf)
 	*buf = NULL;
 	if (tmp_buf != NULL) {
 		if (tmp_buf->base.pin_count > 0)
-			ttm_bo_unpin(&tmp_buf->base);
+			vmw_bo_unpin_safe(&tmp_buf->base);
 		ttm_bo_put(&tmp_buf->base);
 	}
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
index a0b53141dded..23ffeb2dd6e0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
@@ -277,7 +277,7 @@  static int vmw_otable_batch_setup(struct vmw_private *dev_priv,
 						 &batch->otables[i]);
 	}
 
-	ttm_bo_unpin(batch->otable_bo);
+	vmw_bo_unpin_safe(batch->otable_bo);
 	ttm_bo_put(batch->otable_bo);
 	batch->otable_bo = NULL;
 	return ret;
@@ -343,7 +343,7 @@  static void vmw_otable_batch_takedown(struct vmw_private *dev_priv,
 	vmw_bo_fence_single(bo, NULL);
 	ttm_bo_unreserve(bo);
 
-	ttm_bo_unpin(batch->otable_bo);
+	vmw_bo_unpin_safe(batch->otable_bo);
 	ttm_bo_put(batch->otable_bo);
 	batch->otable_bo = NULL;
 }
@@ -530,7 +530,7 @@  static void vmw_mob_pt_setup(struct vmw_mob *mob,
 void vmw_mob_destroy(struct vmw_mob *mob)
 {
 	if (mob->pt_bo) {
-		ttm_bo_unpin(mob->pt_bo);
+		vmw_bo_unpin_safe(mob->pt_bo);
 		ttm_bo_put(mob->pt_bo);
 		mob->pt_bo = NULL;
 	}
@@ -646,7 +646,7 @@  int vmw_mob_bind(struct vmw_private *dev_priv,
 out_no_cmd_space:
 	vmw_fifo_resource_dec(dev_priv);
 	if (pt_set_up) {
-		ttm_bo_unpin(mob->pt_bo);
+		vmw_bo_unpin_safe(mob->pt_bo);
 		ttm_bo_put(mob->pt_bo);
 		mob->pt_bo = NULL;
 	}