diff mbox series

[16/66] drm/i915/gem: Remove the call for no-evict i915_vma_pin

Message ID 20200715115147.11866-16-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/66] drm/i915: Reduce i915_request.lock contention for i915_request_wait | expand

Commit Message

Chris Wilson July 15, 2020, 11:50 a.m. UTC
Remove the stub i915_vma_pin() used for incrementally pining objects for
execbuf (under the severe restriction that they must not wait on a
resource as we may have already pinned it) and replace it with a
i915_vma_pin_inplace() that is only allowed to reclaim the currently
bound location for the vma (and will never wait for a pinned resource).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 69 +++++++++++--------
 drivers/gpu/drm/i915/i915_vma.c               |  6 +-
 drivers/gpu/drm/i915/i915_vma.h               |  2 +
 3 files changed, 45 insertions(+), 32 deletions(-)

Comments

Tvrtko Ursulin July 17, 2020, 2:36 p.m. UTC | #1
On 15/07/2020 12:50, Chris Wilson wrote:
> Remove the stub i915_vma_pin() used for incrementally pining objects for
> execbuf (under the severe restriction that they must not wait on a
> resource as we may have already pinned it) and replace it with a
> i915_vma_pin_inplace() that is only allowed to reclaim the currently
> bound location for the vma (and will never wait for a pinned resource).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 69 +++++++++++--------
>   drivers/gpu/drm/i915/i915_vma.c               |  6 +-
>   drivers/gpu/drm/i915/i915_vma.h               |  2 +
>   3 files changed, 45 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 28cf28fcf80a..0b8a26da26e5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -452,49 +452,55 @@ static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
>   	return pin_flags;
>   }
>   
> +static bool eb_pin_vma_fence_inplace(struct eb_vma *ev)
> +{
> +	struct i915_vma *vma = ev->vma;
> +	struct i915_fence_reg *reg = vma->fence;
> +
> +	if (reg) {
> +		if (READ_ONCE(reg->dirty))
> +			return false;
> +
> +		atomic_inc(&reg->pin_count);

Why is this safe outside the vm->mutex? It otherwise seems to be 
protecting this pin count.

Regards,

Tvrtko

> +		ev->flags |= __EXEC_OBJECT_HAS_FENCE;
> +	} else {
> +		if (i915_gem_object_is_tiled(vma->obj))
> +			return false;
> +	}
> +
> +	return true;
> +}
> +
>   static inline bool
> -eb_pin_vma(struct i915_execbuffer *eb,
> -	   const struct drm_i915_gem_exec_object2 *entry,
> -	   struct eb_vma *ev)
> +eb_pin_vma_inplace(struct i915_execbuffer *eb,
> +		   const struct drm_i915_gem_exec_object2 *entry,
> +		   struct eb_vma *ev)
>   {
>   	struct i915_vma *vma = ev->vma;
> -	u64 pin_flags;
> +	unsigned int pin_flags;
>   
> -	if (vma->node.size)
> -		pin_flags = vma->node.start;
> -	else
> -		pin_flags = entry->offset & PIN_OFFSET_MASK;
> +	if (eb_vma_misplaced(entry, vma, ev->flags))
> +		return false;
>   
> -	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
> +	pin_flags = PIN_USER;
>   	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
>   		pin_flags |= PIN_GLOBAL;
>   
>   	/* Attempt to reuse the current location if available */
> -	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) {
> -		if (entry->flags & EXEC_OBJECT_PINNED)
> -			return false;
> -
> -		/* Failing that pick any _free_ space if suitable */
> -		if (unlikely(i915_vma_pin(vma,
> -					  entry->pad_to_size,
> -					  entry->alignment,
> -					  eb_pin_flags(entry, ev->flags) |
> -					  PIN_USER | PIN_NOEVICT)))
> -			return false;
> -	}
> +	if (!i915_vma_pin_inplace(vma, pin_flags))
> +		return false;
>   
>   	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
> -		if (unlikely(i915_vma_pin_fence(vma))) {
> -			i915_vma_unpin(vma);
> +		if (!eb_pin_vma_fence_inplace(ev)) {
> +			__i915_vma_unpin(vma);
>   			return false;
>   		}
> -
> -		if (vma->fence)
> -			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
>   	}
>   
> +	GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags));
> +
>   	ev->flags |= __EXEC_OBJECT_HAS_PIN;
> -	return !eb_vma_misplaced(entry, vma, ev->flags);
> +	return true;
>   }
>   
>   static int
> @@ -676,14 +682,17 @@ static int eb_reserve_vm(struct i915_execbuffer *eb)
>   		struct drm_i915_gem_exec_object2 *entry = ev->exec;
>   		struct i915_vma *vma = ev->vma;
>   
> -		if (eb_pin_vma(eb, entry, ev)) {
> +		if (eb_pin_vma_inplace(eb, entry, ev)) {
>   			if (entry->offset != vma->node.start) {
>   				entry->offset = vma->node.start | UPDATE;
>   				eb->args->flags |= __EXEC_HAS_RELOC;
>   			}
>   		} else {
> -			eb_unreserve_vma(ev);
> -			list_add_tail(&ev->unbound_link, &unbound);
> +			/* Lightly sort user placed objects to the fore */
> +			if (ev->flags & EXEC_OBJECT_PINNED)
> +				list_add(&ev->unbound_link, &unbound);
> +			else
> +				list_add_tail(&ev->unbound_link, &unbound);
>   		}
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index c6bf04ca2032..dbe11b349175 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -740,11 +740,13 @@ i915_vma_detach(struct i915_vma *vma)
>   	list_del(&vma->vm_link);
>   }
>   
> -static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
> +bool i915_vma_pin_inplace(struct i915_vma *vma, unsigned int flags)
>   {
>   	unsigned int bound;
>   	bool pinned = true;
>   
> +	GEM_BUG_ON(flags & ~I915_VMA_BIND_MASK);
> +
>   	bound = atomic_read(&vma->flags);
>   	do {
>   		if (unlikely(flags & ~bound))
> @@ -865,7 +867,7 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   	GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL)));
>   
>   	/* First try and grab the pin without rebinding the vma */
> -	if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK))
> +	if (i915_vma_pin_inplace(vma, flags & I915_VMA_BIND_MASK))
>   		return 0;
>   
>   	err = vma_get_pages(vma);
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index d0d01f909548..03fea54fd573 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -236,6 +236,8 @@ static inline void i915_vma_unlock(struct i915_vma *vma)
>   	dma_resv_unlock(vma->resv);
>   }
>   
> +bool i915_vma_pin_inplace(struct i915_vma *vma, unsigned int flags);
> +
>   int __must_check
>   i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
>   int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags);
>
Thomas Hellström (Intel) July 28, 2020, 9:46 a.m. UTC | #2
On 7/15/20 1:50 PM, Chris Wilson wrote:
> Remove the stub i915_vma_pin() used for incrementally pining objects for

s/pining/pinning/
Chris Wilson July 28, 2020, 3:04 p.m. UTC | #3
Quoting Tvrtko Ursulin (2020-07-17 15:36:04)
> 
> On 15/07/2020 12:50, Chris Wilson wrote:
> > Remove the stub i915_vma_pin() used for incrementally pining objects for
> > execbuf (under the severe restriction that they must not wait on a
> > resource as we may have already pinned it) and replace it with a
> > i915_vma_pin_inplace() that is only allowed to reclaim the currently
> > bound location for the vma (and will never wait for a pinned resource).
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 69 +++++++++++--------
> >   drivers/gpu/drm/i915/i915_vma.c               |  6 +-
> >   drivers/gpu/drm/i915/i915_vma.h               |  2 +
> >   3 files changed, 45 insertions(+), 32 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > index 28cf28fcf80a..0b8a26da26e5 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > @@ -452,49 +452,55 @@ static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
> >       return pin_flags;
> >   }
> >   
> > +static bool eb_pin_vma_fence_inplace(struct eb_vma *ev)
> > +{
> > +     struct i915_vma *vma = ev->vma;
> > +     struct i915_fence_reg *reg = vma->fence;
> > +
> > +     if (reg) {
> > +             if (READ_ONCE(reg->dirty))
> > +                     return false;
> > +
> > +             atomic_inc(&reg->pin_count);
> 
> Why is this safe outside the vm->mutex? It otherwise seems to be 
> protecting this pin count.

I was working on having the fence protected by the vma. It's important
that we do avoid the fallback scheme -- although not strictly as
important for gen2/gen3 as they do not need the ppGTT preallocations.

If I adapt find_fence() to operate against a concurrent atomic_inc()
that should dig myself out of the hold. (Another cmpxchg, oh my.)
-Chris
Chris Wilson July 28, 2020, 3:05 p.m. UTC | #4
Quoting Thomas Hellström (Intel) (2020-07-28 10:46:51)
> 
> On 7/15/20 1:50 PM, Chris Wilson wrote:
> > Remove the stub i915_vma_pin() used for incrementally pining objects for
> 
> s/pining/pinning/

Pining for the fjords.
-Chris
Thomas Hellström (Intel) July 31, 2020, 8:58 a.m. UTC | #5
On 7/28/20 5:05 PM, Chris Wilson wrote:
> Quoting Thomas Hellström (Intel) (2020-07-28 10:46:51)
>> On 7/15/20 1:50 PM, Chris Wilson wrote:
>>> Remove the stub i915_vma_pin() used for incrementally pining objects for
>> s/pining/pinning/
> Pining for the fjords.
> -Chris

Apart from that,

Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 28cf28fcf80a..0b8a26da26e5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -452,49 +452,55 @@  static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
 	return pin_flags;
 }
 
+static bool eb_pin_vma_fence_inplace(struct eb_vma *ev)
+{
+	struct i915_vma *vma = ev->vma;
+	struct i915_fence_reg *reg = vma->fence;
+
+	if (reg) {
+		if (READ_ONCE(reg->dirty))
+			return false;
+
+		atomic_inc(&reg->pin_count);
+		ev->flags |= __EXEC_OBJECT_HAS_FENCE;
+	} else {
+		if (i915_gem_object_is_tiled(vma->obj))
+			return false;
+	}
+
+	return true;
+}
+
 static inline bool
-eb_pin_vma(struct i915_execbuffer *eb,
-	   const struct drm_i915_gem_exec_object2 *entry,
-	   struct eb_vma *ev)
+eb_pin_vma_inplace(struct i915_execbuffer *eb,
+		   const struct drm_i915_gem_exec_object2 *entry,
+		   struct eb_vma *ev)
 {
 	struct i915_vma *vma = ev->vma;
-	u64 pin_flags;
+	unsigned int pin_flags;
 
-	if (vma->node.size)
-		pin_flags = vma->node.start;
-	else
-		pin_flags = entry->offset & PIN_OFFSET_MASK;
+	if (eb_vma_misplaced(entry, vma, ev->flags))
+		return false;
 
-	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
+	pin_flags = PIN_USER;
 	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
 		pin_flags |= PIN_GLOBAL;
 
 	/* Attempt to reuse the current location if available */
-	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) {
-		if (entry->flags & EXEC_OBJECT_PINNED)
-			return false;
-
-		/* Failing that pick any _free_ space if suitable */
-		if (unlikely(i915_vma_pin(vma,
-					  entry->pad_to_size,
-					  entry->alignment,
-					  eb_pin_flags(entry, ev->flags) |
-					  PIN_USER | PIN_NOEVICT)))
-			return false;
-	}
+	if (!i915_vma_pin_inplace(vma, pin_flags))
+		return false;
 
 	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
-		if (unlikely(i915_vma_pin_fence(vma))) {
-			i915_vma_unpin(vma);
+		if (!eb_pin_vma_fence_inplace(ev)) {
+			__i915_vma_unpin(vma);
 			return false;
 		}
-
-		if (vma->fence)
-			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
+	GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags));
+
 	ev->flags |= __EXEC_OBJECT_HAS_PIN;
-	return !eb_vma_misplaced(entry, vma, ev->flags);
+	return true;
 }
 
 static int
@@ -676,14 +682,17 @@  static int eb_reserve_vm(struct i915_execbuffer *eb)
 		struct drm_i915_gem_exec_object2 *entry = ev->exec;
 		struct i915_vma *vma = ev->vma;
 
-		if (eb_pin_vma(eb, entry, ev)) {
+		if (eb_pin_vma_inplace(eb, entry, ev)) {
 			if (entry->offset != vma->node.start) {
 				entry->offset = vma->node.start | UPDATE;
 				eb->args->flags |= __EXEC_HAS_RELOC;
 			}
 		} else {
-			eb_unreserve_vma(ev);
-			list_add_tail(&ev->unbound_link, &unbound);
+			/* Lightly sort user placed objects to the fore */
+			if (ev->flags & EXEC_OBJECT_PINNED)
+				list_add(&ev->unbound_link, &unbound);
+			else
+				list_add_tail(&ev->unbound_link, &unbound);
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index c6bf04ca2032..dbe11b349175 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -740,11 +740,13 @@  i915_vma_detach(struct i915_vma *vma)
 	list_del(&vma->vm_link);
 }
 
-static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
+bool i915_vma_pin_inplace(struct i915_vma *vma, unsigned int flags)
 {
 	unsigned int bound;
 	bool pinned = true;
 
+	GEM_BUG_ON(flags & ~I915_VMA_BIND_MASK);
+
 	bound = atomic_read(&vma->flags);
 	do {
 		if (unlikely(flags & ~bound))
@@ -865,7 +867,7 @@  int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL)));
 
 	/* First try and grab the pin without rebinding the vma */
-	if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK))
+	if (i915_vma_pin_inplace(vma, flags & I915_VMA_BIND_MASK))
 		return 0;
 
 	err = vma_get_pages(vma);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index d0d01f909548..03fea54fd573 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -236,6 +236,8 @@  static inline void i915_vma_unlock(struct i915_vma *vma)
 	dma_resv_unlock(vma->resv);
 }
 
+bool i915_vma_pin_inplace(struct i915_vma *vma, unsigned int flags);
+
 int __must_check
 i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
 int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags);