diff mbox

drm/i915: Separate fence pin counting from normal bind pin counting

Message ID 1307340601-32259-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson June 6, 2011, 6:10 a.m. UTC
In order to correctly account for reserving space in the GTT and fences
for a batch buffer, we need to independently track whether the fence is
pinned due to a fenced GPU access in the batch from from whether the
buffer is pinned in the aperture. Currently we count the fenced as
pinned if the buffer has already been seen in the execbuffer. This leads
to a false accounting of available fence registers, causing frequent
mass evictions. Worse, if coupled with the change to make
i915_gem_object_get_fence() report EDEADLK upon fence starvation, the
batchbuffer can fail with only one fence required...

In order to trigger the false accounting, one can simply submit two
batches containing the same 18 [2*(num_avail_fences+1)] buffers. In the
first batch, the first 9 buffers require a fence. In the second batch,
the latter half require a fence. Due to prior pinning of all buffers,
which also then pins the fence register, this results in the false
starvation and forced eviction of the currently active buffers.
Admittedly, such batch buffers require pipelined fencing...

Note, this fixes a severe performance regression with heavy fenced BLT
users such as the Cairo traces firefox-planet-gnome and midori-zoomed on
gen3.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h            |   21 ++++
 drivers/gpu/drm/i915/i915_gem.c            |    7 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  161 +++++++++++++++++++---------
 drivers/gpu/drm/i915/intel_display.c       |   21 +++-
 4 files changed, 155 insertions(+), 55 deletions(-)

Comments

Eugeni Dodonov Sept. 2, 2011, 6:43 p.m. UTC | #1
On 06/06/2011 03:10 AM, Chris Wilson wrote:
> In order to correctly account for reserving space in the GTT and fences
> for a batch buffer, we need to independently track whether the fence is
> pinned due to a fenced GPU access in the batch from from whether the
> buffer is pinned in the aperture. Currently we count the fenced as
> pinned if the buffer has already been seen in the execbuffer. This leads
> to a false accounting of available fence registers, causing frequent
> mass evictions. Worse, if coupled with the change to make
> i915_gem_object_get_fence() report EDEADLK upon fence starvation, the
> batchbuffer can fail with only one fence required...
>
> In order to trigger the false accounting, one can simply submit two
> batches containing the same 18 [2*(num_avail_fences+1)] buffers. In the
> first batch, the first 9 buffers require a fence. In the second batch,
> the latter half require a fence. Due to prior pinning of all buffers,
> which also then pins the fence register, this results in the false
> starvation and forced eviction of the currently active buffers.
> Admittedly, such batch buffers require pipelined fencing...
>
> Note, this fixes a severe performance regression with heavy fenced BLT
> users such as the Cairo traces firefox-planet-gnome and midori-zoomed on
> gen3.
>
> Signed-off-by: Chris Wilson<chris at chris-wilson.co.uk>
> Cc: Daniel Vetter<daniel.vetter at ffwll.ch>
> Reviewed-by: Daniel Vetter<daniel.vetter at ffwll.ch>
> ---
>   drivers/gpu/drm/i915/i915_drv.h            |   21 ++++
>   drivers/gpu/drm/i915/i915_gem.c            |    7 +-
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |  161 +++++++++++++++++++---------
>   drivers/gpu/drm/i915/intel_display.c       |   21 +++-
>   4 files changed, 155 insertions(+), 55 deletions(-)
>

Bringing back this June's thread to live, I noticed that this patch 
never made it to the kernel.. is anything still pending on it by a 
chance, or it was just MIA?
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 624b9cc..2776b30 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -766,6 +766,9 @@  struct drm_i915_gem_object {
 	unsigned int pin_count : 4;
 #define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
 
+	unsigned int fence_pin_count : 3;
+#define DRM_I915_GEM_OBJECT_MAX_FENCE_PIN_COUNT 0x7
+
 	/**
 	 * Is the object at the current location in the gtt mappable and
 	 * fenceable? Used to avoid costly recalculations.
@@ -1171,6 +1174,24 @@  int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 					   struct intel_ring_buffer *pipelined);
 int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 
+static inline void
+i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		BUG_ON(obj->fence_pin_count == DRM_I915_GEM_OBJECT_MAX_FENCE_PIN_COUNT);
+		obj->fence_pin_count++;
+	}
+}
+
+static inline void
+i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		BUG_ON(obj->fence_pin_count == 0);
+		obj->fence_pin_count--;
+	}
+}
+
 bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_reset(struct drm_device *dev);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7e3b85f..470240b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2150,6 +2150,7 @@  static void i915_gem_reset_fences(struct drm_device *dev)
 			i915_gem_release_mmap(obj);
 
 		reg->obj->fence_reg = I915_FENCE_REG_NONE;
+		reg->obj->fence_pin_count = 0;
 		reg->obj->fenced_gpu_access = false;
 		reg->obj->last_fenced_seqno = 0;
 		i915_gem_clear_fence_reg(dev, reg);
@@ -2836,6 +2837,8 @@  i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
 {
 	int ret;
 
+	BUG_ON(obj->fence_pin_count);
+
 	if (obj->tiling_mode)
 		i915_gem_release_mmap(obj);
 
@@ -2869,7 +2872,7 @@  i915_find_fence_reg(struct drm_device *dev,
 		if (!reg->obj)
 			return reg;
 
-		if (!reg->obj->pin_count)
+		if (!reg->obj->fence_pin_count)
 			avail = reg;
 	}
 
@@ -2879,7 +2882,7 @@  i915_find_fence_reg(struct drm_device *dev,
 	/* None available, try to steal one or wait for a user to finish */
 	avail = first = NULL;
 	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
-		if (reg->obj->pin_count)
+		if (reg->obj->fence_pin_count)
 			continue;
 
 		if (first == NULL)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 741bf39..5abac22 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -460,6 +460,54 @@  i915_gem_execbuffer_relocate(struct drm_device *dev,
 	return ret;
 }
 
+#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
+
+static int
+pin_and_fence_object(struct drm_i915_gem_object *obj,
+		     struct intel_ring_buffer *ring)
+{
+	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
+	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
+	bool need_fence, need_mappable;
+	int ret;
+
+	need_fence =
+		has_fenced_gpu_access &&
+		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+		obj->tiling_mode != I915_TILING_NONE;
+	need_mappable =
+		entry->relocation_count ? true : need_fence;
+
+	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
+	if (ret)
+		return ret;
+
+	if (has_fenced_gpu_access) {
+		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+			if (obj->tiling_mode) {
+				ret = i915_gem_object_get_fence(obj, ring);
+				if (ret)
+					goto err_unpin;
+
+				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+				i915_gem_object_pin_fence(obj);
+			} else {
+				ret = i915_gem_object_put_fence(obj);
+				if (ret)
+					goto err_unpin;
+			}
+		}
+		obj->pending_fenced_gpu_access = need_fence;
+	}
+
+	entry->offset = obj->gtt_offset;
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin(obj);
+	return ret;
+}
+
 static int
 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    struct drm_file *file,
@@ -502,7 +550,8 @@  i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 	 *
 	 * 1a. Unbind all objects that do not match the GTT constraints for
 	 *     the execbuffer (fenceable, mappable, alignment etc).
-	 * 1b. Increment pin count for already bound objects.
+	 * 1b. Increment pin count for already bound objects and obtain
+	 *     a fence register if required.
 	 * 2.  Bind new objects.
 	 * 3.  Decrement pin count.
 	 *
@@ -517,6 +566,7 @@  i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 		list_for_each_entry(obj, objects, exec_list) {
 			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 			bool need_fence, need_mappable;
+
 			if (!obj->gtt_space)
 				continue;
 
@@ -531,58 +581,61 @@  i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    (need_mappable && !obj->map_and_fenceable))
 				ret = i915_gem_object_unbind(obj);
 			else
-				ret = i915_gem_object_pin(obj,
-							  entry->alignment,
-							  need_mappable);
+				ret = pin_and_fence_object(obj, ring);
 			if (ret)
 				goto err;
-
-			entry++;
 		}
 
 		/* Bind fresh objects */
 		list_for_each_entry(obj, objects, exec_list) {
-			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
-			bool need_fence;
-
-			need_fence =
-				has_fenced_gpu_access &&
-				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-				obj->tiling_mode != I915_TILING_NONE;
-
-			if (!obj->gtt_space) {
-				bool need_mappable =
-					entry->relocation_count ? true : need_fence;
-
-				ret = i915_gem_object_pin(obj,
-							  entry->alignment,
-							  need_mappable);
-				if (ret)
-					break;
-			}
+			if (obj->gtt_space)
+				continue;
 
-			if (has_fenced_gpu_access) {
-				if (need_fence) {
-					ret = i915_gem_object_get_fence(obj, ring);
-					if (ret)
-						break;
-				} else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-					   obj->tiling_mode == I915_TILING_NONE) {
-					/* XXX pipelined! */
-					ret = i915_gem_object_put_fence(obj);
-					if (ret)
-						break;
-				}
-				obj->pending_fenced_gpu_access = need_fence;
+			ret = pin_and_fence_object(obj, ring);
+			if (ret) {
+				int ret_ignore;
+
+				/* Now this is a little tricky...
+				 *
+				 * As reservation is split up into two passes,
+				 * the first pinning any already bound objects
+				 * and the second binding any new objects,
+				 * the unwind loop (for the execbuffer
+				 * reservation, not the error cleanup)
+				 * decrements the pin count on *all* bound
+				 * objects. As pin_and_fence() already has
+				 * released the pin_count after binding should
+				 * it fail to grab a fence register, we
+				 * need to also remove this fresh binding to
+				 * preserve the invariant for the unwind.
+				 *
+				 * This can potentially raise a harmless
+				 * -EINVAL if we failed to bind in the above
+				 * call. It cannot raise -EINTR since we know
+				 * that the bo is freshly bound and so will
+				 * not need to be flushed or waited upon.
+				 */
+				ret_ignore = i915_gem_object_unbind(obj);
+				(void)ret_ignore;
+				WARN_ON(obj->gtt_space);
+				break;
 			}
-
-			entry->offset = obj->gtt_offset;
 		}
 
 		/* Decrement pin count for bound objects */
 		list_for_each_entry(obj, objects, exec_list) {
-			if (obj->gtt_space)
-				i915_gem_object_unpin(obj);
+			struct drm_i915_gem_exec_object2 *entry;
+
+			if (!obj->gtt_space)
+				continue;
+
+			entry = obj->exec_entry;
+			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+				i915_gem_object_unpin_fence(obj);
+				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
+			}
+
+			i915_gem_object_unpin(obj);
 		}
 
 		if (ret != -ENOSPC || retry > 1)
@@ -599,16 +652,24 @@  i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 	} while (1);
 
 err:
-	obj = list_entry(obj->exec_list.prev,
-			 struct drm_i915_gem_object,
-			 exec_list);
-	while (objects != &obj->exec_list) {
-		if (obj->gtt_space)
-			i915_gem_object_unpin(obj);
+	/* Called from the middle of an interrupted first pass over already
+	 * bound objects (likely a signal during grabing a fence). Since
+	 * we have only processed as far as @obj, and pin_and_fence() left
+	 * that in a neutral, we unpin backwards from the previous object.
+	 */
+	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
+		struct drm_i915_gem_exec_object2 *entry;
+
+		if (!obj->gtt_space)
+			continue;
+
+		entry = obj->exec_entry;
+		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+			i915_gem_object_unpin_fence(obj);
+			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
+		}
 
-		obj = list_entry(obj->exec_list.prev,
-				 struct drm_i915_gem_object,
-				 exec_list);
+		i915_gem_object_unpin(obj);
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d4e79d3..79c3849 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1851,6 +1851,8 @@  intel_pin_and_fence_fb_obj(struct drm_device *dev,
 		ret = i915_gem_object_get_fence(obj, pipelined);
 		if (ret)
 			goto err_unpin;
+
+		i915_gem_object_pin_fence(obj);
 	}
 
 	dev_priv->mm.interruptible = true;
@@ -2000,14 +2002,22 @@  intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 	ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y,
 					 LEAVE_ATOMIC_MODE_SET);
 	if (ret) {
-		i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
+		struct drm_i915_gem_object *obj =
+			to_intel_framebuffer(crtc->fb)->obj;
+
+		i915_gem_object_unpin_fence(obj);
+		i915_gem_object_unpin(obj);
 		mutex_unlock(&dev->struct_mutex);
 		return ret;
 	}
 
 	if (old_fb) {
+		struct drm_i915_gem_object *obj =
+			to_intel_framebuffer(old_fb)->obj;
+
 		intel_wait_for_vblank(dev, intel_crtc->pipe);
-		i915_gem_object_unpin(to_intel_framebuffer(old_fb)->obj);
+		i915_gem_object_unpin_fence(obj);
+		i915_gem_object_unpin(obj);
 	}
 
 	mutex_unlock(&dev->struct_mutex);
@@ -2895,8 +2905,12 @@  static void intel_crtc_disable(struct drm_crtc *crtc)
 	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
 
 	if (crtc->fb) {
+		struct drm_i915_gem_object *obj =
+			to_intel_framebuffer(crtc->fb)->obj;
+
 		mutex_lock(&dev->struct_mutex);
-		i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
+		i915_gem_object_unpin_fence(obj);
+		i915_gem_object_unpin(obj);
 		mutex_unlock(&dev->struct_mutex);
 	}
 }
@@ -6116,6 +6130,7 @@  static void intel_unpin_work_fn(struct work_struct *__work)
 		container_of(__work, struct intel_unpin_work, work);
 
 	mutex_lock(&work->dev->struct_mutex);
+	i915_gem_object_unpin_fence(work->old_fb_obj);
 	i915_gem_object_unpin(work->old_fb_obj);
 	drm_gem_object_unreference(&work->pending_flip_obj->base);
 	drm_gem_object_unreference(&work->old_fb_obj->base);