diff mbox

[1/3] drm/i915: Use a non-blocking wait for set-to-domain ioctl

Message ID 1345723973-22092-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Aug. 23, 2012, 12:12 p.m. UTC
The principal use for set-to-domain is for userspace to serialise
operations with a particular buffer, for example to maintain coherency
with a CPU map or to ratelimit its rendering by waiting on all previous
operations before continuing. As such we tend to hold the struct_mutex
for long periods during the synchronisation and so cause contention
issues with other users of the graphics device, even for independent
operations as memory management. An example is the contention between
compiz and X which causes jitter in the display and a drop in peak
throughput.

The ultimate solution would be a set of fine grained locks and lockless
operations, but an intermediate step is to first attempt the
synchronisation for set-to-domain without holding the mutex. This
introduces a number of race conditions, so we limit it use to the ioctl
periphery where we have no dependent state and can safely complete with
a locked synchronisation afterwards.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |    3 +-
 drivers/gpu/drm/i915/i915_gem.c         |  417 ++++++++++++++++---------------
 drivers/gpu/drm/i915/intel_overlay.c    |    4 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |    2 +-
 4 files changed, 223 insertions(+), 203 deletions(-)

Comments

Daniel Vetter Aug. 24, 2012, 12:28 a.m. UTC | #1
On Thu, Aug 23, 2012 at 01:12:51PM +0100, Chris Wilson wrote:
> The principal use for set-to-domain is for userspace to serialise
> operations with a particular buffer, for example to maintain coherency
> with a CPU map or to ratelimit its rendering by waiting on all previous
> operations before continuing. As such we tend to hold the struct_mutex
> for long periods during the synchronisation and so cause contention
> issues with other users of the graphics device, even for independent
> operations as memory management. An example is the contention between
> compiz and X which causes jitter in the display and a drop in peak
> throughput.
> 
> The ultimate solution would be a set of fine grained locks and lockless
> operations, but an intermediate step is to first attempt the
> synchronisation for set-to-domain without holding the mutex. This
> introduces a number of race conditions, so we limit it use to the ioctl
> periphery where we have no dependent state and can safely complete with
> a locked synchronisation afterwards.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Can I have the "move functions around" patch split out, please?

Also, I think it doesn't make sense to splatter nonblocking bools all over
the code:
- We won't need such a dance anywhere else, since we require userspace to
  call set_domain before any cpu access anyway.
- I think hiding the mutex dropping deep down in the callchain is evil.

I.e. I'd prefer if we do the lock dropping in set_domain_ioctl itself and
just copy&pasta the required code to flush olr and whatnotelse. And since
we drop the mutex in between I actually think that the write domain
frobbery that wait_rendering does is positively harmful. But we don't need
it, so better cut it out (same applies to the retire_requests in there).
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cbd3cd0..7246373 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1385,7 +1385,8 @@  int i915_add_request(struct intel_ring_buffer *ring,
 		     struct drm_file *file,
 		     struct drm_i915_gem_request *request);
 int __must_check i915_wait_seqno(struct intel_ring_buffer *ring,
-				 uint32_t seqno);
+				 uint32_t seqno,
+				 bool nonblocking);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e1ec587..083b2ab 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -946,6 +946,205 @@  unlock:
 	return ret;
 }
 
+int
+i915_gem_check_wedge(struct drm_i915_private *dev_priv,
+		     bool interruptible)
+{
+	if (atomic_read(&dev_priv->mm.wedged)) {
+		struct completion *x = &dev_priv->error_completion;
+		bool recovery_complete;
+		unsigned long flags;
+
+		/* Give the error handler a chance to run. */
+		spin_lock_irqsave(&x->wait.lock, flags);
+		recovery_complete = x->done > 0;
+		spin_unlock_irqrestore(&x->wait.lock, flags);
+
+		/* Non-interruptible callers can't handle -EAGAIN, hence return
+		 * -EIO unconditionally for these. */
+		if (!interruptible)
+			return -EIO;
+
+		/* Recovery complete, but still wedged means reset failure. */
+		if (recovery_complete)
+			return -EIO;
+
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+/*
+ * Compare seqno against outstanding lazy request. Emit a request if they are
+ * equal.
+ */
+static int
+i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
+{
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+
+	ret = 0;
+	if (seqno == ring->outstanding_lazy_request)
+		ret = i915_add_request(ring, NULL, NULL);
+
+	return ret;
+}
+
+/**
+ * __wait_seqno - wait until execution of seqno has finished
+ * @ring: the ring expected to report seqno
+ * @seqno: duh!
+ * @interruptible: do an interruptible wait (normally yes)
+ * @timeout: in - how long to wait (NULL forever); out - how much time remaining
+ *
+ * Returns 0 if the seqno was found within the alloted time. Else returns the
+ * errno with remaining time filled in timeout argument.
+ */
+static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
+			bool interruptible, struct timespec *timeout)
+{
+	drm_i915_private_t *dev_priv = ring->dev->dev_private;
+	struct timespec before, now, wait_time={1,0};
+	unsigned long timeout_jiffies;
+	long end;
+	bool wait_forever = true;
+	int ret;
+
+	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
+		return 0;
+
+	trace_i915_gem_request_wait_begin(ring, seqno);
+
+	if (timeout != NULL) {
+		wait_time = *timeout;
+		wait_forever = false;
+	}
+
+	timeout_jiffies = timespec_to_jiffies(&wait_time);
+
+	if (WARN_ON(!ring->irq_get(ring)))
+		return -ENODEV;
+
+	/* Record current time in case interrupted by signal, or wedged * */
+	getrawmonotonic(&before);
+
+#define EXIT_COND \
+	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
+	atomic_read(&dev_priv->mm.wedged))
+	do {
+		if (interruptible)
+			end = wait_event_interruptible_timeout(ring->irq_queue,
+							       EXIT_COND,
+							       timeout_jiffies);
+		else
+			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
+						 timeout_jiffies);
+
+		ret = i915_gem_check_wedge(dev_priv, interruptible);
+		if (ret)
+			end = ret;
+	} while (end == 0 && wait_forever);
+
+	getrawmonotonic(&now);
+
+	ring->irq_put(ring);
+	trace_i915_gem_request_wait_end(ring, seqno);
+#undef EXIT_COND
+
+	if (timeout) {
+		struct timespec sleep_time = timespec_sub(now, before);
+		*timeout = timespec_sub(*timeout, sleep_time);
+	}
+
+	switch (end) {
+	case -EIO:
+	case -EAGAIN: /* Wedged */
+	case -ERESTARTSYS: /* Signal */
+		return (int)end;
+	case 0: /* Timeout */
+		if (timeout)
+			set_normalized_timespec(timeout, 0, 0);
+		return -ETIME;
+	default: /* Completed */
+		WARN_ON(end < 0); /* We're not aware of other errors */
+		return 0;
+	}
+}
+
+/**
+ * Waits for a sequence number to be signaled, and cleans up the
+ * request and object lists appropriately for that event.
+ */
+int
+i915_wait_seqno(struct intel_ring_buffer *ring,
+		uint32_t seqno,
+		bool nonblocking)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	bool interruptible = dev_priv->mm.interruptible;
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
+	BUG_ON(seqno == 0);
+
+	ret = i915_gem_check_wedge(dev_priv, interruptible);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_check_olr(ring, seqno);
+	if (ret)
+		return ret;
+
+	if (nonblocking)
+		mutex_unlock(&dev->struct_mutex);
+
+	ret = __wait_seqno(ring, seqno, interruptible, NULL);
+
+	if (nonblocking)
+		mutex_lock(&dev->struct_mutex);
+
+	return ret;
+}
+
+/**
+ * Ensures that all rendering to the object has completed and the object is
+ * safe to unbind from the GTT or access from the CPU.
+ */
+static __must_check int
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+			       bool readonly,
+			       bool nonblocking)
+{
+	struct intel_ring_buffer *ring = obj->ring;
+	u32 seqno;
+	int ret;
+
+	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
+	if (seqno == 0)
+		return 0;
+
+	ret = i915_wait_seqno(ring, seqno, nonblocking);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests_ring(ring);
+
+	/* Manually manage the write flush as we may have not yet
+	 * retired the buffer.
+	 */
+	if (obj->last_write_seqno &&
+	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
+		obj->last_write_seqno = 0;
+		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
+	}
+
+	return 0;
+}
+
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -983,6 +1182,16 @@  i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 		goto unlock;
 	}
 
+	/* Try to flush the object off the GPU without holding the lock.
+	 * We will repeat the flush holding the lock in the normal manner
+	 * to catch cases where we are gazumped.
+	 */
+	ret = i915_gem_object_wait_rendering(obj,
+					     write_domain == 0,
+					     true);
+	if (ret)
+		goto unref;
+
 	if (read_domains & I915_GEM_DOMAIN_GTT) {
 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
 
@@ -996,6 +1205,7 @@  i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
 	}
 
+unref:
 	drm_gem_object_unreference(&obj->base);
 unlock:
 	mutex_unlock(&dev->struct_mutex);
@@ -1950,197 +2160,6 @@  i915_gem_retire_work_handler(struct work_struct *work)
 	mutex_unlock(&dev->struct_mutex);
 }
 
-int
-i915_gem_check_wedge(struct drm_i915_private *dev_priv,
-		     bool interruptible)
-{
-	if (atomic_read(&dev_priv->mm.wedged)) {
-		struct completion *x = &dev_priv->error_completion;
-		bool recovery_complete;
-		unsigned long flags;
-
-		/* Give the error handler a chance to run. */
-		spin_lock_irqsave(&x->wait.lock, flags);
-		recovery_complete = x->done > 0;
-		spin_unlock_irqrestore(&x->wait.lock, flags);
-
-		/* Non-interruptible callers can't handle -EAGAIN, hence return
-		 * -EIO unconditionally for these. */
-		if (!interruptible)
-			return -EIO;
-
-		/* Recovery complete, but still wedged means reset failure. */
-		if (recovery_complete)
-			return -EIO;
-
-		return -EAGAIN;
-	}
-
-	return 0;
-}
-
-/*
- * Compare seqno against outstanding lazy request. Emit a request if they are
- * equal.
- */
-static int
-i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
-{
-	int ret;
-
-	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-
-	ret = 0;
-	if (seqno == ring->outstanding_lazy_request)
-		ret = i915_add_request(ring, NULL, NULL);
-
-	return ret;
-}
-
-/**
- * __wait_seqno - wait until execution of seqno has finished
- * @ring: the ring expected to report seqno
- * @seqno: duh!
- * @interruptible: do an interruptible wait (normally yes)
- * @timeout: in - how long to wait (NULL forever); out - how much time remaining
- *
- * Returns 0 if the seqno was found within the alloted time. Else returns the
- * errno with remaining time filled in timeout argument.
- */
-static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
-			bool interruptible, struct timespec *timeout)
-{
-	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	struct timespec before, now, wait_time={1,0};
-	unsigned long timeout_jiffies;
-	long end;
-	bool wait_forever = true;
-	int ret;
-
-	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
-		return 0;
-
-	trace_i915_gem_request_wait_begin(ring, seqno);
-
-	if (timeout != NULL) {
-		wait_time = *timeout;
-		wait_forever = false;
-	}
-
-	timeout_jiffies = timespec_to_jiffies(&wait_time);
-
-	if (WARN_ON(!ring->irq_get(ring)))
-		return -ENODEV;
-
-	/* Record current time in case interrupted by signal, or wedged * */
-	getrawmonotonic(&before);
-
-#define EXIT_COND \
-	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
-	atomic_read(&dev_priv->mm.wedged))
-	do {
-		if (interruptible)
-			end = wait_event_interruptible_timeout(ring->irq_queue,
-							       EXIT_COND,
-							       timeout_jiffies);
-		else
-			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
-						 timeout_jiffies);
-
-		ret = i915_gem_check_wedge(dev_priv, interruptible);
-		if (ret)
-			end = ret;
-	} while (end == 0 && wait_forever);
-
-	getrawmonotonic(&now);
-
-	ring->irq_put(ring);
-	trace_i915_gem_request_wait_end(ring, seqno);
-#undef EXIT_COND
-
-	if (timeout) {
-		struct timespec sleep_time = timespec_sub(now, before);
-		*timeout = timespec_sub(*timeout, sleep_time);
-	}
-
-	switch (end) {
-	case -EIO:
-	case -EAGAIN: /* Wedged */
-	case -ERESTARTSYS: /* Signal */
-		return (int)end;
-	case 0: /* Timeout */
-		if (timeout)
-			set_normalized_timespec(timeout, 0, 0);
-		return -ETIME;
-	default: /* Completed */
-		WARN_ON(end < 0); /* We're not aware of other errors */
-		return 0;
-	}
-}
-
-/**
- * Waits for a sequence number to be signaled, and cleans up the
- * request and object lists appropriately for that event.
- */
-int
-i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
-{
-	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	int ret = 0;
-
-	BUG_ON(seqno == 0);
-
-	ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_check_olr(ring, seqno);
-	if (ret)
-		return ret;
-
-	ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible, NULL);
-
-	return ret;
-}
-
-/**
- * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT or access from the CPU.
- */
-static __must_check int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
-			       bool readonly)
-{
-	u32 seqno;
-	int ret;
-
-	/* If there is rendering queued on the buffer being evicted, wait for
-	 * it.
-	 */
-	if (readonly)
-		seqno = obj->last_write_seqno;
-	else
-		seqno = obj->last_read_seqno;
-	if (seqno == 0)
-		return 0;
-
-	ret = i915_wait_seqno(obj->ring, seqno);
-	if (ret)
-		return ret;
-
-	/* Manually manage the write flush as we may have not yet retired
-	 * the buffer.
-	 */
-	if (obj->last_write_seqno &&
-	    i915_seqno_passed(seqno, obj->last_write_seqno)) {
-		obj->last_write_seqno = 0;
-		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
-	}
-
-	i915_gem_retire_requests_ring(obj->ring);
-	return 0;
-}
-
 /**
  * Ensures that an object will eventually get non-busy by flushing any required
  * write domains, emitting any outstanding lazy request and retiring and
@@ -2270,7 +2289,7 @@  i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		return 0;
 
 	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
-		return i915_gem_object_wait_rendering(obj, false);
+		return i915_gem_object_wait_rendering(obj, false, false);
 
 	idx = intel_ring_sync_index(from, to);
 
@@ -2372,7 +2391,7 @@  static int i915_ring_idle(struct intel_ring_buffer *ring)
 	if (list_empty(&ring->active_list))
 		return 0;
 
-	return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring));
+	return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring), false);
 }
 
 int i915_gpu_idle(struct drm_device *dev)
@@ -2563,7 +2582,7 @@  static int
 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
 {
 	if (obj->last_fenced_seqno) {
-		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
+		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno, false);
 		if (ret)
 			return ret;
 
@@ -2984,8 +3003,8 @@  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
 		return 0;
 
-	ret = i915_gem_object_wait_rendering(obj, !write);
-	if (ret)
+	ret = i915_gem_object_wait_rendering(obj, !write, false);
+	if (ret && ret != -EIO)
 		return ret;
 
 	i915_gem_object_flush_cpu_write_domain(obj);
@@ -3218,7 +3237,7 @@  i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
 	if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
 		return 0;
 
-	ret = i915_gem_object_wait_rendering(obj, false);
+	ret = i915_gem_object_wait_rendering(obj, false, false);
 	if (ret)
 		return ret;
 
@@ -3242,8 +3261,8 @@  i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
 		return 0;
 
-	ret = i915_gem_object_wait_rendering(obj, !write);
-	if (ret)
+	ret = i915_gem_object_wait_rendering(obj, !write, false);
+	if (ret && ret != -EIO)
 		return ret;
 
 	i915_gem_object_flush_gtt_write_domain(obj);
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index c0f4858..cf3f6f1 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -226,7 +226,7 @@  static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
 	}
 	overlay->last_flip_req = request->seqno;
 	overlay->flip_tail = tail;
-	ret = i915_wait_seqno(ring, overlay->last_flip_req);
+	ret = i915_wait_seqno(ring, overlay->last_flip_req, false);
 	if (ret)
 		return ret;
 	i915_gem_retire_requests(dev);
@@ -396,7 +396,7 @@  static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 	if (overlay->last_flip_req == 0)
 		return 0;
 
-	ret = i915_wait_seqno(ring, overlay->last_flip_req);
+	ret = i915_wait_seqno(ring, overlay->last_flip_req, false);
 	if (ret)
 		return ret;
 	i915_gem_retire_requests(dev);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c828169..68a874a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1134,7 +1134,7 @@  static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
 {
 	int ret;
 
-	ret = i915_wait_seqno(ring, seqno);
+	ret = i915_wait_seqno(ring, seqno, false);
 	if (!ret)
 		i915_gem_retire_requests_ring(ring);