diff mbox

[v2,2/3] drm/i915: Remove (struct_mutex) locking for wait-ioctl

Message ID 1450877756-2902-2-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Dec. 23, 2015, 1:35 p.m. UTC
With a bit of care (and leniency) we can iterate over the object and
wait for previous rendering to complete with judicial use of atomic
reference counting. The ABI requires us to ensure that an active object
is eventually flushed (like the busy-ioctl) which is guaranteed by our
management of requests (i.e. everything that is submitted to hardware is
flushed in the same request). All we have to do is ensure that we can
detect when the requests are complete for reporting when the object is
idle (without triggering ETIME) - this is handled by
__i915_wait_request.

The biggest danger in the code is walking the object without holding any
locks. We iterate over the set of last requests and carefully grab a
reference upon it. (If it is changing beneath us, that is the usual
userspace race and even with locking you get the same indeterminate
results.) If the request is unreferenced beneath us, it will be disposed
of into the request cache - so we have to carefully order the retrieval
of the request pointer with its removal, and to do this we employ RCU on
the request cache and upon the last_request pointer tracking.

The impact of this is actually quite small - the return to userspace
following the wait was already lockless. What we achieve here is
completing an already finished wait without hitting the struct_mutex,
our hold is quite short and so we are typically just a victim of
contention rather than a cause.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 55 ++++++++++++++---------------------------
 1 file changed, 19 insertions(+), 36 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 696ada3891ed..3e331f7e9d74 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2414,57 +2414,40 @@  i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct drm_i915_gem_wait *args = data;
 	struct drm_i915_gem_object *obj;
-	struct drm_i915_gem_request *req[I915_NUM_RINGS];
-	int i, n = 0;
-	int ret;
+	int i, ret = 0;
 
 	if (args->flags != 0)
 		return -EINVAL;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
 	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
-	if (&obj->base == NULL) {
-		mutex_unlock(&dev->struct_mutex);
+	if (&obj->base == NULL)
 		return -ENOENT;
-	}
-
-	/* Need to make sure the object gets inactive eventually. */
-	ret = i915_gem_object_flush_active(obj);
-	if (ret)
-		goto out;
-
-	if (!obj->active)
-		goto out;
 
-	/* Do this after OLR check to make sure we make forward progress polling
-	 * on this IOCTL with a timeout == 0 (like busy ioctl)
-	 */
-	if (args->timeout_ns == 0) {
-		ret = -ETIME;
+	if (!obj->active) /* XXX READ_ONCE(obj->flags) */
 		goto out;
-	}
 
+	rcu_read_lock();
 	for (i = 0; i < I915_NUM_RINGS; i++) {
-		if (obj->last_read[i].request == NULL)
+		struct drm_i915_gem_request *req;
+
+		req = i915_gem_active_get_request_rcu(&obj->last_read[i]);
+		if (req == NULL)
 			continue;
 
-		req[n++] = i915_gem_request_get(obj->last_read[i].request);
+		rcu_read_unlock();
+		ret = __i915_wait_request(req, true,
+					  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
+					  to_rps_client(file));
+		i915_gem_request_put(req);
+		if (ret)
+			goto out;
+
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 
 out:
-	drm_gem_object_unreference(&obj->base);
-	mutex_unlock(&dev->struct_mutex);
-
-	for (i = 0; i < n; i++) {
-		if (ret == 0)
-			ret = __i915_wait_request(req[i], true,
-						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
-						  to_rps_client(file));
-		i915_gem_request_put(req[i]);
-	}
+	drm_gem_object_unreference_unlocked(&obj->base);
 	return ret;
 }