@@ -2187,7 +2187,12 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
struct drm_i915_gem_request {
/** Underlying object for implementing the signal/wait stuff. */
struct fence fence;
+ struct list_head signal_link;
+ struct list_head unsignal_link;
struct list_head delayed_free_link;
+ bool cancelled;
+ bool irq_enabled;
+ bool signal_requested;
/** On Which ring this request was generated */
struct drm_i915_private *i915;
@@ -2264,6 +2269,9 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
struct intel_context *ctx,
struct drm_i915_gem_request **req_out);
void i915_gem_request_cancel(struct drm_i915_gem_request *req);
+void i915_gem_request_enable_interrupt(struct drm_i915_gem_request *req,
+ bool fence_locked);
+void i915_gem_request_notify(struct intel_engine_cs *ring, bool fence_locked);
int i915_create_fence_timeline(struct drm_device *dev,
struct intel_context *ctx,
@@ -40,6 +40,8 @@
#define RQ_BUG_ON(expr)
+static void i915_gem_request_submit(struct drm_i915_gem_request *req);
+
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
static void
@@ -1156,16 +1158,32 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
}
+/*
+ * Super low latency implementation of request wait.
+ *
+ * This is used as a precursor to doing anything slow like waiting to be
+ * woken up by a signal, interrupt handler, etc. in the main wait request
+ * code. The idea is that most requests complete pretty quickly, so burning
+ * the CPU for a jiffy is actually more efficient than sleeping as that
+ * introduces significant latency.
+ */
static int __i915_spin_request(struct drm_i915_gem_request *req)
{
unsigned long timeout;
+ uint32_t seqno;
if (i915_gem_request_get_ring(req)->irq_refcount)
return -EBUSY;
timeout = jiffies + 1;
while (!need_resched()) {
- if (i915_gem_request_completed(req))
+ /*
+ * Explicitly check the seqno rather than waiting for the
+ * user interrupt to work its way through the hardware and
+ * software layers.
+ */
+ seqno = req->ring->get_seqno(req->ring, false);
+ if (i915_seqno_passed(seqno, req->seqno))
return 0;
if (time_after_eq(jiffies, timeout))
@@ -1173,7 +1191,9 @@ static int __i915_spin_request(struct drm_i915_gem_request *req)
cpu_relax_lowlatency();
}
- if (i915_gem_request_completed(req))
+
+ seqno = req->ring->get_seqno(req->ring, false);
+ if (i915_seqno_passed(seqno, req->seqno))
return 0;
return -EAGAIN;
@@ -1205,8 +1225,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- const bool irq_test_in_progress =
- ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
+ uint32_t seqno;
DEFINE_WAIT(wait);
unsigned long timeout_expire;
s64 before, now;
@@ -1214,9 +1233,6 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
- if (list_empty(&req->list))
- return 0;
-
if (i915_gem_request_completed(req))
return 0;
@@ -1231,15 +1247,17 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
before = ktime_get_raw_ns();
/* Optimistic spin for the next jiffie before touching IRQs */
- ret = __i915_spin_request(req);
- if (ret == 0)
- goto out;
-
- if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
- ret = -ENODEV;
- goto out;
+ if (req->seqno) {
+ ret = __i915_spin_request(req);
+ if (ret == 0)
+ goto out;
}
+ /*
+ * Enable interrupt completion of the request.
+ */
+ fence_enable_sw_signaling(&req->fence);
+
for (;;) {
struct timer_list timer;
@@ -1262,6 +1280,19 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
break;
}
+ if (req->seqno) {
+ /*
+ * There is quite a lot of latency in the user interrupt
+ * path. So do an explicit seqno check and potentially
+ * remove all that delay.
+ */
+ seqno = ring->get_seqno(ring, false);
+ if (i915_seqno_passed(seqno, req->seqno)) {
+ ret = 0;
+ break;
+ }
+ }
+
if (interruptible && signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -1288,8 +1319,6 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
destroy_timer_on_stack(&timer);
}
}
- if (!irq_test_in_progress)
- ring->irq_put(ring);
finish_wait(&ring->irq_queue, &wait);
@@ -1297,6 +1326,18 @@ out:
now = ktime_get_raw_ns();
trace_i915_gem_request_wait_end(req);
+ if ((ret == 0) && (req->seqno)) {
+ seqno = ring->get_seqno(ring, false);
+ if (i915_seqno_passed(seqno, req->seqno) &&
+ !i915_gem_request_completed(req)) {
+ /*
+ * Make sure the request is marked as completed before
+ * returning:
+ */
+ i915_gem_request_notify(req->ring, false);
+ }
+ }
+
if (timeout) {
s64 tres = *timeout - (now - before);
@@ -1377,6 +1418,22 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
list_del_init(&request->list);
i915_gem_request_remove_from_client(request);
+ /*
+ * In case the request is still in the signal pending list,
+ * e.g. due to being cancelled by TDR, preemption, etc.
+ */
+ if (!list_empty(&request->signal_link)) {
+ /*
+ * The request must be marked as cancelled and the underlying
+ * fence as both failed. NB: There is no explicit fence fail
+ * API, there is only a manual poke and signal.
+ */
+ request->cancelled = true;
+ /* How to propagate to any associated sync_fence??? */
+ request->fence.status = -EIO;
+ fence_signal_locked(&request->fence);
+ }
+
i915_gem_request_unreference(request);
}
@@ -2535,6 +2592,12 @@ void __i915_add_request(struct drm_i915_gem_request *request,
*/
request->postfix = intel_ring_get_tail(ringbuf);
+ /*
+ * Add the fence to the pending list before emitting the commands to
+ * generate a seqno notification interrupt.
+ */
+ i915_gem_request_submit(request);
+
if (i915.enable_execlists)
ret = ring->emit_request(request);
else {
@@ -2653,25 +2716,140 @@ static void i915_gem_request_free(struct drm_i915_gem_request *req)
i915_gem_context_unreference(ctx);
}
+ if (req->irq_enabled)
+ req->ring->irq_put(req->ring);
+
kmem_cache_free(req->i915->requests, req);
}
-static bool i915_gem_request_enable_signaling(struct fence *req_fence)
+/*
+ * The request is about to be submitted to the hardware so add the fence to
+ * the list of signalable fences.
+ *
+ * NB: This does not necessarily enable interrupts yet. That only occurs on
+ * demand when the request is actually waited on. However, adding it to the
+ * list early ensures that there is no race condition where the interrupt
+ * could pop out prematurely and thus be completely lost. The race is merely
+ * that the interrupt must be manually checked for after being enabled.
+ */
+static void i915_gem_request_submit(struct drm_i915_gem_request *req)
{
- /* Interrupt driven fences are not implemented yet.*/
- WARN(true, "This should not be called!");
- return true;
+ /*
+ * Always enable signal processing for the request's fence object
+ * before that request is submitted to the hardware. Thus there is no
+ * race condition whereby the interrupt could pop out before the
+ * request has been added to the signal list. Hence no need to check
+ * for completion, undo the list add and return false.
+ */
+ i915_gem_request_reference(req);
+ spin_lock_irq(&req->ring->fence_lock);
+ WARN_ON(!list_empty(&req->signal_link));
+ list_add_tail(&req->signal_link, &req->ring->fence_signal_list);
+ spin_unlock_irq(&req->ring->fence_lock);
+
+ /*
+ * NB: Interrupts are only enabled on demand. Thus there is still a
+ * race where the request could complete before the interrupt has
+ * been enabled. Thus care must be taken at that point.
+ */
+
+ /* Have interrupts already been requested? */
+ if (req->signal_requested)
+ i915_gem_request_enable_interrupt(req, false);
}
-static bool i915_gem_request_is_completed(struct fence *req_fence)
+/*
+ * The request is being actively waited on, so enable interrupt based
+ * completion signalling.
+ */
+void i915_gem_request_enable_interrupt(struct drm_i915_gem_request *req,
+ bool fence_locked)
+{
+ struct drm_i915_private *dev_priv = to_i915(req->ring->dev);
+ const bool irq_test_in_progress =
+ ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) &
+ intel_ring_flag(req->ring);
+
+ if (req->irq_enabled)
+ return;
+
+ if (irq_test_in_progress)
+ return;
+
+ if (!WARN_ON(!req->ring->irq_get(req->ring)))
+ req->irq_enabled = true;
+
+ /*
+ * Because the interrupt is only enabled on demand, there is a race
+ * where the interrupt can fire before anyone is looking for it. So
+ * do an explicit check for missed interrupts.
+ */
+ i915_gem_request_notify(req->ring, fence_locked);
+}
+
+static bool i915_gem_request_enable_signaling(struct fence *req_fence)
{
struct drm_i915_gem_request *req = container_of(req_fence,
typeof(*req), fence);
+
+ /*
+ * No need to actually enable interrupt based processing until the
+ * request has been submitted to the hardware. At which point
+ * 'i915_gem_request_submit()' is called. So only really enable
+ * signalling in there. Just set a flag to say that interrupts are
+ * wanted when the request is eventually submitted. On the other hand
+ * if the request has already been submitted then interrupts do need
+ * to be enabled now.
+ */
+
+ req->signal_requested = true;
+
+ if (!list_empty(&req->signal_link))
+ i915_gem_request_enable_interrupt(req, true);
+
+ return true;
+}
+
+void i915_gem_request_notify(struct intel_engine_cs *ring, bool fence_locked)
+{
+ struct drm_i915_gem_request *req, *req_next;
+ unsigned long flags;
u32 seqno;
- seqno = req->ring->get_seqno(req->ring, false/*lazy_coherency*/);
+ if (list_empty(&ring->fence_signal_list))
+ return;
+
+ if (!fence_locked)
+ spin_lock_irqsave(&ring->fence_lock, flags);
- return i915_seqno_passed(seqno, req->seqno);
+ seqno = ring->get_seqno(ring, false);
+
+ list_for_each_entry_safe(req, req_next, &ring->fence_signal_list, signal_link) {
+ if (!req->cancelled) {
+ if (!i915_seqno_passed(seqno, req->seqno))
+ break;
+ }
+
+ /*
+ * Start by removing the fence from the signal list otherwise
+ * the retire code can run concurrently and get confused.
+ */
+ list_del_init(&req->signal_link);
+
+ if (!req->cancelled)
+ fence_signal_locked(&req->fence);
+
+ if (req->irq_enabled) {
+ req->ring->irq_put(req->ring);
+ req->irq_enabled = false;
+ }
+
+ /* Can't unreference here because that might grab fence_lock */
+ list_add_tail(&req->unsignal_link, &ring->fence_unsignal_list);
+ }
+
+ if (!fence_locked)
+ spin_unlock_irqrestore(&ring->fence_lock, flags);
}
static const char *i915_gem_request_get_driver_name(struct fence *req_fence)
@@ -2714,7 +2892,6 @@ static void i915_gem_request_fence_value_str(struct fence *req_fence,
static const struct fence_ops i915_gem_request_fops = {
.enable_signaling = i915_gem_request_enable_signaling,
- .signaled = i915_gem_request_is_completed,
.wait = fence_default_wait,
.release = i915_gem_request_release,
.get_driver_name = i915_gem_request_get_driver_name,
@@ -2798,6 +2975,7 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
goto err;
}
+ INIT_LIST_HEAD(&req->signal_link);
fence_init(&req->fence, &i915_gem_request_fops, &ring->fence_lock,
ctx->engine[ring->id].fence_timeline.fence_context,
i915_fence_timeline_get_next_seqno(&ctx->engine[ring->id].fence_timeline));
@@ -2835,6 +3013,11 @@ void i915_gem_request_cancel(struct drm_i915_gem_request *req)
{
intel_ring_reserved_space_cancel(req->ringbuf);
+ req->cancelled = true;
+ /* How to propagate to any associated sync_fence??? */
+ req->fence.status = -EINVAL;
+ fence_signal_locked(&req->fence);
+
i915_gem_request_unreference(req);
}
@@ -2928,6 +3111,13 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
i915_gem_request_retire(request);
}
+ /*
+ * Tidy up anything left over. This includes a call to
+ * i915_gem_request_notify() which will make sure that any requests
+ * that were on the signal pending list get also cleaned up.
+ */
+ i915_gem_retire_requests_ring(ring);
+
/* Having flushed all requests from all queues, we know that all
* ringbuffers must now be empty. However, since we do not reclaim
* all space when retiring the request (to prevent HEADs colliding
@@ -2976,6 +3166,13 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
WARN_ON(i915_verify_lists(ring->dev));
+ /*
+ * If no-one has waited on a request recently then interrupts will
+ * not have been enabled and thus no requests will ever be marked as
+ * completed. So do an interrupt check now.
+ */
+ i915_gem_request_notify(ring, false);
+
/* Retire requests first as we use it above for the early return.
* If we retire requests last, we may use a later seqno and so clear
* the requests lists without clearing the active list, leading to
@@ -3017,6 +3214,15 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
i915_gem_request_assign(&ring->trace_irq_req, NULL);
}
+ /* Tidy up any requests that were recently signalled */
+ spin_lock_irq(&ring->fence_lock);
+ list_splice_init(&ring->fence_unsignal_list, &list_head);
+ spin_unlock_irq(&ring->fence_lock);
+ list_for_each_entry_safe(req, req_next, &list_head, unsignal_link) {
+ list_del(&req->unsignal_link);
+ i915_gem_request_unreference(req);
+ }
+
/* Really free any requests that were recently unreferenced */
spin_lock(&ring->delayed_free_lock);
list_splice_init(&ring->delayed_free_list, &list_head);
@@ -5068,6 +5274,8 @@ init_ring_lists(struct intel_engine_cs *ring)
{
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
+ INIT_LIST_HEAD(&ring->fence_signal_list);
+ INIT_LIST_HEAD(&ring->fence_unsignal_list);
INIT_LIST_HEAD(&ring->delayed_free_list);
}
@@ -981,6 +981,8 @@ static void notify_ring(struct intel_engine_cs *ring)
trace_i915_gem_request_notify(ring);
+ i915_gem_request_notify(ring, false);
+
wake_up_all(&ring->irq_queue);
}
@@ -1920,6 +1920,8 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
ring->dev = dev;
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
+ INIT_LIST_HEAD(&ring->fence_signal_list);
+ INIT_LIST_HEAD(&ring->fence_unsignal_list);
INIT_LIST_HEAD(&ring->delayed_free_list);
spin_lock_init(&ring->fence_lock);
spin_lock_init(&ring->delayed_free_lock);
@@ -2158,6 +2158,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
INIT_LIST_HEAD(&ring->request_list);
INIT_LIST_HEAD(&ring->execlist_queue);
INIT_LIST_HEAD(&ring->buffers);
+ INIT_LIST_HEAD(&ring->fence_signal_list);
+ INIT_LIST_HEAD(&ring->fence_unsignal_list);
INIT_LIST_HEAD(&ring->delayed_free_list);
spin_lock_init(&ring->fence_lock);
spin_lock_init(&ring->delayed_free_lock);
@@ -357,6 +357,8 @@ struct intel_engine_cs {
u32 (*get_cmd_length_mask)(u32 cmd_header);
spinlock_t fence_lock;
+ struct list_head fence_signal_list;
+ struct list_head fence_unsignal_list;
};
bool intel_ring_initialized(struct intel_engine_cs *ring);