@@ -1019,9 +1019,18 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
if (dev_priv->wq == NULL)
goto out_err;
+ /*
+ * Making this work queue un-ordered means that request notifications
+ * for different engines can be processed in parallel across multiple
+ * CPU cores (if available).
+ */
+ dev_priv->req_wq = alloc_workqueue("i915-rq", WQ_HIGHPRI, I915_NUM_ENGINES);
+ if (dev_priv->req_wq == NULL)
+ goto out_free_wq;
+
dev_priv->hotplug.dp_wq = alloc_ordered_workqueue("i915-dp", 0);
if (dev_priv->hotplug.dp_wq == NULL)
- goto out_free_wq;
+ goto out_free_req_wq;
dev_priv->gpu_error.hangcheck_wq =
alloc_ordered_workqueue("i915-hangcheck", 0);
@@ -1032,6 +1041,8 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
out_free_dp_wq:
destroy_workqueue(dev_priv->hotplug.dp_wq);
+out_free_req_wq:
+ destroy_workqueue(dev_priv->req_wq);
out_free_wq:
destroy_workqueue(dev_priv->wq);
out_err:
@@ -1044,6 +1055,7 @@ static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
{
destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
destroy_workqueue(dev_priv->hotplug.dp_wq);
+ destroy_workqueue(dev_priv->req_wq);
destroy_workqueue(dev_priv->wq);
}
@@ -1849,6 +1849,9 @@ struct drm_i915_private {
*/
struct workqueue_struct *wq;
+ /* Work queue for request completion processing */
+ struct workqueue_struct *req_wq;
+
/* Display functions */
struct drm_i915_display_funcs display;
@@ -2356,6 +2359,10 @@ struct drm_i915_gem_request {
* Underlying object for implementing the signal/wait stuff.
*/
struct fence fence;
+ struct list_head signal_link;
+ bool cancelled;
+ bool irq_enabled;
+ bool signal_requested;
/** On Which ring this request was generated */
struct drm_i915_private *i915;
@@ -2457,6 +2464,9 @@ struct drm_i915_gem_request {
struct drm_i915_gem_request * __must_check
i915_gem_request_alloc(struct intel_engine_cs *engine,
struct i915_gem_context *ctx);
+void i915_gem_request_notify(struct intel_engine_cs *ring, bool fence_locked,
+ bool lazy_coherency);
+void i915_gem_request_worker(struct work_struct *work);
static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
{
@@ -39,6 +39,8 @@
#include <linux/pci.h>
#include <linux/dma-buf.h>
+static void i915_gem_request_submit(struct drm_i915_gem_request *req);
+
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
static void
@@ -1251,9 +1253,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
{
struct intel_engine_cs *engine = i915_gem_request_get_engine(req);
struct drm_i915_private *dev_priv = req->i915;
- const bool irq_test_in_progress =
- ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine);
int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+ uint32_t seqno;
DEFINE_WAIT(wait);
unsigned long timeout_expire;
s64 before = 0; /* Only to silence a compiler warning. */
@@ -1261,9 +1262,6 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
- if (list_empty(&req->list))
- return 0;
-
if (i915_gem_request_completed(req))
return 0;
@@ -1293,10 +1291,10 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
if (ret == 0)
goto out;
- if (!irq_test_in_progress && WARN_ON(!engine->irq_get(engine))) {
- ret = -ENODEV;
- goto out;
- }
+ /*
+ * Enable interrupt completion of the request.
+ */
+ fence_enable_sw_signaling(&req->fence);
for (;;) {
struct timer_list timer;
@@ -1320,6 +1318,19 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
break;
}
+ /*
+ * There is quite a lot of latency in the user interrupt
+ * path. So do an explicit seqno check and potentially
+ * remove all that delay.
+ */
+ if (req->engine->irq_seqno_barrier)
+ req->engine->irq_seqno_barrier(req->engine);
+ seqno = engine->get_seqno(engine);
+ if (i915_seqno_passed(seqno, req->seqno)) {
+ ret = 0;
+ break;
+ }
+
if (signal_pending_state(state, current)) {
ret = -ERESTARTSYS;
break;
@@ -1346,14 +1357,36 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
destroy_timer_on_stack(&timer);
}
}
- if (!irq_test_in_progress)
- engine->irq_put(engine);
finish_wait(&engine->irq_queue, &wait);
out:
trace_i915_gem_request_wait_end(req);
+ if (ret == 0) {
+ if (req->engine->irq_seqno_barrier)
+ req->engine->irq_seqno_barrier(req->engine);
+ seqno = engine->get_seqno(engine);
+ /*
+ * Check for the fast path case of the seqno being passed but
+ * the request not actually being signalled yet.
+ */
+ if (i915_seqno_passed(seqno, req->seqno) &&
+ !i915_gem_request_completed(req)) {
+ /*
+ * Make sure the request is marked as completed before
+ * returning. NB: Need to acquire the spinlock around
+ * the whole call to avoid a race condition when the
+ * interrupt handler is running concurrently and could
+ * cause this invocation to early exit even though the
+ * request has not actually been fully processed yet.
+ */
+ spin_lock_irq(&req->engine->fence_lock);
+ i915_gem_request_notify(req->engine, true, true);
+ spin_unlock_irq(&req->engine->fence_lock);
+ }
+ }
+
if (timeout) {
s64 tres = *timeout - (ktime_get_raw_ns() - before);
@@ -1419,6 +1452,11 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
{
trace_i915_gem_request_retire(request);
+ if (request->irq_enabled) {
+ request->engine->irq_put(request->engine);
+ request->irq_enabled = false;
+ }
+
/* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position
* of tail of the request to update the last known position
@@ -1432,6 +1470,22 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
list_del_init(&request->list);
i915_gem_request_remove_from_client(request);
+ /*
+ * In case the request is still in the signal pending list,
+ * e.g. due to being cancelled by TDR, preemption, etc.
+ */
+ if (!list_empty(&request->signal_link)) {
+ /*
+ * The request must be marked as cancelled and the underlying
+ * fence as failed. NB: There is no explicit fence fail API,
+ * there is only a manual poke and signal.
+ */
+ request->cancelled = true;
+ /* How to propagate to any associated sync_fence??? */
+ request->fence.status = -EIO;
+ fence_signal(&request->fence);
+ }
+
if (request->previous_context) {
if (i915.enable_execlists)
intel_lr_context_unpin(request->previous_context,
@@ -2699,6 +2753,12 @@ void __i915_add_request(struct drm_i915_gem_request *request,
*/
request->postfix = intel_ring_get_tail(ringbuf);
+ /*
+ * Add the fence to the pending list before emitting the commands to
+ * generate a seqno notification interrupt.
+ */
+ i915_gem_request_submit(request);
+
if (i915.enable_execlists)
ret = engine->emit_request(request);
else {
@@ -2789,22 +2849,157 @@ static void i915_gem_request_free(struct fence *req_fence)
call_rcu(&req->fence.rcu, i915_gem_request_free_rcu);
}
-static bool i915_gem_request_enable_signaling(struct fence *req_fence)
+/*
+ * The request is being actively waited on, so enable interrupt based
+ * completion signalling.
+ */
+static void i915_gem_request_enable_interrupt(struct drm_i915_gem_request *req)
{
- /* Interrupt driven fences are not implemented yet.*/
- WARN(true, "This should not be called!");
- return true;
+ struct drm_i915_private *dev_priv = req->engine->i915;
+ const bool irq_test_in_progress =
+ ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) &
+ intel_engine_flag(req->engine);
+
+ if (req->irq_enabled)
+ return;
+
+ if (irq_test_in_progress)
+ return;
+
+ if (req->engine->irq_get(req->engine))
+ req->irq_enabled = true;
+ else
+ WARN(1, "Failed to get IRQ!");
+
+ /*
+ * Because the interrupt is only enabled on demand, there is a race
+ * where the interrupt can fire before anyone is looking for it. So
+ * do an explicit check for missed interrupts.
+ */
+ i915_gem_request_notify(req->engine, true, false);
}
-static bool i915_gem_request_is_completed(struct fence *req_fence)
+static bool i915_gem_request_enable_signaling(struct fence *req_fence)
{
struct drm_i915_gem_request *req = container_of(req_fence,
typeof(*req), fence);
+
+ /*
+ * No need to actually enable interrupt based processing until the
+ * request has been submitted to the hardware. At which point
+ * 'i915_gem_request_submit()' is called. So only really enable
+ * signalling in there. Just set a flag to say that interrupts are
+ * wanted when the request is eventually submitted. On the other hand
+ * if the request has already been submitted then interrupts do need
+ * to be enabled now.
+ */
+
+ req->signal_requested = true;
+
+ if (!list_empty(&req->signal_link))
+ i915_gem_request_enable_interrupt(req);
+
+ return true;
+}
+
+/*
+ * The request is about to be submitted to the hardware so add the fence to
+ * the list of signalable fences.
+ *
+ * NB: This does not necessarily enable interrupts yet. That only occurs on
+ * demand when the request is actually waited on. However, adding it to the
+ * list early ensures that there is no race condition where the interrupt
+ * could pop out prematurely and thus be completely lost. The race is merely
+ * that the interrupt must be manually checked for after being enabled.
+ */
+static void i915_gem_request_submit(struct drm_i915_gem_request *req)
+{
+ /*
+ * Always enable signal processing for the request's fence object
+ * before that request is submitted to the hardware. Thus there is no
+ * race condition whereby the interrupt could pop out before the
+ * request has been added to the signal list. Hence no need to check
+ * for completion, undo the list add and return false.
+ */
+ i915_gem_request_reference(req);
+
+ spin_lock_irq(&req->engine->fence_lock);
+
+ WARN_ON(!list_empty(&req->signal_link));
+ list_add_tail(&req->signal_link, &req->engine->fence_signal_list);
+
+ /*
+ * NB: Interrupts are only enabled on demand. Thus there is still a
+ * race where the request could complete before the interrupt has
+ * been enabled. Thus care must be taken at that point.
+ */
+
+ /* Have interrupts already been requested? */
+ if (req->signal_requested)
+ i915_gem_request_enable_interrupt(req);
+
+ spin_unlock_irq(&req->engine->fence_lock);
+}
+
+/**
+ * i915_gem_request_worker - request work handler callback.
+ * @work: Work structure
+ * Called in response to a seqno interrupt to process the completed requests.
+ */
+void i915_gem_request_worker(struct work_struct *work)
+{
+ struct intel_engine_cs *engine;
+
+ engine = container_of(work, struct intel_engine_cs, request_work);
+ i915_gem_request_notify(engine, false, false);
+}
+
+void i915_gem_request_notify(struct intel_engine_cs *engine, bool fence_locked,
+ bool lazy_coherency)
+{
+ struct drm_i915_gem_request *req, *req_next;
u32 seqno;
- seqno = req->engine->get_seqno(req->engine);
+ /*
+ * Note that this is safe to do before acquiring the spinlock as any
+ * items are only added to the list before enabling interrupts. Hence
+ * this can't be run while the list is transitioning from empty to
+ * not-empty. And a false not-empty is not an issue - it would just be
+ * the same as not doing the early exit test at all.
+ */
+ if (list_empty(&engine->fence_signal_list))
+ return;
+
+ if (!fence_locked)
+ spin_lock_irq(&engine->fence_lock);
+
+ if (!lazy_coherency && engine->irq_seqno_barrier)
+ engine->irq_seqno_barrier(engine);
+ seqno = engine->get_seqno(engine);
+
+ list_for_each_entry_safe(req, req_next, &engine->fence_signal_list, signal_link) {
+ if (!req->cancelled && !i915_seqno_passed(seqno, req->seqno))
+ break;
+
+ /*
+ * Start by removing the fence from the signal list otherwise
+ * the retire code can run concurrently and get confused.
+ */
+ list_del_init(&req->signal_link);
+
+ if (!req->cancelled)
+ fence_signal_locked(&req->fence);
+
+ if (req->irq_enabled) {
+ req->engine->irq_put(req->engine);
+ req->irq_enabled = false;
+ }
+
+ i915_gem_request_unreference(req);
+ }
- return i915_seqno_passed(seqno, req->seqno);
+ if (!fence_locked)
+ spin_unlock_irq(&engine->fence_lock);
}
static const char *i915_gem_request_get_driver_name(struct fence *req_fence)
@@ -2851,7 +3046,6 @@ static void i915_gem_request_fence_value_str(struct fence *req_fence,
static const struct fence_ops i915_gem_request_fops = {
.enable_signaling = i915_gem_request_enable_signaling,
- .signaled = i915_gem_request_is_completed,
.wait = fence_default_wait,
.release = i915_gem_request_free,
.get_driver_name = i915_gem_request_get_driver_name,
@@ -2933,6 +3127,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
req->ctx = ctx;
i915_gem_context_reference(req->ctx);
+ INIT_LIST_HEAD(&req->signal_link);
fence_init(&req->fence, &i915_gem_request_fops, &engine->fence_lock,
ctx->engine[engine->id].fence_timeline.fence_context,
i915_fence_timeline_get_next_seqno(&ctx->engine[engine->id].fence_timeline));
@@ -3067,6 +3262,12 @@ static void i915_gem_reset_engine_cleanup(struct drm_i915_private *dev_priv,
i915_gem_request_retire(request);
}
+ /*
+ * Make sure that any requests that were on the signal pending list also
+ * get cleaned up.
+ */
+ i915_gem_request_notify(engine, false, false);
+
/* Having flushed all requests from all queues, we know that all
* ringbuffers must now be empty. However, since we do not reclaim
* all space when retiring the request (to prevent HEADs colliding
@@ -3114,6 +3315,14 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
{
WARN_ON(i915_verify_lists(engine->dev));
+ /*
+ * If no-one has waited on a request recently then interrupts will
+ * not have been enabled and thus no requests will ever be marked as
+ * completed. So do an interrupt check now.
+ */
+ if(engine->irq_refcount == 0)
+ i915_gem_request_notify(engine, false, true);
+
/* Retire requests first as we use it above for the early return.
* If we retire requests last, we may use a later seqno and so clear
* the requests lists without clearing the active list, leading to
@@ -5146,6 +5355,7 @@ init_engine_lists(struct intel_engine_cs *engine)
{
INIT_LIST_HEAD(&engine->active_list);
INIT_LIST_HEAD(&engine->request_list);
+ INIT_LIST_HEAD(&engine->fence_signal_list);
}
void
@@ -982,6 +982,8 @@ static void notify_ring(struct intel_engine_cs *engine)
trace_i915_gem_request_notify(engine);
engine->user_interrupts++;
+ queue_work(engine->i915->req_wq, &engine->request_work);
+
wake_up_all(&engine->irq_queue);
}
@@ -1922,6 +1922,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
dev_priv = engine->i915;
+ cancel_work_sync(&engine->request_work);
+
if (engine->buffer) {
intel_logical_ring_stop(engine);
WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
@@ -2070,6 +2072,7 @@ logical_ring_setup(struct drm_device *dev, enum intel_engine_id id)
INIT_LIST_HEAD(&engine->active_list);
INIT_LIST_HEAD(&engine->request_list);
+ INIT_LIST_HEAD(&engine->fence_signal_list);
INIT_LIST_HEAD(&engine->buffers);
INIT_LIST_HEAD(&engine->execlist_queue);
spin_lock_init(&engine->execlist_lock);
@@ -2078,6 +2081,8 @@ logical_ring_setup(struct drm_device *dev, enum intel_engine_id id)
tasklet_init(&engine->irq_tasklet,
intel_lrc_irq_handler, (unsigned long)engine);
+ INIT_WORK(&engine->request_work, i915_gem_request_worker);
+
logical_ring_init_platform_invariants(engine);
logical_ring_default_vfuncs(engine);
logical_ring_default_irqs(engine, info->irq_shift);
@@ -2326,6 +2326,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
INIT_LIST_HEAD(&engine->request_list);
INIT_LIST_HEAD(&engine->execlist_queue);
INIT_LIST_HEAD(&engine->buffers);
+ INIT_LIST_HEAD(&engine->fence_signal_list);
spin_lock_init(&engine->fence_lock);
i915_gem_batch_pool_init(dev, &engine->batch_pool);
memset(engine->semaphore.sync_seqno, 0,
@@ -2333,6 +2334,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
init_waitqueue_head(&engine->irq_queue);
+ INIT_WORK(&engine->request_work, i915_gem_request_worker);
+
ringbuf = intel_engine_create_ringbuffer(engine, 32 * PAGE_SIZE);
if (IS_ERR(ringbuf)) {
ret = PTR_ERR(ringbuf);
@@ -2379,6 +2382,8 @@ void intel_cleanup_engine(struct intel_engine_cs *engine)
dev_priv = engine->i915;
+ cancel_work_sync(&engine->request_work);
+
if (engine->buffer) {
intel_stop_engine(engine);
WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0);
@@ -347,11 +347,15 @@ struct intel_engine_cs {
u32 (*get_cmd_length_mask)(u32 cmd_header);
/*
- * This spinlock is used by the fence implementation internally. Note,
- * it can be acquire from interrupt context so all usage must be IRQ
- * safe.
+ * This spinlock is used by the fence implementation internally and by
+ * the i915 driver for operations on the fence_signal_list and on fences
+ * in general. Note, it can be acquire from interrupt context so all
+ * usage must be IRQ safe.
*/
spinlock_t fence_lock;
+ struct list_head fence_signal_list;
+
+ struct work_struct request_work;
};
static inline bool