@@ -709,11 +709,12 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
task = NULL;
if (req->pid)
task = pid_task(req->pid, PIDTYPE_PID);
- seq_printf(m, " %x @ %d: %s [%d]\n",
+ seq_printf(m, " %x @ %d: %s [%d], fence = %x:%x\n",
req->seqno,
(int) (jiffies - req->emitted_jiffies),
task ? task->comm : "<unknown>",
- task ? task->pid : -1);
+ task ? task->pid : -1,
+ req->fence.context, req->fence.seqno);
rcu_read_unlock();
}
@@ -53,6 +53,7 @@
#include <linux/kref.h>
#include <linux/pm_qos.h>
#include "intel_guc.h"
+#include <linux/fence.h>
/* General customization:
*/
@@ -2197,7 +2198,17 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
* initial reference taken using kref_init
*/
struct drm_i915_gem_request {
- struct kref ref;
+ /**
+ * Underlying object for implementing the signal/wait stuff.
+ * NB: Never call fence_later() or return this fence object to user
+ * land! Due to lazy allocation, scheduler re-ordering, pre-emption,
+ * etc., there is no guarantee at all about the validity or
+ * sequentiality of the fence's seqno! It is also unsafe to let
+ * anything outside of the i915 driver get hold of the fence object
+ * as the clean up when decrementing the reference count requires
+ * holding the driver mutex lock.
+ */
+ struct fence fence;
/** On Which ring this request was generated */
struct drm_i915_private *i915;
@@ -2283,7 +2294,13 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
struct intel_context *ctx,
struct drm_i915_gem_request **req_out);
void i915_gem_request_cancel(struct drm_i915_gem_request *req);
-void i915_gem_request_free(struct kref *req_ref);
+
+static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
+ bool lazy_coherency)
+{
+ return fence_is_signaled(&req->fence);
+}
+
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
struct drm_file *file);
@@ -2303,7 +2320,7 @@ static inline struct drm_i915_gem_request *
i915_gem_request_reference(struct drm_i915_gem_request *req)
{
if (req)
- kref_get(&req->ref);
+ fence_get(&req->fence);
return req;
}
@@ -2311,7 +2328,7 @@ static inline void
i915_gem_request_unreference(struct drm_i915_gem_request *req)
{
WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex));
- kref_put(&req->ref, i915_gem_request_free);
+ fence_put(&req->fence);
}
static inline void
@@ -2323,7 +2340,7 @@ i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req)
return;
dev = req->ring->dev;
- if (kref_put_mutex(&req->ref, i915_gem_request_free, &dev->struct_mutex))
+ if (kref_put_mutex(&req->fence.refcount, fence_release, &dev->struct_mutex))
mutex_unlock(&dev->struct_mutex);
}
@@ -2340,12 +2357,6 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
}
/*
- * XXX: i915_gem_request_completed should be here but currently needs the
- * definition of i915_seqno_passed() which is below. It will be moved in
- * a later patch when the call to i915_seqno_passed() is obsoleted...
- */
-
-/*
* A command that requires special handling by the command parser.
*/
struct drm_i915_cmd_descriptor {
@@ -2966,20 +2977,6 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
return (int32_t)(seq1 - seq2) >= 0;
}
-static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
- bool lazy_coherency)
-{
- u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
- return i915_seqno_passed(seqno, req->previous_seqno);
-}
-
-static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
- bool lazy_coherency)
-{
- u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
- return i915_seqno_passed(seqno, req->seqno);
-}
-
int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
@@ -1192,6 +1192,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
{
unsigned long timeout;
unsigned cpu;
+ uint32_t seqno;
/* When waiting for high frequency requests, e.g. during synchronous
* rendering split between the CPU and GPU, the finite amount of time
@@ -1207,12 +1208,14 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
return -EBUSY;
/* Only spin if we know the GPU is processing this request */
- if (!i915_gem_request_started(req, true))
+ seqno = req->ring->get_seqno(req->ring, true);
+ if (!i915_seqno_passed(seqno, req->previous_seqno))
return -EAGAIN;
timeout = local_clock_us(&cpu) + 5;
while (!need_resched()) {
- if (i915_gem_request_completed(req, true))
+ seqno = req->ring->get_seqno(req->ring, true);
+ if (i915_seqno_passed(seqno, req->seqno))
return 0;
if (signal_pending_state(state, current))
@@ -1224,7 +1227,8 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
cpu_relax_lowlatency();
}
- if (i915_gem_request_completed(req, false))
+ seqno = req->ring->get_seqno(req->ring, false);
+ if (i915_seqno_passed(seqno, req->seqno))
return 0;
return -EAGAIN;
@@ -2679,12 +2683,14 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
}
}
-void i915_gem_request_free(struct kref *req_ref)
+static void i915_gem_request_free(struct fence *req_fence)
{
- struct drm_i915_gem_request *req = container_of(req_ref,
- typeof(*req), ref);
+ struct drm_i915_gem_request *req = container_of(req_fence,
+ typeof(*req), fence);
struct intel_context *ctx = req->ctx;
+ WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex));
+
if (req->file_priv)
i915_gem_request_remove_from_client(req);
@@ -2700,6 +2706,45 @@ void i915_gem_request_free(struct kref *req_ref)
kmem_cache_free(req->i915->requests, req);
}
+static bool i915_gem_request_enable_signaling(struct fence *req_fence)
+{
+ /* Interrupt driven fences are not implemented yet.*/
+ WARN(true, "This should not be called!");
+ return true;
+}
+
+static bool i915_gem_request_is_completed(struct fence *req_fence)
+{
+ struct drm_i915_gem_request *req = container_of(req_fence,
+ typeof(*req), fence);
+ u32 seqno;
+
+ seqno = req->ring->get_seqno(req->ring, false/*lazy_coherency*/);
+
+ return i915_seqno_passed(seqno, req->seqno);
+}
+
+static const char *i915_gem_request_get_driver_name(struct fence *req_fence)
+{
+ return "i915";
+}
+
+static const char *i915_gem_request_get_timeline_name(struct fence *req_fence)
+{
+ struct drm_i915_gem_request *req = container_of(req_fence,
+ typeof(*req), fence);
+ return req->ring->name;
+}
+
+static const struct fence_ops i915_gem_request_fops = {
+ .enable_signaling = i915_gem_request_enable_signaling,
+ .signaled = i915_gem_request_is_completed,
+ .wait = fence_default_wait,
+ .release = i915_gem_request_free,
+ .get_driver_name = i915_gem_request_get_driver_name,
+ .get_timeline_name = i915_gem_request_get_timeline_name,
+};
+
int i915_gem_request_alloc(struct intel_engine_cs *ring,
struct intel_context *ctx,
struct drm_i915_gem_request **req_out)
@@ -2721,7 +2766,6 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
if (ret)
goto err;
- kref_init(&req->ref);
req->i915 = dev_priv;
req->ring = ring;
req->ctx = ctx;
@@ -2736,6 +2780,9 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
goto err;
}
+ fence_init(&req->fence, &i915_gem_request_fops, &ring->fence_lock,
+ ring->fence_context, req->seqno);
+
/*
* Reserve space in the ring buffer for all the commands required to
* eventually emit this request. This is to guarantee that the
@@ -4810,7 +4857,7 @@ i915_gem_init_hw(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring;
- int ret, i, j;
+ int ret, i, j, fence_base;
if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
return -EIO;
@@ -4880,12 +4927,16 @@ i915_gem_init_hw(struct drm_device *dev)
if (ret)
goto out;
+ fence_base = fence_context_alloc(I915_NUM_RINGS);
+
/* Now it is safe to go back round and do everything else: */
for_each_ring(ring, dev_priv, i) {
struct drm_i915_gem_request *req;
WARN_ON(!ring->default_context);
+ ring->fence_context = fence_base + i;
+
ret = i915_gem_request_alloc(ring, ring->default_context, &req);
if (ret) {
i915_gem_cleanup_ringbuffer(dev);
@@ -2013,6 +2013,7 @@ logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
ring->dev = dev;
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
+ spin_lock_init(&ring->fence_lock);
i915_gem_batch_pool_init(dev, &ring->batch_pool);
init_waitqueue_head(&ring->irq_queue);
@@ -2159,6 +2159,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
INIT_LIST_HEAD(&ring->request_list);
INIT_LIST_HEAD(&ring->execlist_queue);
INIT_LIST_HEAD(&ring->buffers);
+ spin_lock_init(&ring->fence_lock);
i915_gem_batch_pool_init(dev, &ring->batch_pool);
memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
@@ -352,6 +352,9 @@ struct intel_engine_cs {
* to encode the command length in the header).
*/
u32 (*get_cmd_length_mask)(u32 cmd_header);
+
+ unsigned fence_context;
+ spinlock_t fence_lock;
};
static inline bool