diff mbox

[3/3] drm/i915: Derive GEM requests from dma-fence

Message ID 1449934473-24902-3-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Dec. 12, 2015, 3:34 p.m. UTC
dma-buf provides a generic fence class for interoperation between
drivers. Internally we use the request structure as a fence, and so with
only a little bit of interfacing we can rebase those requests on top of
dma-buf fences. This will allow us, in the future, to pass those fences
back to userspace or between drivers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |   2 +-
 drivers/gpu/drm/i915/i915_gem.c            |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.c    | 128 ++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_gem_request.h    |  25 ++----
 drivers/gpu/drm/i915/i915_gpu_error.c      |   2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |   2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c   |   3 +-
 drivers/gpu/drm/i915/intel_lrc.c           |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  14 ++--
 9 files changed, 140 insertions(+), 40 deletions(-)

Comments

Dave Gordon Jan. 4, 2016, 12:17 p.m. UTC | #1
On 12/12/15 15:34, Chris Wilson wrote:
> dma-buf provides a generic fence class for interoperation between
> drivers. Internally we use the request structure as a fence, and so with
> only a little bit of interfacing we can rebase those requests on top of
> dma-buf fences. This will allow us, in the future, to pass those fences
> back to userspace or between drivers.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        |   2 +-
>   drivers/gpu/drm/i915/i915_gem.c            |   2 +-
>   drivers/gpu/drm/i915/i915_gem_request.c    | 128 ++++++++++++++++++++++++++---
>   drivers/gpu/drm/i915/i915_gem_request.h    |  25 ++----
>   drivers/gpu/drm/i915/i915_gpu_error.c      |   2 +-
>   drivers/gpu/drm/i915/i915_guc_submission.c |   2 +-
>   drivers/gpu/drm/i915/intel_breadcrumbs.c   |   3 +-
>   drivers/gpu/drm/i915/intel_lrc.c           |   2 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.c    |  14 ++--
>   9 files changed, 140 insertions(+), 40 deletions(-)

This seems to be doing the same thing as John Harrison's patch

[PATCH 05/13] drm/i915: Convert requests to use struct fence

Likewise you both have a patch to remove lazy coherency, and various 
other duplications:

[PATCH 06/13] drm/i915: Removed now redudant parameter to 
i915_gem_request_completed()
[PATCH 08/13] drm/i915: Delay the freeing of requests until retire time

These will have to be unified; we just have to pick exactly how to make 
each of these improvements.

.Dave.
Chris Wilson Jan. 4, 2016, 12:22 p.m. UTC | #2
On Mon, Jan 04, 2016 at 12:17:47PM +0000, Dave Gordon wrote:
> On 12/12/15 15:34, Chris Wilson wrote:
> >dma-buf provides a generic fence class for interoperation between
> >drivers. Internally we use the request structure as a fence, and so with
> >only a little bit of interfacing we can rebase those requests on top of
> >dma-buf fences. This will allow us, in the future, to pass those fences
> >back to userspace or between drivers.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
> >Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> >---
> >  drivers/gpu/drm/i915/i915_debugfs.c        |   2 +-
> >  drivers/gpu/drm/i915/i915_gem.c            |   2 +-
> >  drivers/gpu/drm/i915/i915_gem_request.c    | 128 ++++++++++++++++++++++++++---
> >  drivers/gpu/drm/i915/i915_gem_request.h    |  25 ++----
> >  drivers/gpu/drm/i915/i915_gpu_error.c      |   2 +-
> >  drivers/gpu/drm/i915/i915_guc_submission.c |   2 +-
> >  drivers/gpu/drm/i915/intel_breadcrumbs.c   |   3 +-
> >  drivers/gpu/drm/i915/intel_lrc.c           |   2 +-
> >  drivers/gpu/drm/i915/intel_ringbuffer.c    |  14 ++--
> >  9 files changed, 140 insertions(+), 40 deletions(-)
> 
> This seems to be doing the same thing as John Harrison's patch
> 
> [PATCH 05/13] drm/i915: Convert requests to use struct fence
> 
> Likewise you both have a patch to remove lazy coherency, and various
> other duplications:
> 
> [PATCH 06/13] drm/i915: Removed now redudant parameter to
> i915_gem_request_completed()
> [PATCH 08/13] drm/i915: Delay the freeing of requests until retire time
> 
> These will have to be unified; we just have to pick exactly how to
> make each of these improvements.

Indeed. This is my rebuttal of those patches since they did not
understand the issues involved.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 95fa6f7c8e00..d182c299d521 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -710,7 +710,7 @@  static int i915_gem_request_info(struct seq_file *m, void *data)
 			if (req->pid)
 				task = pid_task(req->pid, PIDTYPE_PID);
 			seq_printf(m, "    %x @ %d: %s [%d]\n",
-				   req->seqno,
+				   req->fence.seqno,
 				   (int) (jiffies - req->emitted_jiffies),
 				   task ? task->comm : "<unknown>",
 				   task ? task->pid : -1);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d92950b56f2d..8efc04547e23 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2264,7 +2264,7 @@  void __i915_add_request(struct drm_i915_gem_request *request,
 
 	request->emitted_jiffies = jiffies;
 	request->previous_seqno = ring->last_submitted_seqno;
-	ring->last_submitted_seqno = request->seqno;
+	ring->last_submitted_seqno = request->fence.seqno;
 	list_add_tail(&request->list, &ring->request_list);
 
 	trace_i915_gem_request_add(request);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index bb88146dd90c..eec6b920e646 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -24,6 +24,115 @@ 
 
 #include "i915_drv.h"
 
+static inline struct drm_i915_gem_request *
+to_i915_request(struct fence *fence)
+{
+	return container_of(fence, struct drm_i915_gem_request, fence);
+}
+
+static const char *i915_fence_get_driver_name(struct fence *fence)
+{
+	return "i915";
+}
+
+static const char *i915_fence_get_timeline_name(struct fence *fence)
+{
+	return to_i915_request(fence)->ring->name;
+}
+
+static bool i915_fence_signaled(struct fence *fence)
+{
+	struct drm_i915_gem_request *request = to_i915_request(fence);
+
+	if (i915_gem_request_completed(request))
+		return true;
+
+	if (request->reset_counter != i915_reset_counter(&request->i915->gpu_error))
+		return true;
+
+	return false;
+}
+
+static bool i915_fence_enable_signaling(struct fence *fence)
+{
+	if (i915_fence_signaled(fence))
+		return false;
+
+	return intel_engine_add_signal(to_i915_request(fence)) == 0;
+}
+
+#define NO_WAITBOOST NULL /* FIXME! */
+
+static signed long i915_fence_wait(struct fence *fence,
+				   bool interruptible,
+				   signed long timeout_jiffies)
+{
+	s64 timeout_ns, *timeout;
+	int ret;
+
+	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
+		timeout_ns = jiffies_to_nsecs(timeout_jiffies);
+		timeout = &timeout_ns;
+	} else
+		timeout = NULL;
+
+	ret = __i915_wait_request(to_i915_request(fence),
+				  interruptible, timeout,
+				  NO_WAITBOOST);
+	if (ret == -ETIME)
+		return 0;
+
+	if (ret < 0)
+		return ret;
+
+	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
+		timeout_jiffies = nsecs_to_jiffies(timeout_ns);
+
+	return timeout_jiffies;
+}
+
+static int i915_fence_fill_driver_data(struct fence *fence, void *data,
+				      int size)
+{
+	struct drm_i915_gem_request *request = to_i915_request(fence);
+
+	if (size < sizeof(request->fence.seqno))
+		return -ENOMEM;
+
+	memcpy(data, &request->fence.seqno, sizeof(request->fence.seqno));
+	return sizeof(request->fence.seqno);
+}
+
+static void i915_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+static void i915_fence_timeline_value_str(struct fence *fence, char *str,
+					  int size)
+{
+	snprintf(str, size, "%u",
+		 intel_ring_get_seqno(to_i915_request(fence)->ring));
+}
+
+static void i915_fence_release(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	kmem_cache_free(req->i915->requests, req);
+}
+
+static const struct fence_ops i915_fence_ops = {
+	.get_driver_name = i915_fence_get_driver_name,
+	.get_timeline_name = i915_fence_get_timeline_name,
+	.enable_signaling = i915_fence_enable_signaling,
+	.signaled = i915_fence_signaled,
+	.wait = i915_fence_wait,
+	.release = i915_fence_release,
+	.fill_driver_data = i915_fence_fill_driver_data,
+	.fence_value_str = i915_fence_value_str,
+	.timeline_value_str = i915_fence_timeline_value_str,
+};
+
 static int
 i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
 {
@@ -49,6 +158,7 @@  int i915_gem_request_alloc(struct intel_engine_cs *ring,
 	struct drm_i915_private *dev_priv = to_i915(ring->dev);
 	unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 	struct drm_i915_gem_request *req;
+	u32 seqno;
 	int ret;
 
 	if (!req_out)
@@ -68,11 +178,13 @@  int i915_gem_request_alloc(struct intel_engine_cs *ring,
 	if (req == NULL)
 		return -ENOMEM;
 
-	ret = i915_gem_get_seqno(ring->dev, &req->seqno);
+	ret = i915_gem_get_seqno(ring->dev, &seqno);
 	if (ret)
 		goto err;
 
-	kref_init(&req->ref);
+	spin_lock_init(&req->lock);
+	fence_init(&req->fence, &i915_fence_ops, &req->lock, ring->id, seqno);
+
 	INIT_LIST_HEAD(&req->list);
 	req->i915 = dev_priv;
 	req->ring = ring;
@@ -362,7 +474,7 @@  int __i915_wait_request(struct drm_i915_gem_request *req,
 	if (INTEL_INFO(req->i915)->gen >= 6)
 		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
-	intel_wait_init(&wait, req->seqno);
+	intel_wait_init(&wait, req->fence.seqno);
 	set_task_state(wait.task, state);
 
 	/* Optimistic spin for the next ~jiffie before touching IRQs */
@@ -453,7 +565,8 @@  out:
 			*timeout = 0;
 	}
 
-	if (ret == 0 && rps && req->seqno == req->ring->last_submitted_seqno) {
+	if (ret == 0 && rps &&
+	    req->fence.seqno == req->ring->last_submitted_seqno) {
 		/* The GPU is now idle and this client has stalled.
 		 * Since no other client has submitted a request in the
 		 * meantime, assume that this client is the only one
@@ -500,10 +613,3 @@  i915_wait_request(struct drm_i915_gem_request *req)
 	i915_gem_request_retire__upto(req);
 	return 0;
 }
-
-void i915_gem_request_free(struct kref *req_ref)
-{
-	struct drm_i915_gem_request *req =
-		container_of(req_ref, typeof(*req), ref);
-	kmem_cache_free(req->i915->requests, req);
-}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index b19160a1511a..4198cd97e642 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -25,6 +25,8 @@ 
 #ifndef I915_GEM_REQUEST_H
 #define I915_GEM_REQUEST_H
 
+#include <linux/fence.h>
+
 /**
  * Request queue structure.
  *
@@ -36,11 +38,11 @@ 
  * emission time to be associated with the request for tracking how far ahead
  * of the GPU the submission is.
  *
- * The requests are reference counted, so upon creation they should have an
- * initial reference taken using kref_init
+ * The requests are reference counted.
  */
 struct drm_i915_gem_request {
-	struct kref ref;
+	struct fence fence;
+	spinlock_t lock;
 
 	/** On Which ring this request was generated */
 	struct drm_i915_private *i915;
@@ -53,12 +55,6 @@  struct drm_i915_gem_request {
 	  */
 	u32 previous_seqno;
 
-	 /** GEM sequence number associated with this request,
-	  * when the HWS breadcrumb is equal or greater than this the GPU
-	  * has finished processing this request.
-	  */
-	u32 seqno;
-
 	/** Position in the ringbuffer of the start of the request */
 	u32 head;
 
@@ -127,7 +123,6 @@  int i915_gem_request_alloc(struct intel_engine_cs *ring,
 			   struct intel_context *ctx,
 			   struct drm_i915_gem_request **req_out);
 void i915_gem_request_cancel(struct drm_i915_gem_request *req);
-void i915_gem_request_free(struct kref *req_ref);
 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
 				   struct drm_file *file);
 void i915_gem_request_retire__upto(struct drm_i915_gem_request *req);
@@ -135,7 +130,7 @@  void i915_gem_request_retire__upto(struct drm_i915_gem_request *req);
 static inline uint32_t
 i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
 {
-	return req ? req->seqno : 0;
+	return req ? req->fence.seqno : 0;
 }
 
 static inline struct intel_engine_cs *
@@ -147,15 +142,13 @@  i915_gem_request_get_ring(struct drm_i915_gem_request *req)
 static inline struct drm_i915_gem_request *
 i915_gem_request_reference(struct drm_i915_gem_request *req)
 {
-	if (req)
-		kref_get(&req->ref);
-	return req;
+	return container_of(fence_get(&req->fence), typeof(*req), fence);
 }
 
 static inline void
 i915_gem_request_unreference(struct drm_i915_gem_request *req)
 {
-	kref_put(&req->ref, i915_gem_request_free);
+	fence_put(&req->fence);
 }
 
 static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
@@ -188,7 +181,7 @@  static inline bool i915_gem_request_started(struct drm_i915_gem_request *req)
 static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
 {
 	return i915_seqno_passed(intel_ring_get_seqno(req->ring),
-				 req->seqno);
+				 req->fence.seqno);
 }
 
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6db6d7e02aea..70ac9badb950 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1094,7 +1094,7 @@  static void i915_gem_record_rings(struct drm_device *dev,
 			}
 
 			erq = &error->ring[i].requests[count++];
-			erq->seqno = request->seqno;
+			erq->seqno = request->fence.seqno;
 			erq->jiffies = request->emitted_jiffies;
 			erq->tail = request->postfix;
 		}
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 0d23785ba818..398c7c8c2815 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -609,7 +609,7 @@  int i915_guc_submit(struct i915_guc_client *client,
 		client->retcode = 0;
 	}
 	guc->submissions[ring_id] += 1;
-	guc->last_seqno[ring_id] = rq->seqno;
+	guc->last_seqno[ring_id] = rq->fence.seqno;
 
 	return q_ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index d916e1325bd4..fa58a3f3b8bc 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -324,6 +324,7 @@  static int intel_breadcrumbs_signaller(void *arg)
 		if (signal_complete(signal)) {
 			intel_engine_remove_wait(engine, &signal->wait);
 
+			fence_signal(&signal->request->fence);
 			i915_gem_request_unreference(signal->request);
 
 			spin_lock(&engine->breadcrumbs.lock);
@@ -372,7 +373,7 @@  int intel_engine_add_signal(struct drm_i915_gem_request *request)
 	}
 
 	signal->wait.task = task;
-	signal->wait.seqno = request->seqno;
+	signal->wait.seqno = request->fence.seqno;
 
 	signal->request = i915_gem_request_reference(request);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 009ab1321205..40645479dadd 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1768,7 +1768,7 @@  static int gen8_emit_request(struct drm_i915_gem_request *request)
 				(ring->status_page.gfx_addr +
 				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
 	intel_logical_ring_emit(ringbuf, 0);
-	intel_logical_ring_emit(ringbuf, request->seqno);
+	intel_logical_ring_emit(ringbuf, request->fence.seqno);
 	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
 	intel_logical_ring_emit(ringbuf, MI_NOOP);
 	intel_logical_ring_advance_and_submit(request);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8ed1cac3a4f2..f68319a8a4cd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1218,7 +1218,7 @@  static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
 					   PIPE_CONTROL_FLUSH_ENABLE);
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller_req->seqno);
+		intel_ring_emit(signaller, signaller_req->fence.seqno);
 		intel_ring_emit(signaller, 0);
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->id));
@@ -1256,7 +1256,7 @@  static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
 					   MI_FLUSH_DW_USE_GTT);
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller_req->seqno);
+		intel_ring_emit(signaller, signaller_req->fence.seqno);
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->id));
 		intel_ring_emit(signaller, 0);
@@ -1289,7 +1289,7 @@  static int gen6_signal(struct drm_i915_gem_request *signaller_req,
 		if (i915_mmio_reg_valid(mbox_reg)) {
 			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
 			intel_ring_emit_reg(signaller, mbox_reg);
-			intel_ring_emit(signaller, signaller_req->seqno);
+			intel_ring_emit(signaller, signaller_req->fence.seqno);
 		}
 	}
 
@@ -1324,7 +1324,7 @@  gen6_add_request(struct drm_i915_gem_request *req)
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, req->seqno);
+	intel_ring_emit(ring, req->fence.seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	__intel_ring_advance(ring);
 
@@ -1448,7 +1448,7 @@  pc_render_add_request(struct drm_i915_gem_request *req)
 			PIPE_CONTROL_QW_WRITE |
 			PIPE_CONTROL_WRITE_FLUSH);
 	intel_ring_emit(ring, addr | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, req->seqno);
+	intel_ring_emit(ring, req->fence.seqno);
 	intel_ring_emit(ring, 0);
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
@@ -1467,7 +1467,7 @@  pc_render_add_request(struct drm_i915_gem_request *req)
 			PIPE_CONTROL_WRITE_FLUSH |
 			PIPE_CONTROL_NOTIFY);
 	intel_ring_emit(ring, addr | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, req->seqno);
+	intel_ring_emit(ring, req->fence.seqno);
 	intel_ring_emit(ring, 0);
 	__intel_ring_advance(ring);
 
@@ -1577,7 +1577,7 @@  i9xx_add_request(struct drm_i915_gem_request *req)
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, req->seqno);
+	intel_ring_emit(ring, req->fence.seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	__intel_ring_advance(ring);