diff mbox

[13/62] drm/i915: Derive GEM requests from dma-fence

Message ID 1464971847-15809-14-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson June 3, 2016, 4:36 p.m. UTC
dma-buf provides a generic fence class for interoperation between
drivers. Internally we use the request structure as a fence, and so with
only a little bit of interfacing we can rebase those requests on top of
dma-buf fences. This will allow us, in the future, to pass those fences
back to userspace or between drivers.

v2: The fence_context needs to be globally unique, not just unique to
this device.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.c    | 116 ++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_gem_request.h    |  33 ++++----
 drivers/gpu/drm/i915/i915_gpu_error.c      |   2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |   4 +-
 drivers/gpu/drm/i915/i915_trace.h          |  10 +--
 drivers/gpu/drm/i915/intel_breadcrumbs.c   |   7 +-
 drivers/gpu/drm/i915/intel_lrc.c           |   3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  11 +--
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   1 +
 10 files changed, 143 insertions(+), 46 deletions(-)

Comments

Daniel Vetter June 8, 2016, 9:14 a.m. UTC | #1
On Fri, Jun 03, 2016 at 05:36:38PM +0100, Chris Wilson wrote:
> dma-buf provides a generic fence class for interoperation between
> drivers. Internally we use the request structure as a fence, and so with
> only a little bit of interfacing we can rebase those requests on top of
> dma-buf fences. This will allow us, in the future, to pass those fences
> back to userspace or between drivers.
> 
> v2: The fence_context needs to be globally unique, not just unique to
> this device.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |   2 +-
>  drivers/gpu/drm/i915/i915_gem_request.c    | 116 ++++++++++++++++++++++++++---
>  drivers/gpu/drm/i915/i915_gem_request.h    |  33 ++++----
>  drivers/gpu/drm/i915/i915_gpu_error.c      |   2 +-
>  drivers/gpu/drm/i915/i915_guc_submission.c |   4 +-
>  drivers/gpu/drm/i915/i915_trace.h          |  10 +--
>  drivers/gpu/drm/i915/intel_breadcrumbs.c   |   7 +-
>  drivers/gpu/drm/i915/intel_lrc.c           |   3 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.c    |  11 +--
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |   1 +
>  10 files changed, 143 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 8f576b443ff6..8e37315443f3 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -768,7 +768,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
>  			if (req->pid)
>  				task = pid_task(req->pid, PIDTYPE_PID);
>  			seq_printf(m, "    %x @ %d: %s [%d]\n",
> -				   req->seqno,
> +				   req->fence.seqno,
>  				   (int) (jiffies - req->emitted_jiffies),
>  				   task ? task->comm : "<unknown>",
>  				   task ? task->pid : -1);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 34b2f151cdfc..512b15153ac6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -24,6 +24,98 @@
>  
>  #include "i915_drv.h"
>  
> +static inline struct drm_i915_gem_request *
> +to_i915_request(struct fence *fence)
> +{
> +	return container_of(fence, struct drm_i915_gem_request, fence);
> +}
> +
> +static const char *i915_fence_get_driver_name(struct fence *fence)
> +{
> +	return "i915";
> +}
> +
> +static const char *i915_fence_get_timeline_name(struct fence *fence)
> +{
> +	/* Timelines are bound by eviction to a VM. However, since
> +	 * we only have a global seqno at the moment, we only have
> +	 * a single timeline. Note that each timeline will have
> +	 * multiple execution contexts (fence contexts) as we allow
> +	 * engines within a single timeline to execute in parallel.
> +	 */
> +	return "global";
> +}
> +
> +static bool i915_fence_signaled(struct fence *fence)
> +{
> +	return i915_gem_request_completed(to_i915_request(fence));
> +}
> +
> +static bool i915_fence_enable_signaling(struct fence *fence)
> +{
> +	if (i915_fence_signaled(fence))
> +		return false;
> +
> +	return intel_engine_enable_signaling(to_i915_request(fence)) == 0;
> +}
> +
> +static signed long i915_fence_wait(struct fence *fence,
> +				   bool interruptible,
> +				   signed long timeout_jiffies)
> +{
> +	s64 timeout_ns, *timeout;
> +	int ret;
> +
> +	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
> +		timeout_ns = jiffies_to_nsecs(timeout_jiffies);
> +		timeout = &timeout_ns;
> +	} else
> +		timeout = NULL;
> +
> +	ret = __i915_wait_request(to_i915_request(fence),
> +				  interruptible, timeout,
> +				  NULL);
> +	if (ret == -ETIME)
> +		return 0;
> +
> +	if (ret < 0)
> +		return ret;
> +
> +	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
> +		timeout_jiffies = nsecs_to_jiffies(timeout_ns);
> +
> +	return timeout_jiffies;
> +}
> +
> +static void i915_fence_value_str(struct fence *fence, char *str, int size)
> +{
> +	snprintf(str, size, "%u", fence->seqno);
> +}
> +
> +static void i915_fence_timeline_value_str(struct fence *fence, char *str,
> +					  int size)
> +{
> +	snprintf(str, size, "%u",
> +		 intel_engine_get_seqno(to_i915_request(fence)->engine));
> +}
> +
> +static void i915_fence_release(struct fence *fence)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +	kmem_cache_free(req->i915->requests, req);
> +}
> +
> +static const struct fence_ops i915_fence_ops = {
> +	.get_driver_name = i915_fence_get_driver_name,
> +	.get_timeline_name = i915_fence_get_timeline_name,
> +	.enable_signaling = i915_fence_enable_signaling,
> +	.signaled = i915_fence_signaled,
> +	.wait = i915_fence_wait,
> +	.release = i915_fence_release,
> +	.fence_value_str = i915_fence_value_str,
> +	.timeline_value_str = i915_fence_timeline_value_str,
> +};
> +
>  static int i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
>  {
>  	if (__i915_terminally_wedged(reset_counter))
> @@ -117,6 +209,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	struct drm_i915_private *dev_priv = engine->i915;
>  	unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error);
>  	struct drm_i915_gem_request *req;
> +	u32 seqno;
>  	int ret;
>  
>  	if (!req_out)
> @@ -136,11 +229,17 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	if (req == NULL)
>  		return -ENOMEM;
>  
> -	ret = i915_gem_get_seqno(dev_priv, &req->seqno);
> +	ret = i915_gem_get_seqno(dev_priv, &seqno);
>  	if (ret)
>  		goto err;
>  
> -	kref_init(&req->ref);
> +	spin_lock_init(&req->lock);
> +	fence_init(&req->fence,
> +		   &i915_fence_ops,
> +		   &req->lock,
> +		   engine->fence_context,
> +		   seqno);
> +
>  	req->i915 = dev_priv;
>  	req->engine = engine;
>  	req->reset_counter = reset_counter;
> @@ -376,7 +475,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
>  	 */
>  	request->emitted_jiffies = jiffies;
>  	request->previous_seqno = engine->last_submitted_seqno;
> -	smp_store_mb(engine->last_submitted_seqno, request->seqno);
> +	smp_store_mb(engine->last_submitted_seqno, request->fence.seqno);
>  	list_add_tail(&request->list, &engine->request_list);
>  
>  	/* Record the position of the start of the request so that
> @@ -543,7 +642,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>  	if (i915_spin_request(req, state, 5))
>  		goto complete;
>  
> -	intel_wait_init(&wait, req->seqno);
> +	intel_wait_init(&wait, req->fence.seqno);
>  	set_current_state(state);
>  	if (intel_engine_add_wait(req->engine, &wait))
>  		/* In order to check that we haven't missed the interrupt
> @@ -609,7 +708,7 @@ complete:
>  			*timeout = 0;
>  	}
>  
> -	if (rps && req->seqno == req->engine->last_submitted_seqno) {
> +	if (rps && req->fence.seqno == req->engine->last_submitted_seqno) {
>  		/* The GPU is now idle and this client has stalled.
>  		 * Since no other client has submitted a request in the
>  		 * meantime, assume that this client is the only one
> @@ -650,10 +749,3 @@ int i915_wait_request(struct drm_i915_gem_request *req)
>  
>  	return 0;
>  }
> -
> -void i915_gem_request_free(struct kref *req_ref)
> -{
> -	struct drm_i915_gem_request *req =
> -	       	container_of(req_ref, typeof(*req), ref);
> -	kmem_cache_free(req->i915->requests, req);
> -}
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 166e0733d2d8..248aec2c09b7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -25,6 +25,8 @@
>  #ifndef I915_GEM_REQUEST_H
>  #define I915_GEM_REQUEST_H
>  
> +#include <linux/fence.h>
> +
>  /**
>   * Request queue structure.
>   *
> @@ -36,11 +38,11 @@
>   * emission time to be associated with the request for tracking how far ahead
>   * of the GPU the submission is.
>   *
> - * The requests are reference counted, so upon creation they should have an
> - * initial reference taken using kref_init
> + * The requests are reference counted.
>   */
>  struct drm_i915_gem_request {
> -	struct kref ref;
> +	struct fence fence;
> +	spinlock_t lock;
>  
>  	/** On Which ring this request was generated */
>  	struct drm_i915_private *i915;
> @@ -68,12 +70,6 @@ struct drm_i915_gem_request {
>  	 */
>  	u32 previous_seqno;
>  
> -	/** GEM sequence number associated with this request,
> -	 * when the HWS breadcrumb is equal or greater than this the GPU
> -	 * has finished processing this request.
> -	 */
> -	u32 seqno;
> -
>  	/** Position in the ringbuffer of the start of the request */
>  	u32 head;
>  
> @@ -152,7 +148,6 @@ __request_to_i915(const struct drm_i915_gem_request *request)
>  struct drm_i915_gem_request * __must_check
>  i915_gem_request_alloc(struct intel_engine_cs *engine,
>  		       struct i915_gem_context *ctx);
> -void i915_gem_request_free(struct kref *req_ref);
>  int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
>  				   struct drm_file *file);
>  void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
> @@ -160,7 +155,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
>  static inline uint32_t
>  i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
>  {
> -	return req ? req->seqno : 0;
> +	return req ? req->fence.seqno : 0;
>  }
>  
>  static inline struct intel_engine_cs *
> @@ -170,17 +165,23 @@ i915_gem_request_get_engine(struct drm_i915_gem_request *req)
>  }
>  
>  static inline struct drm_i915_gem_request *
> +to_request(struct fence *fence)
> +{
> +	/* We assume that NULL fence/request are interoperable */
> +	BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0);
> +	return container_of(fence, struct drm_i915_gem_request, fence);

For future-proofing to make sure we don't accidentally call this on a
foreign fence:

	BUG_ON(fence->ops != i915_fence_ops);

BUG_ON since I don't want to splatter all callers with handlers for this.
And if we ever get this wrong debugging it with just some randomy
corruption would be serious pain, so I think the overhead is justified.
-Daniel

> +}
> +
> +static inline struct drm_i915_gem_request *
>  i915_gem_request_reference(struct drm_i915_gem_request *req)
>  {
> -	if (req)
> -		kref_get(&req->ref);
> -	return req;
> +	return to_request(fence_get(&req->fence));
>  }
>  
>  static inline void
>  i915_gem_request_unreference(struct drm_i915_gem_request *req)
>  {
> -	kref_put(&req->ref, i915_gem_request_free);
> +	fence_put(&req->fence);
>  }
>  
>  static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
> @@ -230,7 +231,7 @@ static inline bool i915_gem_request_started(const struct drm_i915_gem_request *r
>  static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req)
>  {
>  	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> -				 req->seqno);
> +				 req->fence.seqno);
>  }
>  
>  bool __i915_spin_request(const struct drm_i915_gem_request *request,
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 3ba5302ce19f..5332bd32c555 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1181,7 +1181,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
>  			}
>  
>  			erq = &error->ring[i].requests[count++];
> -			erq->seqno = request->seqno;
> +			erq->seqno = request->fence.seqno;
>  			erq->jiffies = request->emitted_jiffies;
>  			erq->tail = request->postfix;
>  		}
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index ac72451c571c..629111d42ce0 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -538,7 +538,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
>  							     rq->engine);
>  
>  	wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
> -	wqi->fence_id = rq->seqno;
> +	wqi->fence_id = rq->fence.seqno;
>  
>  	kunmap_atomic(base);
>  }
> @@ -578,7 +578,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq)
>  		client->b_fail += 1;
>  
>  	guc->submissions[engine_id] += 1;
> -	guc->last_seqno[engine_id] = rq->seqno;
> +	guc->last_seqno[engine_id] = rq->fence.seqno;
>  
>  	return b_ret;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index f59cf07184ae..0296a77b586a 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -465,7 +465,7 @@ TRACE_EVENT(i915_gem_ring_sync_to,
>  			   __entry->dev = from->i915->dev->primary->index;
>  			   __entry->sync_from = from->id;
>  			   __entry->sync_to = to_req->engine->id;
> -			   __entry->seqno = i915_gem_request_get_seqno(req);
> +			   __entry->seqno = req->fence.seqno;
>  			   ),
>  
>  	    TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
> @@ -488,9 +488,9 @@ TRACE_EVENT(i915_gem_ring_dispatch,
>  	    TP_fast_assign(
>  			   __entry->dev = req->i915->dev->primary->index;
>  			   __entry->ring = req->engine->id;
> -			   __entry->seqno = req->seqno;
> +			   __entry->seqno = req->fence.seqno;
>  			   __entry->flags = flags;
> -			   intel_engine_enable_signaling(req);
> +			   fence_enable_sw_signaling(&req->fence);
>  			   ),
>  
>  	    TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
> @@ -533,7 +533,7 @@ DECLARE_EVENT_CLASS(i915_gem_request,
>  	    TP_fast_assign(
>  			   __entry->dev = req->i915->dev->primary->index;
>  			   __entry->ring = req->engine->id;
> -			   __entry->seqno = req->seqno;
> +			   __entry->seqno = req->fence.seqno;
>  			   ),
>  
>  	    TP_printk("dev=%u, ring=%u, seqno=%u",
> @@ -595,7 +595,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
>  	    TP_fast_assign(
>  			   __entry->dev = req->i915->dev->primary->index;
>  			   __entry->ring = req->engine->id;
> -			   __entry->seqno = req->seqno;
> +			   __entry->seqno = req->fence.seqno;
>  			   __entry->blocking =
>  				     mutex_is_locked(&req->i915->dev->struct_mutex);
>  			   ),
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index dc65a007fa20..05f62f706897 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -396,6 +396,7 @@ static int intel_breadcrumbs_signaler(void *arg)
>  			 */
>  			intel_engine_remove_wait(engine,
>  						 &request->signaling.wait);
> +			fence_signal(&request->fence);
>  
>  			/* Find the next oldest signal. Note that as we have
>  			 * not been holding the lock, another client may
> @@ -444,7 +445,7 @@ int intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>  	}
>  
>  	request->signaling.wait.task = b->signaler;
> -	request->signaling.wait.seqno = request->seqno;
> +	request->signaling.wait.seqno = request->fence.seqno;
>  	i915_gem_request_reference(request);
>  
>  	/* First add ourselves into the list of waiters, but register our
> @@ -466,8 +467,8 @@ int intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>  	p = &b->signals.rb_node;
>  	while (*p) {
>  		parent = *p;
> -		if (i915_seqno_passed(request->seqno,
> -				      to_signal(parent)->seqno)) {
> +		if (i915_seqno_passed(request->fence.seqno,
> +				      to_signal(parent)->fence.seqno)) {
>  			p = &parent->rb_right;
>  			first = false;
>  		} else
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 0742a849acce..c7a9ebdb0811 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1731,7 +1731,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request)
>  				intel_hws_seqno_address(request->engine) |
>  				MI_FLUSH_DW_USE_GTT);
>  	intel_logical_ring_emit(ringbuf, 0);
> -	intel_logical_ring_emit(ringbuf, request->seqno);
> +	intel_logical_ring_emit(ringbuf, request->fence.seqno);
>  	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
>  	intel_logical_ring_emit(ringbuf, MI_NOOP);
>  	return intel_logical_ring_advance_and_submit(request);
> @@ -1964,6 +1964,7 @@ logical_ring_setup(struct drm_device *dev, enum intel_engine_id id)
>  	engine->exec_id = info->exec_id;
>  	engine->guc_id = info->guc_id;
>  	engine->mmio_base = info->mmio_base;
> +	engine->fence_context = fence_context_alloc(1);
>  
>  	engine->i915 = dev_priv;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 327ad7fdf118..c3d6345aa2c1 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1266,7 +1266,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
>  					   PIPE_CONTROL_CS_STALL);
>  		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
>  		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> -		intel_ring_emit(signaller, signaller_req->seqno);
> +		intel_ring_emit(signaller, signaller_req->fence.seqno);
>  		intel_ring_emit(signaller, 0);
>  		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
>  					   MI_SEMAPHORE_TARGET(waiter->hw_id));
> @@ -1304,7 +1304,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
>  		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
>  					   MI_FLUSH_DW_USE_GTT);
>  		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> -		intel_ring_emit(signaller, signaller_req->seqno);
> +		intel_ring_emit(signaller, signaller_req->fence.seqno);
>  		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
>  					   MI_SEMAPHORE_TARGET(waiter->hw_id));
>  		intel_ring_emit(signaller, 0);
> @@ -1337,7 +1337,7 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req,
>  		if (i915_mmio_reg_valid(mbox_reg)) {
>  			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
>  			intel_ring_emit_reg(signaller, mbox_reg);
> -			intel_ring_emit(signaller, signaller_req->seqno);
> +			intel_ring_emit(signaller, signaller_req->fence.seqno);
>  		}
>  	}
>  
> @@ -1373,7 +1373,7 @@ gen6_add_request(struct drm_i915_gem_request *req)
>  	intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
>  	intel_ring_emit(engine,
>  			I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(engine, req->seqno);
> +	intel_ring_emit(engine, req->fence.seqno);
>  	intel_ring_emit(engine, MI_USER_INTERRUPT);
>  	__intel_ring_advance(engine);
>  
> @@ -1623,7 +1623,7 @@ i9xx_add_request(struct drm_i915_gem_request *req)
>  	intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
>  	intel_ring_emit(engine,
>  		       	I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(engine, req->seqno);
> +	intel_ring_emit(engine, req->fence.seqno);
>  	intel_ring_emit(engine, MI_USER_INTERRUPT);
>  	__intel_ring_advance(engine);
>  
> @@ -2092,6 +2092,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>  	WARN_ON(engine->buffer);
>  
>  	engine->i915 = dev_priv;
> +	engine->fence_context = fence_context_alloc(1);
>  	INIT_LIST_HEAD(&engine->active_list);
>  	INIT_LIST_HEAD(&engine->request_list);
>  	INIT_LIST_HEAD(&engine->execlist_queue);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 6017367e94fb..b041fb6a6d01 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -158,6 +158,7 @@ struct intel_engine_cs {
>  	unsigned int exec_id;
>  	unsigned int hw_id;
>  	unsigned int guc_id; /* XXX same as hw_id? */
> +	unsigned fence_context;
>  	u32		mmio_base;
>  	struct intel_ringbuffer *buffer;
>  	struct list_head buffers;
> -- 
> 2.8.1
>
Chris Wilson June 8, 2016, 10:33 a.m. UTC | #2
On Wed, Jun 08, 2016 at 11:14:23AM +0200, Daniel Vetter wrote:
> On Fri, Jun 03, 2016 at 05:36:38PM +0100, Chris Wilson wrote:
> >  static inline struct drm_i915_gem_request *
> > +to_request(struct fence *fence)
> > +{
> > +	/* We assume that NULL fence/request are interoperable */
> > +	BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0);
> > +	return container_of(fence, struct drm_i915_gem_request, fence);
> 
> For future-proofing to make sure we don't accidentally call this on a
> foreign fence:
> 
> 	BUG_ON(fence->ops != i915_fence_ops);
> 
> BUG_ON since I don't want to splatter all callers with handlers for this.
> And if we ever get this wrong debugging it with just some randomy
> corruption would be serious pain, so I think the overhead is justified.

How about I just remove the function? It is only used on known requests
today, or call it __to_request_from_fence() ?
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8f576b443ff6..8e37315443f3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -768,7 +768,7 @@  static int i915_gem_request_info(struct seq_file *m, void *data)
 			if (req->pid)
 				task = pid_task(req->pid, PIDTYPE_PID);
 			seq_printf(m, "    %x @ %d: %s [%d]\n",
-				   req->seqno,
+				   req->fence.seqno,
 				   (int) (jiffies - req->emitted_jiffies),
 				   task ? task->comm : "<unknown>",
 				   task ? task->pid : -1);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 34b2f151cdfc..512b15153ac6 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -24,6 +24,98 @@ 
 
 #include "i915_drv.h"
 
+static inline struct drm_i915_gem_request *
+to_i915_request(struct fence *fence)
+{
+	return container_of(fence, struct drm_i915_gem_request, fence);
+}
+
+static const char *i915_fence_get_driver_name(struct fence *fence)
+{
+	return "i915";
+}
+
+static const char *i915_fence_get_timeline_name(struct fence *fence)
+{
+	/* Timelines are bound by eviction to a VM. However, since
+	 * we only have a global seqno at the moment, we only have
+	 * a single timeline. Note that each timeline will have
+	 * multiple execution contexts (fence contexts) as we allow
+	 * engines within a single timeline to execute in parallel.
+	 */
+	return "global";
+}
+
+static bool i915_fence_signaled(struct fence *fence)
+{
+	return i915_gem_request_completed(to_i915_request(fence));
+}
+
+static bool i915_fence_enable_signaling(struct fence *fence)
+{
+	if (i915_fence_signaled(fence))
+		return false;
+
+	return intel_engine_enable_signaling(to_i915_request(fence)) == 0;
+}
+
+static signed long i915_fence_wait(struct fence *fence,
+				   bool interruptible,
+				   signed long timeout_jiffies)
+{
+	s64 timeout_ns, *timeout;
+	int ret;
+
+	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
+		timeout_ns = jiffies_to_nsecs(timeout_jiffies);
+		timeout = &timeout_ns;
+	} else
+		timeout = NULL;
+
+	ret = __i915_wait_request(to_i915_request(fence),
+				  interruptible, timeout,
+				  NULL);
+	if (ret == -ETIME)
+		return 0;
+
+	if (ret < 0)
+		return ret;
+
+	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
+		timeout_jiffies = nsecs_to_jiffies(timeout_ns);
+
+	return timeout_jiffies;
+}
+
+static void i915_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+static void i915_fence_timeline_value_str(struct fence *fence, char *str,
+					  int size)
+{
+	snprintf(str, size, "%u",
+		 intel_engine_get_seqno(to_i915_request(fence)->engine));
+}
+
+static void i915_fence_release(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	kmem_cache_free(req->i915->requests, req);
+}
+
+static const struct fence_ops i915_fence_ops = {
+	.get_driver_name = i915_fence_get_driver_name,
+	.get_timeline_name = i915_fence_get_timeline_name,
+	.enable_signaling = i915_fence_enable_signaling,
+	.signaled = i915_fence_signaled,
+	.wait = i915_fence_wait,
+	.release = i915_fence_release,
+	.fence_value_str = i915_fence_value_str,
+	.timeline_value_str = i915_fence_timeline_value_str,
+};
+
 static int i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
 {
 	if (__i915_terminally_wedged(reset_counter))
@@ -117,6 +209,7 @@  __i915_gem_request_alloc(struct intel_engine_cs *engine,
 	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 	struct drm_i915_gem_request *req;
+	u32 seqno;
 	int ret;
 
 	if (!req_out)
@@ -136,11 +229,17 @@  __i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (req == NULL)
 		return -ENOMEM;
 
-	ret = i915_gem_get_seqno(dev_priv, &req->seqno);
+	ret = i915_gem_get_seqno(dev_priv, &seqno);
 	if (ret)
 		goto err;
 
-	kref_init(&req->ref);
+	spin_lock_init(&req->lock);
+	fence_init(&req->fence,
+		   &i915_fence_ops,
+		   &req->lock,
+		   engine->fence_context,
+		   seqno);
+
 	req->i915 = dev_priv;
 	req->engine = engine;
 	req->reset_counter = reset_counter;
@@ -376,7 +475,7 @@  void __i915_add_request(struct drm_i915_gem_request *request,
 	 */
 	request->emitted_jiffies = jiffies;
 	request->previous_seqno = engine->last_submitted_seqno;
-	smp_store_mb(engine->last_submitted_seqno, request->seqno);
+	smp_store_mb(engine->last_submitted_seqno, request->fence.seqno);
 	list_add_tail(&request->list, &engine->request_list);
 
 	/* Record the position of the start of the request so that
@@ -543,7 +642,7 @@  int __i915_wait_request(struct drm_i915_gem_request *req,
 	if (i915_spin_request(req, state, 5))
 		goto complete;
 
-	intel_wait_init(&wait, req->seqno);
+	intel_wait_init(&wait, req->fence.seqno);
 	set_current_state(state);
 	if (intel_engine_add_wait(req->engine, &wait))
 		/* In order to check that we haven't missed the interrupt
@@ -609,7 +708,7 @@  complete:
 			*timeout = 0;
 	}
 
-	if (rps && req->seqno == req->engine->last_submitted_seqno) {
+	if (rps && req->fence.seqno == req->engine->last_submitted_seqno) {
 		/* The GPU is now idle and this client has stalled.
 		 * Since no other client has submitted a request in the
 		 * meantime, assume that this client is the only one
@@ -650,10 +749,3 @@  int i915_wait_request(struct drm_i915_gem_request *req)
 
 	return 0;
 }
-
-void i915_gem_request_free(struct kref *req_ref)
-{
-	struct drm_i915_gem_request *req =
-	       	container_of(req_ref, typeof(*req), ref);
-	kmem_cache_free(req->i915->requests, req);
-}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 166e0733d2d8..248aec2c09b7 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -25,6 +25,8 @@ 
 #ifndef I915_GEM_REQUEST_H
 #define I915_GEM_REQUEST_H
 
+#include <linux/fence.h>
+
 /**
  * Request queue structure.
  *
@@ -36,11 +38,11 @@ 
  * emission time to be associated with the request for tracking how far ahead
  * of the GPU the submission is.
  *
- * The requests are reference counted, so upon creation they should have an
- * initial reference taken using kref_init
+ * The requests are reference counted.
  */
 struct drm_i915_gem_request {
-	struct kref ref;
+	struct fence fence;
+	spinlock_t lock;
 
 	/** On Which ring this request was generated */
 	struct drm_i915_private *i915;
@@ -68,12 +70,6 @@  struct drm_i915_gem_request {
 	 */
 	u32 previous_seqno;
 
-	/** GEM sequence number associated with this request,
-	 * when the HWS breadcrumb is equal or greater than this the GPU
-	 * has finished processing this request.
-	 */
-	u32 seqno;
-
 	/** Position in the ringbuffer of the start of the request */
 	u32 head;
 
@@ -152,7 +148,6 @@  __request_to_i915(const struct drm_i915_gem_request *request)
 struct drm_i915_gem_request * __must_check
 i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       struct i915_gem_context *ctx);
-void i915_gem_request_free(struct kref *req_ref);
 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
 				   struct drm_file *file);
 void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
@@ -160,7 +155,7 @@  void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
 static inline uint32_t
 i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
 {
-	return req ? req->seqno : 0;
+	return req ? req->fence.seqno : 0;
 }
 
 static inline struct intel_engine_cs *
@@ -170,17 +165,23 @@  i915_gem_request_get_engine(struct drm_i915_gem_request *req)
 }
 
 static inline struct drm_i915_gem_request *
+to_request(struct fence *fence)
+{
+	/* We assume that NULL fence/request are interoperable */
+	BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0);
+	return container_of(fence, struct drm_i915_gem_request, fence);
+}
+
+static inline struct drm_i915_gem_request *
 i915_gem_request_reference(struct drm_i915_gem_request *req)
 {
-	if (req)
-		kref_get(&req->ref);
-	return req;
+	return to_request(fence_get(&req->fence));
 }
 
 static inline void
 i915_gem_request_unreference(struct drm_i915_gem_request *req)
 {
-	kref_put(&req->ref, i915_gem_request_free);
+	fence_put(&req->fence);
 }
 
 static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
@@ -230,7 +231,7 @@  static inline bool i915_gem_request_started(const struct drm_i915_gem_request *r
 static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req)
 {
 	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->seqno);
+				 req->fence.seqno);
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *request,
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 3ba5302ce19f..5332bd32c555 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1181,7 +1181,7 @@  static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 			}
 
 			erq = &error->ring[i].requests[count++];
-			erq->seqno = request->seqno;
+			erq->seqno = request->fence.seqno;
 			erq->jiffies = request->emitted_jiffies;
 			erq->tail = request->postfix;
 		}
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index ac72451c571c..629111d42ce0 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -538,7 +538,7 @@  static void guc_add_workqueue_item(struct i915_guc_client *gc,
 							     rq->engine);
 
 	wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
-	wqi->fence_id = rq->seqno;
+	wqi->fence_id = rq->fence.seqno;
 
 	kunmap_atomic(base);
 }
@@ -578,7 +578,7 @@  int i915_guc_submit(struct drm_i915_gem_request *rq)
 		client->b_fail += 1;
 
 	guc->submissions[engine_id] += 1;
-	guc->last_seqno[engine_id] = rq->seqno;
+	guc->last_seqno[engine_id] = rq->fence.seqno;
 
 	return b_ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index f59cf07184ae..0296a77b586a 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -465,7 +465,7 @@  TRACE_EVENT(i915_gem_ring_sync_to,
 			   __entry->dev = from->i915->dev->primary->index;
 			   __entry->sync_from = from->id;
 			   __entry->sync_to = to_req->engine->id;
-			   __entry->seqno = i915_gem_request_get_seqno(req);
+			   __entry->seqno = req->fence.seqno;
 			   ),
 
 	    TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
@@ -488,9 +488,9 @@  TRACE_EVENT(i915_gem_ring_dispatch,
 	    TP_fast_assign(
 			   __entry->dev = req->i915->dev->primary->index;
 			   __entry->ring = req->engine->id;
-			   __entry->seqno = req->seqno;
+			   __entry->seqno = req->fence.seqno;
 			   __entry->flags = flags;
-			   intel_engine_enable_signaling(req);
+			   fence_enable_sw_signaling(&req->fence);
 			   ),
 
 	    TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
@@ -533,7 +533,7 @@  DECLARE_EVENT_CLASS(i915_gem_request,
 	    TP_fast_assign(
 			   __entry->dev = req->i915->dev->primary->index;
 			   __entry->ring = req->engine->id;
-			   __entry->seqno = req->seqno;
+			   __entry->seqno = req->fence.seqno;
 			   ),
 
 	    TP_printk("dev=%u, ring=%u, seqno=%u",
@@ -595,7 +595,7 @@  TRACE_EVENT(i915_gem_request_wait_begin,
 	    TP_fast_assign(
 			   __entry->dev = req->i915->dev->primary->index;
 			   __entry->ring = req->engine->id;
-			   __entry->seqno = req->seqno;
+			   __entry->seqno = req->fence.seqno;
 			   __entry->blocking =
 				     mutex_is_locked(&req->i915->dev->struct_mutex);
 			   ),
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index dc65a007fa20..05f62f706897 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -396,6 +396,7 @@  static int intel_breadcrumbs_signaler(void *arg)
 			 */
 			intel_engine_remove_wait(engine,
 						 &request->signaling.wait);
+			fence_signal(&request->fence);
 
 			/* Find the next oldest signal. Note that as we have
 			 * not been holding the lock, another client may
@@ -444,7 +445,7 @@  int intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	}
 
 	request->signaling.wait.task = b->signaler;
-	request->signaling.wait.seqno = request->seqno;
+	request->signaling.wait.seqno = request->fence.seqno;
 	i915_gem_request_reference(request);
 
 	/* First add ourselves into the list of waiters, but register our
@@ -466,8 +467,8 @@  int intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	p = &b->signals.rb_node;
 	while (*p) {
 		parent = *p;
-		if (i915_seqno_passed(request->seqno,
-				      to_signal(parent)->seqno)) {
+		if (i915_seqno_passed(request->fence.seqno,
+				      to_signal(parent)->fence.seqno)) {
 			p = &parent->rb_right;
 			first = false;
 		} else
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0742a849acce..c7a9ebdb0811 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1731,7 +1731,7 @@  static int gen8_emit_request(struct drm_i915_gem_request *request)
 				intel_hws_seqno_address(request->engine) |
 				MI_FLUSH_DW_USE_GTT);
 	intel_logical_ring_emit(ringbuf, 0);
-	intel_logical_ring_emit(ringbuf, request->seqno);
+	intel_logical_ring_emit(ringbuf, request->fence.seqno);
 	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
 	intel_logical_ring_emit(ringbuf, MI_NOOP);
 	return intel_logical_ring_advance_and_submit(request);
@@ -1964,6 +1964,7 @@  logical_ring_setup(struct drm_device *dev, enum intel_engine_id id)
 	engine->exec_id = info->exec_id;
 	engine->guc_id = info->guc_id;
 	engine->mmio_base = info->mmio_base;
+	engine->fence_context = fence_context_alloc(1);
 
 	engine->i915 = dev_priv;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 327ad7fdf118..c3d6345aa2c1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1266,7 +1266,7 @@  static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
 					   PIPE_CONTROL_CS_STALL);
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller_req->seqno);
+		intel_ring_emit(signaller, signaller_req->fence.seqno);
 		intel_ring_emit(signaller, 0);
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->hw_id));
@@ -1304,7 +1304,7 @@  static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
 					   MI_FLUSH_DW_USE_GTT);
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller_req->seqno);
+		intel_ring_emit(signaller, signaller_req->fence.seqno);
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->hw_id));
 		intel_ring_emit(signaller, 0);
@@ -1337,7 +1337,7 @@  static int gen6_signal(struct drm_i915_gem_request *signaller_req,
 		if (i915_mmio_reg_valid(mbox_reg)) {
 			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
 			intel_ring_emit_reg(signaller, mbox_reg);
-			intel_ring_emit(signaller, signaller_req->seqno);
+			intel_ring_emit(signaller, signaller_req->fence.seqno);
 		}
 	}
 
@@ -1373,7 +1373,7 @@  gen6_add_request(struct drm_i915_gem_request *req)
 	intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(engine,
 			I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(engine, req->seqno);
+	intel_ring_emit(engine, req->fence.seqno);
 	intel_ring_emit(engine, MI_USER_INTERRUPT);
 	__intel_ring_advance(engine);
 
@@ -1623,7 +1623,7 @@  i9xx_add_request(struct drm_i915_gem_request *req)
 	intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(engine,
 		       	I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(engine, req->seqno);
+	intel_ring_emit(engine, req->fence.seqno);
 	intel_ring_emit(engine, MI_USER_INTERRUPT);
 	__intel_ring_advance(engine);
 
@@ -2092,6 +2092,7 @@  static int intel_init_ring_buffer(struct drm_device *dev,
 	WARN_ON(engine->buffer);
 
 	engine->i915 = dev_priv;
+	engine->fence_context = fence_context_alloc(1);
 	INIT_LIST_HEAD(&engine->active_list);
 	INIT_LIST_HEAD(&engine->request_list);
 	INIT_LIST_HEAD(&engine->execlist_queue);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 6017367e94fb..b041fb6a6d01 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -158,6 +158,7 @@  struct intel_engine_cs {
 	unsigned int exec_id;
 	unsigned int hw_id;
 	unsigned int guc_id; /* XXX same as hw_id? */
+	unsigned fence_context;
 	u32		mmio_base;
 	struct intel_ringbuffer *buffer;
 	struct list_head buffers;