[3/6] drm/i915/execlists: Process one CSB update at a time

Message ID	20180627113843.7257-3-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Wed, 27 Jun 2018 12:38:40 +0100 Message-Id: <20180627113843.7257-3-chris@chris-wilson.co.uk> In-Reply-To: <20180627113843.7257-1-chris@chris-wilson.co.uk> References: <20180627113843.7257-1-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH 3/6] drm/i915/execlists: Process one CSB update at a time Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

20180627113843.7257-3-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Wed, 27 Jun 2018 12:38:40 +0100
Message-Id: <20180627113843.7257-3-chris@chris-wilson.co.uk>
In-Reply-To: <20180627113843.7257-1-chris@chris-wilson.co.uk>
References: <20180627113843.7257-1-chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH 3/6] drm/i915/execlists: Process one CSB update
	at a time
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Chris Wilson June 27, 2018, 11:38 a.m. UTC

In the next patch, we will process the CSB events directly from the
submission path, rather than only after a CS interrupt. Hence, we will
no longer have the need for a loop until the has-interrupt bit is clear,
and in the meantime can remove that small optimisation.

v2: Tvrtko pointed out it was safer to unconditionally kick the tasklet
after each irq, when assuming that the tasklet is called for each irq.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> #v1
---
 drivers/gpu/drm/i915/i915_irq.c  |   7 +-
 drivers/gpu/drm/i915/intel_lrc.c | 278 +++++++++++++++----------------
 2 files changed, 141 insertions(+), 144 deletions(-)

Comments

Tvrtko Ursulin June 27, 2018, 12:58 p.m. UTC | #1

On 27/06/2018 12:38, Chris Wilson wrote:
> In the next patch, we will process the CSB events directly from the
> submission path, rather than only after a CS interrupt. Hence, we will
> no longer have the need for a loop until the has-interrupt bit is clear,
> and in the meantime can remove that small optimisation.
> 
> v2: Tvrtko pointed out it was safer to unconditionally kick the tasklet
> after each irq, when assuming that the tasklet is called for each irq.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> #v1

Okay I can swallow it.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

> ---
>   drivers/gpu/drm/i915/i915_irq.c  |   7 +-
>   drivers/gpu/drm/i915/intel_lrc.c | 278 +++++++++++++++----------------
>   2 files changed, 141 insertions(+), 144 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 46aaef5c1851..d02f30591c0b 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1473,9 +1473,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   	bool tasklet = false;
>   
>   	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
> -		if (READ_ONCE(engine->execlists.active))
> -			tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
> -						    &engine->irq_posted);
> +		if (READ_ONCE(engine->execlists.active)) {
> +			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> +			tasklet = true;
> +		}
>   	}
>   
>   	if (iir & GT_RENDER_USER_INTERRUPT) {
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 4b31e8f42aeb..91656eb2f2db 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -954,166 +954,162 @@ static void process_csb(struct intel_engine_cs *engine)
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
>   	struct drm_i915_private *i915 = engine->i915;
> +
> +	/* The HWSP contains a (cacheable) mirror of the CSB */
> +	const u32 *buf =
> +		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> +	unsigned int head, tail;
>   	bool fw = false;
>   
> -	do {
> -		/* The HWSP contains a (cacheable) mirror of the CSB */
> -		const u32 *buf =
> -			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> -		unsigned int head, tail;
> -
> -		/* Clear before reading to catch new interrupts */
> -		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -		smp_mb__after_atomic();
> -
> -		if (unlikely(execlists->csb_use_mmio)) {
> -			if (!fw) {
> -				intel_uncore_forcewake_get(i915, execlists->fw_domains);
> -				fw = true;
> -			}
> +	/* Clear before reading to catch new interrupts */
> +	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> +	smp_mb__after_atomic();
>   
> -			buf = (u32 * __force)
> -				(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> +	if (unlikely(execlists->csb_use_mmio)) {
> +		intel_uncore_forcewake_get(i915, execlists->fw_domains);
> +		fw = true;
>   
> -			head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> -			tail = GEN8_CSB_WRITE_PTR(head);
> -			head = GEN8_CSB_READ_PTR(head);
> -			execlists->csb_head = head;
> -		} else {
> -			const int write_idx =
> -				intel_hws_csb_write_index(i915) -
> -				I915_HWS_CSB_BUF0_INDEX;
> +		buf = (u32 * __force)
> +			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
>   
> -			head = execlists->csb_head;
> -			tail = READ_ONCE(buf[write_idx]);
> -			rmb(); /* Hopefully paired with a wmb() in HW */
> -		}
> -		GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> -			  engine->name,
> -			  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> -			  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> +		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> +		tail = GEN8_CSB_WRITE_PTR(head);
> +		head = GEN8_CSB_READ_PTR(head);
> +		execlists->csb_head = head;
> +	} else {
> +		const int write_idx =
> +			intel_hws_csb_write_index(i915) -
> +			I915_HWS_CSB_BUF0_INDEX;
>   
> -		while (head != tail) {
> -			struct i915_request *rq;
> -			unsigned int status;
> -			unsigned int count;
> +		head = execlists->csb_head;
> +		tail = READ_ONCE(buf[write_idx]);
> +		rmb(); /* Hopefully paired with a wmb() in HW */
> +	}
> +	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> +		  engine->name,
> +		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> +		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
>   
> -			if (++head == GEN8_CSB_ENTRIES)
> -				head = 0;
> +	while (head != tail) {
> +		struct i915_request *rq;
> +		unsigned int status;
> +		unsigned int count;
>   
> -			/*
> -			 * We are flying near dragons again.
> -			 *
> -			 * We hold a reference to the request in execlist_port[]
> -			 * but no more than that. We are operating in softirq
> -			 * context and so cannot hold any mutex or sleep. That
> -			 * prevents us stopping the requests we are processing
> -			 * in port[] from being retired simultaneously (the
> -			 * breadcrumb will be complete before we see the
> -			 * context-switch). As we only hold the reference to the
> -			 * request, any pointer chasing underneath the request
> -			 * is subject to a potential use-after-free. Thus we
> -			 * store all of the bookkeeping within port[] as
> -			 * required, and avoid using unguarded pointers beneath
> -			 * request itself. The same applies to the atomic
> -			 * status notifier.
> -			 */
> +		if (++head == GEN8_CSB_ENTRIES)
> +			head = 0;
>   
> -			status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> -			GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> -				  engine->name, head,
> -				  status, buf[2*head + 1],
> -				  execlists->active);
> -
> -			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> -				      GEN8_CTX_STATUS_PREEMPTED))
> -				execlists_set_active(execlists,
> -						     EXECLISTS_ACTIVE_HWACK);
> -			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> -				execlists_clear_active(execlists,
> -						       EXECLISTS_ACTIVE_HWACK);
> -
> -			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> -				continue;
> +		/*
> +		 * We are flying near dragons again.
> +		 *
> +		 * We hold a reference to the request in execlist_port[]
> +		 * but no more than that. We are operating in softirq
> +		 * context and so cannot hold any mutex or sleep. That
> +		 * prevents us stopping the requests we are processing
> +		 * in port[] from being retired simultaneously (the
> +		 * breadcrumb will be complete before we see the
> +		 * context-switch). As we only hold the reference to the
> +		 * request, any pointer chasing underneath the request
> +		 * is subject to a potential use-after-free. Thus we
> +		 * store all of the bookkeeping within port[] as
> +		 * required, and avoid using unguarded pointers beneath
> +		 * request itself. The same applies to the atomic
> +		 * status notifier.
> +		 */
>   
> -			/* We should never get a COMPLETED | IDLE_ACTIVE! */
> -			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> +		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> +		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> +			  engine->name, head,
> +			  status, buf[2*head + 1],
> +			  execlists->active);
> +
> +		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> +			      GEN8_CTX_STATUS_PREEMPTED))
> +			execlists_set_active(execlists,
> +					     EXECLISTS_ACTIVE_HWACK);
> +		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> +			execlists_clear_active(execlists,
> +					       EXECLISTS_ACTIVE_HWACK);
> +
> +		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> +			continue;
>   
> -			if (status & GEN8_CTX_STATUS_COMPLETE &&
> -			    buf[2*head + 1] == execlists->preempt_complete_status) {
> -				GEM_TRACE("%s preempt-idle\n", engine->name);
> -				complete_preempt_context(execlists);
> -				continue;
> -			}
> +		/* We should never get a COMPLETED | IDLE_ACTIVE! */
> +		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>   
> -			if (status & GEN8_CTX_STATUS_PREEMPTED &&
> -			    execlists_is_active(execlists,
> -						EXECLISTS_ACTIVE_PREEMPT))
> -				continue;
> +		if (status & GEN8_CTX_STATUS_COMPLETE &&
> +		    buf[2*head + 1] == execlists->preempt_complete_status) {
> +			GEM_TRACE("%s preempt-idle\n", engine->name);
> +			complete_preempt_context(execlists);
> +			continue;
> +		}
>   
> -			GEM_BUG_ON(!execlists_is_active(execlists,
> -							EXECLISTS_ACTIVE_USER));
> +		if (status & GEN8_CTX_STATUS_PREEMPTED &&
> +		    execlists_is_active(execlists,
> +					EXECLISTS_ACTIVE_PREEMPT))
> +			continue;
>   
> -			rq = port_unpack(port, &count);
> -			GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
> -				  engine->name,
> -				  port->context_id, count,
> -				  rq ? rq->global_seqno : 0,
> -				  rq ? rq->fence.context : 0,
> -				  rq ? rq->fence.seqno : 0,
> -				  intel_engine_get_seqno(engine),
> -				  rq ? rq_prio(rq) : 0);
> +		GEM_BUG_ON(!execlists_is_active(execlists,
> +						EXECLISTS_ACTIVE_USER));
>   
> -			/* Check the context/desc id for this event matches */
> -			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> +		rq = port_unpack(port, &count);
> +		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
> +			  engine->name,
> +			  port->context_id, count,
> +			  rq ? rq->global_seqno : 0,
> +			  rq ? rq->fence.context : 0,
> +			  rq ? rq->fence.seqno : 0,
> +			  intel_engine_get_seqno(engine),
> +			  rq ? rq_prio(rq) : 0);
> +
> +		/* Check the context/desc id for this event matches */
> +		GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> +
> +		GEM_BUG_ON(count == 0);
> +		if (--count == 0) {
> +			/*
> +			 * On the final event corresponding to the
> +			 * submission of this context, we expect either
> +			 * an element-switch event or a completion
> +			 * event (and on completion, the active-idle
> +			 * marker). No more preemptions, lite-restore
> +			 * or otherwise.
> +			 */
> +			GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> +			GEM_BUG_ON(port_isset(&port[1]) &&
> +				   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> +			GEM_BUG_ON(!port_isset(&port[1]) &&
> +				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
>   
> -			GEM_BUG_ON(count == 0);
> -			if (--count == 0) {
> -				/*
> -				 * On the final event corresponding to the
> -				 * submission of this context, we expect either
> -				 * an element-switch event or a completion
> -				 * event (and on completion, the active-idle
> -				 * marker). No more preemptions, lite-restore
> -				 * or otherwise.
> -				 */
> -				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> -				GEM_BUG_ON(port_isset(&port[1]) &&
> -					   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> -				GEM_BUG_ON(!port_isset(&port[1]) &&
> -					   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> +			/*
> +			 * We rely on the hardware being strongly
> +			 * ordered, that the breadcrumb write is
> +			 * coherent (visible from the CPU) before the
> +			 * user interrupt and CSB is processed.
> +			 */
> +			GEM_BUG_ON(!i915_request_completed(rq));
>   
> -				/*
> -				 * We rely on the hardware being strongly
> -				 * ordered, that the breadcrumb write is
> -				 * coherent (visible from the CPU) before the
> -				 * user interrupt and CSB is processed.
> -				 */
> -				GEM_BUG_ON(!i915_request_completed(rq));
> -
> -				execlists_context_schedule_out(rq,
> -							       INTEL_CONTEXT_SCHEDULE_OUT);
> -				i915_request_put(rq);
> -
> -				GEM_TRACE("%s completed ctx=%d\n",
> -					  engine->name, port->context_id);
> -
> -				port = execlists_port_complete(execlists, port);
> -				if (port_isset(port))
> -					execlists_user_begin(execlists, port);
> -				else
> -					execlists_user_end(execlists);
> -			} else {
> -				port_set(port, port_pack(rq, count));
> -			}
> -		}
> +			execlists_context_schedule_out(rq,
> +						       INTEL_CONTEXT_SCHEDULE_OUT);
> +			i915_request_put(rq);
>   
> -		if (head != execlists->csb_head) {
> -			execlists->csb_head = head;
> -			writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> -			       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> +			GEM_TRACE("%s completed ctx=%d\n",
> +				  engine->name, port->context_id);
> +
> +			port = execlists_port_complete(execlists, port);
> +			if (port_isset(port))
> +				execlists_user_begin(execlists, port);
> +			else
> +				execlists_user_end(execlists);
> +		} else {
> +			port_set(port, port_pack(rq, count));
>   		}
> -	} while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
> +	}
> +
> +	if (head != execlists->csb_head) {
> +		execlists->csb_head = head;
> +		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> +		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> +	}
>   
>   	if (unlikely(fw))
>   		intel_uncore_forcewake_put(i915, execlists->fw_domains);
>

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 46aaef5c1851..d02f30591c0b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1473,9 +1473,10 @@  gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	bool tasklet = false;
 
 	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
-		if (READ_ONCE(engine->execlists.active))
-			tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
-						    &engine->irq_posted);
+		if (READ_ONCE(engine->execlists.active)) {
+			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+			tasklet = true;
+		}
 	}
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4b31e8f42aeb..91656eb2f2db 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -954,166 +954,162 @@  static void process_csb(struct intel_engine_cs *engine)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
 	struct drm_i915_private *i915 = engine->i915;
+
+	/* The HWSP contains a (cacheable) mirror of the CSB */
+	const u32 *buf =
+		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+	unsigned int head, tail;
 	bool fw = false;
 
-	do {
-		/* The HWSP contains a (cacheable) mirror of the CSB */
-		const u32 *buf =
-			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
-		unsigned int head, tail;
-
-		/* Clear before reading to catch new interrupts */
-		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		smp_mb__after_atomic();
-
-		if (unlikely(execlists->csb_use_mmio)) {
-			if (!fw) {
-				intel_uncore_forcewake_get(i915, execlists->fw_domains);
-				fw = true;
-			}
+	/* Clear before reading to catch new interrupts */
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+	smp_mb__after_atomic();
 
-			buf = (u32 * __force)
-				(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+	if (unlikely(execlists->csb_use_mmio)) {
+		intel_uncore_forcewake_get(i915, execlists->fw_domains);
+		fw = true;
 
-			head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-			tail = GEN8_CSB_WRITE_PTR(head);
-			head = GEN8_CSB_READ_PTR(head);
-			execlists->csb_head = head;
-		} else {
-			const int write_idx =
-				intel_hws_csb_write_index(i915) -
-				I915_HWS_CSB_BUF0_INDEX;
+		buf = (u32 * __force)
+			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
 
-			head = execlists->csb_head;
-			tail = READ_ONCE(buf[write_idx]);
-			rmb(); /* Hopefully paired with a wmb() in HW */
-		}
-		GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
-			  engine->name,
-			  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-			  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+		tail = GEN8_CSB_WRITE_PTR(head);
+		head = GEN8_CSB_READ_PTR(head);
+		execlists->csb_head = head;
+	} else {
+		const int write_idx =
+			intel_hws_csb_write_index(i915) -
+			I915_HWS_CSB_BUF0_INDEX;
 
-		while (head != tail) {
-			struct i915_request *rq;
-			unsigned int status;
-			unsigned int count;
+		head = execlists->csb_head;
+		tail = READ_ONCE(buf[write_idx]);
+		rmb(); /* Hopefully paired with a wmb() in HW */
+	}
+	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
+		  engine->name,
+		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
 
-			if (++head == GEN8_CSB_ENTRIES)
-				head = 0;
+	while (head != tail) {
+		struct i915_request *rq;
+		unsigned int status;
+		unsigned int count;
 
-			/*
-			 * We are flying near dragons again.
-			 *
-			 * We hold a reference to the request in execlist_port[]
-			 * but no more than that. We are operating in softirq
-			 * context and so cannot hold any mutex or sleep. That
-			 * prevents us stopping the requests we are processing
-			 * in port[] from being retired simultaneously (the
-			 * breadcrumb will be complete before we see the
-			 * context-switch). As we only hold the reference to the
-			 * request, any pointer chasing underneath the request
-			 * is subject to a potential use-after-free. Thus we
-			 * store all of the bookkeeping within port[] as
-			 * required, and avoid using unguarded pointers beneath
-			 * request itself. The same applies to the atomic
-			 * status notifier.
-			 */
+		if (++head == GEN8_CSB_ENTRIES)
+			head = 0;
 
-			status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
-			GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
-				  engine->name, head,
-				  status, buf[2*head + 1],
-				  execlists->active);
-
-			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-				      GEN8_CTX_STATUS_PREEMPTED))
-				execlists_set_active(execlists,
-						     EXECLISTS_ACTIVE_HWACK);
-			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-				execlists_clear_active(execlists,
-						       EXECLISTS_ACTIVE_HWACK);
-
-			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-				continue;
+		/*
+		 * We are flying near dragons again.
+		 *
+		 * We hold a reference to the request in execlist_port[]
+		 * but no more than that. We are operating in softirq
+		 * context and so cannot hold any mutex or sleep. That
+		 * prevents us stopping the requests we are processing
+		 * in port[] from being retired simultaneously (the
+		 * breadcrumb will be complete before we see the
+		 * context-switch). As we only hold the reference to the
+		 * request, any pointer chasing underneath the request
+		 * is subject to a potential use-after-free. Thus we
+		 * store all of the bookkeeping within port[] as
+		 * required, and avoid using unguarded pointers beneath
+		 * request itself. The same applies to the atomic
+		 * status notifier.
+		 */
 
-			/* We should never get a COMPLETED | IDLE_ACTIVE! */
-			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
+		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+			  engine->name, head,
+			  status, buf[2*head + 1],
+			  execlists->active);
+
+		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+			      GEN8_CTX_STATUS_PREEMPTED))
+			execlists_set_active(execlists,
+					     EXECLISTS_ACTIVE_HWACK);
+		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+			execlists_clear_active(execlists,
+					       EXECLISTS_ACTIVE_HWACK);
+
+		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+			continue;
 
-			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == execlists->preempt_complete_status) {
-				GEM_TRACE("%s preempt-idle\n", engine->name);
-				complete_preempt_context(execlists);
-				continue;
-			}
+		/* We should never get a COMPLETED | IDLE_ACTIVE! */
+		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-			if (status & GEN8_CTX_STATUS_PREEMPTED &&
-			    execlists_is_active(execlists,
-						EXECLISTS_ACTIVE_PREEMPT))
-				continue;
+		if (status & GEN8_CTX_STATUS_COMPLETE &&
+		    buf[2*head + 1] == execlists->preempt_complete_status) {
+			GEM_TRACE("%s preempt-idle\n", engine->name);
+			complete_preempt_context(execlists);
+			continue;
+		}
 
-			GEM_BUG_ON(!execlists_is_active(execlists,
-							EXECLISTS_ACTIVE_USER));
+		if (status & GEN8_CTX_STATUS_PREEMPTED &&
+		    execlists_is_active(execlists,
+					EXECLISTS_ACTIVE_PREEMPT))
+			continue;
 
-			rq = port_unpack(port, &count);
-			GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
-				  engine->name,
-				  port->context_id, count,
-				  rq ? rq->global_seqno : 0,
-				  rq ? rq->fence.context : 0,
-				  rq ? rq->fence.seqno : 0,
-				  intel_engine_get_seqno(engine),
-				  rq ? rq_prio(rq) : 0);
+		GEM_BUG_ON(!execlists_is_active(execlists,
+						EXECLISTS_ACTIVE_USER));
 
-			/* Check the context/desc id for this event matches */
-			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+		rq = port_unpack(port, &count);
+		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
+			  engine->name,
+			  port->context_id, count,
+			  rq ? rq->global_seqno : 0,
+			  rq ? rq->fence.context : 0,
+			  rq ? rq->fence.seqno : 0,
+			  intel_engine_get_seqno(engine),
+			  rq ? rq_prio(rq) : 0);
+
+		/* Check the context/desc id for this event matches */
+		GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+
+		GEM_BUG_ON(count == 0);
+		if (--count == 0) {
+			/*
+			 * On the final event corresponding to the
+			 * submission of this context, we expect either
+			 * an element-switch event or a completion
+			 * event (and on completion, the active-idle
+			 * marker). No more preemptions, lite-restore
+			 * or otherwise.
+			 */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+			GEM_BUG_ON(port_isset(&port[1]) &&
+				   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+			GEM_BUG_ON(!port_isset(&port[1]) &&
+				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 
-			GEM_BUG_ON(count == 0);
-			if (--count == 0) {
-				/*
-				 * On the final event corresponding to the
-				 * submission of this context, we expect either
-				 * an element-switch event or a completion
-				 * event (and on completion, the active-idle
-				 * marker). No more preemptions, lite-restore
-				 * or otherwise.
-				 */
-				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-				GEM_BUG_ON(port_isset(&port[1]) &&
-					   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
-				GEM_BUG_ON(!port_isset(&port[1]) &&
-					   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+			/*
+			 * We rely on the hardware being strongly
+			 * ordered, that the breadcrumb write is
+			 * coherent (visible from the CPU) before the
+			 * user interrupt and CSB is processed.
+			 */
+			GEM_BUG_ON(!i915_request_completed(rq));
 
-				/*
-				 * We rely on the hardware being strongly
-				 * ordered, that the breadcrumb write is
-				 * coherent (visible from the CPU) before the
-				 * user interrupt and CSB is processed.
-				 */
-				GEM_BUG_ON(!i915_request_completed(rq));
-
-				execlists_context_schedule_out(rq,
-							       INTEL_CONTEXT_SCHEDULE_OUT);
-				i915_request_put(rq);
-
-				GEM_TRACE("%s completed ctx=%d\n",
-					  engine->name, port->context_id);
-
-				port = execlists_port_complete(execlists, port);
-				if (port_isset(port))
-					execlists_user_begin(execlists, port);
-				else
-					execlists_user_end(execlists);
-			} else {
-				port_set(port, port_pack(rq, count));
-			}
-		}
+			execlists_context_schedule_out(rq,
+						       INTEL_CONTEXT_SCHEDULE_OUT);
+			i915_request_put(rq);
 
-		if (head != execlists->csb_head) {
-			execlists->csb_head = head;
-			writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-			       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+			GEM_TRACE("%s completed ctx=%d\n",
+				  engine->name, port->context_id);
+
+			port = execlists_port_complete(execlists, port);
+			if (port_isset(port))
+				execlists_user_begin(execlists, port);
+			else
+				execlists_user_end(execlists);
+		} else {
+			port_set(port, port_pack(rq, count));
 		}
-	} while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
+	}
+
+	if (head != execlists->csb_head) {
+		execlists->csb_head = head;
+		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+	}
 
 	if (unlikely(fw))
 		intel_uncore_forcewake_put(i915, execlists->fw_domains);

[3/6] drm/i915/execlists: Process one CSB update at a time

Commit Message

Comments

Patch