diff mbox

[7/8] drm/i915: Keep track of reserved execlist ports

Message ID 20170920143705.11277-8-mika.kuoppala@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala Sept. 20, 2017, 2:37 p.m. UTC
To further enchance port processing, keep track of
reserved ports. This way we can iterate only the used subset
of port space. Note that we lift the responsibility of
execlists_submit_request() to inspect hw availability and
always do dequeuing. This is to ensure that only the irq
handler will be responsible for keeping track of available ports.

v2: rebase, comment fix, READ_ONCE only outside of irq handler (Chris)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 51 +++++++++--------
 drivers/gpu/drm/i915/i915_irq.c            |  2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c     |  7 ++-
 drivers/gpu/drm/i915/intel_lrc.c           | 90 ++++++++++++++++++------------
 drivers/gpu/drm/i915/intel_ringbuffer.h    | 55 +++++++++++++-----
 5 files changed, 129 insertions(+), 76 deletions(-)

Comments

Mika Kuoppala Sept. 21, 2017, 12:08 p.m. UTC | #1
Mika Kuoppala <mika.kuoppala@linux.intel.com> writes:

> To further enchance port processing, keep track of
> reserved ports. This way we can iterate only the used subset
> of port space. Note that we lift the responsibility of
> execlists_submit_request() to inspect hw availability and

s/execlist_submit_request/insert_request.

-Mika

> always do dequeuing. This is to ensure that only the irq
> handler will be responsible for keeping track of available ports.
>
> v2: rebase, comment fix, READ_ONCE only outside of irq handler (Chris)
>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_guc_submission.c | 51 +++++++++--------
>  drivers/gpu/drm/i915/i915_irq.c            |  2 +-
>  drivers/gpu/drm/i915/intel_engine_cs.c     |  7 ++-
>  drivers/gpu/drm/i915/intel_lrc.c           | 90 ++++++++++++++++++------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h    | 55 +++++++++++++-----
>  5 files changed, 129 insertions(+), 76 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 25c9bac94c39..359f57a59cba 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -487,7 +487,7 @@ static void guc_ring_doorbell(struct i915_guc_client *client)
>   * @engine: engine associated with the commands
>   *
>   * The only error here arises if the doorbell hardware isn't functioning
> - * as expected, which really shouln't happen.
> + * as expected, which really shouldn't happen.
>   */
>  static void i915_guc_submit(struct intel_engine_cs *engine)
>  {
> @@ -495,17 +495,19 @@ static void i915_guc_submit(struct intel_engine_cs *engine)
>  	struct intel_guc *guc = &dev_priv->guc;
>  	struct i915_guc_client *client = guc->execbuf_client;
>  	struct intel_engine_execlist * const el = &engine->execlist;
> -	struct execlist_port *port = el->port;
>  	const unsigned int engine_id = engine->id;
>  	unsigned int n;
>  
> -	for (n = 0; n < ARRAY_SIZE(el->port); n++) {
> +	for (n = 0; n < execlist_active_ports(el); n++) {
> +		struct execlist_port *port;
>  		struct drm_i915_gem_request *rq;
>  		unsigned int count;
>  
> -		rq = port_unpack(&port[n], &count);
> +		port = execlist_port_index(el, n);
> +
> +		rq = port_unpack(port, &count);
>  		if (rq && count == 0) {
> -			port_set(&port[n], port_pack(rq, ++count));
> +			port_set(port, port_pack(rq, ++count));
>  
>  			if (i915_vma_is_map_and_fenceable(rq->ring->vma))
>  				POSTING_READ_FW(GUC_STATUS);
> @@ -560,25 +562,27 @@ static void port_assign(struct execlist_port *port,
>  static void i915_guc_dequeue(struct intel_engine_cs *engine)
>  {
>  	struct intel_engine_execlist * const el = &engine->execlist;
> -	struct execlist_port *port = el->port;
> +	struct execlist_port *port;
>  	struct drm_i915_gem_request *last = NULL;
> -	const struct execlist_port * const last_port = execlist_port_tail(el);
>  	bool submit = false;
>  	struct rb_node *rb;
>  
> -	if (port_isset(port))
> -		port++;
> -
>  	spin_lock_irq(&engine->timeline->lock);
>  	rb = el->first;
>  	GEM_BUG_ON(rb_first(&el->queue) != rb);
> -	while (rb) {
> +
> +	if (unlikely(!rb))
> +		goto done;
> +
> +	port = execlist_request_port(el);
> +
> +	do {
>  		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
>  		struct drm_i915_gem_request *rq, *rn;
>  
>  		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
>  			if (last && rq->ctx != last->ctx) {
> -				if (port == last_port) {
> +				if (!execlist_inactive_ports(el)) {
>  					__list_del_many(&p->requests,
>  							&rq->priotree.link);
>  					goto done;
> @@ -587,7 +591,8 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine)
>  				if (submit)
>  					port_assign(port, last);
>  
> -				port = execlist_port_next(el, port);
> +				port = execlist_request_port(el);
> +				GEM_BUG_ON(port_isset(port));
>  			}
>  
>  			INIT_LIST_HEAD(&rq->priotree.link);
> @@ -604,7 +609,7 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine)
>  		INIT_LIST_HEAD(&p->requests);
>  		if (p->priority != I915_PRIORITY_NORMAL)
>  			kmem_cache_free(engine->i915->priorities, p);
> -	}
> +	} while (rb);
>  done:
>  	el->first = rb;
>  	if (submit) {
> @@ -618,21 +623,21 @@ static void i915_guc_irq_handler(unsigned long data)
>  {
>  	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>  	struct intel_engine_execlist * const el = &engine->execlist;
> -	struct execlist_port *port = execlist_port_head(el);
> -	const struct execlist_port * const last_port = execlist_port_tail(el);
> -	struct drm_i915_gem_request *rq;
>  
> -	rq = port_request(port);
> -	while (rq && i915_gem_request_completed(rq)) {
> +	while (execlist_active_ports(el)) {
> +		struct execlist_port *port = execlist_port_head(el);
> +		struct drm_i915_gem_request *rq = port_request(port);
> +
> +		if (!i915_gem_request_completed(rq))
> +			break;
> +
>  		trace_i915_gem_request_out(rq);
>  		i915_gem_request_put(rq);
>  
> -		port = execlist_port_complete(el, port);
> -
> -		rq = port_request(port);
> +		execlist_release_port(el, port);
>  	}
>  
> -	if (!port_isset(last_port))
> +	if (execlist_inactive_ports(el))
>  		i915_guc_dequeue(engine);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index ac5a95439393..a9d888b726c4 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1342,7 +1342,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
>  	bool tasklet = false;
>  
>  	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
> -		if (port_count(execlist_port_head(el))) {
> +		if (READ_ONCE(el->port_count)) {
>  			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>  			tasklet = true;
>  		}
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index b0d702063a50..29b170fdd6ef 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -407,6 +407,9 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
>  	BUILD_BUG_ON_NOT_POWER_OF_2(execlist_num_ports(&engine->execlist));
>  	GEM_BUG_ON(execlist_num_ports(&engine->execlist) > EXECLIST_MAX_PORTS);
>  
> +	el->port_head = 0;
> +	el->port_count = 0;
> +
>  	el->queue = RB_ROOT;
>  	el->first = NULL;
>  }
> @@ -1501,8 +1504,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
>  	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
>  		return false;
>  
> -	/* Both ports drained, no more ELSP submission? */
> -	if (port_request(execlist_port_head(&engine->execlist)))
> +	/* All ports drained, no more ELSP submission? */
> +	if (execlist_active_ports(&engine->execlist))
>  		return false;
>  
>  	/* ELSP is empty, but there are ready requests? */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 8550cd6635c9..bea10620bed2 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -399,26 +399,29 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>  		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
>  	unsigned int n;
>  
> -	for (n = execlist_num_ports(el); n--; ) {
> -		struct execlist_port *port;
> +	for (n = 0; n < execlist_inactive_ports(el); n++) {
> +		writel(0, elsp);
> +		writel(0, elsp);
> +	}
> +
> +	for (n = execlist_active_ports(el); n--; ) {
>  		struct drm_i915_gem_request *rq;
> +		struct execlist_port *port;
>  		unsigned int count;
>  		u64 desc;
>  
>  		port = execlist_port_index(el, n);
> -
>  		rq = port_unpack(port, &count);
> -		if (rq) {
> -			GEM_BUG_ON(count > !n);
> -			if (!count++)
> -				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
> -			port_set(port, port_pack(rq, count));
> -			desc = execlists_update_context(rq);
> -			GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc));
> -		} else {
> -			GEM_BUG_ON(!n);
> -			desc = 0;
> -		}
> +
> +		GEM_BUG_ON(!rq);
> +		GEM_BUG_ON(count > !n);
> +
> +		if (!count++)
> +			execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
> +
> +		port_set(port, port_pack(rq, count));
> +		desc = execlists_update_context(rq);
> +		GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc));
>  
>  		writel(upper_32_bits(desc), elsp);
>  		writel(lower_32_bits(desc), elsp);
> @@ -456,15 +459,23 @@ static void port_assign(struct execlist_port *port,
>  
>  static void execlists_dequeue(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_gem_request *last;
>  	struct intel_engine_execlist * const el = &engine->execlist;
> -	struct execlist_port *port = execlist_port_head(el);
> -	const struct execlist_port * const last_port = execlist_port_tail(el);
> +	struct execlist_port *port;
> +	struct drm_i915_gem_request *last;
>  	struct rb_node *rb;
>  	bool submit = false;
>  
> -	last = port_request(port);
> -	if (last)
> +	spin_lock_irq(&engine->timeline->lock);
> +	rb = el->first;
> +	GEM_BUG_ON(rb_first(&el->queue) != rb);
> +
> +	if (unlikely(!rb))
> +		goto done;
> +
> +	if (execlist_active_ports(el)) {
> +		port = execlist_port_tail(el);
> +		last = port_request(port);
> +
>  		/* WaIdleLiteRestore:bdw,skl
>  		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
>  		 * as we resubmit the request. See gen8_emit_breadcrumb()
> @@ -472,6 +483,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  		 * request.
>  		 */
>  		last->tail = last->wa_tail;
> +	} else {
> +		/* Allocate first port to coalesce into */
> +		port = execlist_request_port(el);
> +		last = NULL;
> +	}
>  
>  	/* Hardware submission is through 2 ports. Conceptually each port
>  	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
> @@ -493,11 +509,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  	 * sequence of requests as being the most optimal (fewest wake ups
>  	 * and context switches) submission.
>  	 */
> -
> -	spin_lock_irq(&engine->timeline->lock);
> -	rb = el->first;
> -	GEM_BUG_ON(rb_first(&el->queue) != rb);
> -	while (rb) {
> +	do {
>  		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
>  		struct drm_i915_gem_request *rq, *rn;
>  
> @@ -515,11 +527,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  			 */
>  			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
>  				/*
> -				 * If we are on the second port and cannot
> +				 * If we are on the last port and cannot
>  				 * combine this request with the last, then we
>  				 * are done.
>  				 */
> -				if (port == last_port) {
> +				if (!execlist_inactive_ports(el)) {
>  					__list_del_many(&p->requests,
>  							&rq->priotree.link);
>  					goto done;
> @@ -544,8 +556,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  				if (submit)
>  					port_assign(port, last);
>  
> -				port = execlist_port_next(el, port);
> -
> +				port = execlist_request_port(el);
>  				GEM_BUG_ON(port_isset(port));
>  			}
>  
> @@ -563,7 +574,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  		INIT_LIST_HEAD(&p->requests);
>  		if (p->priority != I915_PRIORITY_NORMAL)
>  			kmem_cache_free(engine->i915->priorities, p);
> -	}
> +	} while (rb);
> +
>  done:
>  	el->first = rb;
>  	if (submit)
> @@ -582,6 +594,9 @@ static void execlist_cancel_port_requests(struct intel_engine_execlist *el)
>  		i915_gem_request_put(port_request(&el->port[i]));
>  
>  	memset(el->port, 0, sizeof(el->port));
> +
> +	el->port_count = 0;
> +	el->port_head = 0;
>  }
>  
>  static void execlists_cancel_requests(struct intel_engine_cs *engine)
> @@ -643,10 +658,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  
>  static bool execlists_elsp_ready(struct intel_engine_execlist * const el)
>  {
> -	struct execlist_port * const port0 = execlist_port_head(el);
> -	struct execlist_port * const port1 = execlist_port_next(el, port0);
> +	const unsigned int active = execlist_active_ports(el);
> +
> +	if (!active)
> +		return true;
>  
> -	return port_count(port0) + port_count(port1) < 2;
> +	return port_count(execlist_port_tail(el)) + active < 2;
>  }
>  
>  /*
> @@ -657,7 +674,6 @@ static void intel_lrc_irq_handler(unsigned long data)
>  {
>  	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>  	struct intel_engine_execlist * const el = &engine->execlist;
> -	struct execlist_port *port = execlist_port_head(el);
>  	struct drm_i915_private *dev_priv = engine->i915;
>  
>  	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
> @@ -714,6 +730,7 @@ static void intel_lrc_irq_handler(unsigned long data)
>  		}
>  
>  		while (head != tail) {
> +			struct execlist_port *port;
>  			struct drm_i915_gem_request *rq;
>  			unsigned int status;
>  			unsigned int count;
> @@ -742,6 +759,7 @@ static void intel_lrc_irq_handler(unsigned long data)
>  			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>  				continue;
>  
> +			port = execlist_port_head(el);
>  			/* Check the context/desc id for this event matches */
>  			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
>  
> @@ -755,13 +773,13 @@ static void intel_lrc_irq_handler(unsigned long data)
>  				trace_i915_gem_request_out(rq);
>  				i915_gem_request_put(rq);
>  
> -				port = execlist_port_complete(el, port);
> +				execlist_release_port(el, port);
>  			} else {
>  				port_set(port, port_pack(rq, count));
>  			}
>  
>  			/* After the final element, the hw should be idle */
> -			GEM_BUG_ON(port_count(port) == 0 &&
> +			GEM_BUG_ON(execlist_active_ports(el) == 0 &&
>  				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
>  		}
>  
> @@ -786,7 +804,7 @@ static void insert_request(struct intel_engine_cs *engine,
>  	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
>  
>  	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
> -	if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(el))
> +	if (ptr_unmask_bits(p, 1))
>  		tasklet_hi_schedule(&el->irq_tasklet);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 991f6c0bd6c2..efa5a8ea1ecb 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -249,6 +249,11 @@ struct intel_engine_execlist {
>  	unsigned int port_head;
>  
>  	/**
> +	 * @port_count: reserved ports
> +	 */
> +	unsigned int port_count;
> +
> +	/**
>  	 * @queue: queue of requests, in priority lists
>  	 */
>  	struct rb_root queue;
> @@ -529,14 +534,20 @@ execlist_num_ports(const struct intel_engine_execlist * const el)
>  	return el->port_mask + 1;
>  }
>  
> -#define __port_idx(start, index, mask) (((start) + (index)) & (mask))
> +static inline unsigned int
> +execlist_active_ports(const struct intel_engine_execlist * const el)
> +{
> +	return el->port_count;
> +}
>  
> -static inline struct execlist_port *
> -execlist_port_head(struct intel_engine_execlist * const el)
> +static inline unsigned int
> +execlist_inactive_ports(const struct intel_engine_execlist * const el)
>  {
> -	return &el->port[el->port_head];
> +	return execlist_num_ports(el) - execlist_active_ports(el);
>  }
>  
> +#define __port_idx(start, index, mask) (((start) + (index)) & (mask))
> +
>  /* Index starting from port_head */
>  static inline struct execlist_port *
>  execlist_port_index(struct intel_engine_execlist * const el,
> @@ -546,30 +557,46 @@ execlist_port_index(struct intel_engine_execlist * const el,
>  }
>  
>  static inline struct execlist_port *
> -execlist_port_tail(struct intel_engine_execlist * const el)
> +execlist_port_head(struct intel_engine_execlist * const el)
>  {
> -	return &el->port[__port_idx(el->port_head, -1, el->port_mask)];
> +	GEM_BUG_ON(!el->port_count);
> +
> +	return execlist_port_index(el, 0);
>  }
>  
>  static inline struct execlist_port *
> -execlist_port_next(struct intel_engine_execlist * const el,
> -		   const struct execlist_port * const port)
> +execlist_port_tail(struct intel_engine_execlist * const el)
>  {
> -	const unsigned int i = port_index(port, el);
> +	GEM_BUG_ON(!el->port_count);
>  
> -	return &el->port[__port_idx(i, 1, el->port_mask)];
> +	return execlist_port_index(el, el->port_count - 1);
>  }
>  
>  static inline struct execlist_port *
> -execlist_port_complete(struct intel_engine_execlist * const el,
> -		       struct execlist_port * const port)
> +execlist_request_port(struct intel_engine_execlist * const el)
>  {
> +	GEM_BUG_ON(el->port_count == el->port_mask + 1);
> +
> +	el->port_count++;
> +
> +	GEM_BUG_ON(port_isset(execlist_port_tail(el)));
> +
> +	return execlist_port_tail(el);
> +}
> +
> +static inline void
> +execlist_release_port(struct intel_engine_execlist * const el,
> +		      struct execlist_port * const port)
> +{
> +
>  	GEM_BUG_ON(port_index(port, el) != el->port_head);
> +	GEM_BUG_ON(!port_isset(port));
> +	GEM_BUG_ON(!el->port_count);
>  
>  	memset(port, 0, sizeof(struct execlist_port));
> -	el->port_head = __port_idx(el->port_head, 1, el->port_mask);
>  
> -	return execlist_port_head(el);
> +	el->port_head = __port_idx(el->port_head, 1, el->port_mask);
> +	el->port_count--;
>  }
>  
>  static inline unsigned int
> -- 
> 2.11.0
Chris Wilson Sept. 21, 2017, 12:30 p.m. UTC | #2
Quoting Mika Kuoppala (2017-09-20 15:37:04)
> To further enchance port processing, keep track of
> reserved ports. This way we can iterate only the used subset
> of port space. Note that we lift the responsibility of
> execlists_submit_request() to inspect hw availability and
> always do dequeuing. This is to ensure that only the irq
> handler will be responsible for keeping track of available ports.
> 
> v2: rebase, comment fix, READ_ONCE only outside of irq handler (Chris)
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>

Ok, doesn't look hideous. I need to look at it with a clear head, but for
now, could you check scripts/bloat-o-meter for my usual quick guide on
how much gcc likes it?
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 25c9bac94c39..359f57a59cba 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -487,7 +487,7 @@  static void guc_ring_doorbell(struct i915_guc_client *client)
  * @engine: engine associated with the commands
  *
  * The only error here arises if the doorbell hardware isn't functioning
- * as expected, which really shouln't happen.
+ * as expected, which really shouldn't happen.
  */
 static void i915_guc_submit(struct intel_engine_cs *engine)
 {
@@ -495,17 +495,19 @@  static void i915_guc_submit(struct intel_engine_cs *engine)
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_guc_client *client = guc->execbuf_client;
 	struct intel_engine_execlist * const el = &engine->execlist;
-	struct execlist_port *port = el->port;
 	const unsigned int engine_id = engine->id;
 	unsigned int n;
 
-	for (n = 0; n < ARRAY_SIZE(el->port); n++) {
+	for (n = 0; n < execlist_active_ports(el); n++) {
+		struct execlist_port *port;
 		struct drm_i915_gem_request *rq;
 		unsigned int count;
 
-		rq = port_unpack(&port[n], &count);
+		port = execlist_port_index(el, n);
+
+		rq = port_unpack(port, &count);
 		if (rq && count == 0) {
-			port_set(&port[n], port_pack(rq, ++count));
+			port_set(port, port_pack(rq, ++count));
 
 			if (i915_vma_is_map_and_fenceable(rq->ring->vma))
 				POSTING_READ_FW(GUC_STATUS);
@@ -560,25 +562,27 @@  static void port_assign(struct execlist_port *port,
 static void i915_guc_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlist * const el = &engine->execlist;
-	struct execlist_port *port = el->port;
+	struct execlist_port *port;
 	struct drm_i915_gem_request *last = NULL;
-	const struct execlist_port * const last_port = execlist_port_tail(el);
 	bool submit = false;
 	struct rb_node *rb;
 
-	if (port_isset(port))
-		port++;
-
 	spin_lock_irq(&engine->timeline->lock);
 	rb = el->first;
 	GEM_BUG_ON(rb_first(&el->queue) != rb);
-	while (rb) {
+
+	if (unlikely(!rb))
+		goto done;
+
+	port = execlist_request_port(el);
+
+	do {
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
 		struct drm_i915_gem_request *rq, *rn;
 
 		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
 			if (last && rq->ctx != last->ctx) {
-				if (port == last_port) {
+				if (!execlist_inactive_ports(el)) {
 					__list_del_many(&p->requests,
 							&rq->priotree.link);
 					goto done;
@@ -587,7 +591,8 @@  static void i915_guc_dequeue(struct intel_engine_cs *engine)
 				if (submit)
 					port_assign(port, last);
 
-				port = execlist_port_next(el, port);
+				port = execlist_request_port(el);
+				GEM_BUG_ON(port_isset(port));
 			}
 
 			INIT_LIST_HEAD(&rq->priotree.link);
@@ -604,7 +609,7 @@  static void i915_guc_dequeue(struct intel_engine_cs *engine)
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
-	}
+	} while (rb);
 done:
 	el->first = rb;
 	if (submit) {
@@ -618,21 +623,21 @@  static void i915_guc_irq_handler(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlist * const el = &engine->execlist;
-	struct execlist_port *port = execlist_port_head(el);
-	const struct execlist_port * const last_port = execlist_port_tail(el);
-	struct drm_i915_gem_request *rq;
 
-	rq = port_request(port);
-	while (rq && i915_gem_request_completed(rq)) {
+	while (execlist_active_ports(el)) {
+		struct execlist_port *port = execlist_port_head(el);
+		struct drm_i915_gem_request *rq = port_request(port);
+
+		if (!i915_gem_request_completed(rq))
+			break;
+
 		trace_i915_gem_request_out(rq);
 		i915_gem_request_put(rq);
 
-		port = execlist_port_complete(el, port);
-
-		rq = port_request(port);
+		execlist_release_port(el, port);
 	}
 
-	if (!port_isset(last_port))
+	if (execlist_inactive_ports(el))
 		i915_guc_dequeue(engine);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index ac5a95439393..a9d888b726c4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1342,7 +1342,7 @@  gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 	bool tasklet = false;
 
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
-		if (port_count(execlist_port_head(el))) {
+		if (READ_ONCE(el->port_count)) {
 			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 			tasklet = true;
 		}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index b0d702063a50..29b170fdd6ef 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -407,6 +407,9 @@  static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 	BUILD_BUG_ON_NOT_POWER_OF_2(execlist_num_ports(&engine->execlist));
 	GEM_BUG_ON(execlist_num_ports(&engine->execlist) > EXECLIST_MAX_PORTS);
 
+	el->port_head = 0;
+	el->port_count = 0;
+
 	el->queue = RB_ROOT;
 	el->first = NULL;
 }
@@ -1501,8 +1504,8 @@  bool intel_engine_is_idle(struct intel_engine_cs *engine)
 	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
 		return false;
 
-	/* Both ports drained, no more ELSP submission? */
-	if (port_request(execlist_port_head(&engine->execlist)))
+	/* All ports drained, no more ELSP submission? */
+	if (execlist_active_ports(&engine->execlist))
 		return false;
 
 	/* ELSP is empty, but there are ready requests? */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8550cd6635c9..bea10620bed2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -399,26 +399,29 @@  static void execlists_submit_ports(struct intel_engine_cs *engine)
 		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	unsigned int n;
 
-	for (n = execlist_num_ports(el); n--; ) {
-		struct execlist_port *port;
+	for (n = 0; n < execlist_inactive_ports(el); n++) {
+		writel(0, elsp);
+		writel(0, elsp);
+	}
+
+	for (n = execlist_active_ports(el); n--; ) {
 		struct drm_i915_gem_request *rq;
+		struct execlist_port *port;
 		unsigned int count;
 		u64 desc;
 
 		port = execlist_port_index(el, n);
-
 		rq = port_unpack(port, &count);
-		if (rq) {
-			GEM_BUG_ON(count > !n);
-			if (!count++)
-				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-			port_set(port, port_pack(rq, count));
-			desc = execlists_update_context(rq);
-			GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc));
-		} else {
-			GEM_BUG_ON(!n);
-			desc = 0;
-		}
+
+		GEM_BUG_ON(!rq);
+		GEM_BUG_ON(count > !n);
+
+		if (!count++)
+			execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+
+		port_set(port, port_pack(rq, count));
+		desc = execlists_update_context(rq);
+		GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc));
 
 		writel(upper_32_bits(desc), elsp);
 		writel(lower_32_bits(desc), elsp);
@@ -456,15 +459,23 @@  static void port_assign(struct execlist_port *port,
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *last;
 	struct intel_engine_execlist * const el = &engine->execlist;
-	struct execlist_port *port = execlist_port_head(el);
-	const struct execlist_port * const last_port = execlist_port_tail(el);
+	struct execlist_port *port;
+	struct drm_i915_gem_request *last;
 	struct rb_node *rb;
 	bool submit = false;
 
-	last = port_request(port);
-	if (last)
+	spin_lock_irq(&engine->timeline->lock);
+	rb = el->first;
+	GEM_BUG_ON(rb_first(&el->queue) != rb);
+
+	if (unlikely(!rb))
+		goto done;
+
+	if (execlist_active_ports(el)) {
+		port = execlist_port_tail(el);
+		last = port_request(port);
+
 		/* WaIdleLiteRestore:bdw,skl
 		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
 		 * as we resubmit the request. See gen8_emit_breadcrumb()
@@ -472,6 +483,11 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 		 * request.
 		 */
 		last->tail = last->wa_tail;
+	} else {
+		/* Allocate first port to coalesce into */
+		port = execlist_request_port(el);
+		last = NULL;
+	}
 
 	/* Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -493,11 +509,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * sequence of requests as being the most optimal (fewest wake ups
 	 * and context switches) submission.
 	 */
-
-	spin_lock_irq(&engine->timeline->lock);
-	rb = el->first;
-	GEM_BUG_ON(rb_first(&el->queue) != rb);
-	while (rb) {
+	do {
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
 		struct drm_i915_gem_request *rq, *rn;
 
@@ -515,11 +527,11 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 			 */
 			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
 				/*
-				 * If we are on the second port and cannot
+				 * If we are on the last port and cannot
 				 * combine this request with the last, then we
 				 * are done.
 				 */
-				if (port == last_port) {
+				if (!execlist_inactive_ports(el)) {
 					__list_del_many(&p->requests,
 							&rq->priotree.link);
 					goto done;
@@ -544,8 +556,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 				if (submit)
 					port_assign(port, last);
 
-				port = execlist_port_next(el, port);
-
+				port = execlist_request_port(el);
 				GEM_BUG_ON(port_isset(port));
 			}
 
@@ -563,7 +574,8 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
-	}
+	} while (rb);
+
 done:
 	el->first = rb;
 	if (submit)
@@ -582,6 +594,9 @@  static void execlist_cancel_port_requests(struct intel_engine_execlist *el)
 		i915_gem_request_put(port_request(&el->port[i]));
 
 	memset(el->port, 0, sizeof(el->port));
+
+	el->port_count = 0;
+	el->port_head = 0;
 }
 
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -643,10 +658,12 @@  static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 static bool execlists_elsp_ready(struct intel_engine_execlist * const el)
 {
-	struct execlist_port * const port0 = execlist_port_head(el);
-	struct execlist_port * const port1 = execlist_port_next(el, port0);
+	const unsigned int active = execlist_active_ports(el);
+
+	if (!active)
+		return true;
 
-	return port_count(port0) + port_count(port1) < 2;
+	return port_count(execlist_port_tail(el)) + active < 2;
 }
 
 /*
@@ -657,7 +674,6 @@  static void intel_lrc_irq_handler(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlist * const el = &engine->execlist;
-	struct execlist_port *port = execlist_port_head(el);
 	struct drm_i915_private *dev_priv = engine->i915;
 
 	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -714,6 +730,7 @@  static void intel_lrc_irq_handler(unsigned long data)
 		}
 
 		while (head != tail) {
+			struct execlist_port *port;
 			struct drm_i915_gem_request *rq;
 			unsigned int status;
 			unsigned int count;
@@ -742,6 +759,7 @@  static void intel_lrc_irq_handler(unsigned long data)
 			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
 				continue;
 
+			port = execlist_port_head(el);
 			/* Check the context/desc id for this event matches */
 			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
 
@@ -755,13 +773,13 @@  static void intel_lrc_irq_handler(unsigned long data)
 				trace_i915_gem_request_out(rq);
 				i915_gem_request_put(rq);
 
-				port = execlist_port_complete(el, port);
+				execlist_release_port(el, port);
 			} else {
 				port_set(port, port_pack(rq, count));
 			}
 
 			/* After the final element, the hw should be idle */
-			GEM_BUG_ON(port_count(port) == 0 &&
+			GEM_BUG_ON(execlist_active_ports(el) == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
@@ -786,7 +804,7 @@  static void insert_request(struct intel_engine_cs *engine,
 	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
 
 	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
-	if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(el))
+	if (ptr_unmask_bits(p, 1))
 		tasklet_hi_schedule(&el->irq_tasklet);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 991f6c0bd6c2..efa5a8ea1ecb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -249,6 +249,11 @@  struct intel_engine_execlist {
 	unsigned int port_head;
 
 	/**
+	 * @port_count: reserved ports
+	 */
+	unsigned int port_count;
+
+	/**
 	 * @queue: queue of requests, in priority lists
 	 */
 	struct rb_root queue;
@@ -529,14 +534,20 @@  execlist_num_ports(const struct intel_engine_execlist * const el)
 	return el->port_mask + 1;
 }
 
-#define __port_idx(start, index, mask) (((start) + (index)) & (mask))
+static inline unsigned int
+execlist_active_ports(const struct intel_engine_execlist * const el)
+{
+	return el->port_count;
+}
 
-static inline struct execlist_port *
-execlist_port_head(struct intel_engine_execlist * const el)
+static inline unsigned int
+execlist_inactive_ports(const struct intel_engine_execlist * const el)
 {
-	return &el->port[el->port_head];
+	return execlist_num_ports(el) - execlist_active_ports(el);
 }
 
+#define __port_idx(start, index, mask) (((start) + (index)) & (mask))
+
 /* Index starting from port_head */
 static inline struct execlist_port *
 execlist_port_index(struct intel_engine_execlist * const el,
@@ -546,30 +557,46 @@  execlist_port_index(struct intel_engine_execlist * const el,
 }
 
 static inline struct execlist_port *
-execlist_port_tail(struct intel_engine_execlist * const el)
+execlist_port_head(struct intel_engine_execlist * const el)
 {
-	return &el->port[__port_idx(el->port_head, -1, el->port_mask)];
+	GEM_BUG_ON(!el->port_count);
+
+	return execlist_port_index(el, 0);
 }
 
 static inline struct execlist_port *
-execlist_port_next(struct intel_engine_execlist * const el,
-		   const struct execlist_port * const port)
+execlist_port_tail(struct intel_engine_execlist * const el)
 {
-	const unsigned int i = port_index(port, el);
+	GEM_BUG_ON(!el->port_count);
 
-	return &el->port[__port_idx(i, 1, el->port_mask)];
+	return execlist_port_index(el, el->port_count - 1);
 }
 
 static inline struct execlist_port *
-execlist_port_complete(struct intel_engine_execlist * const el,
-		       struct execlist_port * const port)
+execlist_request_port(struct intel_engine_execlist * const el)
 {
+	GEM_BUG_ON(el->port_count == el->port_mask + 1);
+
+	el->port_count++;
+
+	GEM_BUG_ON(port_isset(execlist_port_tail(el)));
+
+	return execlist_port_tail(el);
+}
+
+static inline void
+execlist_release_port(struct intel_engine_execlist * const el,
+		      struct execlist_port * const port)
+{
+
 	GEM_BUG_ON(port_index(port, el) != el->port_head);
+	GEM_BUG_ON(!port_isset(port));
+	GEM_BUG_ON(!el->port_count);
 
 	memset(port, 0, sizeof(struct execlist_port));
-	el->port_head = __port_idx(el->port_head, 1, el->port_mask);
 
-	return execlist_port_head(el);
+	el->port_head = __port_idx(el->port_head, 1, el->port_mask);
+	el->port_count--;
 }
 
 static inline unsigned int