diff mbox

[v2,2/6] drm/i915: Cache last cmd descriptor when parsing

Message ID 1448016961-25331-3-git-send-email-chris@chris-wilson.co.uk
State New, archived
Headers show

Commit Message

Chris Wilson Nov. 20, 2015, 10:55 a.m. UTC
The cmd parser has the biggest impact on the BLT ring, because it is
relatively verbose composed to the other engines as the vertex data is
inline. It also typically has runs of repeating commands (again since
the vertex data is inline, it typically has sequences of XY_SETUP_BLT,
XY_SCANLINE_BLT..) We can easily reduce the impact of cmdparsing on
benchmarks by then caching the last descriptor and comparing it against
the next command header. To get maximum benefit, we also want to take
advantage of skipping a few validations and length determinations if the
header is unchanged between commands.

ivb i7-3720QM:
x11perf -dot: before 52.3M, after 124M (max 203M)
glxgears: before 7310 fps, after 7550 fps (max 7860 fps)

v2: Fix initial cached cmd descriptor to match MI_NOOP.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 133 +++++++++++++++------------------
 drivers/gpu/drm/i915/i915_drv.h        |  10 +--
 2 files changed, 64 insertions(+), 79 deletions(-)

Comments

Ville Syrjälä Nov. 20, 2015, 3:08 p.m. UTC | #1
On Fri, Nov 20, 2015 at 10:55:57AM +0000, Chris Wilson wrote:
> The cmd parser has the biggest impact on the BLT ring, because it is
> relatively verbose composed to the other engines as the vertex data is
> inline. It also typically has runs of repeating commands (again since
> the vertex data is inline, it typically has sequences of XY_SETUP_BLT,
> XY_SCANLINE_BLT..) We can easily reduce the impact of cmdparsing on
> benchmarks by then caching the last descriptor and comparing it against
> the next command header. To get maximum benefit, we also want to take
> advantage of skipping a few validations and length determinations if the
> header is unchanged between commands.
> 
> ivb i7-3720QM:
> x11perf -dot: before 52.3M, after 124M (max 203M)
> glxgears: before 7310 fps, after 7550 fps (max 7860 fps)
> 
> v2: Fix initial cached cmd descriptor to match MI_NOOP.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_cmd_parser.c | 133 +++++++++++++++------------------
>  drivers/gpu/drm/i915/i915_drv.h        |  10 +--
>  2 files changed, 64 insertions(+), 79 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index db3a04ea036a..c6f6d9f2b2ce 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -794,6 +794,9 @@ void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring)
>  	fini_hash_table(ring);
>  }
>  
> +/*
> + * Returns a pointer to a descriptor for the command specified by cmd_header.
> + */
>  static const struct drm_i915_cmd_descriptor*
>  find_cmd_in_table(struct intel_engine_cs *ring,
>  		  u32 cmd_header)
> @@ -813,37 +816,6 @@ find_cmd_in_table(struct intel_engine_cs *ring,
>  	return NULL;
>  }
>  
> -/*
> - * Returns a pointer to a descriptor for the command specified by cmd_header.
> - *
> - * The caller must supply space for a default descriptor via the default_desc
> - * parameter. If no descriptor for the specified command exists in the ring's
> - * command parser tables, this function fills in default_desc based on the
> - * ring's default length encoding and returns default_desc.
> - */
> -static const struct drm_i915_cmd_descriptor*
> -find_cmd(struct intel_engine_cs *ring,
> -	 u32 cmd_header,
> -	 struct drm_i915_cmd_descriptor *default_desc)
> -{
> -	const struct drm_i915_cmd_descriptor *desc;
> -	u32 mask;
> -
> -	desc = find_cmd_in_table(ring, cmd_header);
> -	if (desc)
> -		return desc;
> -
> -	mask = ring->get_cmd_length_mask(cmd_header);
> -	if (!mask)
> -		return NULL;
> -
> -	BUG_ON(!default_desc);
> -	default_desc->flags = CMD_DESC_SKIP;
> -	default_desc->length.mask = mask;
> -
> -	return default_desc;
> -}
> -
>  static const struct drm_i915_reg_descriptor *
>  find_reg(const struct drm_i915_reg_descriptor *table,
>  	 int count, u32 addr)
> @@ -886,17 +858,6 @@ static bool check_cmd(const struct intel_engine_cs *ring,
>  		      const bool is_master,
>  		      bool *oacontrol_set)
>  {
> -	if (desc->flags & CMD_DESC_REJECT) {
> -		DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
> -		return false;
> -	}
> -
> -	if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
> -		DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
> -				 *cmd);
> -		return false;
> -	}
> -
>  	if (desc->flags & CMD_DESC_REGISTER) {
>  		/*
>  		 * Get the distance between individual register offset
> @@ -1027,14 +988,15 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
>  		    u32 batch_len,
>  		    bool is_master)
>  {
> -	const struct drm_i915_cmd_descriptor *desc;
> +	struct drm_i915_cmd_descriptor default_desc = { CMD_DESC_SKIP };
> +	const struct drm_i915_cmd_descriptor *desc = &default_desc;
> +	u32 last_cmd_header = 0;
>  	unsigned dst_iter, src_iter;
>  	int needs_clflush = 0;
>  	struct get_page rewind;
>  	void *src, *dst, *tmp;
> -	u32 partial, length;
> +	u32 partial, length = 1;
>  	unsigned in, out;
> -	struct drm_i915_cmd_descriptor default_desc = { 0 };
>  	bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
>  	int ret = 0;
>  
> @@ -1100,40 +1062,63 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
>  		partial = 0;
>  
>  		do {
> -			if (*cmd == MI_BATCH_BUFFER_END) {
> -				if (oacontrol_set) {
> -					DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n");
> -					goto unmap;
> +			if (*cmd != last_cmd_header) {
> +				if (*cmd == MI_BATCH_BUFFER_END) {
> +					if (unlikely(oacontrol_set)) {
> +						DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n");
> +						goto unmap;
> +					}
> +
> +					cmd++; /* copy this cmd to dst */
> +					batch_len = this; /* no more src */
> +					ret = 0;
> +					break;
>  				}
>  
> -				cmd++; /* copy this cmd to dst */
> -				batch_len = this; /* no more src */
> -				ret = 0;
> -				break;
> -			}
> -
> -			desc = find_cmd(ring, *cmd, &default_desc);
> -			if (!desc) {
> -				DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
> -						 *cmd);
> -				goto unmap;
> -			}
> +				desc = find_cmd_in_table(ring, *cmd);
> +				if (desc) {
> +					if (unlikely(desc->flags & CMD_DESC_REJECT)) {
> +						DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
> +						goto unmap;
> +					}

Yikes. More indenttion. It's getting really deep here. Any sane way to
reduce it?

> +
> +					if (unlikely((desc->flags & CMD_DESC_MASTER) && !is_master)) {
> +						DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", *cmd);
> +						goto unmap;
> +					}
> +
> +					/*
> +					 * If the batch buffer contains a
> +					 * chained batch, return an error that
> +					 * tells the caller to abort and
> +					 * dispatch the workload as a
> +					 * non-secure batch.
> +					 */
> +					if (unlikely(desc->cmd.value == MI_BATCH_BUFFER_START)) {
> +						ret = -EACCES;
> +						goto unmap;
> +					}
> +
> +					if (desc->flags & CMD_DESC_FIXED)
> +						length = desc->length.fixed;
> +					else
> +						length = (*cmd & desc->length.mask) + LENGTH_BIAS;
> +				} else {
> +					u32 mask = ring->get_cmd_length_mask(*cmd);
> +					if (unlikely(!mask)) {
> +						DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", *cmd);
> +						goto unmap;
> +					}
> +
> +					default_desc.length.mask = mask;
> +					desc = &default_desc;
> +
> +					length = (*cmd & mask) + LENGTH_BIAS;
> +				}
>  
> -			/*
> -			 * If the batch buffer contains a chained batch, return an
> -			 * error that tells the caller to abort and dispatch the
> -			 * workload as a non-secure batch.
> -			 */
> -			if (desc->cmd.value == MI_BATCH_BUFFER_START) {
> -				ret = -EACCES;
> -				goto unmap;
> +				last_cmd_header = *cmd;
>  			}
>  
> -			if (desc->flags & CMD_DESC_FIXED)
> -				length = desc->length.fixed;
> -			else
> -				length = ((*cmd & desc->length.mask) + LENGTH_BIAS);
> -
>  			if (unlikely(cmd + length > page_end)) {
>  				if (unlikely(cmd + length > batch_end)) {
>  					DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n",
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8d939649b919..28d5bfceae3b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2333,12 +2333,12 @@ struct drm_i915_cmd_descriptor {
>  	 *                  is the DRM master
>  	 */
>  	u32 flags;
> +#define CMD_DESC_SKIP     (0)
>  #define CMD_DESC_FIXED    (1<<0)
> -#define CMD_DESC_SKIP     (1<<1)
> -#define CMD_DESC_REJECT   (1<<2)
> -#define CMD_DESC_REGISTER (1<<3)
> -#define CMD_DESC_BITMASK  (1<<4)
> -#define CMD_DESC_MASTER   (1<<5)
> +#define CMD_DESC_REJECT   (1<<1)
> +#define CMD_DESC_REGISTER (1<<2)
> +#define CMD_DESC_BITMASK  (1<<3)
> +#define CMD_DESC_MASTER   (1<<4)
>  
>  	/*
>  	 * The command's unique identification bits and the bitmask to get them.
> -- 
> 2.6.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Nov. 20, 2015, 3:44 p.m. UTC | #2
On Fri, Nov 20, 2015 at 05:08:06PM +0200, Ville Syrjälä wrote:
> On Fri, Nov 20, 2015 at 10:55:57AM +0000, Chris Wilson wrote:
> > +				desc = find_cmd_in_table(ring, *cmd);
> > +				if (desc) {
> > +					if (unlikely(desc->flags & CMD_DESC_REJECT)) {
> > +						DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
> > +						goto unmap;
> > +					}
> 
> Yikes. More indenttion. It's getting really deep here. Any sane way to
> reduce it?

Moving the parsing out to a helper and some of the state to an on-stack
struct and then let gcc fold it back in. I was being lazy, just letting
vim reindent the block is far easier!
-Chris
Ville Syrjälä Dec. 1, 2015, 5:30 p.m. UTC | #3
On Fri, Nov 20, 2015 at 10:55:57AM +0000, Chris Wilson wrote:
> The cmd parser has the biggest impact on the BLT ring, because it is
> relatively verbose composed to the other engines as the vertex data is
> inline. It also typically has runs of repeating commands (again since
> the vertex data is inline, it typically has sequences of XY_SETUP_BLT,
> XY_SCANLINE_BLT..) We can easily reduce the impact of cmdparsing on
> benchmarks by then caching the last descriptor and comparing it against
> the next command header. To get maximum benefit, we also want to take
> advantage of skipping a few validations and length determinations if the
> header is unchanged between commands.
> 
> ivb i7-3720QM:
> x11perf -dot: before 52.3M, after 124M (max 203M)
> glxgears: before 7310 fps, after 7550 fps (max 7860 fps)
> 
> v2: Fix initial cached cmd descriptor to match MI_NOOP.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---

Didn't spot anything wrong, so:

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

>  drivers/gpu/drm/i915/i915_cmd_parser.c | 133 +++++++++++++++------------------
>  drivers/gpu/drm/i915/i915_drv.h        |  10 +--
>  2 files changed, 64 insertions(+), 79 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index db3a04ea036a..c6f6d9f2b2ce 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -794,6 +794,9 @@ void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring)
>  	fini_hash_table(ring);
>  }
>  
> +/*
> + * Returns a pointer to a descriptor for the command specified by cmd_header.
> + */
>  static const struct drm_i915_cmd_descriptor*
>  find_cmd_in_table(struct intel_engine_cs *ring,
>  		  u32 cmd_header)
> @@ -813,37 +816,6 @@ find_cmd_in_table(struct intel_engine_cs *ring,
>  	return NULL;
>  }
>  
> -/*
> - * Returns a pointer to a descriptor for the command specified by cmd_header.
> - *
> - * The caller must supply space for a default descriptor via the default_desc
> - * parameter. If no descriptor for the specified command exists in the ring's
> - * command parser tables, this function fills in default_desc based on the
> - * ring's default length encoding and returns default_desc.
> - */
> -static const struct drm_i915_cmd_descriptor*
> -find_cmd(struct intel_engine_cs *ring,
> -	 u32 cmd_header,
> -	 struct drm_i915_cmd_descriptor *default_desc)
> -{
> -	const struct drm_i915_cmd_descriptor *desc;
> -	u32 mask;
> -
> -	desc = find_cmd_in_table(ring, cmd_header);
> -	if (desc)
> -		return desc;
> -
> -	mask = ring->get_cmd_length_mask(cmd_header);
> -	if (!mask)
> -		return NULL;
> -
> -	BUG_ON(!default_desc);
> -	default_desc->flags = CMD_DESC_SKIP;
> -	default_desc->length.mask = mask;
> -
> -	return default_desc;
> -}
> -
>  static const struct drm_i915_reg_descriptor *
>  find_reg(const struct drm_i915_reg_descriptor *table,
>  	 int count, u32 addr)
> @@ -886,17 +858,6 @@ static bool check_cmd(const struct intel_engine_cs *ring,
>  		      const bool is_master,
>  		      bool *oacontrol_set)
>  {
> -	if (desc->flags & CMD_DESC_REJECT) {
> -		DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
> -		return false;
> -	}
> -
> -	if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
> -		DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
> -				 *cmd);
> -		return false;
> -	}
> -
>  	if (desc->flags & CMD_DESC_REGISTER) {
>  		/*
>  		 * Get the distance between individual register offset
> @@ -1027,14 +988,15 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
>  		    u32 batch_len,
>  		    bool is_master)
>  {
> -	const struct drm_i915_cmd_descriptor *desc;
> +	struct drm_i915_cmd_descriptor default_desc = { CMD_DESC_SKIP };
> +	const struct drm_i915_cmd_descriptor *desc = &default_desc;
> +	u32 last_cmd_header = 0;
>  	unsigned dst_iter, src_iter;
>  	int needs_clflush = 0;
>  	struct get_page rewind;
>  	void *src, *dst, *tmp;
> -	u32 partial, length;
> +	u32 partial, length = 1;
>  	unsigned in, out;
> -	struct drm_i915_cmd_descriptor default_desc = { 0 };
>  	bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
>  	int ret = 0;
>  
> @@ -1100,40 +1062,63 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
>  		partial = 0;
>  
>  		do {
> -			if (*cmd == MI_BATCH_BUFFER_END) {
> -				if (oacontrol_set) {
> -					DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n");
> -					goto unmap;
> +			if (*cmd != last_cmd_header) {
> +				if (*cmd == MI_BATCH_BUFFER_END) {
> +					if (unlikely(oacontrol_set)) {
> +						DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n");
> +						goto unmap;
> +					}
> +
> +					cmd++; /* copy this cmd to dst */
> +					batch_len = this; /* no more src */
> +					ret = 0;
> +					break;
>  				}
>  
> -				cmd++; /* copy this cmd to dst */
> -				batch_len = this; /* no more src */
> -				ret = 0;
> -				break;
> -			}
> -
> -			desc = find_cmd(ring, *cmd, &default_desc);
> -			if (!desc) {
> -				DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
> -						 *cmd);
> -				goto unmap;
> -			}
> +				desc = find_cmd_in_table(ring, *cmd);
> +				if (desc) {
> +					if (unlikely(desc->flags & CMD_DESC_REJECT)) {
> +						DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
> +						goto unmap;
> +					}
> +
> +					if (unlikely((desc->flags & CMD_DESC_MASTER) && !is_master)) {
> +						DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", *cmd);
> +						goto unmap;
> +					}
> +
> +					/*
> +					 * If the batch buffer contains a
> +					 * chained batch, return an error that
> +					 * tells the caller to abort and
> +					 * dispatch the workload as a
> +					 * non-secure batch.
> +					 */
> +					if (unlikely(desc->cmd.value == MI_BATCH_BUFFER_START)) {
> +						ret = -EACCES;
> +						goto unmap;
> +					}
> +
> +					if (desc->flags & CMD_DESC_FIXED)
> +						length = desc->length.fixed;
> +					else
> +						length = (*cmd & desc->length.mask) + LENGTH_BIAS;
> +				} else {
> +					u32 mask = ring->get_cmd_length_mask(*cmd);
> +					if (unlikely(!mask)) {
> +						DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", *cmd);
> +						goto unmap;
> +					}
> +
> +					default_desc.length.mask = mask;
> +					desc = &default_desc;
> +
> +					length = (*cmd & mask) + LENGTH_BIAS;
> +				}
>  
> -			/*
> -			 * If the batch buffer contains a chained batch, return an
> -			 * error that tells the caller to abort and dispatch the
> -			 * workload as a non-secure batch.
> -			 */
> -			if (desc->cmd.value == MI_BATCH_BUFFER_START) {
> -				ret = -EACCES;
> -				goto unmap;
> +				last_cmd_header = *cmd;
>  			}
>  
> -			if (desc->flags & CMD_DESC_FIXED)
> -				length = desc->length.fixed;
> -			else
> -				length = ((*cmd & desc->length.mask) + LENGTH_BIAS);
> -
>  			if (unlikely(cmd + length > page_end)) {
>  				if (unlikely(cmd + length > batch_end)) {
>  					DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n",
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8d939649b919..28d5bfceae3b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2333,12 +2333,12 @@ struct drm_i915_cmd_descriptor {
>  	 *                  is the DRM master
>  	 */
>  	u32 flags;
> +#define CMD_DESC_SKIP     (0)
>  #define CMD_DESC_FIXED    (1<<0)
> -#define CMD_DESC_SKIP     (1<<1)
> -#define CMD_DESC_REJECT   (1<<2)
> -#define CMD_DESC_REGISTER (1<<3)
> -#define CMD_DESC_BITMASK  (1<<4)
> -#define CMD_DESC_MASTER   (1<<5)
> +#define CMD_DESC_REJECT   (1<<1)
> +#define CMD_DESC_REGISTER (1<<2)
> +#define CMD_DESC_BITMASK  (1<<3)
> +#define CMD_DESC_MASTER   (1<<4)
>  
>  	/*
>  	 * The command's unique identification bits and the bitmask to get them.
> -- 
> 2.6.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index db3a04ea036a..c6f6d9f2b2ce 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -794,6 +794,9 @@  void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring)
 	fini_hash_table(ring);
 }
 
+/*
+ * Returns a pointer to a descriptor for the command specified by cmd_header.
+ */
 static const struct drm_i915_cmd_descriptor*
 find_cmd_in_table(struct intel_engine_cs *ring,
 		  u32 cmd_header)
@@ -813,37 +816,6 @@  find_cmd_in_table(struct intel_engine_cs *ring,
 	return NULL;
 }
 
-/*
- * Returns a pointer to a descriptor for the command specified by cmd_header.
- *
- * The caller must supply space for a default descriptor via the default_desc
- * parameter. If no descriptor for the specified command exists in the ring's
- * command parser tables, this function fills in default_desc based on the
- * ring's default length encoding and returns default_desc.
- */
-static const struct drm_i915_cmd_descriptor*
-find_cmd(struct intel_engine_cs *ring,
-	 u32 cmd_header,
-	 struct drm_i915_cmd_descriptor *default_desc)
-{
-	const struct drm_i915_cmd_descriptor *desc;
-	u32 mask;
-
-	desc = find_cmd_in_table(ring, cmd_header);
-	if (desc)
-		return desc;
-
-	mask = ring->get_cmd_length_mask(cmd_header);
-	if (!mask)
-		return NULL;
-
-	BUG_ON(!default_desc);
-	default_desc->flags = CMD_DESC_SKIP;
-	default_desc->length.mask = mask;
-
-	return default_desc;
-}
-
 static const struct drm_i915_reg_descriptor *
 find_reg(const struct drm_i915_reg_descriptor *table,
 	 int count, u32 addr)
@@ -886,17 +858,6 @@  static bool check_cmd(const struct intel_engine_cs *ring,
 		      const bool is_master,
 		      bool *oacontrol_set)
 {
-	if (desc->flags & CMD_DESC_REJECT) {
-		DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
-		return false;
-	}
-
-	if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
-		DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
-				 *cmd);
-		return false;
-	}
-
 	if (desc->flags & CMD_DESC_REGISTER) {
 		/*
 		 * Get the distance between individual register offset
@@ -1027,14 +988,15 @@  int i915_parse_cmds(struct intel_engine_cs *ring,
 		    u32 batch_len,
 		    bool is_master)
 {
-	const struct drm_i915_cmd_descriptor *desc;
+	struct drm_i915_cmd_descriptor default_desc = { CMD_DESC_SKIP };
+	const struct drm_i915_cmd_descriptor *desc = &default_desc;
+	u32 last_cmd_header = 0;
 	unsigned dst_iter, src_iter;
 	int needs_clflush = 0;
 	struct get_page rewind;
 	void *src, *dst, *tmp;
-	u32 partial, length;
+	u32 partial, length = 1;
 	unsigned in, out;
-	struct drm_i915_cmd_descriptor default_desc = { 0 };
 	bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 	int ret = 0;
 
@@ -1100,40 +1062,63 @@  int i915_parse_cmds(struct intel_engine_cs *ring,
 		partial = 0;
 
 		do {
-			if (*cmd == MI_BATCH_BUFFER_END) {
-				if (oacontrol_set) {
-					DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n");
-					goto unmap;
+			if (*cmd != last_cmd_header) {
+				if (*cmd == MI_BATCH_BUFFER_END) {
+					if (unlikely(oacontrol_set)) {
+						DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n");
+						goto unmap;
+					}
+
+					cmd++; /* copy this cmd to dst */
+					batch_len = this; /* no more src */
+					ret = 0;
+					break;
 				}
 
-				cmd++; /* copy this cmd to dst */
-				batch_len = this; /* no more src */
-				ret = 0;
-				break;
-			}
-
-			desc = find_cmd(ring, *cmd, &default_desc);
-			if (!desc) {
-				DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
-						 *cmd);
-				goto unmap;
-			}
+				desc = find_cmd_in_table(ring, *cmd);
+				if (desc) {
+					if (unlikely(desc->flags & CMD_DESC_REJECT)) {
+						DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
+						goto unmap;
+					}
+
+					if (unlikely((desc->flags & CMD_DESC_MASTER) && !is_master)) {
+						DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", *cmd);
+						goto unmap;
+					}
+
+					/*
+					 * If the batch buffer contains a
+					 * chained batch, return an error that
+					 * tells the caller to abort and
+					 * dispatch the workload as a
+					 * non-secure batch.
+					 */
+					if (unlikely(desc->cmd.value == MI_BATCH_BUFFER_START)) {
+						ret = -EACCES;
+						goto unmap;
+					}
+
+					if (desc->flags & CMD_DESC_FIXED)
+						length = desc->length.fixed;
+					else
+						length = (*cmd & desc->length.mask) + LENGTH_BIAS;
+				} else {
+					u32 mask = ring->get_cmd_length_mask(*cmd);
+					if (unlikely(!mask)) {
+						DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", *cmd);
+						goto unmap;
+					}
+
+					default_desc.length.mask = mask;
+					desc = &default_desc;
+
+					length = (*cmd & mask) + LENGTH_BIAS;
+				}
 
-			/*
-			 * If the batch buffer contains a chained batch, return an
-			 * error that tells the caller to abort and dispatch the
-			 * workload as a non-secure batch.
-			 */
-			if (desc->cmd.value == MI_BATCH_BUFFER_START) {
-				ret = -EACCES;
-				goto unmap;
+				last_cmd_header = *cmd;
 			}
 
-			if (desc->flags & CMD_DESC_FIXED)
-				length = desc->length.fixed;
-			else
-				length = ((*cmd & desc->length.mask) + LENGTH_BIAS);
-
 			if (unlikely(cmd + length > page_end)) {
 				if (unlikely(cmd + length > batch_end)) {
 					DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n",
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8d939649b919..28d5bfceae3b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2333,12 +2333,12 @@  struct drm_i915_cmd_descriptor {
 	 *                  is the DRM master
 	 */
 	u32 flags;
+#define CMD_DESC_SKIP     (0)
 #define CMD_DESC_FIXED    (1<<0)
-#define CMD_DESC_SKIP     (1<<1)
-#define CMD_DESC_REJECT   (1<<2)
-#define CMD_DESC_REGISTER (1<<3)
-#define CMD_DESC_BITMASK  (1<<4)
-#define CMD_DESC_MASTER   (1<<5)
+#define CMD_DESC_REJECT   (1<<1)
+#define CMD_DESC_REGISTER (1<<2)
+#define CMD_DESC_BITMASK  (1<<3)
+#define CMD_DESC_MASTER   (1<<4)
 
 	/*
 	 * The command's unique identification bits and the bitmask to get them.