diff mbox

[1/3] drm/i915: BUG_ON() when cmd/reg tables are not sorted

Message ID 1395945820-20376-2-git-send-email-bradley.d.volkin@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

bradley.d.volkin@intel.com March 27, 2014, 6:43 p.m. UTC
From: Brad Volkin <bradley.d.volkin@intel.com>

As suggested during review, this makes it much more obvious
when the tables are not sorted.

Cc: Jani Nikula <jani.nikula@linux.intel.com>
Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

Comments

Kenneth Graunke March 27, 2014, 9:47 p.m. UTC | #1
On 03/27/2014 11:43 AM, bradley.d.volkin@intel.com wrote:
> From: Brad Volkin <bradley.d.volkin@intel.com>
> 
> As suggested during review, this makes it much more obvious
> when the tables are not sorted.
> 
> Cc: Jani Nikula <jani.nikula@linux.intel.com>
> Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_cmd_parser.c | 31 +++++++++++++++++++++----------
>  1 file changed, 21 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index 788bd96..8a93db3 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -493,12 +493,13 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
>  	return 0;
>  }
>  
> -static void validate_cmds_sorted(struct intel_ring_buffer *ring)
> +static bool validate_cmds_sorted(struct intel_ring_buffer *ring)
>  {
>  	int i;
> +	bool ret = true;
>  
>  	if (!ring->cmd_tables || ring->cmd_table_count == 0)
> -		return;
> +		return true;
>  
>  	for (i = 0; i < ring->cmd_table_count; i++) {
>  		const struct drm_i915_cmd_table *table = &ring->cmd_tables[i];
> @@ -510,35 +511,45 @@ static void validate_cmds_sorted(struct intel_ring_buffer *ring)
>  				&table->table[i];
>  			u32 curr = desc->cmd.value & desc->cmd.mask;
>  
> -			if (curr < previous)
> +			if (curr < previous) {
>  				DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n",
>  					  ring->id, i, j, curr, previous);
> +				ret = false;
> +			}
>  
>  			previous = curr;
>  		}
>  	}
> +
> +	return ret;
>  }
>  
> -static void check_sorted(int ring_id, const u32 *reg_table, int reg_count)
> +static bool check_sorted(int ring_id, const u32 *reg_table, int reg_count)
>  {
>  	int i;
>  	u32 previous = 0;
> +	bool ret = true;
>  
>  	for (i = 0; i < reg_count; i++) {
>  		u32 curr = reg_table[i];
>  
> -		if (curr < previous)
> +		if (curr < previous) {
>  			DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n",
>  				  ring_id, i, curr, previous);
> +			ret = false;
> +		}
>  
>  		previous = curr;
>  	}
> +
> +	return ret;
>  }
>  
> -static void validate_regs_sorted(struct intel_ring_buffer *ring)
> +static bool validate_regs_sorted(struct intel_ring_buffer *ring)
>  {
> -	check_sorted(ring->id, ring->reg_table, ring->reg_count);
> -	check_sorted(ring->id, ring->master_reg_table, ring->master_reg_count);
> +	return check_sorted(ring->id, ring->reg_table, ring->reg_count) &&
> +		check_sorted(ring->id, ring->master_reg_table,
> +			     ring->master_reg_count);
>  }
>  
>  /**
> @@ -617,8 +628,8 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring)
>  		BUG();
>  	}
>  
> -	validate_cmds_sorted(ring);
> -	validate_regs_sorted(ring);
> +	BUG_ON(!validate_cmds_sorted(ring));
> +	BUG_ON(!validate_regs_sorted(ring));
>  }
>  
>  static const struct drm_i915_cmd_descriptor*

Does any code actually rely on the tables being sorted?

I didn't see any early breaks or returns once the parser has gone past a
command...it seems to keep looking through the entire list.

As an aside:

It seems like find_cmd could be implemented a lot more efficiently.
Currently, most 3DSTATE commands are not in the tables - they only
appear to contain commands that need special handling.  This means that
find_cmd will walk through every command in every table before falling
back to the default info struct.

As an example, my window manager (KWin) submitted a render ring batch
with 700 commands in it.  Most of those commands need to walk through
every command in every table, which is about 48 entries.  That's 33,600
iterations through tables, which seems...kind of excessive.

For example, 3D commands all have the form 0x78xx or 0x79xx.  We could
use the xx as an array index into a lookup table, and get O(1) access
instead of O(n).  Or, at least something similar.

Thanks again for your work on this, Brad.

--Ken
bradley.d.volkin@intel.com March 27, 2014, 11:56 p.m. UTC | #2
On Thu, Mar 27, 2014 at 02:47:03PM -0700, Kenneth Graunke wrote:
> Does any code actually rely on the tables being sorted?

Not today. The idea was to make it easier to move to an algorithm that does
in the future. For example, I thought binary search might be an easy win.

> 
> I didn't see any early breaks or returns once the parser has gone past a
> command...it seems to keep looking through the entire list.
> 
> As an aside:
> 
> It seems like find_cmd could be implemented a lot more efficiently.

Yes, it's definitely a naive implementation. I've been hesitant to do too
much in the way of optimizing the searches until we have some real perf
regressions to look at. I've received several suggested improvements that,
at least with quick implementations, didn't show gains in the benchmarks
I had available (e.g. binary search; using kmap_atomic instead of vmap). My
hope is that with the feature now generally available, we'll see some data
from benchmarks people care about and go from there.

I like your suggestion though and I'll add it to the list.

Thanks,
Brad

> Currently, most 3DSTATE commands are not in the tables - they only
> appear to contain commands that need special handling.  This means that
> find_cmd will walk through every command in every table before falling
> back to the default info struct.
> 
> As an example, my window manager (KWin) submitted a render ring batch
> with 700 commands in it.  Most of those commands need to walk through
> every command in every table, which is about 48 entries.  That's 33,600
> iterations through tables, which seems...kind of excessive.
> 
> For example, 3D commands all have the form 0x78xx or 0x79xx.  We could
> use the xx as an array index into a lookup table, and get O(1) access
> instead of O(n).  Or, at least something similar.
> 
> Thanks again for your work on this, Brad.
> 
> --Ken
>
Jani Nikula April 2, 2014, 9:11 a.m. UTC | #3
On Thu, 27 Mar 2014, bradley.d.volkin@intel.com wrote:
> From: Brad Volkin <bradley.d.volkin@intel.com>
>
> As suggested during review, this makes it much more obvious
> when the tables are not sorted.

Let's look at the optimizations later.

Reviewed-by: Jani Nikula <jani.nikula@intel.com>


>
> Cc: Jani Nikula <jani.nikula@linux.intel.com>
> Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_cmd_parser.c | 31 +++++++++++++++++++++----------
>  1 file changed, 21 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index 788bd96..8a93db3 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -493,12 +493,13 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
>  	return 0;
>  }
>  
> -static void validate_cmds_sorted(struct intel_ring_buffer *ring)
> +static bool validate_cmds_sorted(struct intel_ring_buffer *ring)
>  {
>  	int i;
> +	bool ret = true;
>  
>  	if (!ring->cmd_tables || ring->cmd_table_count == 0)
> -		return;
> +		return true;
>  
>  	for (i = 0; i < ring->cmd_table_count; i++) {
>  		const struct drm_i915_cmd_table *table = &ring->cmd_tables[i];
> @@ -510,35 +511,45 @@ static void validate_cmds_sorted(struct intel_ring_buffer *ring)
>  				&table->table[i];
>  			u32 curr = desc->cmd.value & desc->cmd.mask;
>  
> -			if (curr < previous)
> +			if (curr < previous) {
>  				DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n",
>  					  ring->id, i, j, curr, previous);
> +				ret = false;
> +			}
>  
>  			previous = curr;
>  		}
>  	}
> +
> +	return ret;
>  }
>  
> -static void check_sorted(int ring_id, const u32 *reg_table, int reg_count)
> +static bool check_sorted(int ring_id, const u32 *reg_table, int reg_count)
>  {
>  	int i;
>  	u32 previous = 0;
> +	bool ret = true;
>  
>  	for (i = 0; i < reg_count; i++) {
>  		u32 curr = reg_table[i];
>  
> -		if (curr < previous)
> +		if (curr < previous) {
>  			DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n",
>  				  ring_id, i, curr, previous);
> +			ret = false;
> +		}
>  
>  		previous = curr;
>  	}
> +
> +	return ret;
>  }
>  
> -static void validate_regs_sorted(struct intel_ring_buffer *ring)
> +static bool validate_regs_sorted(struct intel_ring_buffer *ring)
>  {
> -	check_sorted(ring->id, ring->reg_table, ring->reg_count);
> -	check_sorted(ring->id, ring->master_reg_table, ring->master_reg_count);
> +	return check_sorted(ring->id, ring->reg_table, ring->reg_count) &&
> +		check_sorted(ring->id, ring->master_reg_table,
> +			     ring->master_reg_count);
>  }
>  
>  /**
> @@ -617,8 +628,8 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring)
>  		BUG();
>  	}
>  
> -	validate_cmds_sorted(ring);
> -	validate_regs_sorted(ring);
> +	BUG_ON(!validate_cmds_sorted(ring));
> +	BUG_ON(!validate_regs_sorted(ring));
>  }
>  
>  static const struct drm_i915_cmd_descriptor*
> -- 
> 1.8.3.2
>
Daniel Vetter April 2, 2014, 9:35 a.m. UTC | #4
On Wed, Apr 02, 2014 at 12:11:37PM +0300, Jani Nikula wrote:
> On Thu, 27 Mar 2014, bradley.d.volkin@intel.com wrote:
> > From: Brad Volkin <bradley.d.volkin@intel.com>
> >
> > As suggested during review, this makes it much more obvious
> > when the tables are not sorted.
> 
> Let's look at the optimizations later.
> 
> Reviewed-by: Jani Nikula <jani.nikula@intel.com>

Pulled in all three, thanks for patches&review.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 788bd96..8a93db3 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -493,12 +493,13 @@  static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
 	return 0;
 }
 
-static void validate_cmds_sorted(struct intel_ring_buffer *ring)
+static bool validate_cmds_sorted(struct intel_ring_buffer *ring)
 {
 	int i;
+	bool ret = true;
 
 	if (!ring->cmd_tables || ring->cmd_table_count == 0)
-		return;
+		return true;
 
 	for (i = 0; i < ring->cmd_table_count; i++) {
 		const struct drm_i915_cmd_table *table = &ring->cmd_tables[i];
@@ -510,35 +511,45 @@  static void validate_cmds_sorted(struct intel_ring_buffer *ring)
 				&table->table[i];
 			u32 curr = desc->cmd.value & desc->cmd.mask;
 
-			if (curr < previous)
+			if (curr < previous) {
 				DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n",
 					  ring->id, i, j, curr, previous);
+				ret = false;
+			}
 
 			previous = curr;
 		}
 	}
+
+	return ret;
 }
 
-static void check_sorted(int ring_id, const u32 *reg_table, int reg_count)
+static bool check_sorted(int ring_id, const u32 *reg_table, int reg_count)
 {
 	int i;
 	u32 previous = 0;
+	bool ret = true;
 
 	for (i = 0; i < reg_count; i++) {
 		u32 curr = reg_table[i];
 
-		if (curr < previous)
+		if (curr < previous) {
 			DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n",
 				  ring_id, i, curr, previous);
+			ret = false;
+		}
 
 		previous = curr;
 	}
+
+	return ret;
 }
 
-static void validate_regs_sorted(struct intel_ring_buffer *ring)
+static bool validate_regs_sorted(struct intel_ring_buffer *ring)
 {
-	check_sorted(ring->id, ring->reg_table, ring->reg_count);
-	check_sorted(ring->id, ring->master_reg_table, ring->master_reg_count);
+	return check_sorted(ring->id, ring->reg_table, ring->reg_count) &&
+		check_sorted(ring->id, ring->master_reg_table,
+			     ring->master_reg_count);
 }
 
 /**
@@ -617,8 +628,8 @@  void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring)
 		BUG();
 	}
 
-	validate_cmds_sorted(ring);
-	validate_regs_sorted(ring);
+	BUG_ON(!validate_cmds_sorted(ring));
+	BUG_ON(!validate_regs_sorted(ring));
 }
 
 static const struct drm_i915_cmd_descriptor*