diff mbox series

[1/2] i915/perf: Move gen specific OA formats to single array

Message ID 20201215214901.10038-1-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series [1/2] i915/perf: Move gen specific OA formats to single array | expand

Commit Message

Umesh Nerlige Ramappa Dec. 15, 2020, 9:49 p.m. UTC
Variations in OA formats in the different gens has led to creation of
several sparse arrays to store the formats.

Move oa formats into a single array and add the supported range of
platforms for the oa formats.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c       | 56 ++++++++++++--------------
 drivers/gpu/drm/i915/i915_perf_types.h |  3 ++
 2 files changed, 28 insertions(+), 31 deletions(-)

Comments

Lionel Landwerlin Dec. 16, 2020, 8:30 a.m. UTC | #1
On 15/12/2020 23:49, Umesh Nerlige Ramappa wrote:
> Variations in OA formats in the different gens has led to creation of
> several sparse arrays to store the formats.
>
> Move oa formats into a single array and add the supported range of
> platforms for the oa formats.
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_perf.c       | 56 ++++++++++++--------------
>   drivers/gpu/drm/i915/i915_perf_types.h |  3 ++
>   2 files changed, 28 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index f553caf4b06d..afa92cf075c4 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -298,28 +298,27 @@ static u32 i915_oa_max_sample_rate = 100000;
>   
>   /* XXX: beware if future OA HW adds new report formats that the current
>    * code assumes all reports have a power-of-two size and ~(size - 1) can
> - * be used as a mask to align the OA tail pointer.
> + * be used as a mask to align the OA tail pointer. Note that the platforms
> + * in this array specify a range (inclusive of start and end).
>    */
> -static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
> -	[I915_OA_FORMAT_A13]	    = { 0, 64 },
> -	[I915_OA_FORMAT_A29]	    = { 1, 128 },
> -	[I915_OA_FORMAT_A13_B8_C8]  = { 2, 128 },
> -	/* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
> -	[I915_OA_FORMAT_B4_C8]	    = { 4, 64 },
> -	[I915_OA_FORMAT_A45_B8_C8]  = { 5, 256 },
> -	[I915_OA_FORMAT_B4_C8_A16]  = { 6, 128 },
> -	[I915_OA_FORMAT_C4_B8]	    = { 7, 64 },
> -};
> -
> -static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
> -	[I915_OA_FORMAT_A12]		    = { 0, 64 },
> -	[I915_OA_FORMAT_A12_B8_C8]	    = { 2, 128 },
> -	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
> -	[I915_OA_FORMAT_C4_B8]		    = { 7, 64 },
> -};
> -
> -static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
> -	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
> +static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
> +	/* haswell */
> +	[I915_OA_FORMAT_A13]                    = { 0, 64, INTEL_IVYBRIDGE, INTEL_HASWELL },
> +	[I915_OA_FORMAT_A29]                    = { 1, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
> +	[I915_OA_FORMAT_A13_B8_C8]              = { 2, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
> +	[I915_OA_FORMAT_B4_C8]                  = { 4, 64, INTEL_IVYBRIDGE, INTEL_HASWELL },
> +	[I915_OA_FORMAT_A45_B8_C8]              = { 5, 256, INTEL_IVYBRIDGE, INTEL_HASWELL },
> +	[I915_OA_FORMAT_B4_C8_A16]              = { 6, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
> +
> +	/* haswell+ upto but not including tigerlake */
> +	[I915_OA_FORMAT_C4_B8]                  = { 7, 64, INTEL_IVYBRIDGE, INTEL_TIGERLAKE - 1 },
I don't think we support IVB,.
> +
> +	/* gen8+ upto but not including tigerlake */
> +	[I915_OA_FORMAT_A12]                    = { 0, 64, INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
> +	[I915_OA_FORMAT_A12_B8_C8]              = { 2, 128, INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
> +
> +	/* gen8+ */
> +	[I915_OA_FORMAT_A32u40_A4u32_B8_C8]     = { 5, 256, INTEL_BROADWELL, INTEL_MAX_PLATFORMS - 1 },
>   };
>   


You also need to change i915_oa_stream_init() to use the global array. 
Looks like it will access a NULL pointer atm.


>   #define SAMPLE_OA_REPORT      (1<<0)
> @@ -3575,6 +3574,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
>   	for (i = 0; i < n_props; i++) {
>   		u64 oa_period, oa_freq_hz;
>   		u64 id, value;
> +		enum intel_platform p = INTEL_INFO(perf->i915)->platform;
>   
>   		ret = get_user(id, uprop);
>   		if (ret)
> @@ -3611,8 +3611,9 @@ static int read_properties_unlocked(struct i915_perf *perf,
>   					  value);
>   				return -EINVAL;
>   			}
> -			if (!perf->oa_formats[value].size) {
> -				DRM_DEBUG("Unsupported OA report format %llu\n",
> +			if (p < perf->oa_formats[value].start ||
> +			    p > perf->oa_formats[value].end) {
> +				DRM_DEBUG("OA report format not supported %llu\n",
>   					  value);
>   				return -EINVAL;
>   			}
> @@ -4270,6 +4271,7 @@ void i915_perf_init(struct drm_i915_private *i915)
>   
>   	/* XXX const struct i915_perf_ops! */
>   
> +	perf->oa_formats = oa_formats;
>   	if (IS_HASWELL(i915)) {
>   		perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
>   		perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
> @@ -4280,8 +4282,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>   		perf->ops.oa_disable = gen7_oa_disable;
>   		perf->ops.read = gen7_oa_read;
>   		perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
> -
> -		perf->oa_formats = hsw_oa_formats;
>   	} else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
>   		/* Note: that although we could theoretically also support the
>   		 * legacy ringbuffer mode on BDW (and earlier iterations of
> @@ -4292,8 +4292,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>   		perf->ops.read = gen8_oa_read;
>   
>   		if (IS_GEN_RANGE(i915, 8, 9)) {
> -			perf->oa_formats = gen8_plus_oa_formats;
> -
>   			perf->ops.is_valid_b_counter_reg =
>   				gen7_is_valid_b_counter_addr;
>   			perf->ops.is_valid_mux_reg =
> @@ -4324,8 +4322,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>   				perf->gen8_valid_ctx_bit = BIT(16);
>   			}
>   		} else if (IS_GEN_RANGE(i915, 10, 11)) {
> -			perf->oa_formats = gen8_plus_oa_formats;
> -
>   			perf->ops.is_valid_b_counter_reg =
>   				gen7_is_valid_b_counter_addr;
>   			perf->ops.is_valid_mux_reg =
> @@ -4348,8 +4344,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>   			}
>   			perf->gen8_valid_ctx_bit = BIT(16);
>   		} else if (IS_GEN(i915, 12)) {
> -			perf->oa_formats = gen12_oa_formats;
> -
>   			perf->ops.is_valid_b_counter_reg =
>   				gen12_is_valid_b_counter_addr;
>   			perf->ops.is_valid_mux_reg =
> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
> index a36a455ae336..84dceb743ebc 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -19,6 +19,7 @@
>   #include "gt/intel_sseu.h"
>   #include "i915_reg.h"
>   #include "intel_wakeref.h"
> +#include "intel_device_info.h"
>   
>   struct drm_i915_private;
>   struct file;
> @@ -32,6 +33,8 @@ struct intel_engine_cs;
>   struct i915_oa_format {
>   	u32 format;
>   	int size;
> +	enum intel_platform start;
> +	enum intel_platform end;


Also need to drop oa_formats from struct i915_perf


>   };
>   
>   struct i915_oa_reg {
Tvrtko Ursulin Dec. 16, 2020, 9:25 a.m. UTC | #2
On 15/12/2020 21:49, Umesh Nerlige Ramappa wrote:
> Variations in OA formats in the different gens has led to creation of
> several sparse arrays to store the formats.
> 
> Move oa formats into a single array and add the supported range of
> platforms for the oa formats.
> 
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_perf.c       | 56 ++++++++++++--------------
>   drivers/gpu/drm/i915/i915_perf_types.h |  3 ++
>   2 files changed, 28 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index f553caf4b06d..afa92cf075c4 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -298,28 +298,27 @@ static u32 i915_oa_max_sample_rate = 100000;
>   
>   /* XXX: beware if future OA HW adds new report formats that the current
>    * code assumes all reports have a power-of-two size and ~(size - 1) can
> - * be used as a mask to align the OA tail pointer.
> + * be used as a mask to align the OA tail pointer. Note that the platforms
> + * in this array specify a range (inclusive of start and end).
>    */
> -static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
> -	[I915_OA_FORMAT_A13]	    = { 0, 64 },
> -	[I915_OA_FORMAT_A29]	    = { 1, 128 },
> -	[I915_OA_FORMAT_A13_B8_C8]  = { 2, 128 },
> -	/* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
> -	[I915_OA_FORMAT_B4_C8]	    = { 4, 64 },
> -	[I915_OA_FORMAT_A45_B8_C8]  = { 5, 256 },
> -	[I915_OA_FORMAT_B4_C8_A16]  = { 6, 128 },
> -	[I915_OA_FORMAT_C4_B8]	    = { 7, 64 },
> -};
> -
> -static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
> -	[I915_OA_FORMAT_A12]		    = { 0, 64 },
> -	[I915_OA_FORMAT_A12_B8_C8]	    = { 2, 128 },
> -	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
> -	[I915_OA_FORMAT_C4_B8]		    = { 7, 64 },
> -};
> -
> -static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
> -	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
> +static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
> +	/* haswell */
> +	[I915_OA_FORMAT_A13]                    = { 0, 64, INTEL_IVYBRIDGE, INTEL_HASWELL },
> +	[I915_OA_FORMAT_A29]                    = { 1, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
> +	[I915_OA_FORMAT_A13_B8_C8]              = { 2, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
> +	[I915_OA_FORMAT_B4_C8]                  = { 4, 64, INTEL_IVYBRIDGE, INTEL_HASWELL },
> +	[I915_OA_FORMAT_A45_B8_C8]              = { 5, 256, INTEL_IVYBRIDGE, INTEL_HASWELL },
> +	[I915_OA_FORMAT_B4_C8_A16]              = { 6, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
> +
> +	/* haswell+ upto but not including tigerlake */
> +	[I915_OA_FORMAT_C4_B8]                  = { 7, 64, INTEL_IVYBRIDGE, INTEL_TIGERLAKE - 1 },
> +
> +	/* gen8+ upto but not including tigerlake */
> +	[I915_OA_FORMAT_A12]                    = { 0, 64, INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
> +	[I915_OA_FORMAT_A12_B8_C8]              = { 2, 128, INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
> +
> +	/* gen8+ */
> +	[I915_OA_FORMAT_A32u40_A4u32_B8_C8]     = { 5, 256, INTEL_BROADWELL, INTEL_MAX_PLATFORMS - 1 },
>   };
>   
>   #define SAMPLE_OA_REPORT      (1<<0)
> @@ -3575,6 +3574,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
>   	for (i = 0; i < n_props; i++) {
>   		u64 oa_period, oa_freq_hz;
>   		u64 id, value;
> +		enum intel_platform p = INTEL_INFO(perf->i915)->platform;
>   
>   		ret = get_user(id, uprop);
>   		if (ret)
> @@ -3611,8 +3611,9 @@ static int read_properties_unlocked(struct i915_perf *perf,
>   					  value);
>   				return -EINVAL;
>   			}
> -			if (!perf->oa_formats[value].size) {
> -				DRM_DEBUG("Unsupported OA report format %llu\n",
> +			if (p < perf->oa_formats[value].start ||
> +			    p > perf->oa_formats[value].end) {
> +				DRM_DEBUG("OA report format not supported %llu\n",
>   					  value);
>   				return -EINVAL;
>   			}
> @@ -4270,6 +4271,7 @@ void i915_perf_init(struct drm_i915_private *i915)
>   
>   	/* XXX const struct i915_perf_ops! */
>   
> +	perf->oa_formats = oa_formats;
>   	if (IS_HASWELL(i915)) {
>   		perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
>   		perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
> @@ -4280,8 +4282,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>   		perf->ops.oa_disable = gen7_oa_disable;
>   		perf->ops.read = gen7_oa_read;
>   		perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
> -
> -		perf->oa_formats = hsw_oa_formats;
>   	} else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
>   		/* Note: that although we could theoretically also support the
>   		 * legacy ringbuffer mode on BDW (and earlier iterations of
> @@ -4292,8 +4292,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>   		perf->ops.read = gen8_oa_read;
>   
>   		if (IS_GEN_RANGE(i915, 8, 9)) {
> -			perf->oa_formats = gen8_plus_oa_formats;
> -
>   			perf->ops.is_valid_b_counter_reg =
>   				gen7_is_valid_b_counter_addr;
>   			perf->ops.is_valid_mux_reg =
> @@ -4324,8 +4322,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>   				perf->gen8_valid_ctx_bit = BIT(16);
>   			}
>   		} else if (IS_GEN_RANGE(i915, 10, 11)) {
> -			perf->oa_formats = gen8_plus_oa_formats;
> -
>   			perf->ops.is_valid_b_counter_reg =
>   				gen7_is_valid_b_counter_addr;
>   			perf->ops.is_valid_mux_reg =
> @@ -4348,8 +4344,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>   			}
>   			perf->gen8_valid_ctx_bit = BIT(16);
>   		} else if (IS_GEN(i915, 12)) {
> -			perf->oa_formats = gen12_oa_formats;
> -
>   			perf->ops.is_valid_b_counter_reg =
>   				gen12_is_valid_b_counter_addr;
>   			perf->ops.is_valid_mux_reg =
> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
> index a36a455ae336..84dceb743ebc 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -19,6 +19,7 @@
>   #include "gt/intel_sseu.h"
>   #include "i915_reg.h"
>   #include "intel_wakeref.h"
> +#include "intel_device_info.h"
>   
>   struct drm_i915_private;
>   struct file;
> @@ -32,6 +33,8 @@ struct intel_engine_cs;
>   struct i915_oa_format {
>   	u32 format;
>   	int size;
> +	enum intel_platform start;
> +	enum intel_platform end;

As a drive by only - I question the concept of thinking about platforms 
enums as ordered and having things like INTEL_TIGERLAKE - 1 in the code. 
It's completely novel concept and I think fragile. Order there is purely 
the order of upstreaming and not stable.

Suggestions wise, if the rules are Gen based you could use a mask, but 
if they are truly platform based then it is more tricky. However current 
code does use gen ranges to assign them, so would a genmask really not fit?

Regards,

Tvrtko
Umesh Nerlige Ramappa Dec. 18, 2020, 2:08 a.m. UTC | #3
On Wed, Dec 16, 2020 at 10:30:24AM +0200, Lionel Landwerlin wrote:
>On 15/12/2020 23:49, Umesh Nerlige Ramappa wrote:
>>Variations in OA formats in the different gens has led to creation of
>>several sparse arrays to store the formats.
>>
>>Move oa formats into a single array and add the supported range of
>>platforms for the oa formats.
>>
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>---
>>  drivers/gpu/drm/i915/i915_perf.c       | 56 ++++++++++++--------------
>>  drivers/gpu/drm/i915/i915_perf_types.h |  3 ++
>>  2 files changed, 28 insertions(+), 31 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>index f553caf4b06d..afa92cf075c4 100644
>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>@@ -298,28 +298,27 @@ static u32 i915_oa_max_sample_rate = 100000;
>>  /* XXX: beware if future OA HW adds new report formats that the current
>>   * code assumes all reports have a power-of-two size and ~(size - 1) can
>>- * be used as a mask to align the OA tail pointer.
>>+ * be used as a mask to align the OA tail pointer. Note that the platforms
>>+ * in this array specify a range (inclusive of start and end).
>>   */
>>-static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
>>-	[I915_OA_FORMAT_A13]	    = { 0, 64 },
>>-	[I915_OA_FORMAT_A29]	    = { 1, 128 },
>>-	[I915_OA_FORMAT_A13_B8_C8]  = { 2, 128 },
>>-	/* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
>>-	[I915_OA_FORMAT_B4_C8]	    = { 4, 64 },
>>-	[I915_OA_FORMAT_A45_B8_C8]  = { 5, 256 },
>>-	[I915_OA_FORMAT_B4_C8_A16]  = { 6, 128 },
>>-	[I915_OA_FORMAT_C4_B8]	    = { 7, 64 },
>>-};
>>-
>>-static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
>>-	[I915_OA_FORMAT_A12]		    = { 0, 64 },
>>-	[I915_OA_FORMAT_A12_B8_C8]	    = { 2, 128 },
>>-	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
>>-	[I915_OA_FORMAT_C4_B8]		    = { 7, 64 },
>>-};
>>-
>>-static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
>>-	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
>>+static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
>>+	/* haswell */
>>+	[I915_OA_FORMAT_A13]                    = { 0, 64, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_A29]                    = { 1, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_A13_B8_C8]              = { 2, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_B4_C8]                  = { 4, 64, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_A45_B8_C8]              = { 5, 256, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_B4_C8_A16]              = { 6, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+
>>+	/* haswell+ upto but not including tigerlake */
>>+	[I915_OA_FORMAT_C4_B8]                  = { 7, 64, INTEL_IVYBRIDGE, INTEL_TIGERLAKE - 1 },
>I don't think we support IVB,.

So the above formats are only HASWELL then?

>>+
>>+	/* gen8+ upto but not including tigerlake */
>>+	[I915_OA_FORMAT_A12]                    = { 0, 64, INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
>>+	[I915_OA_FORMAT_A12_B8_C8]              = { 2, 128, INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
>>+
>>+	/* gen8+ */
>>+	[I915_OA_FORMAT_A32u40_A4u32_B8_C8]     = { 5, 256, INTEL_BROADWELL, INTEL_MAX_PLATFORMS - 1 },
>>  };
>
>
>You also need to change i915_oa_stream_init() to use the global array. 
>Looks like it will access a NULL pointer atm.
>

yikes, will do.

>
>>  #define SAMPLE_OA_REPORT      (1<<0)
>>@@ -3575,6 +3574,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
>>  	for (i = 0; i < n_props; i++) {
>>  		u64 oa_period, oa_freq_hz;
>>  		u64 id, value;
>>+		enum intel_platform p = INTEL_INFO(perf->i915)->platform;
>>  		ret = get_user(id, uprop);
>>  		if (ret)
>>@@ -3611,8 +3611,9 @@ static int read_properties_unlocked(struct i915_perf *perf,
>>  					  value);
>>  				return -EINVAL;
>>  			}
>>-			if (!perf->oa_formats[value].size) {
>>-				DRM_DEBUG("Unsupported OA report format %llu\n",
>>+			if (p < perf->oa_formats[value].start ||
>>+			    p > perf->oa_formats[value].end) {
>>+				DRM_DEBUG("OA report format not supported %llu\n",
>>  					  value);
>>  				return -EINVAL;
>>  			}
>>@@ -4270,6 +4271,7 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  	/* XXX const struct i915_perf_ops! */
>>+	perf->oa_formats = oa_formats;
>>  	if (IS_HASWELL(i915)) {
>>  		perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
>>  		perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
>>@@ -4280,8 +4282,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  		perf->ops.oa_disable = gen7_oa_disable;
>>  		perf->ops.read = gen7_oa_read;
>>  		perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
>>-
>>-		perf->oa_formats = hsw_oa_formats;
>>  	} else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
>>  		/* Note: that although we could theoretically also support the
>>  		 * legacy ringbuffer mode on BDW (and earlier iterations of
>>@@ -4292,8 +4292,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  		perf->ops.read = gen8_oa_read;
>>  		if (IS_GEN_RANGE(i915, 8, 9)) {
>>-			perf->oa_formats = gen8_plus_oa_formats;
>>-
>>  			perf->ops.is_valid_b_counter_reg =
>>  				gen7_is_valid_b_counter_addr;
>>  			perf->ops.is_valid_mux_reg =
>>@@ -4324,8 +4322,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  				perf->gen8_valid_ctx_bit = BIT(16);
>>  			}
>>  		} else if (IS_GEN_RANGE(i915, 10, 11)) {
>>-			perf->oa_formats = gen8_plus_oa_formats;
>>-
>>  			perf->ops.is_valid_b_counter_reg =
>>  				gen7_is_valid_b_counter_addr;
>>  			perf->ops.is_valid_mux_reg =
>>@@ -4348,8 +4344,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  			}
>>  			perf->gen8_valid_ctx_bit = BIT(16);
>>  		} else if (IS_GEN(i915, 12)) {
>>-			perf->oa_formats = gen12_oa_formats;
>>-
>>  			perf->ops.is_valid_b_counter_reg =
>>  				gen12_is_valid_b_counter_addr;
>>  			perf->ops.is_valid_mux_reg =
>>diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
>>index a36a455ae336..84dceb743ebc 100644
>>--- a/drivers/gpu/drm/i915/i915_perf_types.h
>>+++ b/drivers/gpu/drm/i915/i915_perf_types.h
>>@@ -19,6 +19,7 @@
>>  #include "gt/intel_sseu.h"
>>  #include "i915_reg.h"
>>  #include "intel_wakeref.h"
>>+#include "intel_device_info.h"
>>  struct drm_i915_private;
>>  struct file;
>>@@ -32,6 +33,8 @@ struct intel_engine_cs;
>>  struct i915_oa_format {
>>  	u32 format;
>>  	int size;
>>+	enum intel_platform start;
>>+	enum intel_platform end;
>
>
>Also need to drop oa_formats from struct i915_perf

oa_formats still remains in i915_perf. It's just that

perf->oa_formats = oa_formats;

for all platforms.

Thanks,
Umesh
>
>
>>  };
>>  struct i915_oa_reg {
>
>
Umesh Nerlige Ramappa Dec. 18, 2020, 2:32 a.m. UTC | #4
On Wed, Dec 16, 2020 at 09:25:35AM +0000, Tvrtko Ursulin wrote:
>
>On 15/12/2020 21:49, Umesh Nerlige Ramappa wrote:
>>Variations in OA formats in the different gens has led to creation of
>>several sparse arrays to store the formats.
>>
>>Move oa formats into a single array and add the supported range of
>>platforms for the oa formats.
>>
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>---
>>  drivers/gpu/drm/i915/i915_perf.c       | 56 ++++++++++++--------------
>>  drivers/gpu/drm/i915/i915_perf_types.h |  3 ++
>>  2 files changed, 28 insertions(+), 31 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>index f553caf4b06d..afa92cf075c4 100644
>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>@@ -298,28 +298,27 @@ static u32 i915_oa_max_sample_rate = 100000;
>>  /* XXX: beware if future OA HW adds new report formats that the current
>>   * code assumes all reports have a power-of-two size and ~(size - 1) can
>>- * be used as a mask to align the OA tail pointer.
>>+ * be used as a mask to align the OA tail pointer. Note that the platforms
>>+ * in this array specify a range (inclusive of start and end).
>>   */
>>-static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
>>-	[I915_OA_FORMAT_A13]	    = { 0, 64 },
>>-	[I915_OA_FORMAT_A29]	    = { 1, 128 },
>>-	[I915_OA_FORMAT_A13_B8_C8]  = { 2, 128 },
>>-	/* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
>>-	[I915_OA_FORMAT_B4_C8]	    = { 4, 64 },
>>-	[I915_OA_FORMAT_A45_B8_C8]  = { 5, 256 },
>>-	[I915_OA_FORMAT_B4_C8_A16]  = { 6, 128 },
>>-	[I915_OA_FORMAT_C4_B8]	    = { 7, 64 },
>>-};
>>-
>>-static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
>>-	[I915_OA_FORMAT_A12]		    = { 0, 64 },
>>-	[I915_OA_FORMAT_A12_B8_C8]	    = { 2, 128 },
>>-	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
>>-	[I915_OA_FORMAT_C4_B8]		    = { 7, 64 },
>>-};
>>-
>>-static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
>>-	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
>>+static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
>>+	/* haswell */
>>+	[I915_OA_FORMAT_A13]                    = { 0, 64, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_A29]                    = { 1, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_A13_B8_C8]              = { 2, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_B4_C8]                  = { 4, 64, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_A45_B8_C8]              = { 5, 256, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_B4_C8_A16]              = { 6, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+
>>+	/* haswell+ upto but not including tigerlake */
>>+	[I915_OA_FORMAT_C4_B8]                  = { 7, 64, INTEL_IVYBRIDGE, INTEL_TIGERLAKE - 1 },
>>+
>>+	/* gen8+ upto but not including tigerlake */
>>+	[I915_OA_FORMAT_A12]                    = { 0, 64, INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
>>+	[I915_OA_FORMAT_A12_B8_C8]              = { 2, 128, INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
>>+
>>+	/* gen8+ */
>>+	[I915_OA_FORMAT_A32u40_A4u32_B8_C8]     = { 5, 256, INTEL_BROADWELL, INTEL_MAX_PLATFORMS - 1 },
>>  };
>>  #define SAMPLE_OA_REPORT      (1<<0)
>>@@ -3575,6 +3574,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
>>  	for (i = 0; i < n_props; i++) {
>>  		u64 oa_period, oa_freq_hz;
>>  		u64 id, value;
>>+		enum intel_platform p = INTEL_INFO(perf->i915)->platform;
>>  		ret = get_user(id, uprop);
>>  		if (ret)
>>@@ -3611,8 +3611,9 @@ static int read_properties_unlocked(struct i915_perf *perf,
>>  					  value);
>>  				return -EINVAL;
>>  			}
>>-			if (!perf->oa_formats[value].size) {
>>-				DRM_DEBUG("Unsupported OA report format %llu\n",
>>+			if (p < perf->oa_formats[value].start ||
>>+			    p > perf->oa_formats[value].end) {
>>+				DRM_DEBUG("OA report format not supported %llu\n",
>>  					  value);
>>  				return -EINVAL;
>>  			}
>>@@ -4270,6 +4271,7 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  	/* XXX const struct i915_perf_ops! */
>>+	perf->oa_formats = oa_formats;
>>  	if (IS_HASWELL(i915)) {
>>  		perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
>>  		perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
>>@@ -4280,8 +4282,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  		perf->ops.oa_disable = gen7_oa_disable;
>>  		perf->ops.read = gen7_oa_read;
>>  		perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
>>-
>>-		perf->oa_formats = hsw_oa_formats;
>>  	} else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
>>  		/* Note: that although we could theoretically also support the
>>  		 * legacy ringbuffer mode on BDW (and earlier iterations of
>>@@ -4292,8 +4292,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  		perf->ops.read = gen8_oa_read;
>>  		if (IS_GEN_RANGE(i915, 8, 9)) {
>>-			perf->oa_formats = gen8_plus_oa_formats;
>>-
>>  			perf->ops.is_valid_b_counter_reg =
>>  				gen7_is_valid_b_counter_addr;
>>  			perf->ops.is_valid_mux_reg =
>>@@ -4324,8 +4322,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  				perf->gen8_valid_ctx_bit = BIT(16);
>>  			}
>>  		} else if (IS_GEN_RANGE(i915, 10, 11)) {
>>-			perf->oa_formats = gen8_plus_oa_formats;
>>-
>>  			perf->ops.is_valid_b_counter_reg =
>>  				gen7_is_valid_b_counter_addr;
>>  			perf->ops.is_valid_mux_reg =
>>@@ -4348,8 +4344,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  			}
>>  			perf->gen8_valid_ctx_bit = BIT(16);
>>  		} else if (IS_GEN(i915, 12)) {
>>-			perf->oa_formats = gen12_oa_formats;
>>-
>>  			perf->ops.is_valid_b_counter_reg =
>>  				gen12_is_valid_b_counter_addr;
>>  			perf->ops.is_valid_mux_reg =
>>diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
>>index a36a455ae336..84dceb743ebc 100644
>>--- a/drivers/gpu/drm/i915/i915_perf_types.h
>>+++ b/drivers/gpu/drm/i915/i915_perf_types.h
>>@@ -19,6 +19,7 @@
>>  #include "gt/intel_sseu.h"
>>  #include "i915_reg.h"
>>  #include "intel_wakeref.h"
>>+#include "intel_device_info.h"
>>  struct drm_i915_private;
>>  struct file;
>>@@ -32,6 +33,8 @@ struct intel_engine_cs;
>>  struct i915_oa_format {
>>  	u32 format;
>>  	int size;
>>+	enum intel_platform start;
>>+	enum intel_platform end;
>
>As a drive by only - I question the concept of thinking about 
>platforms enums as ordered and having things like INTEL_TIGERLAKE - 1 
>in the code. It's completely novel concept and I think fragile. Order 
>there is purely the order of upstreaming and not stable.
>
>Suggestions wise, if the rules are Gen based you could use a mask, but 
>if they are truly platform based then it is more tricky. However 
>current code does use gen ranges to assign them, so would a genmask 
>really not fit?

Hmm, the rules are platform-based. A mask would be ideal, but the last I 
thought about it, it felt more complex than this solution. This code 
would have to maintain a mask of supported platforms (and subplatforms) 
for each entry (preferably in the same format as runtime info -> 
platform_mask).  Then we would just AND this mask with the 
platform_mask/s to check for support. Let me know if that's what you 
have in mind, then I can give it a try.

Thanks,
Umesh

>
>Regards,
>
>Tvrtko
Tvrtko Ursulin Dec. 18, 2020, 9:20 a.m. UTC | #5
On 18/12/2020 02:32, Umesh Nerlige Ramappa wrote:
> On Wed, Dec 16, 2020 at 09:25:35AM +0000, Tvrtko Ursulin wrote:
>>
>> On 15/12/2020 21:49, Umesh Nerlige Ramappa wrote:
>>> Variations in OA formats in the different gens has led to creation of
>>> several sparse arrays to store the formats.
>>>
>>> Move oa formats into a single array and add the supported range of
>>> platforms for the oa formats.
>>>
>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>> ---
>>>  drivers/gpu/drm/i915/i915_perf.c       | 56 ++++++++++++--------------
>>>  drivers/gpu/drm/i915/i915_perf_types.h |  3 ++
>>>  2 files changed, 28 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>> b/drivers/gpu/drm/i915/i915_perf.c
>>> index f553caf4b06d..afa92cf075c4 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>> @@ -298,28 +298,27 @@ static u32 i915_oa_max_sample_rate = 100000;
>>>  /* XXX: beware if future OA HW adds new report formats that the current
>>>   * code assumes all reports have a power-of-two size and ~(size - 1) 
>>> can
>>> - * be used as a mask to align the OA tail pointer.
>>> + * be used as a mask to align the OA tail pointer. Note that the 
>>> platforms
>>> + * in this array specify a range (inclusive of start and end).
>>>   */
>>> -static const struct i915_oa_format 
>>> hsw_oa_formats[I915_OA_FORMAT_MAX] = {
>>> -    [I915_OA_FORMAT_A13]        = { 0, 64 },
>>> -    [I915_OA_FORMAT_A29]        = { 1, 128 },
>>> -    [I915_OA_FORMAT_A13_B8_C8]  = { 2, 128 },
>>> -    /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer 
>>> size */
>>> -    [I915_OA_FORMAT_B4_C8]        = { 4, 64 },
>>> -    [I915_OA_FORMAT_A45_B8_C8]  = { 5, 256 },
>>> -    [I915_OA_FORMAT_B4_C8_A16]  = { 6, 128 },
>>> -    [I915_OA_FORMAT_C4_B8]        = { 7, 64 },
>>> -};
>>> -
>>> -static const struct i915_oa_format 
>>> gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
>>> -    [I915_OA_FORMAT_A12]            = { 0, 64 },
>>> -    [I915_OA_FORMAT_A12_B8_C8]        = { 2, 128 },
>>> -    [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
>>> -    [I915_OA_FORMAT_C4_B8]            = { 7, 64 },
>>> -};
>>> -
>>> -static const struct i915_oa_format 
>>> gen12_oa_formats[I915_OA_FORMAT_MAX] = {
>>> -    [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
>>> +static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
>>> +    /* haswell */
>>> +    [I915_OA_FORMAT_A13]                    = { 0, 64, 
>>> INTEL_IVYBRIDGE, INTEL_HASWELL },
>>> +    [I915_OA_FORMAT_A29]                    = { 1, 128, 
>>> INTEL_IVYBRIDGE, INTEL_HASWELL },
>>> +    [I915_OA_FORMAT_A13_B8_C8]              = { 2, 128, 
>>> INTEL_IVYBRIDGE, INTEL_HASWELL },
>>> +    [I915_OA_FORMAT_B4_C8]                  = { 4, 64, 
>>> INTEL_IVYBRIDGE, INTEL_HASWELL },
>>> +    [I915_OA_FORMAT_A45_B8_C8]              = { 5, 256, 
>>> INTEL_IVYBRIDGE, INTEL_HASWELL },
>>> +    [I915_OA_FORMAT_B4_C8_A16]              = { 6, 128, 
>>> INTEL_IVYBRIDGE, INTEL_HASWELL },
>>> +
>>> +    /* haswell+ upto but not including tigerlake */
>>> +    [I915_OA_FORMAT_C4_B8]                  = { 7, 64, 
>>> INTEL_IVYBRIDGE, INTEL_TIGERLAKE - 1 },
>>> +
>>> +    /* gen8+ upto but not including tigerlake */
>>> +    [I915_OA_FORMAT_A12]                    = { 0, 64, 
>>> INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
>>> +    [I915_OA_FORMAT_A12_B8_C8]              = { 2, 128, 
>>> INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
>>> +
>>> +    /* gen8+ */
>>> +    [I915_OA_FORMAT_A32u40_A4u32_B8_C8]     = { 5, 256, 
>>> INTEL_BROADWELL, INTEL_MAX_PLATFORMS - 1 },
>>>  };
>>>  #define SAMPLE_OA_REPORT      (1<<0)
>>> @@ -3575,6 +3574,7 @@ static int read_properties_unlocked(struct 
>>> i915_perf *perf,
>>>      for (i = 0; i < n_props; i++) {
>>>          u64 oa_period, oa_freq_hz;
>>>          u64 id, value;
>>> +        enum intel_platform p = INTEL_INFO(perf->i915)->platform;
>>>          ret = get_user(id, uprop);
>>>          if (ret)
>>> @@ -3611,8 +3611,9 @@ static int read_properties_unlocked(struct 
>>> i915_perf *perf,
>>>                        value);
>>>                  return -EINVAL;
>>>              }
>>> -            if (!perf->oa_formats[value].size) {
>>> -                DRM_DEBUG("Unsupported OA report format %llu\n",
>>> +            if (p < perf->oa_formats[value].start ||
>>> +                p > perf->oa_formats[value].end) {
>>> +                DRM_DEBUG("OA report format not supported %llu\n",
>>>                        value);
>>>                  return -EINVAL;
>>>              }
>>> @@ -4270,6 +4271,7 @@ void i915_perf_init(struct drm_i915_private *i915)
>>>      /* XXX const struct i915_perf_ops! */
>>> +    perf->oa_formats = oa_formats;
>>>      if (IS_HASWELL(i915)) {
>>>          perf->ops.is_valid_b_counter_reg = 
>>> gen7_is_valid_b_counter_addr;
>>>          perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
>>> @@ -4280,8 +4282,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>>          perf->ops.oa_disable = gen7_oa_disable;
>>>          perf->ops.read = gen7_oa_read;
>>>          perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
>>> -
>>> -        perf->oa_formats = hsw_oa_formats;
>>>      } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
>>>          /* Note: that although we could theoretically also support the
>>>           * legacy ringbuffer mode on BDW (and earlier iterations of
>>> @@ -4292,8 +4292,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>>          perf->ops.read = gen8_oa_read;
>>>          if (IS_GEN_RANGE(i915, 8, 9)) {
>>> -            perf->oa_formats = gen8_plus_oa_formats;
>>> -
>>>              perf->ops.is_valid_b_counter_reg =
>>>                  gen7_is_valid_b_counter_addr;
>>>              perf->ops.is_valid_mux_reg =
>>> @@ -4324,8 +4322,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>>                  perf->gen8_valid_ctx_bit = BIT(16);
>>>              }
>>>          } else if (IS_GEN_RANGE(i915, 10, 11)) {
>>> -            perf->oa_formats = gen8_plus_oa_formats;
>>> -
>>>              perf->ops.is_valid_b_counter_reg =
>>>                  gen7_is_valid_b_counter_addr;
>>>              perf->ops.is_valid_mux_reg =
>>> @@ -4348,8 +4344,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>>              }
>>>              perf->gen8_valid_ctx_bit = BIT(16);
>>>          } else if (IS_GEN(i915, 12)) {
>>> -            perf->oa_formats = gen12_oa_formats;
>>> -
>>>              perf->ops.is_valid_b_counter_reg =
>>>                  gen12_is_valid_b_counter_addr;
>>>              perf->ops.is_valid_mux_reg =
>>> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h 
>>> b/drivers/gpu/drm/i915/i915_perf_types.h
>>> index a36a455ae336..84dceb743ebc 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf_types.h
>>> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
>>> @@ -19,6 +19,7 @@
>>>  #include "gt/intel_sseu.h"
>>>  #include "i915_reg.h"
>>>  #include "intel_wakeref.h"
>>> +#include "intel_device_info.h"
>>>  struct drm_i915_private;
>>>  struct file;
>>> @@ -32,6 +33,8 @@ struct intel_engine_cs;
>>>  struct i915_oa_format {
>>>      u32 format;
>>>      int size;
>>> +    enum intel_platform start;
>>> +    enum intel_platform end;
>>
>> As a drive by only - I question the concept of thinking about 
>> platforms enums as ordered and having things like INTEL_TIGERLAKE - 1 
>> in the code. It's completely novel concept and I think fragile. Order 
>> there is purely the order of upstreaming and not stable.
>>
>> Suggestions wise, if the rules are Gen based you could use a mask, but 
>> if they are truly platform based then it is more tricky. However 
>> current code does use gen ranges to assign them, so would a genmask 
>> really not fit?
> 
> Hmm, the rules are platform-based. A mask would be ideal, but the last I 
> thought about it, it felt more complex than this solution. This code 
> would have to maintain a mask of supported platforms (and subplatforms) 
> for each entry (preferably in the same format as runtime info -> 
> platform_mask).  Then we would just AND this mask with the 
> platform_mask/s to check for support. Let me know if that's what you 
> have in mind, then I can give it a try.

Gen mask would be trivial but platform mask a bit more work because it 
is an array of u32. Two are used at the moment with some low bits for 
subplatform. It might be doable with some macro magic but I haven't 
really thought about it. You can look at the IS_PLATFORM implementation 
and see if you can come up with something neat.

Regards,

Tvrtko
Lionel Landwerlin Dec. 18, 2020, 9:47 a.m. UTC | #6
On 18/12/2020 04:08, Umesh Nerlige Ramappa wrote:
> On Wed, Dec 16, 2020 at 10:30:24AM +0200, Lionel Landwerlin wrote:
>> On 15/12/2020 23:49, Umesh Nerlige Ramappa wrote:
>>> Variations in OA formats in the different gens has led to creation of
>>> several sparse arrays to store the formats.
>>>
>>> Move oa formats into a single array and add the supported range of
>>> platforms for the oa formats.
>>>
>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>> ---
>>>  drivers/gpu/drm/i915/i915_perf.c       | 56 ++++++++++++--------------
>>>  drivers/gpu/drm/i915/i915_perf_types.h |  3 ++
>>>  2 files changed, 28 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>> b/drivers/gpu/drm/i915/i915_perf.c
>>> index f553caf4b06d..afa92cf075c4 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>> @@ -298,28 +298,27 @@ static u32 i915_oa_max_sample_rate = 100000;
>>>  /* XXX: beware if future OA HW adds new report formats that the 
>>> current
>>>   * code assumes all reports have a power-of-two size and ~(size - 
>>> 1) can
>>> - * be used as a mask to align the OA tail pointer.
>>> + * be used as a mask to align the OA tail pointer. Note that the 
>>> platforms
>>> + * in this array specify a range (inclusive of start and end).
>>>   */
>>> -static const struct i915_oa_format 
>>> hsw_oa_formats[I915_OA_FORMAT_MAX] = {
>>> -    [I915_OA_FORMAT_A13]        = { 0, 64 },
>>> -    [I915_OA_FORMAT_A29]        = { 1, 128 },
>>> -    [I915_OA_FORMAT_A13_B8_C8]  = { 2, 128 },
>>> -    /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer 
>>> size */
>>> -    [I915_OA_FORMAT_B4_C8]        = { 4, 64 },
>>> -    [I915_OA_FORMAT_A45_B8_C8]  = { 5, 256 },
>>> -    [I915_OA_FORMAT_B4_C8_A16]  = { 6, 128 },
>>> -    [I915_OA_FORMAT_C4_B8]        = { 7, 64 },
>>> -};
>>> -
>>> -static const struct i915_oa_format 
>>> gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
>>> -    [I915_OA_FORMAT_A12]            = { 0, 64 },
>>> -    [I915_OA_FORMAT_A12_B8_C8]        = { 2, 128 },
>>> -    [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
>>> -    [I915_OA_FORMAT_C4_B8]            = { 7, 64 },
>>> -};
>>> -
>>> -static const struct i915_oa_format 
>>> gen12_oa_formats[I915_OA_FORMAT_MAX] = {
>>> -    [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
>>> +static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
>>> +    /* haswell */
>>> +    [I915_OA_FORMAT_A13]                    = { 0, 64, 
>>> INTEL_IVYBRIDGE, INTEL_HASWELL },
>>> +    [I915_OA_FORMAT_A29]                    = { 1, 128, 
>>> INTEL_IVYBRIDGE, INTEL_HASWELL },
>>> +    [I915_OA_FORMAT_A13_B8_C8]              = { 2, 128, 
>>> INTEL_IVYBRIDGE, INTEL_HASWELL },
>>> +    [I915_OA_FORMAT_B4_C8]                  = { 4, 64, 
>>> INTEL_IVYBRIDGE, INTEL_HASWELL },
>>> +    [I915_OA_FORMAT_A45_B8_C8]              = { 5, 256, 
>>> INTEL_IVYBRIDGE, INTEL_HASWELL },
>>> +    [I915_OA_FORMAT_B4_C8_A16]              = { 6, 128, 
>>> INTEL_IVYBRIDGE, INTEL_HASWELL },
>>> +
>>> +    /* haswell+ upto but not including tigerlake */
>>> +    [I915_OA_FORMAT_C4_B8]                  = { 7, 64, 
>>> INTEL_IVYBRIDGE, INTEL_TIGERLAKE - 1 },
>> I don't think we support IVB,.
>
> So the above formats are only HASWELL then?


Correct


>
>>> +
>>> +    /* gen8+ upto but not including tigerlake */
>>> +    [I915_OA_FORMAT_A12]                    = { 0, 64, 
>>> INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
>>> +    [I915_OA_FORMAT_A12_B8_C8]              = { 2, 128, 
>>> INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
>>> +
>>> +    /* gen8+ */
>>> +    [I915_OA_FORMAT_A32u40_A4u32_B8_C8]     = { 5, 256, 
>>> INTEL_BROADWELL, INTEL_MAX_PLATFORMS - 1 },
>>>  };
>>
>>
>> You also need to change i915_oa_stream_init() to use the global 
>> array. Looks like it will access a NULL pointer atm.
>>
>
> yikes, will do.


Ah... See my comment below.


>
>>
>>>  #define SAMPLE_OA_REPORT (1<<0)
>>> @@ -3575,6 +3574,7 @@ static int read_properties_unlocked(struct 
>>> i915_perf *perf,
>>>      for (i = 0; i < n_props; i++) {
>>>          u64 oa_period, oa_freq_hz;
>>>          u64 id, value;
>>> +        enum intel_platform p = INTEL_INFO(perf->i915)->platform;
>>>          ret = get_user(id, uprop);
>>>          if (ret)
>>> @@ -3611,8 +3611,9 @@ static int read_properties_unlocked(struct 
>>> i915_perf *perf,
>>>                        value);
>>>                  return -EINVAL;
>>>              }
>>> -            if (!perf->oa_formats[value].size) {
>>> -                DRM_DEBUG("Unsupported OA report format %llu\n",
>>> +            if (p < perf->oa_formats[value].start ||
>>> +                p > perf->oa_formats[value].end) {
>>> +                DRM_DEBUG("OA report format not supported %llu\n",
>>>                        value);
>>>                  return -EINVAL;
>>>              }
>>> @@ -4270,6 +4271,7 @@ void i915_perf_init(struct drm_i915_private 
>>> *i915)
>>>      /* XXX const struct i915_perf_ops! */
>>> +    perf->oa_formats = oa_formats;
>>>      if (IS_HASWELL(i915)) {
>>>          perf->ops.is_valid_b_counter_reg = 
>>> gen7_is_valid_b_counter_addr;
>>>          perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
>>> @@ -4280,8 +4282,6 @@ void i915_perf_init(struct drm_i915_private 
>>> *i915)
>>>          perf->ops.oa_disable = gen7_oa_disable;
>>>          perf->ops.read = gen7_oa_read;
>>>          perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
>>> -
>>> -        perf->oa_formats = hsw_oa_formats;
>>>      } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
>>>          /* Note: that although we could theoretically also support the
>>>           * legacy ringbuffer mode on BDW (and earlier iterations of
>>> @@ -4292,8 +4292,6 @@ void i915_perf_init(struct drm_i915_private 
>>> *i915)
>>>          perf->ops.read = gen8_oa_read;
>>>          if (IS_GEN_RANGE(i915, 8, 9)) {
>>> -            perf->oa_formats = gen8_plus_oa_formats;
>>> -
>>>              perf->ops.is_valid_b_counter_reg =
>>>                  gen7_is_valid_b_counter_addr;
>>>              perf->ops.is_valid_mux_reg =
>>> @@ -4324,8 +4322,6 @@ void i915_perf_init(struct drm_i915_private 
>>> *i915)
>>>                  perf->gen8_valid_ctx_bit = BIT(16);
>>>              }
>>>          } else if (IS_GEN_RANGE(i915, 10, 11)) {
>>> -            perf->oa_formats = gen8_plus_oa_formats;
>>> -
>>>              perf->ops.is_valid_b_counter_reg =
>>>                  gen7_is_valid_b_counter_addr;
>>>              perf->ops.is_valid_mux_reg =
>>> @@ -4348,8 +4344,6 @@ void i915_perf_init(struct drm_i915_private 
>>> *i915)
>>>              }
>>>              perf->gen8_valid_ctx_bit = BIT(16);
>>>          } else if (IS_GEN(i915, 12)) {
>>> -            perf->oa_formats = gen12_oa_formats;
>>> -
>>>              perf->ops.is_valid_b_counter_reg =
>>>                  gen12_is_valid_b_counter_addr;
>>>              perf->ops.is_valid_mux_reg =
>>> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h 
>>> b/drivers/gpu/drm/i915/i915_perf_types.h
>>> index a36a455ae336..84dceb743ebc 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf_types.h
>>> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
>>> @@ -19,6 +19,7 @@
>>>  #include "gt/intel_sseu.h"
>>>  #include "i915_reg.h"
>>>  #include "intel_wakeref.h"
>>> +#include "intel_device_info.h"
>>>  struct drm_i915_private;
>>>  struct file;
>>> @@ -32,6 +33,8 @@ struct intel_engine_cs;
>>>  struct i915_oa_format {
>>>      u32 format;
>>>      int size;
>>> +    enum intel_platform start;
>>> +    enum intel_platform end;
>>
>>
>> Also need to drop oa_formats from struct i915_perf
>
> oa_formats still remains in i915_perf. It's just that
>
> perf->oa_formats = oa_formats;
>
> for all platforms.
>
> Thanks,
> Umesh


Oops, sorry I missed that.


-Lionel


>>
>>
>>>  };
>>>  struct i915_oa_reg {
>>
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index f553caf4b06d..afa92cf075c4 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -298,28 +298,27 @@  static u32 i915_oa_max_sample_rate = 100000;
 
 /* XXX: beware if future OA HW adds new report formats that the current
  * code assumes all reports have a power-of-two size and ~(size - 1) can
- * be used as a mask to align the OA tail pointer.
+ * be used as a mask to align the OA tail pointer. Note that the platforms
+ * in this array specify a range (inclusive of start and end).
  */
-static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
-	[I915_OA_FORMAT_A13]	    = { 0, 64 },
-	[I915_OA_FORMAT_A29]	    = { 1, 128 },
-	[I915_OA_FORMAT_A13_B8_C8]  = { 2, 128 },
-	/* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
-	[I915_OA_FORMAT_B4_C8]	    = { 4, 64 },
-	[I915_OA_FORMAT_A45_B8_C8]  = { 5, 256 },
-	[I915_OA_FORMAT_B4_C8_A16]  = { 6, 128 },
-	[I915_OA_FORMAT_C4_B8]	    = { 7, 64 },
-};
-
-static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
-	[I915_OA_FORMAT_A12]		    = { 0, 64 },
-	[I915_OA_FORMAT_A12_B8_C8]	    = { 2, 128 },
-	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
-	[I915_OA_FORMAT_C4_B8]		    = { 7, 64 },
-};
-
-static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
-	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
+static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
+	/* haswell */
+	[I915_OA_FORMAT_A13]                    = { 0, 64, INTEL_IVYBRIDGE, INTEL_HASWELL },
+	[I915_OA_FORMAT_A29]                    = { 1, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
+	[I915_OA_FORMAT_A13_B8_C8]              = { 2, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
+	[I915_OA_FORMAT_B4_C8]                  = { 4, 64, INTEL_IVYBRIDGE, INTEL_HASWELL },
+	[I915_OA_FORMAT_A45_B8_C8]              = { 5, 256, INTEL_IVYBRIDGE, INTEL_HASWELL },
+	[I915_OA_FORMAT_B4_C8_A16]              = { 6, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
+
+	/* haswell+ upto but not including tigerlake */
+	[I915_OA_FORMAT_C4_B8]                  = { 7, 64, INTEL_IVYBRIDGE, INTEL_TIGERLAKE - 1 },
+
+	/* gen8+ upto but not including tigerlake */
+	[I915_OA_FORMAT_A12]                    = { 0, 64, INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
+	[I915_OA_FORMAT_A12_B8_C8]              = { 2, 128, INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
+
+	/* gen8+ */
+	[I915_OA_FORMAT_A32u40_A4u32_B8_C8]     = { 5, 256, INTEL_BROADWELL, INTEL_MAX_PLATFORMS - 1 },
 };
 
 #define SAMPLE_OA_REPORT      (1<<0)
@@ -3575,6 +3574,7 @@  static int read_properties_unlocked(struct i915_perf *perf,
 	for (i = 0; i < n_props; i++) {
 		u64 oa_period, oa_freq_hz;
 		u64 id, value;
+		enum intel_platform p = INTEL_INFO(perf->i915)->platform;
 
 		ret = get_user(id, uprop);
 		if (ret)
@@ -3611,8 +3611,9 @@  static int read_properties_unlocked(struct i915_perf *perf,
 					  value);
 				return -EINVAL;
 			}
-			if (!perf->oa_formats[value].size) {
-				DRM_DEBUG("Unsupported OA report format %llu\n",
+			if (p < perf->oa_formats[value].start ||
+			    p > perf->oa_formats[value].end) {
+				DRM_DEBUG("OA report format not supported %llu\n",
 					  value);
 				return -EINVAL;
 			}
@@ -4270,6 +4271,7 @@  void i915_perf_init(struct drm_i915_private *i915)
 
 	/* XXX const struct i915_perf_ops! */
 
+	perf->oa_formats = oa_formats;
 	if (IS_HASWELL(i915)) {
 		perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
 		perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
@@ -4280,8 +4282,6 @@  void i915_perf_init(struct drm_i915_private *i915)
 		perf->ops.oa_disable = gen7_oa_disable;
 		perf->ops.read = gen7_oa_read;
 		perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
-
-		perf->oa_formats = hsw_oa_formats;
 	} else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
 		/* Note: that although we could theoretically also support the
 		 * legacy ringbuffer mode on BDW (and earlier iterations of
@@ -4292,8 +4292,6 @@  void i915_perf_init(struct drm_i915_private *i915)
 		perf->ops.read = gen8_oa_read;
 
 		if (IS_GEN_RANGE(i915, 8, 9)) {
-			perf->oa_formats = gen8_plus_oa_formats;
-
 			perf->ops.is_valid_b_counter_reg =
 				gen7_is_valid_b_counter_addr;
 			perf->ops.is_valid_mux_reg =
@@ -4324,8 +4322,6 @@  void i915_perf_init(struct drm_i915_private *i915)
 				perf->gen8_valid_ctx_bit = BIT(16);
 			}
 		} else if (IS_GEN_RANGE(i915, 10, 11)) {
-			perf->oa_formats = gen8_plus_oa_formats;
-
 			perf->ops.is_valid_b_counter_reg =
 				gen7_is_valid_b_counter_addr;
 			perf->ops.is_valid_mux_reg =
@@ -4348,8 +4344,6 @@  void i915_perf_init(struct drm_i915_private *i915)
 			}
 			perf->gen8_valid_ctx_bit = BIT(16);
 		} else if (IS_GEN(i915, 12)) {
-			perf->oa_formats = gen12_oa_formats;
-
 			perf->ops.is_valid_b_counter_reg =
 				gen12_is_valid_b_counter_addr;
 			perf->ops.is_valid_mux_reg =
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index a36a455ae336..84dceb743ebc 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -19,6 +19,7 @@ 
 #include "gt/intel_sseu.h"
 #include "i915_reg.h"
 #include "intel_wakeref.h"
+#include "intel_device_info.h"
 
 struct drm_i915_private;
 struct file;
@@ -32,6 +33,8 @@  struct intel_engine_cs;
 struct i915_oa_format {
 	u32 format;
 	int size;
+	enum intel_platform start;
+	enum intel_platform end;
 };
 
 struct i915_oa_reg {