Message ID | 20210202075417.28230-1-umesh.nerlige.ramappa@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/3] i915/perf: Store a mask of valid OA formats for a platform | expand |
Quoting Umesh Nerlige Ramappa (2021-02-02 07:54:15) > Validity of an OA format is checked by using a sparse array of formats > per gen. Instead maintain a mask of supported formats for a platform in > the perf object. > > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> > --- > drivers/gpu/drm/i915/i915_perf.c | 64 +++++++++++++++++++++++++- > drivers/gpu/drm/i915/i915_perf_types.h | 16 +++++++ > 2 files changed, 79 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 112ba5f2ce90..973577fcad58 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -3524,6 +3524,19 @@ static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) > 2ULL << exponent); > } > > +static __always_inline bool > +oa_format_valid(struct i915_perf *perf, enum drm_i915_oa_format format) > +{ > + return !!(perf->format_mask[__format_index(format)] & > + __format_bit(format)); !! is already provided by the implicit cast to (bool) > +} > + > +static __always_inline void > +oa_format_add(struct i915_perf *perf, enum drm_i915_oa_format format) > +{ > + perf->format_mask[__format_index(format)] |= __format_bit(format); > +} > + > /** > * read_properties_unlocked - validate + copy userspace stream open properties > * @perf: i915 perf instance > @@ -3615,7 +3628,7 @@ static int read_properties_unlocked(struct i915_perf *perf, > value); > return -EINVAL; > } > - if (!perf->oa_formats[value].size) { > + if (!oa_format_valid(perf, value)) { > DRM_DEBUG("Unsupported OA report format %llu\n", > value); > return -EINVAL; > @@ -4259,6 +4272,53 @@ static struct ctl_table dev_root[] = { > {} > }; > > +static void oa_init_supported_formats(struct i915_perf *perf) > +{ > + struct drm_i915_private *i915 = perf->i915; > + enum intel_platform platform = INTEL_INFO(i915)->platform; > + > + switch (platform) { > + case INTEL_HASWELL: > + oa_format_add(perf, I915_OA_FORMAT_A13); > + oa_format_add(perf, I915_OA_FORMAT_A13); > + oa_format_add(perf, I915_OA_FORMAT_A29); > + oa_format_add(perf, I915_OA_FORMAT_A13_B8_C8); > + oa_format_add(perf, I915_OA_FORMAT_B4_C8); > + oa_format_add(perf, I915_OA_FORMAT_A45_B8_C8); > + oa_format_add(perf, I915_OA_FORMAT_B4_C8_A16); > + oa_format_add(perf, I915_OA_FORMAT_C4_B8); > + break; > + > + case INTEL_BROADWELL: > + case INTEL_CHERRYVIEW: > + case INTEL_SKYLAKE: > + case INTEL_BROXTON: > + case INTEL_KABYLAKE: > + case INTEL_GEMINILAKE: > + case INTEL_COFFEELAKE: > + case INTEL_COMETLAKE: > + case INTEL_CANNONLAKE: > + case INTEL_ICELAKE: > + case INTEL_ELKHARTLAKE: > + case INTEL_JASPERLAKE: > + oa_format_add(perf, I915_OA_FORMAT_A12); > + oa_format_add(perf, I915_OA_FORMAT_A12_B8_C8); > + oa_format_add(perf, I915_OA_FORMAT_A32u40_A4u32_B8_C8); > + oa_format_add(perf, I915_OA_FORMAT_C4_B8); > + break; Ok, this looks as compact and readable as writing it as a bunch of tables. I presume there's a reason you didn't just use generation rather than platform. switch (gen) { case 7: haswell(); break; case 8 .. 11: broadwell(); break; case 12: tigerlake(); break; } if you wanted to stick with a switch rather than an if-else tree for the ranges. Note you could equally do case INTEL_BROADWELL .. INTEL_JASPERLAKE: but I expect that to cause confusion for the reader. > + /** > + * Use a format mask to store the supported formats > + * for a platform. > + */ > +#define __fbits (BITS_PER_TYPE(u32)) > +#define __format_bit(__f) \ > + BIT((__f) & (__fbits - 1)) > + > +#define __format_index_shift (5) > +#define __format_index(__f) \ > + (((__f) & ~(__fbits - 1)) >> __format_index_shift) > + > +#define FORMAT_MASK_SIZE (((I915_OA_FORMAT_MAX - 1) / __fbits) + 1) > + u32 format_mask[FORMAT_MASK_SIZE]; This is just open-coding set_bit/test_bit #define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG) unsigned long format_mask[FORMAT_MASK_SIZE]; static __always_inline bool oa_format_valid(struct i915_perf *perf, enum drm_i915_oa_format format) { return test_bit(format, perf->format_mask); } static __always_inline void oa_format_add(struct i915_perf *perf, enum drm_i915_oa_format format) { __set_bit(format, perf->format_mask); } -Chris
On Tue, Feb 02, 2021 at 08:24:15AM +0000, Chris Wilson wrote: >Quoting Umesh Nerlige Ramappa (2021-02-02 07:54:15) >> Validity of an OA format is checked by using a sparse array of formats >> per gen. Instead maintain a mask of supported formats for a platform in >> the perf object. >> >> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> >> --- >> drivers/gpu/drm/i915/i915_perf.c | 64 +++++++++++++++++++++++++- >> drivers/gpu/drm/i915/i915_perf_types.h | 16 +++++++ >> 2 files changed, 79 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >> index 112ba5f2ce90..973577fcad58 100644 >> --- a/drivers/gpu/drm/i915/i915_perf.c >> +++ b/drivers/gpu/drm/i915/i915_perf.c >> @@ -3524,6 +3524,19 @@ static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) >> 2ULL << exponent); >> } >> >> +static __always_inline bool >> +oa_format_valid(struct i915_perf *perf, enum drm_i915_oa_format format) >> +{ >> + return !!(perf->format_mask[__format_index(format)] & >> + __format_bit(format)); > >!! is already provided by the implicit cast to (bool) > >> +} >> + >> +static __always_inline void >> +oa_format_add(struct i915_perf *perf, enum drm_i915_oa_format format) >> +{ >> + perf->format_mask[__format_index(format)] |= __format_bit(format); >> +} >> + >> /** >> * read_properties_unlocked - validate + copy userspace stream open properties >> * @perf: i915 perf instance >> @@ -3615,7 +3628,7 @@ static int read_properties_unlocked(struct i915_perf *perf, >> value); >> return -EINVAL; >> } >> - if (!perf->oa_formats[value].size) { >> + if (!oa_format_valid(perf, value)) { >> DRM_DEBUG("Unsupported OA report format %llu\n", >> value); >> return -EINVAL; >> @@ -4259,6 +4272,53 @@ static struct ctl_table dev_root[] = { >> {} >> }; >> >> +static void oa_init_supported_formats(struct i915_perf *perf) >> +{ >> + struct drm_i915_private *i915 = perf->i915; >> + enum intel_platform platform = INTEL_INFO(i915)->platform; >> + >> + switch (platform) { >> + case INTEL_HASWELL: >> + oa_format_add(perf, I915_OA_FORMAT_A13); >> + oa_format_add(perf, I915_OA_FORMAT_A13); >> + oa_format_add(perf, I915_OA_FORMAT_A29); >> + oa_format_add(perf, I915_OA_FORMAT_A13_B8_C8); >> + oa_format_add(perf, I915_OA_FORMAT_B4_C8); >> + oa_format_add(perf, I915_OA_FORMAT_A45_B8_C8); >> + oa_format_add(perf, I915_OA_FORMAT_B4_C8_A16); >> + oa_format_add(perf, I915_OA_FORMAT_C4_B8); >> + break; >> + >> + case INTEL_BROADWELL: >> + case INTEL_CHERRYVIEW: >> + case INTEL_SKYLAKE: >> + case INTEL_BROXTON: >> + case INTEL_KABYLAKE: >> + case INTEL_GEMINILAKE: >> + case INTEL_COFFEELAKE: >> + case INTEL_COMETLAKE: >> + case INTEL_CANNONLAKE: >> + case INTEL_ICELAKE: >> + case INTEL_ELKHARTLAKE: >> + case INTEL_JASPERLAKE: >> + oa_format_add(perf, I915_OA_FORMAT_A12); >> + oa_format_add(perf, I915_OA_FORMAT_A12_B8_C8); >> + oa_format_add(perf, I915_OA_FORMAT_A32u40_A4u32_B8_C8); >> + oa_format_add(perf, I915_OA_FORMAT_C4_B8); >> + break; > >Ok, this looks as compact and readable as writing it as a bunch of >tables. I presume there's a reason you didn't just use generation rather >than platform. > >switch (gen) { >case 7: > haswell(); > break; >case 8 .. 11: > broadwell(); > break; >case 12: > tigerlake(); > break; >} >if you wanted to stick with a switch rather than an if-else tree for the >ranges. only haswell is supported on gen7 and gen12 may define new formats that are platform specific. How about a mix? - if (gen == 7 && haswell) haswell(); else if (gen >= 8 && gen <= 11) broadwell; else gen12_formats(); gen12_formats can choose to use the switch if formats vary between platforms. Thanks, Umesh > >Note you could equally do > case INTEL_BROADWELL .. INTEL_JASPERLAKE: >but I expect that to cause confusion for the reader. > >> + /** >> + * Use a format mask to store the supported formats >> + * for a platform. >> + */ >> +#define __fbits (BITS_PER_TYPE(u32)) >> +#define __format_bit(__f) \ >> + BIT((__f) & (__fbits - 1)) >> + >> +#define __format_index_shift (5) >> +#define __format_index(__f) \ >> + (((__f) & ~(__fbits - 1)) >> __format_index_shift) >> + >> +#define FORMAT_MASK_SIZE (((I915_OA_FORMAT_MAX - 1) / __fbits) + 1) >> + u32 format_mask[FORMAT_MASK_SIZE]; > >This is just open-coding set_bit/test_bit > >#define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG) >unsigned long format_mask[FORMAT_MASK_SIZE]; > >static __always_inline bool >oa_format_valid(struct i915_perf *perf, enum drm_i915_oa_format format) >{ > return test_bit(format, perf->format_mask); >} > >static __always_inline void >oa_format_add(struct i915_perf *perf, enum drm_i915_oa_format format) >{ > __set_bit(format, perf->format_mask); >} >-Chris
Quoting Umesh Nerlige Ramappa (2021-02-02 20:10:44) > On Tue, Feb 02, 2021 at 08:24:15AM +0000, Chris Wilson wrote: > >Ok, this looks as compact and readable as writing it as a bunch of > >tables. I presume there's a reason you didn't just use generation rather > >than platform. > > > >switch (gen) { > >case 7: > > haswell(); > > break; > >case 8 .. 11: > > broadwell(); > > break; > >case 12: > > tigerlake(); > > break; > >} > >if you wanted to stick with a switch rather than an if-else tree for the > >ranges. > > only haswell is supported on gen7 and gen12 may define new formats that > are platform specific. > > How about a mix? - > > if (gen == 7 && haswell) > haswell(); > else if (gen >= 8 && gen <= 11) > broadwell; > else > gen12_formats(); > > gen12_formats can choose to use the switch if formats vary between > platforms. I didn't mind the platform switch too much, so no need to change at the moment. I just worry that it's more typing to maintain :) What I thought you were going to do (from the subject) were tables with a platform_mask for applicability, but that I feell would be just as much typing, now and in the future. I thought support started at Haswell, so the other gen7 were not a concern? But yes, if we look at how we end up doing it else where it's a mix of gen and platform if (gen >= 12) gen12_formats; else if (gen >= 8) gen8_formats; else if (IS_HSW) hsw_formats; else MISSING_CASE(gen) At the end of the day, you're the person who is typing this, so it's up to you how much effort you want to spend now to save later. :) -Chris
On 02/02/2021 09:54, Umesh Nerlige Ramappa wrote: > Validity of an OA format is checked by using a sparse array of formats > per gen. Instead maintain a mask of supported formats for a platform in > the perf object. > > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Nice cleanup : Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Thanks! > --- > drivers/gpu/drm/i915/i915_perf.c | 64 +++++++++++++++++++++++++- > drivers/gpu/drm/i915/i915_perf_types.h | 16 +++++++ > 2 files changed, 79 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 112ba5f2ce90..973577fcad58 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -3524,6 +3524,19 @@ static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) > 2ULL << exponent); > } > > +static __always_inline bool > +oa_format_valid(struct i915_perf *perf, enum drm_i915_oa_format format) > +{ > + return !!(perf->format_mask[__format_index(format)] & > + __format_bit(format)); > +} > + > +static __always_inline void > +oa_format_add(struct i915_perf *perf, enum drm_i915_oa_format format) > +{ > + perf->format_mask[__format_index(format)] |= __format_bit(format); > +} > + > /** > * read_properties_unlocked - validate + copy userspace stream open properties > * @perf: i915 perf instance > @@ -3615,7 +3628,7 @@ static int read_properties_unlocked(struct i915_perf *perf, > value); > return -EINVAL; > } > - if (!perf->oa_formats[value].size) { > + if (!oa_format_valid(perf, value)) { > DRM_DEBUG("Unsupported OA report format %llu\n", > value); > return -EINVAL; > @@ -4259,6 +4272,53 @@ static struct ctl_table dev_root[] = { > {} > }; > > +static void oa_init_supported_formats(struct i915_perf *perf) > +{ > + struct drm_i915_private *i915 = perf->i915; > + enum intel_platform platform = INTEL_INFO(i915)->platform; > + > + switch (platform) { > + case INTEL_HASWELL: > + oa_format_add(perf, I915_OA_FORMAT_A13); > + oa_format_add(perf, I915_OA_FORMAT_A13); > + oa_format_add(perf, I915_OA_FORMAT_A29); > + oa_format_add(perf, I915_OA_FORMAT_A13_B8_C8); > + oa_format_add(perf, I915_OA_FORMAT_B4_C8); > + oa_format_add(perf, I915_OA_FORMAT_A45_B8_C8); > + oa_format_add(perf, I915_OA_FORMAT_B4_C8_A16); > + oa_format_add(perf, I915_OA_FORMAT_C4_B8); > + break; > + > + case INTEL_BROADWELL: > + case INTEL_CHERRYVIEW: > + case INTEL_SKYLAKE: > + case INTEL_BROXTON: > + case INTEL_KABYLAKE: > + case INTEL_GEMINILAKE: > + case INTEL_COFFEELAKE: > + case INTEL_COMETLAKE: > + case INTEL_CANNONLAKE: > + case INTEL_ICELAKE: > + case INTEL_ELKHARTLAKE: > + case INTEL_JASPERLAKE: > + oa_format_add(perf, I915_OA_FORMAT_A12); > + oa_format_add(perf, I915_OA_FORMAT_A12_B8_C8); > + oa_format_add(perf, I915_OA_FORMAT_A32u40_A4u32_B8_C8); > + oa_format_add(perf, I915_OA_FORMAT_C4_B8); > + break; > + > + case INTEL_TIGERLAKE: > + case INTEL_ROCKETLAKE: > + case INTEL_DG1: > + case INTEL_ALDERLAKE_S: > + oa_format_add(perf, I915_OA_FORMAT_A32u40_A4u32_B8_C8); > + break; > + > + default: > + MISSING_CASE(platform); > + } > +} > + > /** > * i915_perf_init - initialize i915-perf state on module bind > * @i915: i915 device instance > @@ -4408,6 +4468,8 @@ void i915_perf_init(struct drm_i915_private *i915) > 500 * 1000 /* 500us */); > > perf->i915 = i915; > + > + oa_init_supported_formats(perf); > } > } > > diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h > index a36a455ae336..f81bcb533723 100644 > --- a/drivers/gpu/drm/i915/i915_perf_types.h > +++ b/drivers/gpu/drm/i915/i915_perf_types.h > @@ -15,6 +15,7 @@ > #include <linux/types.h> > #include <linux/uuid.h> > #include <linux/wait.h> > +#include <uapi/drm/i915_drm.h> > > #include "gt/intel_sseu.h" > #include "i915_reg.h" > @@ -441,6 +442,21 @@ struct i915_perf { > struct i915_oa_ops ops; > const struct i915_oa_format *oa_formats; > > + /** > + * Use a format mask to store the supported formats > + * for a platform. > + */ > +#define __fbits (BITS_PER_TYPE(u32)) > +#define __format_bit(__f) \ > + BIT((__f) & (__fbits - 1)) > + > +#define __format_index_shift (5) > +#define __format_index(__f) \ > + (((__f) & ~(__fbits - 1)) >> __format_index_shift) > + > +#define FORMAT_MASK_SIZE (((I915_OA_FORMAT_MAX - 1) / __fbits) + 1) > + u32 format_mask[FORMAT_MASK_SIZE]; > + > atomic64_t noa_programming_delay; > }; >
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 112ba5f2ce90..973577fcad58 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3524,6 +3524,19 @@ static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) 2ULL << exponent); } +static __always_inline bool +oa_format_valid(struct i915_perf *perf, enum drm_i915_oa_format format) +{ + return !!(perf->format_mask[__format_index(format)] & + __format_bit(format)); +} + +static __always_inline void +oa_format_add(struct i915_perf *perf, enum drm_i915_oa_format format) +{ + perf->format_mask[__format_index(format)] |= __format_bit(format); +} + /** * read_properties_unlocked - validate + copy userspace stream open properties * @perf: i915 perf instance @@ -3615,7 +3628,7 @@ static int read_properties_unlocked(struct i915_perf *perf, value); return -EINVAL; } - if (!perf->oa_formats[value].size) { + if (!oa_format_valid(perf, value)) { DRM_DEBUG("Unsupported OA report format %llu\n", value); return -EINVAL; @@ -4259,6 +4272,53 @@ static struct ctl_table dev_root[] = { {} }; +static void oa_init_supported_formats(struct i915_perf *perf) +{ + struct drm_i915_private *i915 = perf->i915; + enum intel_platform platform = INTEL_INFO(i915)->platform; + + switch (platform) { + case INTEL_HASWELL: + oa_format_add(perf, I915_OA_FORMAT_A13); + oa_format_add(perf, I915_OA_FORMAT_A13); + oa_format_add(perf, I915_OA_FORMAT_A29); + oa_format_add(perf, I915_OA_FORMAT_A13_B8_C8); + oa_format_add(perf, I915_OA_FORMAT_B4_C8); + oa_format_add(perf, I915_OA_FORMAT_A45_B8_C8); + oa_format_add(perf, I915_OA_FORMAT_B4_C8_A16); + oa_format_add(perf, I915_OA_FORMAT_C4_B8); + break; + + case INTEL_BROADWELL: + case INTEL_CHERRYVIEW: + case INTEL_SKYLAKE: + case INTEL_BROXTON: + case INTEL_KABYLAKE: + case INTEL_GEMINILAKE: + case INTEL_COFFEELAKE: + case INTEL_COMETLAKE: + case INTEL_CANNONLAKE: + case INTEL_ICELAKE: + case INTEL_ELKHARTLAKE: + case INTEL_JASPERLAKE: + oa_format_add(perf, I915_OA_FORMAT_A12); + oa_format_add(perf, I915_OA_FORMAT_A12_B8_C8); + oa_format_add(perf, I915_OA_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(perf, I915_OA_FORMAT_C4_B8); + break; + + case INTEL_TIGERLAKE: + case INTEL_ROCKETLAKE: + case INTEL_DG1: + case INTEL_ALDERLAKE_S: + oa_format_add(perf, I915_OA_FORMAT_A32u40_A4u32_B8_C8); + break; + + default: + MISSING_CASE(platform); + } +} + /** * i915_perf_init - initialize i915-perf state on module bind * @i915: i915 device instance @@ -4408,6 +4468,8 @@ void i915_perf_init(struct drm_i915_private *i915) 500 * 1000 /* 500us */); perf->i915 = i915; + + oa_init_supported_formats(perf); } } diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index a36a455ae336..f81bcb533723 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -15,6 +15,7 @@ #include <linux/types.h> #include <linux/uuid.h> #include <linux/wait.h> +#include <uapi/drm/i915_drm.h> #include "gt/intel_sseu.h" #include "i915_reg.h" @@ -441,6 +442,21 @@ struct i915_perf { struct i915_oa_ops ops; const struct i915_oa_format *oa_formats; + /** + * Use a format mask to store the supported formats + * for a platform. + */ +#define __fbits (BITS_PER_TYPE(u32)) +#define __format_bit(__f) \ + BIT((__f) & (__fbits - 1)) + +#define __format_index_shift (5) +#define __format_index(__f) \ + (((__f) & ~(__fbits - 1)) >> __format_index_shift) + +#define FORMAT_MASK_SIZE (((I915_OA_FORMAT_MAX - 1) / __fbits) + 1) + u32 format_mask[FORMAT_MASK_SIZE]; + atomic64_t noa_programming_delay; };
Validity of an OA format is checked by using a sparse array of formats per gen. Instead maintain a mask of supported formats for a platform in the perf object. Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/i915/i915_perf.c | 64 +++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_perf_types.h | 16 +++++++ 2 files changed, 79 insertions(+), 1 deletion(-)