@@ -4340,7 +4340,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
continue;
sseu->slice_mask = BIT(0);
- sseu->subslice_mask |= BIT(ss);
+ sseu->subslices_mask[0] |= BIT(ss);
eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
@@ -4387,7 +4387,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
continue;
sseu->slice_mask |= BIT(s);
- sseu->subslice_mask = info->sseu.subslice_mask;
+ sseu->subslices_mask[s] = info->sseu.subslices_mask[s];
for (ss = 0; ss < ss_max; ss++) {
unsigned int eu_cnt;
@@ -4442,8 +4442,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
sseu->slice_mask |= BIT(s);
if (IS_GEN9_BC(dev_priv))
- sseu->subslice_mask =
- INTEL_INFO(dev_priv)->sseu.subslice_mask;
+ sseu->subslices_mask[s] =
+ INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
for (ss = 0; ss < ss_max; ss++) {
unsigned int eu_cnt;
@@ -4453,7 +4453,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
/* skip disabled subslice */
continue;
- sseu->subslice_mask |= BIT(ss);
+ sseu->subslices_mask[s] |= BIT(ss);
}
eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
@@ -4475,9 +4475,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
if (sseu->slice_mask) {
- sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
sseu->eu_per_subslice =
INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
+ for (s = 0; s < fls(sseu->slice_mask); s++) {
+ sseu->subslices_mask[s] =
+ INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
+ }
sseu->eu_total = sseu->eu_per_subslice *
sseu_subslice_total(sseu);
@@ -4496,6 +4499,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
const char *type = is_available_info ? "Available" : "Enabled";
+ int s;
seq_printf(m, " %s Slice Mask: %04x\n", type,
sseu->slice_mask);
@@ -4503,10 +4507,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
hweight8(sseu->slice_mask));
seq_printf(m, " %s Subslice Total: %u\n", type,
sseu_subslice_total(sseu));
- seq_printf(m, " %s Subslice Mask: %04x\n", type,
- sseu->subslice_mask);
- seq_printf(m, " %s Subslice Per Slice: %u\n", type,
- hweight8(sseu->subslice_mask));
+ for (s = 0; s < fls(sseu->slice_mask); s++) {
+ seq_printf(m, " %s Slice%i Subslice Mask: %04x\n", type,
+ s, sseu->subslices_mask[s]);
+ }
seq_printf(m, " %s EU Total: %u\n", type,
sseu->eu_total);
seq_printf(m, " %s EU Per Subslice: %u\n", type,
@@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
return -ENODEV;
break;
case I915_PARAM_SUBSLICE_MASK:
- value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
+ value = INTEL_INFO(dev_priv)->sseu.subslices_mask[0];
if (!value)
return -ENODEV;
break;
@@ -801,9 +801,12 @@ struct intel_csr {
func(supports_tv); \
func(has_ipc);
+#define GEN_MAX_SLICES (6) /* CNL upper bound */
+#define GEN_MAX_SUBSLICES (7)
+
struct sseu_dev_info {
u8 slice_mask;
- u8 subslice_mask;
+ u8 subslices_mask[GEN_MAX_SLICES];
u8 eu_total;
u8 eu_per_subslice;
u8 min_eu_in_pool;
@@ -812,11 +815,27 @@ struct sseu_dev_info {
u8 has_slice_pg:1;
u8 has_subslice_pg:1;
u8 has_eu_pg:1;
+
+ /* Topology fields */
+ u8 max_slices;
+ u8 max_subslices;
+ u8 max_eus_per_subslice;
+
+ /* We don't have more than 8 eus per subslice at the moment and as we
+ * store eus enabled using bits, no need to multiply by eus per
+ * subslice.
+ */
+ u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
};
static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
{
- return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
+ unsigned s, total = 0;
+
+ for (s = 0; s < ARRAY_SIZE(sseu->subslices_mask); s++)
+ total += hweight8(sseu->subslices_mask[s]);
+
+ return total;
}
/* Keep in gen based order, and chronological order within a gen */
@@ -82,22 +82,74 @@ void intel_device_info_dump(struct drm_i915_private *dev_priv)
#undef PRINT_FLAG
}
+static u8 compute_eu_total(const struct sseu_dev_info *sseu)
+{
+ u8 i, total = 0;
+
+ for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
+ total += hweight8(sseu->eu_mask[i]);
+
+ return total;
+}
+
static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
{
struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
const u32 fuse2 = I915_READ(GEN8_FUSE2);
+ int s, ss, eu_mask = 0xff;
+ u32 subslice_mask, eu_en;
sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
GEN10_F2_S_ENA_SHIFT;
- sseu->subslice_mask = (1 << 4) - 1;
- sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
- GEN10_F2_SS_DIS_SHIFT);
+ sseu->max_slices = 6;
+ sseu->max_subslices = 4;
+ sseu->max_eus_per_subslice = 8;
+
+ subslice_mask = (1 << 4) - 1;
+ subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
+ GEN10_F2_SS_DIS_SHIFT);
+
+ /* Slice0 can have up to 3 subslices, but there are only 2 in
+ * slice1/2.
+ */
+ sseu->subslices_mask[0] = subslice_mask;
+ for (s = 1; s < sseu->max_slices; s++)
+ sseu->subslices_mask[s] = subslice_mask & 0x3;
+
+ /* Slice0 */
+ eu_en = ~I915_READ(GEN8_EU_DISABLE0);
+ for (ss = 0; ss < sseu->max_subslices; ss++)
+ sseu->eu_mask[ss] = (eu_en >> (8 * ss)) & eu_mask;
+ /* Slice1 */
+ sseu->eu_mask[sseu->max_subslices] = (eu_en >> 24) & eu_mask;
+ eu_en = ~I915_READ(GEN8_EU_DISABLE1);
+ sseu->eu_mask[sseu->max_subslices + 1] = eu_en & eu_mask;
+ /* Slice2 */
+ sseu->eu_mask[2 * sseu->max_subslices] = (eu_en >> 8) & eu_mask;
+ sseu->eu_mask[2 * sseu->max_subslices + 1] = (eu_en >> 16) & eu_mask;
+ /* Slice3 */
+ sseu->eu_mask[3 * sseu->max_subslices] = (eu_en >> 24) & eu_mask;
+ eu_en = ~I915_READ(GEN8_EU_DISABLE2);
+ sseu->eu_mask[3 * sseu->max_subslices + 1] = eu_en & eu_mask;
+ /* Slice4 */
+ sseu->eu_mask[4 * sseu->max_subslices] = (eu_en >> 8) & eu_mask;
+ sseu->eu_mask[4 * sseu->max_subslices + 1] = (eu_en >> 16) & eu_mask;
+ /* Slice5 */
+ sseu->eu_mask[5 * sseu->max_subslices] = (eu_en >> 24) & eu_mask;
+ eu_en = ~I915_READ(GEN10_EU_DISABLE3);
+ sseu->eu_mask[5 * sseu->max_subslices + 1] = eu_en & eu_mask;
+
+ /* Do a second pass where we marked the subslices disabled if all
+ * their eus are off.
+ */
+ for (s = 0; s < sseu->max_slices; s++) {
+ for (ss = 0; ss < sseu->max_subslices; ss++) {
+ if (sseu->eu_mask[s * sseu->max_subslices + ss] == 0)
+ sseu->subslices_mask[s] &= ~BIT(ss);
+ }
+ }
- sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
- sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
- sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
- sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
- GEN10_EU_DIS_SS_MASK));
+ sseu->eu_total = compute_eu_total(sseu);
/*
* CNL is expected to always have a uniform distribution
@@ -118,26 +170,30 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
{
struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
- u32 fuse, eu_dis;
+ u32 fuse;
fuse = I915_READ(CHV_FUSE_GT);
sseu->slice_mask = BIT(0);
+ sseu->max_slices = 1;
+ sseu->max_subslices = 2;
+ sseu->max_eus_per_subslice = 8;
if (!(fuse & CHV_FGT_DISABLE_SS0)) {
- sseu->subslice_mask |= BIT(0);
- eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
- CHV_FGT_EU_DIS_SS0_R1_MASK);
- sseu->eu_total += 8 - hweight32(eu_dis);
+ sseu->subslices_mask[0] |= BIT(0);
+ sseu->eu_mask[0] = (fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
+ sseu->eu_mask[0] |= ((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;
+ sseu->subslices_mask[0] = 1;
}
if (!(fuse & CHV_FGT_DISABLE_SS1)) {
- sseu->subslice_mask |= BIT(1);
- eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
- CHV_FGT_EU_DIS_SS1_R1_MASK);
- sseu->eu_total += 8 - hweight32(eu_dis);
+ sseu->subslices_mask[0] |= BIT(1);
+ sseu->eu_mask[1] = (fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
+ sseu->eu_mask[2] |= ((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;
}
+ sseu->eu_total = compute_eu_total(sseu);
+
/*
* CHV expected to always have a uniform distribution of EU
* across subslices.
@@ -159,41 +215,50 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
{
struct intel_device_info *info = mkwrite_device_info(dev_priv);
struct sseu_dev_info *sseu = &info->sseu;
- int s_max = 3, ss_max = 4, eu_max = 8;
int s, ss;
- u32 fuse2, eu_disable;
+ u32 fuse2, eu_disable, subslice_mask;
u8 eu_mask = 0xff;
fuse2 = I915_READ(GEN8_FUSE2);
sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
+ /* BXT has a single slice and at most 3 subslices. */
+ sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
+ sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
+ sseu->max_eus_per_subslice = 8;
+
/*
* The subslice disable field is global, i.e. it applies
* to each of the enabled slices.
*/
- sseu->subslice_mask = (1 << ss_max) - 1;
- sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
- GEN9_F2_SS_DIS_SHIFT);
+ subslice_mask = (1 << sseu->max_subslices) - 1;
+ subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
+ GEN9_F2_SS_DIS_SHIFT);
/*
* Iterate through enabled slices and subslices to
* count the total enabled EU.
*/
- for (s = 0; s < s_max; s++) {
+ for (s = 0; s < sseu->max_slices; s++) {
if (!(sseu->slice_mask & BIT(s)))
/* skip disabled slice */
continue;
+ sseu->subslices_mask[s] = subslice_mask;
+
eu_disable = I915_READ(GEN9_EU_DISABLE(s));
- for (ss = 0; ss < ss_max; ss++) {
+ for (ss = 0; ss < sseu->max_subslices; ss++) {
int eu_per_ss;
- if (!(sseu->subslice_mask & BIT(ss)))
+ if (!(sseu->subslices_mask[s] & BIT(ss)))
/* skip disabled subslice */
continue;
- eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
- eu_mask);
+ sseu->eu_mask[ss + s * sseu->max_subslices] =
+ ~((eu_disable >> (ss*8)) & eu_mask);
+
+ eu_per_ss = sseu->max_eus_per_subslice -
+ hweight8((eu_disable >> (ss*8)) & eu_mask);
/*
* Record which subslice(s) has(have) 7 EUs. we
@@ -202,11 +267,11 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
*/
if (eu_per_ss == 7)
sseu->subslice_7eu[s] |= BIT(ss);
-
- sseu->eu_total += eu_per_ss;
}
}
+ sseu->eu_total = compute_eu_total(sseu);
+
/*
* SKL is expected to always have a uniform distribution
* of EU across subslices with the exception that any one
@@ -232,8 +297,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
sseu->has_eu_pg = sseu->eu_per_subslice > 2;
if (IS_GEN9_LP(dev_priv)) {
-#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss)))
- info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
+#define IS_SS_DISABLED(ss) (!(sseu->subslices_mask[0] & BIT(ss)))
+ info->has_pooled_eu = hweight8(sseu->subslices_mask[0]) == 3;
sseu->min_eu_in_pool = 0;
if (info->has_pooled_eu) {
@@ -251,19 +316,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
{
struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
- const int s_max = 3, ss_max = 3, eu_max = 8;
int s, ss;
- u32 fuse2, eu_disable[3]; /* s_max */
+ u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
fuse2 = I915_READ(GEN8_FUSE2);
sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
+ sseu->max_slices = 3;
+ sseu->max_subslices = 3;
+ sseu->max_eus_per_subslice = 8;
+
/*
* The subslice disable field is global, i.e. it applies
* to each of the enabled slices.
*/
- sseu->subslice_mask = GENMASK(ss_max - 1, 0);
- sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
- GEN8_F2_SS_DIS_SHIFT);
+ subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
+ subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
+ GEN8_F2_SS_DIS_SHIFT);
eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK;
eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) |
@@ -277,30 +345,36 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
* Iterate through enabled slices and subslices to
* count the total enabled EU.
*/
- for (s = 0; s < s_max; s++) {
+ for (s = 0; s < sseu->max_slices; s++) {
if (!(sseu->slice_mask & BIT(s)))
/* skip disabled slice */
continue;
- for (ss = 0; ss < ss_max; ss++) {
+ sseu->subslices_mask[s] = subslice_mask;
+
+ for (ss = 0; ss < sseu->max_subslices; ss++) {
u32 n_disabled;
- if (!(sseu->subslice_mask & BIT(ss)))
+ if (!(sseu->subslices_mask[ss] & BIT(ss)))
/* skip disabled subslice */
continue;
- n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
+ sseu->eu_mask[ss + s * sseu->max_subslices] =
+ ~(eu_disable[s] >>
+ (ss * sseu->max_eus_per_subslice));
+ n_disabled = hweight8(eu_disable[s] >>
+ (ss * sseu->max_eus_per_subslice));
/*
* Record which subslices have 7 EUs.
*/
- if (eu_max - n_disabled == 7)
+ if (sseu->max_eus_per_subslice - n_disabled == 7)
sseu->subslice_7eu[s] |= 1 << ss;
-
- sseu->eu_total += eu_max - n_disabled;
}
}
+ sseu->eu_total = compute_eu_total(sseu);
+
/*
* BDW is expected to always have a uniform distribution of EU across
* subslices with the exception that any one EU in any one subslice may
@@ -437,6 +511,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
{
struct intel_device_info *info = mkwrite_device_info(dev_priv);
enum pipe pipe;
+ int s;
if (INTEL_GEN(dev_priv) >= 10) {
for_each_pipe(dev_priv, pipe)
@@ -548,9 +623,11 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
DRM_DEBUG_DRIVER("subslice total: %u\n",
sseu_subslice_total(&info->sseu));
- DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask);
- DRM_DEBUG_DRIVER("subslice per slice: %u\n",
- hweight8(info->sseu.subslice_mask));
+ for (s = 0; s < ARRAY_SIZE(info->sseu.subslices_mask); s++) {
+ DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslices_mask[s]);
+ DRM_DEBUG_DRIVER("subslice per slice: %u\n",
+ hweight8(info->sseu.subslices_mask[s]));
+ }
DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total);
DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->sseu.eu_per_subslice);
DRM_DEBUG_DRIVER("has slice power gating: %s\n",
@@ -2073,7 +2073,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
- rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
+ rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslices_mask[0]) <<
GEN8_RPCS_SS_CNT_SHIFT;
rpcs |= GEN8_RPCS_ENABLE;
}
@@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
#define instdone_subslice_mask(dev_priv__) \
(INTEL_GEN(dev_priv__) == 7 ? \
- 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
+ 1 : INTEL_INFO(dev_priv__)->sseu.subslices_mask[0])
#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
for ((slice__) = 0, (subslice__) = 0; \
Up to now, subslice mask was assumed to be uniform across slices. But starting with Cannonlake, slices can be asymetric (for example slice0 has different number of subslices as slice1+). This change stores all subslices masks for all slices rather than having a single mask that applies to all slices. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- drivers/gpu/drm/i915/i915_debugfs.c | 24 +++-- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 23 ++++- drivers/gpu/drm/i915/intel_device_info.c | 169 ++++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 6 files changed, 161 insertions(+), 61 deletions(-)