@@ -91,36 +91,70 @@ static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
int subslice)
{
- int i, offset = sseu_eu_idx(sseu, slice, subslice);
- u16 eu_mask = 0;
-
- for (i = 0; i < sseu->eu_stride; i++)
- eu_mask |=
- ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
+ if (!intel_sseu_has_subslice(sseu, slice, subslice))
+ return 0;
- return eu_mask;
+ if (sseu->has_common_ss_eumask)
+ return sseu->eu_mask[0];
+ else
+ return sseu->eu_mask[slice * sseu->max_subslices + subslice];
}
static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
u16 eu_mask)
{
- int i, offset = sseu_eu_idx(sseu, slice, subslice);
+ WARN_ON(sseu->has_common_ss_eumask);
+ WARN_ON(sseu->max_eus_per_subslice > sizeof(sseu->eu_mask[0]) * BITS_PER_BYTE);
- for (i = 0; i < sseu->eu_stride; i++)
- sseu->eu_mask[offset + i] =
- (eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+ sseu->eu_mask[slice * sseu->max_subslices + subslice] =
+ eu_mask & GENMASK(sseu->max_eus_per_subslice - 1, 0);
}
static u16 compute_eu_total(const struct sseu_dev_info *sseu)
{
u16 i, total = 0;
+ if (sseu->has_common_ss_eumask)
+ return intel_sseu_subslices_per_slice(sseu, 0) *
+ hweight16(sseu->eu_mask[0]);
+
for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
- total += hweight8(sseu->eu_mask[i]);
+ total += hweight16(sseu->eu_mask[i]);
return total;
}
+/**
+ * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer
+ * @to: Pointer to userspace buffer to copy to
+ * @sseu: SSEU structure containing EU mask to copy
+ *
+ * Copies the EU mask to a userspace buffer in the format expected by
+ * the query ioctl's topology queries.
+ *
+ * Returns the result of the copy_to_user() operation.
+ */
+int intel_sseu_copy_eumask_to_user(void __user *to,
+ const struct sseu_dev_info *sseu)
+{
+ u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {};
+ int len = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
+ int s, ss, i;
+
+ for (s = 0; s < sseu->max_slices; s++) {
+ for (ss = 0; ss < sseu->max_subslices; ss++) {
+ int offset = sseu_eu_idx(sseu, s, ss);
+ u16 mask = sseu_get_eus(sseu, s, ss);
+
+ for (i = 0; i < sseu->eu_stride; i++)
+ eu_mask[offset + i] =
+ (mask >> (BITS_PER_BYTE * i)) & 0xff;
+ }
+ }
+
+ return copy_to_user(to, eu_mask, len);
+}
+
static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
{
u32 ss_mask;
@@ -134,7 +168,7 @@ static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
u32 g_ss_en, u32 c_ss_en, u16 eu_en)
{
- int s, ss;
+ int s;
/* g_ss_en/c_ss_en represent entire subslice mask across all slices */
GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
@@ -162,11 +196,9 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
get_ss_stride_mask(sseu, s,
g_ss_en | c_ss_en));
-
- for (ss = 0; ss < sseu->max_subslices; ss++)
- if (intel_sseu_has_subslice(sseu, s, ss))
- sseu_set_eus(sseu, s, ss, eu_en);
}
+ sseu->has_common_ss_eumask = 1;
+ sseu->eu_mask[0] = eu_en;
sseu->eu_per_subslice = hweight16(eu_en);
sseu->eu_total = compute_eu_total(sseu);
}
@@ -57,7 +57,21 @@ struct sseu_dev_info {
u8 subslice_mask[GEN_SS_MASK_SIZE];
u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
- u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];
+
+ /*
+ * EU masks. Use has_common_ss_eumask to determine how the field
+ * will be interpreted.
+ *
+ * On pre-gen11 platforms, each subslice has independent EU fusing, so
+ * we store an array of u16's that are sufficient to represent each
+ * subslice's EU mask on pre-gen11 platforms.
+ *
+ * For gen11 and beyond, all subslices will always have the same set of
+ * enabled/disabled EUs so only eu_mask[0] is utilized; all other array
+ * entries are ignored.
+ */
+ u16 eu_mask[GEN_MAX_HSW_SLICES * GEN_MAX_SS_PER_HSW_SLICE];
+
u16 eu_total;
u8 eu_per_subslice;
u8 min_eu_in_pool;
@@ -66,6 +80,8 @@ struct sseu_dev_info {
u8 has_slice_pg:1;
u8 has_subslice_pg:1;
u8 has_eu_pg:1;
+ /* All subslices have the same set of enabled/disabled EUs? */
+ u8 has_common_ss_eumask:1;
/* Topology fields */
u8 max_slices;
@@ -145,4 +161,7 @@ void intel_sseu_print_topology(struct drm_i915_private *i915,
u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice);
+int intel_sseu_copy_eumask_to_user(void __user *to,
+ const struct sseu_dev_info *sseu);
+
#endif /* __INTEL_SSEU_H__ */
@@ -76,10 +76,10 @@ static int fill_topology_info(const struct sseu_dev_info *sseu,
subslice_mask, subslice_length))
return -EFAULT;
- if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
- sizeof(topo) +
- slice_length + subslice_length),
- sseu->eu_mask, eu_length))
+ if (intel_sseu_copy_eumask_to_user(u64_to_user_ptr(query_item->data_ptr +
+ sizeof(topo) +
+ slice_length + subslice_length),
+ sseu))
return -EFAULT;
return total_length;
Storing the EU mask internally in the same format the I915_QUERY topology queries use makes the final copy_to_user() a bit simpler, but makes the rest of the driver's SSEU more complicated. Given that modern platforms (gen11 and beyond) are architecturally guaranteed to have equivalent EU masks for every subslice, it also wastes quite a bit of space since we're storing a duplicate copy of the EU mask for every single subslice where we really only need to store one instance. Let's add a has_common_ss_eumask flag to the SSEU structure to determine which type of hardware we're working on. For the older pre-gen11 platforms the various subslices can have different EU masks so we use an array of u16[] to store each subslice's copy. For gen11 and beyond we'll only use index [0] of the array and not worry about copying the repeated value, except when converting into uapi form for the I915_QUERY ioctl. Signed-off-by: Matt Roper <matthew.d.roper@intel.com> --- drivers/gpu/drm/i915/gt/intel_sseu.c | 66 +++++++++++++++++++++------- drivers/gpu/drm/i915/gt/intel_sseu.h | 21 ++++++++- drivers/gpu/drm/i915/i915_query.c | 8 ++-- 3 files changed, 73 insertions(+), 22 deletions(-)