@@ -2066,6 +2066,12 @@ struct drm_i915_private {
u32 active_requests;
u32 request_serial;
+ /**
+ * Global barrier to ensuring ordering of sseu transitions
+ * requests.
+ */
+ struct i915_gem_active global_barrier;
+
/**
* Is the GPU currently considered idle, or busy executing
* userspace requests? Whilst idle, we allow runtime power
@@ -3212,6 +3218,13 @@ i915_vm_to_ppgtt(struct i915_address_space *vm)
return container_of(vm, struct i915_hw_ppgtt, base);
}
+static inline void i915_gem_set_global_barrier(struct drm_i915_private *i915,
+ struct i915_request *rq)
+{
+ lockdep_assert_held(&i915->drm.struct_mutex);
+ i915_gem_active_set(&i915->gt.global_barrier, rq);
+}
+
/* i915_gem_fence_reg.c */
struct drm_i915_fence_reg *
i915_reserve_fence(struct drm_i915_private *dev_priv);
@@ -5555,6 +5555,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
if (!dev_priv->priorities)
goto err_dependencies;
+ init_request_active(&dev_priv->gt.global_barrier, NULL);
+
INIT_LIST_HEAD(&dev_priv->gt.timelines);
INIT_LIST_HEAD(&dev_priv->gt.active_rings);
INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
@@ -781,6 +781,79 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
return 0;
}
+static int
+intel_sseu_from_user_sseu(const struct sseu_dev_info *sseu,
+ const struct drm_i915_gem_context_param_sseu *user_sseu,
+ struct intel_sseu *ctx_sseu)
+{
+ if ((user_sseu->slice_mask & ~sseu->slice_mask) != 0 ||
+ user_sseu->slice_mask == 0)
+ return -EINVAL;
+
+ if ((user_sseu->subslice_mask & ~sseu->subslice_mask[0]) != 0 ||
+ user_sseu->subslice_mask == 0)
+ return -EINVAL;
+
+ if (user_sseu->min_eus_per_subslice > sseu->max_eus_per_subslice)
+ return -EINVAL;
+
+ if (user_sseu->max_eus_per_subslice > sseu->max_eus_per_subslice ||
+ user_sseu->max_eus_per_subslice < user_sseu->min_eus_per_subslice ||
+ user_sseu->max_eus_per_subslice == 0)
+ return -EINVAL;
+
+ ctx_sseu->slice_mask = user_sseu->slice_mask;
+ ctx_sseu->subslice_mask = user_sseu->subslice_mask;
+ ctx_sseu->min_eus_per_subslice = user_sseu->min_eus_per_subslice;
+ ctx_sseu->max_eus_per_subslice = user_sseu->max_eus_per_subslice;
+
+ return 0;
+}
+
+static int
+i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
+ struct intel_sseu sseu)
+{
+ struct drm_i915_private *i915 = ctx->i915;
+ struct i915_request *rq;
+ struct intel_ring *ring;
+ int ret;
+
+ lockdep_assert_held(&i915->drm.struct_mutex);
+
+ i915_retire_requests(i915);
+
+ /* Now use the RCS to actually reconfigure. */
+ engine = i915->engine[RCS];
+
+ rq = i915_request_alloc(engine, i915->kernel_context);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ ret = engine->emit_rpcs_config(rq, ctx, sseu);
+ if (ret) {
+ __i915_request_add(rq, true);
+ return ret;
+ }
+
+ /* Queue this switch after all other activity */
+ list_for_each_entry(ring, &i915->gt.active_rings, active_link) {
+ struct i915_request *prev;
+
+ prev = last_request_on_engine(ring->timeline, engine);
+ if (prev)
+ i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+ &prev->submit,
+ I915_FENCE_GFP);
+ }
+
+ i915_gem_set_global_barrier(i915, rq);
+ __i915_request_add(rq, true);
+
+ return 0;
+}
+
int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
@@ -818,6 +891,46 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
case I915_CONTEXT_PARAM_PRIORITY:
args->value = ctx->sched.priority;
break;
+ case I915_CONTEXT_PARAM_SSEU:
+ {
+ struct drm_i915_gem_context_param_sseu user_sseu;
+ struct intel_engine_cs *engine;
+ struct intel_context *ce;
+
+ if (copy_from_user(&user_sseu,
+ u64_to_user_ptr(args->value),
+ sizeof(user_sseu))) {
+ ret = -EFAULT;
+ break;
+ }
+
+ if (user_sseu.rsvd1 != 0 || user_sseu.rsvd2 != 0) {
+ ret = -EINVAL;
+ break;
+ }
+
+ engine = intel_engine_lookup_user(to_i915(dev),
+ user_sseu.class,
+ user_sseu.instance);
+ if (!engine) {
+ ret = -EINVAL;
+ break;
+ }
+
+ ce = to_intel_context(ctx, engine);
+
+ user_sseu.slice_mask = ce->sseu.slice_mask;
+ user_sseu.subslice_mask = ce->sseu.subslice_mask;
+ user_sseu.min_eus_per_subslice =
+ ce->sseu.min_eus_per_subslice;
+ user_sseu.max_eus_per_subslice =
+ ce->sseu.max_eus_per_subslice;
+
+ if (copy_to_user(u64_to_user_ptr(args->value),
+ &user_sseu, sizeof(user_sseu)))
+ ret = -EFAULT;
+ break;
+ }
default:
ret = -EINVAL;
break;
@@ -892,7 +1005,70 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
ctx->sched.priority = priority;
}
break;
+ case I915_CONTEXT_PARAM_SSEU:
+ {
+ struct drm_i915_private *i915 = to_i915(dev);
+ struct drm_i915_gem_context_param_sseu user_sseu;
+ struct intel_engine_cs *engine;
+ struct intel_sseu ctx_sseu;
+ enum intel_engine_id id;
+
+ if (args->size) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+ sizeof(user_sseu))) {
+ ret = -EFAULT;
+ break;
+ }
+ if (user_sseu.rsvd1 != 0 || user_sseu.rsvd2 != 0) {
+ ret = -EINVAL;
+ break;
+ }
+
+ engine = intel_engine_lookup_user(i915,
+ user_sseu.class,
+ user_sseu.instance);
+ if (!engine) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (!engine->emit_rpcs_config) {
+ ret = -ENODEV;
+ break;
+ }
+
+ ret = intel_sseu_from_user_sseu(&INTEL_INFO(i915)->sseu,
+ &user_sseu, &ctx_sseu);
+ if (ret)
+ break;
+
+ if (memcmp(&to_intel_context(ctx, engine)->sseu,
+ &ctx_sseu, sizeof(ctx_sseu)) != 0) {
+ DRM_ERROR("reconfiguring ctx=%p\n", ctx);
+ ret = i915_gem_context_reconfigure_sseu(ctx,
+ engine,
+ ctx_sseu);
+ if (ret)
+ break;
+ }
+
+ /*
+ * Apply the configuration to all engine. Our hardware
+ * doesn't currently support different configurations
+ * for each engine.
+ */
+ for_each_engine(engine, i915, id) {
+ struct intel_context *ce = to_intel_context(ctx, engine);
+
+ ce->sseu = ctx_sseu;
+ }
+ }
+ break;
default:
ret = -EINVAL;
break;
@@ -644,6 +644,22 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
return NOTIFY_DONE;
}
+static int
+i915_request_await_request(struct i915_request *to, struct i915_request *from);
+
+static int add_global_barrier(struct i915_request *rq)
+{
+ struct i915_request *barrier;
+ int ret = 0;
+
+ barrier = i915_gem_active_raw(&rq->i915->gt.global_barrier,
+ &rq->i915->drm.struct_mutex);
+ if (barrier)
+ ret = i915_request_await_request(rq, barrier);
+
+ return ret;
+}
+
/**
* i915_request_alloc - allocate a request structure
*
@@ -805,6 +821,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
*/
rq->head = rq->ring->emit;
+ ret = add_global_barrier(rq);
+ if (ret)
+ goto err_unwind;
+
/* Unconditionally invalidate GPU caches and TLBs. */
ret = engine->emit_flush(rq, EMIT_INVALIDATE);
if (ret)
@@ -2271,6 +2271,82 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
}
static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
+u32 gen8_make_rpcs(const struct sseu_dev_info *sseu,
+ struct intel_sseu ctx_sseu)
+{
+ u32 rpcs = 0;
+
+ /*
+ * Starting in Gen9, render power gating can leave
+ * slice/subslice/EU in a partially enabled state. We
+ * must make an explicit request through RPCS for full
+ * enablement.
+ */
+ if (sseu->has_slice_pg) {
+ rpcs |= GEN8_RPCS_S_CNT_ENABLE;
+ rpcs |= hweight8(ctx_sseu.slice_mask) << GEN8_RPCS_S_CNT_SHIFT;
+ rpcs |= GEN8_RPCS_ENABLE;
+ }
+
+ if (sseu->has_subslice_pg) {
+ rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
+ rpcs |= hweight8(ctx_sseu.subslice_mask) <<
+ GEN8_RPCS_SS_CNT_SHIFT;
+ rpcs |= GEN8_RPCS_ENABLE;
+ }
+
+ if (sseu->has_eu_pg) {
+ rpcs |= ctx_sseu.min_eus_per_subslice <<
+ GEN8_RPCS_EU_MIN_SHIFT;
+ rpcs |= ctx_sseu.max_eus_per_subslice <<
+ GEN8_RPCS_EU_MAX_SHIFT;
+ rpcs |= GEN8_RPCS_ENABLE;
+ }
+
+ return rpcs;
+}
+
+static int gen8_emit_rpcs_config(struct i915_request *rq,
+ struct i915_gem_context *ctx,
+ struct intel_sseu sseu)
+{
+ struct drm_i915_private *i915 = rq->i915;
+ struct intel_context *ce = to_intel_context(ctx, i915->engine[RCS]);
+ u64 offset;
+ u32 *cs;
+
+ /* Let the deferred state allocation take care of this. */
+ if (!ce->state)
+ return 0;
+
+ if (!i915_vma_is_pinned(ce->kernel_state)) {
+ int ret;
+
+ ret = i915_vma_pin(ce->kernel_state, 0,
+ GEN8_LR_CONTEXT_ALIGN, PIN_USER);
+ if (ret)
+ return ret;
+ }
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ offset = ce->kernel_state->node.start +
+ LRC_STATE_PN * PAGE_SIZE +
+ (CTX_R_PWR_CLK_STATE + 1) * 4;
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4;
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+ *cs++ = gen8_make_rpcs(&INTEL_INFO(i915)->sseu,
+ intel_engine_prepare_sseu(rq->engine, sseu));
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
static int gen8_init_rcs_context(struct i915_request *rq)
{
int ret;
@@ -2364,6 +2440,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_breadcrumb = gen8_emit_breadcrumb;
engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
+ engine->emit_rpcs_config = gen8_emit_rpcs_config;
+
engine->set_default_submission = execlists_set_default_submission;
if (INTEL_GEN(engine->i915) < 11) {
@@ -2512,41 +2590,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
return logical_ring_init(engine);
}
-u32 gen8_make_rpcs(const struct sseu_dev_info *sseu,
- struct intel_sseu ctx_sseu)
-{
- u32 rpcs = 0;
-
- /*
- * Starting in Gen9, render power gating can leave
- * slice/subslice/EU in a partially enabled state. We
- * must make an explicit request through RPCS for full
- * enablement.
- */
- if (sseu->has_slice_pg) {
- rpcs |= GEN8_RPCS_S_CNT_ENABLE;
- rpcs |= hweight8(ctx_sseu.slice_mask) << GEN8_RPCS_S_CNT_SHIFT;
- rpcs |= GEN8_RPCS_ENABLE;
- }
-
- if (sseu->has_subslice_pg) {
- rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
- rpcs |= hweight8(ctx_sseu.subslice_mask) <<
- GEN8_RPCS_SS_CNT_SHIFT;
- rpcs |= GEN8_RPCS_ENABLE;
- }
-
- if (sseu->has_eu_pg) {
- rpcs |= ctx_sseu.min_eus_per_subslice <<
- GEN8_RPCS_EU_MIN_SHIFT;
- rpcs |= ctx_sseu.max_eus_per_subslice <<
- GEN8_RPCS_EU_MAX_SHIFT;
- rpcs |= GEN8_RPCS_ENABLE;
- }
-
- return rpcs;
-}
-
static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
{
u32 indirect_ctx_offset;
@@ -2063,6 +2063,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
engine->emit_breadcrumb_sz++;
}
+ engine->emit_rpcs_config = NULL; /* Only supported on Gen8+ */
+
engine->set_default_submission = i9xx_set_default_submission;
if (INTEL_GEN(dev_priv) >= 6)
@@ -456,6 +456,10 @@ struct intel_engine_cs {
void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
int emit_breadcrumb_sz;
+ int (*emit_rpcs_config)(struct i915_request *rq,
+ struct i915_gem_context *ctx,
+ struct intel_sseu sseu);
+
/* Pass the request to the hardware queue (e.g. directly into
* the legacy ringbuffer or to the end of an execlist).
*
@@ -1456,9 +1456,52 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
#define I915_CONTEXT_DEFAULT_PRIORITY 0
#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
+ /*
+ * When using the following param, value should be a pointer to
+ * drm_i915_gem_context_param_sseu.
+ */
+#define I915_CONTEXT_PARAM_SSEU 0x7
__u64 value;
};
+struct drm_i915_gem_context_param_sseu {
+ /*
+ * Engine class & instance to be configured or queried.
+ */
+ __u16 class;
+ __u16 instance;
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 rsvd1;
+
+ /*
+ * Mask of slices to enable for the context. Valid values are a subset
+ * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+ */
+ __u64 slice_mask;
+
+ /*
+ * Mask of subslices to enable for the context. Valid values are a
+ * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+ */
+ __u64 subslice_mask;
+
+ /*
+ * Minimum/Maximum number of EUs to enable per subslice for the
+ * context. min_eus_per_subslice must be inferior or equal to
+ * max_eus_per_subslice.
+ */
+ __u16 min_eus_per_subslice;
+ __u16 max_eus_per_subslice;
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 rsvd2;
+};
+
enum drm_i915_oa_format {
I915_OA_FORMAT_A13 = 1, /* HSW only */
I915_OA_FORMAT_A29, /* HSW only */