@@ -1454,7 +1454,7 @@ static void engines_idle_release(struct i915_gem_context *ctx,
int err;
/* serialises with execbuf */
- set_bit(CONTEXT_CLOSED_BIT, &ce->flags);
+ intel_context_close(ce);
if (!intel_context_pin_if_active(ce))
continue;
@@ -276,6 +276,15 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce)
return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
}
+static inline void intel_context_close(struct intel_context *ce)
+{
+ set_bit(CONTEXT_CLOSED_BIT, &ce->flags);
+
+ trace_intel_context_close(ce);
+ if (ce->ops->close)
+ ce->ops->close(ce);
+}
+
static inline bool intel_context_is_closed(const struct intel_context *ce)
{
return test_bit(CONTEXT_CLOSED_BIT, &ce->flags);
@@ -43,6 +43,8 @@ struct intel_context_ops {
void (*revoke)(struct intel_context *ce, struct i915_request *rq,
unsigned int preempt_timeout_ms);
+ void (*close)(struct intel_context *ce);
+
int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
int (*pin)(struct intel_context *ce, void *vaddr);
void (*unpin)(struct intel_context *ce);
@@ -236,6 +238,12 @@ struct intel_context {
*/
struct list_head destroyed_link;
+ /**
+ * @guc_sched_disable_delay: worker to disable scheduling on this
+ * context
+ */
+ struct delayed_work guc_sched_disable_delay;
+
/** @parallel: sub-structure for parallel submission members */
struct {
union {
@@ -112,6 +112,10 @@ struct intel_guc {
* refs
*/
struct list_head guc_id_list;
+ /**
+ * @guc_ids_in_use: Number single-lrc guc_ids in use
+ */
+ u16 guc_ids_in_use;
/**
* @destroyed_contexts: list of contexts waiting to be destroyed
* (deregistered with the GuC)
@@ -132,6 +136,12 @@ struct intel_guc {
* @reset_fail_mask: mask of engines that failed to reset
*/
intel_engine_mask_t reset_fail_mask;
+#define SCHED_DISABLE_DELAY_MS 100
+ /**
+ * @sched_disable_delay_ms: schedule disable delay, in ms, for
+ * contexts
+ */
+ u64 sched_disable_delay_ms;
} submission_state;
/**
@@ -71,12 +71,40 @@ static bool intel_eval_slpc_support(void *data)
return intel_guc_slpc_is_used(guc);
}
+static int guc_sched_disable_delay_ms_get(void *data, u64 *val)
+{
+ struct intel_guc *guc = data;
+
+ if (!intel_guc_submission_is_used(guc))
+ return -ENODEV;
+
+ *val = guc->submission_state.sched_disable_delay_ms;
+
+ return 0;
+}
+
+static int guc_sched_disable_delay_ms_set(void *data, u64 val)
+{
+ struct intel_guc *guc = data;
+
+ if (!intel_guc_submission_is_used(guc))
+ return -ENODEV;
+
+ guc->submission_state.sched_disable_delay_ms = val;
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_delay_ms_fops,
+ guc_sched_disable_delay_ms_get,
+ guc_sched_disable_delay_ms_set, "%lld\n");
+
void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
{
static const struct intel_gt_debugfs_file files[] = {
{ "guc_info", &guc_info_fops, NULL },
{ "guc_registered_contexts", &guc_registered_contexts_fops, NULL },
{ "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support},
+ { "guc_sched_disable_delay_ms", &guc_sched_disable_delay_ms_fops, NULL },
};
if (!intel_guc_is_supported(guc))
@@ -65,7 +65,13 @@
* corresponding G2H returns indicating the scheduling disable operation has
* completed it is safe to unpin the context. While a disable is in flight it
* isn't safe to resubmit the context so a fence is used to stall all future
- * requests of that context until the G2H is returned.
+ * requests of that context until the G2H is returned. Because this interaction
+ * with the GuC takes a non-zero amount of time / GuC cycles we delay the
+ * disabling of scheduling after the pin count goes to zero by configurable
+ * (default 100 ms) period of time. The thought is this gives the user a window
+ * of time to resubmit something on the context before doing a somewhat costly
+ * operation. This delay is only done if the context is close and less than 3/4
+ * of the guc_ids are in use.
*
* Context deregistration:
* Before a context can be destroyed or if we steal its guc_id we must
@@ -1992,6 +1998,9 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
if (unlikely(ret < 0))
return ret;
+ if (!intel_context_is_parent(ce))
+ ++guc->submission_state.guc_ids_in_use;
+
ce->guc_id.id = ret;
return 0;
}
@@ -2001,14 +2010,16 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
GEM_BUG_ON(intel_context_is_child(ce));
if (!context_guc_id_invalid(ce)) {
- if (intel_context_is_parent(ce))
+ if (intel_context_is_parent(ce)) {
bitmap_release_region(guc->submission_state.guc_ids_bitmap,
ce->guc_id.id,
order_base_2(ce->parallel.number_children
+ 1));
- else
+ } else {
ida_simple_remove(&guc->submission_state.guc_ids,
ce->guc_id.id);
+ --guc->submission_state.guc_ids_in_use;
+ }
clr_ctx_id_mapping(guc, ce->guc_id.id);
set_context_guc_id_invalid(ce);
}
@@ -2858,41 +2869,98 @@ guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
}
}
-static void guc_context_sched_disable(struct intel_context *ce)
+static void guc_context_sched_disable(struct intel_context *ce);
+
+static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce,
+ unsigned long flags)
+ __releases(ce->guc_state.lock)
{
- struct intel_guc *guc = ce_to_guc(ce);
- unsigned long flags;
struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
intel_wakeref_t wakeref;
- u16 guc_id;
+ lockdep_assert_held(&ce->guc_state.lock);
+
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+
+ with_intel_runtime_pm(runtime_pm, wakeref)
+ guc_context_sched_disable(ce);
+}
+
+static bool bypass_sched_disable(struct intel_guc *guc,
+ struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
GEM_BUG_ON(intel_context_is_child(ce));
+ if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
+ !ctx_id_mapped(guc, ce->guc_id.id)) {
+ clr_context_enabled(ce);
+ return true;
+ }
+
+ return !context_enabled(ce);
+}
+
+static void __delay_sched_disable(struct work_struct *wrk)
+{
+ struct intel_context *ce =
+ container_of(wrk, typeof(*ce), guc_sched_disable_delay.work);
+ struct intel_guc *guc = ce_to_guc(ce);
+ unsigned long flags;
+
spin_lock_irqsave(&ce->guc_state.lock, flags);
- /*
- * We have to check if the context has been disabled by another thread,
- * check if submssion has been disabled to seal a race with reset and
- * finally check if any more requests have been committed to the
- * context ensursing that a request doesn't slip through the
- * 'context_pending_disable' fence.
- */
- if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
- context_has_committed_requests(ce))) {
- clr_context_enabled(ce);
+ if (bypass_sched_disable(guc, ce)) {
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- goto unpin;
+ intel_context_sched_disable_unpin(ce);
+ } else {
+ do_sched_disable(guc, ce, flags);
}
- guc_id = prep_context_pending_disable(ce);
+}
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce)
+{
+ /*
+ * parent contexts are perma-pinned, if we are unpinning do schedule
+ * disable immediately.
+ */
+ if (intel_context_is_parent(ce))
+ return true;
- with_intel_runtime_pm(runtime_pm, wakeref)
- __guc_context_sched_disable(guc, ce, guc_id);
+ /*
+ * If more than 3/4 of guc_ids in use, do schedule disable immediately.
+ */
+ return guc->submission_state.guc_ids_in_use >
+ ((GUC_MAX_CONTEXT_ID - NUMBER_MULTI_LRC_GUC_ID(guc)) / 4) * 3;
+}
+
+static void guc_context_sched_disable(struct intel_context *ce)
+{
+ struct intel_guc *guc = ce_to_guc(ce);
+ u64 delay = guc->submission_state.sched_disable_delay_ms;
+ unsigned long flags;
- return;
-unpin:
- intel_context_sched_disable_unpin(ce);
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+
+ if (bypass_sched_disable(guc, ce)) {
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ intel_context_sched_disable_unpin(ce);
+ } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) &&
+ delay) {
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ mod_delayed_work(system_unbound_wq,
+ &ce->guc_sched_disable_delay,
+ msecs_to_jiffies(delay));
+ } else {
+ do_sched_disable(guc, ce, flags);
+ }
+}
+
+static void guc_context_close(struct intel_context *ce)
+{
+ if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
+ cancel_delayed_work(&ce->guc_sched_disable_delay))
+ __delay_sched_disable(&ce->guc_sched_disable_delay.work);
}
static inline void guc_lrc_desc_unpin(struct intel_context *ce)
@@ -3201,6 +3269,8 @@ static void remove_from_context(struct i915_request *rq)
static const struct intel_context_ops guc_context_ops = {
.alloc = guc_context_alloc,
+ .close = guc_context_close,
+
.pre_pin = guc_context_pre_pin,
.pin = guc_context_pin,
.unpin = guc_context_unpin,
@@ -3283,6 +3353,10 @@ static void guc_context_init(struct intel_context *ce)
rcu_read_unlock();
ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
+
+ INIT_DELAYED_WORK(&ce->guc_sched_disable_delay,
+ __delay_sched_disable);
+
set_bit(CONTEXT_GUC_INIT, &ce->flags);
}
@@ -3320,6 +3394,9 @@ static int guc_request_alloc(struct i915_request *rq)
if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
guc_context_init(ce);
+ if (cancel_delayed_work(&ce->guc_sched_disable_delay))
+ intel_context_sched_disable_unpin(ce);
+
/*
* Call pin_guc_id here rather than in the pinning step as with
* dma_resv, contexts can be repeatedly pinned / unpinned trashing the
@@ -3450,6 +3527,8 @@ static int guc_virtual_context_alloc(struct intel_context *ce)
static const struct intel_context_ops virtual_guc_context_ops = {
.alloc = guc_virtual_context_alloc,
+ .close = guc_context_close,
+
.pre_pin = guc_virtual_context_pre_pin,
.pin = guc_virtual_context_pin,
.unpin = guc_virtual_context_unpin,
@@ -3539,6 +3618,8 @@ static void guc_child_context_destroy(struct kref *kref)
static const struct intel_context_ops virtual_parent_context_ops = {
.alloc = guc_virtual_context_alloc,
+ .close = guc_context_close,
+
.pre_pin = guc_context_pre_pin,
.pin = guc_parent_context_pin,
.unpin = guc_parent_context_unpin,
@@ -4064,6 +4145,7 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
spin_lock_init(&guc->timestamp.lock);
INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
+ guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS;
guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
guc->submission_supported = __guc_submission_supported(guc);
guc->submission_selected = __guc_submission_selected(guc);
@@ -91,12 +91,14 @@ int __i915_subtests(const char *caller,
__i915_nop_setup, __i915_nop_teardown, \
T, ARRAY_SIZE(T), data)
#define i915_live_subtests(T, data) ({ \
+ (data)->gt[0]->uc.guc.submission_state.sched_disable_delay_ms = 0; \
typecheck(struct drm_i915_private *, data); \
__i915_subtests(__func__, \
__i915_live_setup, __i915_live_teardown, \
T, ARRAY_SIZE(T), data); \
})
#define intel_gt_live_subtests(T, data) ({ \
+ (data)->uc.guc.submission_state.sched_disable_delay_ms = 0; \
typecheck(struct intel_gt *, data); \
__i915_subtests(__func__, \
__intel_gt_live_setup, __intel_gt_live_teardown, \
@@ -430,6 +430,11 @@ DEFINE_EVENT(intel_context, intel_context_reset,
TP_ARGS(ce)
);
+DEFINE_EVENT(intel_context, intel_context_close,
+ TP_PROTO(struct intel_context *ce),
+ TP_ARGS(ce)
+);
+
DEFINE_EVENT(intel_context, intel_context_ban,
TP_PROTO(struct intel_context *ce),
TP_ARGS(ce)
@@ -532,6 +537,11 @@ trace_intel_context_reset(struct intel_context *ce)
{
}
+static inline void
+trace_intel_context_close(struct intel_context *ce)
+{
+}
+
static inline void
trace_intel_context_ban(struct intel_context *ce)
{