@@ -2455,11 +2455,10 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
seq_printf(m, "GuC stage descriptor %u:\n", index);
seq_printf(m, "\tIndex: %u\n", desc->stage_id);
+ seq_printf(m, "\tProxy Index: %u\n", desc->proxy_id);
seq_printf(m, "\tAttribute: 0x%x\n", desc->attribute);
seq_printf(m, "\tPriority: %d\n", desc->priority);
seq_printf(m, "\tDoorbell id: %d\n", desc->db_id);
- seq_printf(m, "\tEngines used: 0x%x\n",
- desc->engines_used);
seq_printf(m, "\tDoorbell trigger phy: 0x%llx, cpu: 0x%llx, uK: 0x%x\n",
desc->db_trigger_phy,
desc->db_trigger_cpu,
@@ -2471,10 +2470,10 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
seq_putc(m, '\n');
for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
- u32 guc_engine_id = engine->guc_id;
+ u32 class = GUC_ID_TO_ENGINE_CLASS(engine->guc_id);
+ u32 inst = GUC_ID_TO_ENGINE_INSTANCE(engine->guc_id);
struct guc_execlist_context *lrc =
- &desc->lrc[guc_engine_id];
-
+ &desc->lrc[class][inst];
seq_printf(m, "\t%s LRC:\n", engine->name);
seq_printf(m, "\t\tContext desc: 0x%x\n",
lrc->context_desc);
@@ -118,6 +118,18 @@ static inline u64 ptr_to_u64(const void *ptr)
__idx; \
})
+#define bitmap32_test_bit(bitmap, bit) ({ \
+ (bitmap)[(bit) / 32] & (1 << ((bit) % 32)); \
+})
+
+#define bitmap32_set_bit(bitmap, bit) ({ \
+ (bitmap)[(bit) / 32] |= (1 << ((bit) % 32)); \
+})
+
+#define bitmap32_clear_bit(bitmap, bit) ({ \
+ (bitmap)[(bit) / 32] &= ~(1 << ((bit) % 32)); \
+})
+
#include <linux/list.h>
static inline int list_is_first(const struct list_head *list,
@@ -29,9 +29,11 @@
#define GUC_CLIENT_PRIORITY_NORMAL 3
#define GUC_CLIENT_PRIORITY_NUM 4
-#define GUC_MAX_STAGE_DESCRIPTORS 1024
+#define GUC_MAX_STAGE_DESCRIPTORS 2032
#define GUC_INVALID_STAGE_ID GUC_MAX_STAGE_DESCRIPTORS
+#define GUC_MAX_LRC_PER_CLASS 64
+
#define GUC_RENDER_ENGINE 0
#define GUC_VIDEO_ENGINE 1
#define GUC_BLITTER_ENGINE 2
@@ -71,9 +73,12 @@
#define GUC_DOORBELL_DISABLED 0
#define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0)
-#define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1)
-#define GUC_STAGE_DESC_ATTR_KERNEL BIT(2)
-#define GUC_STAGE_DESC_ATTR_PREEMPT BIT(3)
+#define GUC_STAGE_DESC_ATTR_TYPE_SHIFT 1
+#define GUC_STAGE_DESC_ATTR_PRINCIPAL (0x0 << GUC_STAGE_DESC_ATTR_TYPE_SHIFT)
+#define GUC_STAGE_DESC_ATTR_PROXY (0x1 << GUC_STAGE_DESC_ATTR_TYPE_SHIFT)
+#define GUC_STAGE_DESC_ATTR_REAL (0x2 << GUC_STAGE_DESC_ATTR_TYPE_SHIFT)
+#define GUC_STAGE_DESC_ATTR_TYPE_MASK (0x3 << GUC_STAGE_DESC_ATTR_TYPE_SHIFT)
+#define GUC_STAGE_DESC_ATTR_KERNEL (1 << 3)
#define GUC_STAGE_DESC_ATTR_RESET BIT(4)
#define GUC_STAGE_DESC_ATTR_WQLOCKED BIT(5)
#define GUC_STAGE_DESC_ATTR_PCH BIT(6)
@@ -282,9 +287,10 @@ struct guc_process_desc {
u64 wq_base_addr;
u32 wq_size_bytes;
u32 wq_status;
- u32 engine_presence;
u32 priority;
- u32 reserved[30];
+ u32 token;
+ u32 queue_engine_error;
+ u32 reserved[23];
} __packed;
/* engine id and context id is packed into guc_execlist_context.context_id*/
@@ -295,16 +301,19 @@ struct guc_process_desc {
struct guc_execlist_context {
u32 context_desc;
u32 context_id;
- u32 ring_status;
+ u32 reserved0;
u32 ring_lrca;
u32 ring_begin;
u32 ring_end;
u32 ring_next_free_location;
u32 ring_current_tail_pointer_value;
- u8 engine_state_submit_value;
- u8 engine_state_wait_value;
- u16 pagefault_count;
- u16 engine_submit_queue_count;
+ u32 engine_state_wait_value;
+ u32 state_reserved;
+ u32 is_present_in_sq;
+ u32 sync_value;
+ u32 sync_addr;
+ u32 slpc_hints;
+ u32 reserved1[4];
} __packed;
/*
@@ -317,36 +326,30 @@ struct guc_execlist_context {
* with the GuC, being allocated before the GuC is loaded with its firmware.
*/
struct guc_stage_desc {
- u32 sched_common_area;
+ u64 desc_private;
u32 stage_id;
- u32 pas_id;
- u8 engines_used;
+ u32 proxy_id;
u64 db_trigger_cpu;
u32 db_trigger_uk;
u64 db_trigger_phy;
- u16 db_id;
-
- struct guc_execlist_context lrc[GUC_MAX_ENGINES_NUM];
-
- u8 attribute;
-
- u32 priority;
-
+ u32 db_id;
+ struct guc_execlist_context lrc[GUC_MAX_ENGINE_CLASSES][GUC_MAX_LRC_PER_CLASS];
+ u32 lrc_bitmap[GUC_MAX_ENGINE_CLASSES][3];
+ u32 lrc_count;
+ u32 max_lrc_per_class;
+ u32 attribute; /* GUC_STAGE_DESC_ATTR_xxx */
+ u32 priority; /* GUC_CLIENT_PRIORITY_xxx */
u32 wq_sampled_tail_offset;
u32 wq_total_submit_enqueues;
-
u32 process_desc;
u32 wq_addr;
u32 wq_size;
-
- u32 engine_presence;
-
- u8 engine_suspended;
-
- u8 reserved0[3];
- u64 reserved1[1];
-
- u64 desc_private;
+ u32 feature0;
+ u32 feature1;
+ u32 feature2;
+ u32 queue_engine_error;
+ u32 reserved[2];
+ u64 reserved3[12];
} __packed;
/**
@@ -46,17 +46,22 @@
* that contains all required pages for these elements).
*
* GuC stage descriptor:
- * During initialization, the driver allocates a static pool of 1024 such
- * descriptors, and shares them with the GuC.
- * Currently, there exists a 1:1 mapping between a intel_guc_client and a
- * guc_stage_desc (via the client's stage_id), so effectively only one
- * gets used. This stage descriptor lets the GuC know about the doorbell,
- * workqueue and process descriptor. Theoretically, it also lets the GuC
- * know about our HW contexts (context ID, etc...), but we actually
- * employ a kind of submission where the GuC uses the LRCA sent via the work
- * item instead (the single guc_stage_desc associated to execbuf client
- * contains information about the default kernel context only, but this is
- * essentially unused). This is called a "proxy" submission.
+ * During initialization, the driver allocates a static pool of descriptors
+ * and shares them with the GuC. This stage descriptor lets the GuC know about
+ * the doorbell, workqueue and process descriptor, additionally it stores
+ * information about all possible HW contexts that use it (64 x number of
+ * engine classes of guc_execlist_context structs).
+ *
+ * The idea is that every direct-submission GuC client gets one SW Context ID
+ * and every HW context created by that client gets one SW Counter. The "SW
+ * Context ID" and "SW Counter" to use now get passed on every work queue item.
+ *
+ * But we don't have direct submission yet: does that mean we are limited to 64
+ * contexts in total (one client)? Not really: we can use extra GuC context
+ * descriptors to store more HW contexts. They are special in that they don't
+ * have their own work queue, doorbell or process descriptor. Instead, these
+ * "principal" GuC context descriptors use the one that belongs to the client
+ * as a "proxy" for submission (a generalization of the old proxy submission).
*
* The Scratch registers:
* There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
@@ -171,6 +176,16 @@ static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client)
return &base[client->stage_id];
}
+static struct guc_stage_desc *__get_ppal_stage_desc(struct intel_guc *guc,
+ u32 index)
+{
+ struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
+
+ GEM_BUG_ON(index >= GUC_MAX_STAGE_DESCRIPTORS);
+
+ return &base[index];
+}
+
/*
* Initialise, update, or clear doorbell data shared with the GuC
*
@@ -344,70 +359,20 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
static void guc_stage_desc_init(struct intel_guc *guc,
struct intel_guc_client *client)
{
- struct drm_i915_private *dev_priv = guc_to_i915(guc);
- struct intel_engine_cs *engine;
- struct i915_gem_context *ctx = client->owner;
struct guc_stage_desc *desc;
- unsigned int tmp;
u32 gfx_addr;
desc = __get_stage_desc(client);
memset(desc, 0, sizeof(*desc));
desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
+ GUC_STAGE_DESC_ATTR_PROXY |
GUC_STAGE_DESC_ATTR_KERNEL;
- if (is_high_priority(client))
- desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT;
desc->stage_id = client->stage_id;
+ desc->proxy_id = client->stage_id;
desc->priority = client->priority;
desc->db_id = client->doorbell_id;
- for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
- struct intel_context *ce = to_intel_context(ctx, engine);
- u32 guc_engine_id = engine->guc_id;
- struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
-
- /* TODO: We have a design issue to be solved here. Only when we
- * receive the first batch, we know which engine is used by the
- * user. But here GuC expects the lrc and ring to be pinned. It
- * is not an issue for default context, which is the only one
- * for now who owns a GuC client. But for future owner of GuC
- * client, need to make sure lrc is pinned prior to enter here.
- */
- if (!ce->state)
- break; /* XXX: continue? */
-
- /*
- * XXX: When this is a GUC_STAGE_DESC_ATTR_KERNEL client (proxy
- * submission or, in other words, not using a direct submission
- * model) the KMD's LRCA is not used for any work submission.
- * Instead, the GuC uses the LRCA of the user mode context (see
- * guc_add_request below).
- */
- lrc->context_desc = lower_32_bits(ce->lrc_desc);
-
- /* The state page is after PPHWSP */
- lrc->ring_lrca = intel_guc_ggtt_offset(guc, ce->state) +
- LRC_STATE_PN * PAGE_SIZE;
-
- /* XXX: In direct submission, the GuC wants the HW context id
- * here. In proxy submission, it wants the stage id
- */
- lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) |
- (guc_engine_id << GUC_ELC_ENGINE_OFFSET);
-
- lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma);
- lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
- lrc->ring_next_free_location = lrc->ring_begin;
- lrc->ring_current_tail_pointer_value = 0;
-
- desc->engines_used |= (1 << guc_engine_id);
- }
-
- DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
- client->engines, desc->engines_used);
- WARN_ON(desc->engines_used == 0);
-
/*
* The doorbell, process descriptor, and workqueue are all parts
* of the client object, which the GuC will reference via the GGTT
@@ -430,7 +395,15 @@ static void guc_stage_desc_fini(struct intel_guc *guc,
struct guc_stage_desc *desc;
desc = __get_stage_desc(client);
- memset(desc, 0, sizeof(*desc));
+ /* No memset: the stage desc might still be used as a principal */
+ desc->attribute &= ~GUC_STAGE_DESC_ATTR_TYPE_MASK;
+ desc->db_id = 0;
+ desc->db_trigger_phy = 0;
+ desc->db_trigger_cpu = 0;
+ desc->db_trigger_uk = 0;
+ desc->process_desc = 0;
+ desc->wq_addr = 0;
+ desc->wq_size = 0;
}
/* Construct a Work Item and append it to the GuC's Work Queue */
@@ -1299,6 +1272,87 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
}
+static void guc_ppal_stage_attach(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ struct intel_guc *guc = &ctx->i915->guc;
+ struct intel_context *ce = to_intel_context(ctx, engine);
+ struct guc_stage_desc *desc;
+
+ GEM_BUG_ON(ce->sw_context_id >= GUC_MAX_STAGE_DESCRIPTORS);
+
+ desc = __get_ppal_stage_desc(guc, ce->sw_context_id);
+
+ if (desc->lrc_count == 0) {
+ desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
+ GUC_STAGE_DESC_ATTR_PRINCIPAL |
+ GUC_STAGE_DESC_ATTR_KERNEL;
+ desc->stage_id = ce->sw_context_id;
+ }
+
+ GEM_BUG_ON(bitmap32_test_bit(desc->lrc_bitmap[engine->guc_class],
+ ce->sw_counter));
+ bitmap32_set_bit(desc->lrc_bitmap[engine->guc_class], ce->sw_counter);
+ desc->lrc_count++;
+
+ /* GuC optimizations */
+ if (ce->sw_counter >= desc->max_lrc_per_class)
+ desc->max_lrc_per_class = ce->sw_counter + 1;
+}
+
+static void guc_ppal_stage_detach(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ struct intel_guc *guc = &ctx->i915->guc;
+ struct intel_context *ce = to_intel_context(ctx, engine);
+ struct guc_stage_desc *desc;
+ struct guc_execlist_context *lrc;
+
+ GEM_BUG_ON(ce->sw_context_id >= GUC_MAX_STAGE_DESCRIPTORS);
+
+ desc = __get_ppal_stage_desc(guc, ce->sw_context_id);
+
+ GEM_BUG_ON(!bitmap32_test_bit(desc->lrc_bitmap[engine->guc_class],
+ ce->sw_counter));
+ bitmap32_clear_bit(desc->lrc_bitmap[engine->guc_class], ce->sw_counter);
+ desc->lrc_count--;
+
+ if (desc->lrc_count == 0) {
+ desc->attribute = 0;
+ desc->stage_id = 0;
+ desc->max_lrc_per_class = 0;
+ } else {
+ /* TODO: GuC optimizations */
+ }
+
+ lrc = &desc->lrc[engine->guc_class][ce->sw_counter];
+ memset(lrc, 0, sizeof(*lrc));
+}
+
+static void guc_ppal_stage_update(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ struct intel_guc *guc = &ctx->i915->guc;
+ struct intel_context *ce = to_intel_context(ctx, engine);
+ struct guc_stage_desc *desc;
+ struct guc_execlist_context *lrc;
+
+ GEM_BUG_ON(ce->sw_context_id >= GUC_MAX_STAGE_DESCRIPTORS);
+
+ desc = __get_ppal_stage_desc(guc, ce->sw_context_id);
+
+ GEM_BUG_ON(!bitmap32_test_bit(desc->lrc_bitmap[engine->guc_class],
+ ce->sw_counter));
+
+ lrc = &desc->lrc[engine->guc_class][ce->sw_counter];
+
+ lrc->context_desc = lower_32_bits(ce->lrc_desc);
+ lrc->context_id = upper_32_bits(ce->lrc_desc);
+ lrc->ring_lrca = intel_guc_ggtt_offset(guc, ce->state) + LRC_STATE_PN * PAGE_SIZE;
+ lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma);
+ lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
+}
+
int intel_guc_submission_enable(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
@@ -1321,17 +1375,26 @@ int intel_guc_submission_enable(struct intel_guc *guc)
GEM_BUG_ON(!guc->execbuf_client);
+ dev_priv->contexts.alloc_hook = guc_ppal_stage_attach;
+ dev_priv->contexts.update_hook = guc_ppal_stage_update;
+ dev_priv->contexts.free_hook = guc_ppal_stage_detach;
+
+ for_each_engine(engine, dev_priv, id) {
+ guc_ppal_stage_attach(dev_priv->kernel_context, engine);
+ guc_ppal_stage_update(dev_priv->kernel_context, engine);
+ }
+
guc_reset_wq(guc->execbuf_client);
if (guc->preempt_client)
guc_reset_wq(guc->preempt_client);
err = intel_guc_sample_forcewake(guc);
if (err)
- return err;
+ goto err_clear_ctx_hooks;
err = guc_clients_doorbell_init(guc);
if (err)
- return err;
+ goto err_clear_ctx_hooks;
/* Take over from manual control of ELSP (execlists) */
guc_interrupts_capture(dev_priv);
@@ -1342,6 +1405,12 @@ int intel_guc_submission_enable(struct intel_guc *guc)
}
return 0;
+
+err_clear_ctx_hooks:
+ dev_priv->contexts.alloc_hook = NULL;
+ dev_priv->contexts.update_hook = NULL;
+ dev_priv->contexts.free_hook = NULL;
+ return err;
}
void intel_guc_submission_disable(struct intel_guc *guc)
@@ -1352,6 +1421,10 @@ void intel_guc_submission_disable(struct intel_guc *guc)
guc_interrupts_release(dev_priv);
guc_clients_doorbell_fini(guc);
+
+ dev_priv->contexts.alloc_hook = NULL;
+ dev_priv->contexts.update_hook = NULL;
+ dev_priv->contexts.free_hook = NULL;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)