@@ -49,22 +49,30 @@
#define GUC_MAX_ENGINE_CLASSES (GUC_RESERVED_CLASS + 1)
#define GUC_MAX_INSTANCES_PER_CLASS 4
-/* Work queue item header definitions */
+/* Work queue status */
#define WQ_STATUS_ACTIVE 1
#define WQ_STATUS_SUSPENDED 2
#define WQ_STATUS_CMD_ERROR 3
#define WQ_STATUS_ENGINE_ID_NOT_USED 4
#define WQ_STATUS_SUSPENDED_FROM_RESET 5
+#define WQ_STATUS_INVALID 6
+
+/* Work queue item header definitions */
#define WQ_TYPE_SHIFT 0
#define WQ_TYPE_BATCH_BUF (0x1 << WQ_TYPE_SHIFT)
#define WQ_TYPE_PSEUDO (0x2 << WQ_TYPE_SHIFT)
-#define WQ_TYPE_INORDER (0x3 << WQ_TYPE_SHIFT)
+#define WQ_TYPE_KMD (0x3 << WQ_TYPE_SHIFT)
#define WQ_TYPE_NOOP (0x4 << WQ_TYPE_SHIFT)
-#define WQ_TARGET_SHIFT 10
+#define WQ_TYPE_RESUME (0x5 << WQ_TYPE_SHIFT)
+#define WQ_TYPE_INVALID (0x6 << WQ_TYPE_SHIFT)
+#define WQ_TARGET_SHIFT 8
#define WQ_LEN_SHIFT 16
-#define WQ_NO_WCFLUSH_WAIT (1 << 27)
-#define WQ_PRESENT_WORKLOAD (1 << 28)
+#define WQ_LEN_MASK (0x7FF << WQ_LEN_SHIFT)
+/* Work queue item submit element info definitions */
+#define WQ_SW_CTX_INDEX_SHIFT 0
+#define WQ_SW_COUNTER_SHIFT 11
+#define WQ_RING_TAIL_INDEX_SHIFT 18
#define WQ_RING_TAIL_SHIFT 20
#define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */
#define WQ_RING_TAIL_MASK (WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT)
@@ -79,9 +79,14 @@
* Work Items:
* There are several types of work items that the host may place into a
* workqueue, each with its own requirements and limitations. Currently only
- * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
- * represents in-order queue. The kernel driver packs ring tail pointer and an
- * ELSP context descriptor dword into Work Item.
+ * out-of-order WQ_TYPE_KMD is supported by the firmware.
+ * Our in-orderness is guaranteed by the execlists emulation with two "fake"
+ * ports that the scheduler uses when submitting to the GuC backend. If we are
+ * submitting requests for context A in the first port and we place a request
+ * for context B in the second port, we won't submit more requests for A until
+ * all the pending ones complete. We have to take this into account if we try
+ * to change the current execlist emulation model (e.g.: increasing the number
+ * of fake ports could cause requests to execute in wrong global seqno order).
* See guc_add_request()
*
*/
@@ -408,6 +413,7 @@ static void guc_stage_desc_fini(struct intel_guc *guc,
/* Construct a Work Item and append it to the GuC's Work Queue */
static void guc_wq_item_append(struct intel_guc_client *client,
+ struct intel_context *ce,
u32 target_engine, u32 context_desc,
u32 ring_tail, u32 fence_id)
{
@@ -445,12 +451,15 @@ static void guc_wq_item_append(struct intel_guc_client *client,
wqi->header = WQ_TYPE_NOOP | (wqi_len << WQ_LEN_SHIFT);
} else {
/* Now fill in the 4-word work queue item */
- wqi->header = WQ_TYPE_INORDER |
- (wqi_len << WQ_LEN_SHIFT) |
+ wqi->header = WQ_TYPE_KMD |
(target_engine << WQ_TARGET_SHIFT) |
- WQ_NO_WCFLUSH_WAIT;
+ (wqi_len << WQ_LEN_SHIFT);
wqi->context_desc = context_desc;
- wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
+
+ wqi->submit_element_info =
+ (ce->sw_context_id << WQ_SW_CTX_INDEX_SHIFT) |
+ (ce->sw_counter << WQ_SW_COUNTER_SHIFT |
+ (ring_tail << WQ_RING_TAIL_INDEX_SHIFT));
GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
wqi->fence_id = fence_id;
}
@@ -492,12 +501,13 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
{
struct intel_guc_client *client = guc->execbuf_client;
struct intel_engine_cs *engine = rq->engine;
+ struct intel_context *ce = &rq->gem_context->__engine[rq->engine->id];
u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
spin_lock(&client->wq_lock);
- guc_wq_item_append(client, engine->guc_id, ctx_desc,
+ guc_wq_item_append(client, ce, engine->guc_id, ctx_desc,
ring_tail, rq->global_seqno);
guc_ring_doorbell(client);
@@ -530,16 +540,20 @@ static void inject_preempt_context(struct work_struct *work)
preempt_work[engine->id]);
struct intel_guc_client *client = guc->preempt_client;
struct guc_stage_desc *stage_desc = __get_stage_desc(client);
+ struct intel_context *ce = &client->owner->__engine[engine->id];
u32 ctx_desc = lower_32_bits(to_intel_context(client->owner,
engine)->lrc_desc);
u32 data[7];
+ /* FIXME: Gen11+ preemption is different anyway */
+ GEM_BUG_ON(INTEL_GEN(guc_to_i915(guc)) >= 11);
+
/*
* The ring contains commands to write GUC_PREEMPT_FINISHED into HWSP.
* See guc_fill_preempt_context().
*/
spin_lock_irq(&client->wq_lock);
- guc_wq_item_append(client, engine->guc_id, ctx_desc,
+ guc_wq_item_append(client, ce, engine->guc_id, ctx_desc,
GUC_PREEMPT_BREADCRUMB_BYTES / sizeof(u64), 0);
spin_unlock_irq(&client->wq_lock);
@@ -74,7 +74,7 @@ static int ring_doorbell_nop(struct intel_guc_client *client)
spin_lock_irq(&client->wq_lock);
- guc_wq_item_append(client, 0, 0, 0, 0);
+ guc_wq_item_append(client, NULL, 0, 0, 0, 0);
guc_ring_doorbell(client);
spin_unlock_irq(&client->wq_lock);
Work queue items definitions were updated. To simplify the scheduling logic in the GuC firmware, now only out-of-order mode of scheduling is supported. Credits-to: Michel Thierry <michel.thierry@intel.com> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Michel Thierry <michel.thierry@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: MichaĆ Winiarski <michal.winiarski@intel.com> Cc: Tomasz Lis <tomasz.lis@intel.com> --- drivers/gpu/drm/i915/intel_guc_fwif.h | 18 +++++++++++----- drivers/gpu/drm/i915/intel_guc_submission.c | 32 +++++++++++++++++++++-------- drivers/gpu/drm/i915/selftests/intel_guc.c | 2 +- 3 files changed, 37 insertions(+), 15 deletions(-)