@@ -31,6 +31,7 @@
#define GUC_WQ_SIZE (PAGE_SIZE * 2)
struct i915_guc_client {
+ spinlock_t wq_lock;
struct drm_i915_gem_object *client_obj;
u32 priority;
off_t doorbell_offset;
@@ -39,6 +40,8 @@ struct i915_guc_client {
uint16_t doorbell_id;
uint32_t ctx_index;
uint32_t wq_size;
+ uint32_t wq_tail;
+ uint32_t cookie;
};
#define I915_MAX_DOORBELLS 256
@@ -22,6 +22,7 @@
*
*/
#include <linux/firmware.h>
+#include <linux/circ_buf.h>
#include "i915_drv.h"
#include "intel_guc.h"
@@ -52,6 +53,14 @@
* Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW)
* mapped into process space.
*
+ * Work Items:
+ * There are several types of work items that the host may place into a
+ * workqueue, each with its own requirements and limitations. Currently only
+ * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
+ * represents in-order queue. The kernel driver packs ring tail pointer and an
+ * ELSP context descriptor dword into Work Item.
+ * See add_workqueue_item()
+ *
*/
/*
@@ -395,6 +404,8 @@ i915_guc_client_alloc(struct drm_device *dev, u32 priority)
/* XXX: evict a doorbell instead */
goto err;
+ spin_lock_init(&client->wq_lock);
+
init_ctx_desc(guc, client);
init_proc_desc(guc, client);
init_doorbell(guc, client);
@@ -414,6 +425,183 @@ err:
return NULL;
}
+/* Get valid workqueue item and return it back to offset */
+static int get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
+{
+ struct guc_process_desc *desc;
+ void *base;
+ u32 size = sizeof(struct guc_wq_item);
+ int ret = 0, timeout_counter = 200;
+ unsigned long flags;
+
+ base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
+ desc = base + gc->proc_desc_offset;
+
+ while (timeout_counter-- > 0) {
+ spin_lock_irqsave(&gc->wq_lock, flags);
+
+ ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head,
+ gc->wq_size) >= size, 1);
+
+ if (!ret) {
+ *offset = gc->wq_tail;
+
+ /* advance the tail for next workqueue item */
+ gc->wq_tail += size;
+ gc->wq_tail &= gc->wq_size - 1;
+
+ /* this will break the loop */
+ timeout_counter = 0;
+ }
+
+ spin_unlock_irqrestore(&gc->wq_lock, flags);
+ };
+
+ kunmap_atomic(base);
+
+ return ret;
+}
+
+static void guc_update_context(struct intel_context *ctx,
+ struct intel_engine_cs *ring)
+{
+ struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+ struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+ struct page *page;
+ uint32_t *reg_state;
+
+ page = i915_gem_object_get_page(ctx_obj, 1);
+ reg_state = kmap_atomic(page);
+
+ reg_state[CTX_RING_BUFFER_START + 1] =
+ i915_gem_obj_ggtt_offset(ringbuf->obj);
+
+ /* True PPGTT with dynamic page allocation: update PDP registers and
+ * point the unallocated PDPs to the scratch page
+ */
+ if (ctx->ppgtt) {
+ ASSIGN_CTX_PDP(ctx->ppgtt, reg_state, 3);
+ ASSIGN_CTX_PDP(ctx->ppgtt, reg_state, 2);
+ ASSIGN_CTX_PDP(ctx->ppgtt, reg_state, 1);
+ ASSIGN_CTX_PDP(ctx->ppgtt, reg_state, 0);
+ }
+
+ kunmap_atomic(reg_state);
+}
+
+static int add_workqueue_item(struct i915_guc_client *gc,
+ struct intel_context *ctx,
+ struct intel_engine_cs *ring)
+{
+ struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+ struct drm_i915_gem_object *ctx_obj;
+ struct guc_wq_item *wqi;
+ void *base;
+ u32 wq_off = 0, tail = ringbuf->tail, wq_len;
+ int ret;
+
+ ctx_obj = ctx->engine[ring->id].state;
+
+ /* Need this because of the deferred pin ctx and ring */
+ /* Shall we move this right after ring is pinned? */
+ guc_update_context(ctx, ring);
+
+ ret = get_workqueue_space(gc, &wq_off);
+ if (ret)
+ return ret;
+
+ /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
+ * should not have the case where structure wqi is across page, neither
+ * wrapped to the beginning. This simplifies the implementation below.
+ *
+ * XXX: if not the case, we need save data to a temp wqi and copy it to
+ * workqueue buffer dw by dw.
+ */
+ WARN_ON(sizeof(struct guc_wq_item) != 16);
+ WARN_ON(wq_off & 3);
+
+ /* wq starts from the page after doorbell / process_desc */
+ base = kmap_atomic(i915_gem_object_get_page(gc->client_obj,
+ (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT));
+ wq_off &= PAGE_SIZE - 1;
+ wqi = (struct guc_wq_item *)((char *)base + wq_off);
+
+ /* len does not include the header */
+ wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1;
+ wqi->header = WQ_TYPE_INORDER |
+ (wq_len << WQ_LEN_SHIFT) |
+ (ring->id << WQ_TARGET_SHIFT) |
+ WQ_NO_WCFLUSH_WAIT;
+
+ wqi->context_desc = (u32)execlists_ctx_descriptor(ring, ctx_obj);
+ /* tail index is in qw */
+ tail >>= 3;
+ wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
+ wqi->fence_id = 0; /*XXX: what fence to be here */
+
+ kunmap_atomic(base);
+
+ return 0;
+}
+
+static int ring_doorbell(struct i915_guc_client *gc)
+{
+ struct guc_process_desc *desc;
+ union guc_doorbell_qw db_cmp, db_exc, db_ret;
+ union guc_doorbell_qw *db;
+ void *base;
+ int attempt = 2, ret = -EAGAIN;
+
+ base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
+ desc = base + gc->proc_desc_offset;
+
+ /* Update the tail so it is visible to GuC */
+ desc->tail = gc->wq_tail;
+
+ /* current cookie */
+ db_cmp.db_status = GUC_DOORBELL_ENABLED;
+ db_cmp.cookie = gc->cookie;
+
+ /* cookie to be updated */
+ db_exc.db_status = GUC_DOORBELL_ENABLED;
+ db_exc.cookie = gc->cookie + 1;
+ if (db_exc.cookie == 0)
+ db_exc.cookie = 1;
+
+ /* pointer of current doorbell cacheline */
+ db = base + gc->doorbell_offset;
+
+ while (attempt--) {
+ /* lets ring the doorbell */
+ db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db,
+ db_cmp.value_qw, db_exc.value_qw);
+
+ /* if the exchange was successfully executed */
+ if (db_ret.value_qw == db_cmp.value_qw) {
+ /* db was successfully rung */
+ gc->cookie = db_exc.cookie;
+ ret = 0;
+ break;
+ }
+
+ /* XXX: doorbell was lost and need to acquire it again */
+ if (db_ret.db_status == GUC_DOORBELL_DISABLED)
+ break;
+
+ DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n",
+ db_cmp.cookie, db_ret.cookie);
+
+ /* update the cookie to newly read cookie from GuC */
+ db_cmp.cookie = db_ret.cookie;
+ db_exc.cookie = db_ret.cookie + 1;
+ if (db_exc.cookie == 0)
+ db_exc.cookie = 1;
+ }
+
+ kunmap_atomic(base);
+ return ret;
+}
+
/**
* i915_guc_client_submit() - Submit commands through GuC
* @client: the guc client where commands will go through
@@ -426,5 +614,13 @@ int i915_guc_client_submit(struct i915_guc_client *client,
struct intel_context *ctx,
struct intel_engine_cs *ring)
{
- return 0;
+ int ret;
+
+ ret = add_workqueue_item(client, ctx, ring);
+ if (ret)
+ return ret;
+
+ ret = ring_doorbell(client);
+
+ return ret;
}
@@ -187,8 +187,8 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
return lrca >> 12;
}
-static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
- struct drm_i915_gem_object *ctx_obj)
+uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
+ struct drm_i915_gem_object *ctx_obj)
{
struct drm_device *dev = ring->dev;
uint64_t desc;
@@ -648,13 +648,17 @@ intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf,
struct drm_i915_gem_request *request)
{
struct intel_engine_cs *ring = ringbuf->ring;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
intel_logical_ring_advance(ringbuf);
if (intel_ring_stopped(ring))
return;
- execlists_context_queue(ring, ctx, ringbuf->tail, request);
+ if (dev_priv->guc.execbuf_client)
+ i915_guc_client_submit(dev_priv->guc.execbuf_client, ctx, ring);
+ else
+ execlists_context_queue(ring, ctx, ringbuf->tail, request);
}
static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf,
@@ -918,18 +922,23 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring,
{
struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
int ret = 0;
WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
if (ctx->engine[ring->id].pin_count++ == 0) {
- ret = i915_gem_obj_ggtt_pin(ctx_obj,
- GEN8_LR_CONTEXT_ALIGN, 0);
+ ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
+ PIN_OFFSET_BIAS | GUC_WOPCM_SIZE_VALUE);
if (ret)
goto reset_pin_count;
ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
if (ret)
goto unpin_ctx_obj;
+
+ /* Invalidate GuC TLB. */
+ if (i915.enable_guc_scheduling)
+ I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
}
return ret;
@@ -1283,6 +1292,13 @@ static int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
ret = __i915_add_request(ring, file, so.obj);
/* intel_logical_ring_add_request moves object to inactive if it
* fails */
+
+ /* GuC firmware will try to collapse its DPC work queue if the new one
+ * is for same context. So the following breadcrumb could be amended to
+ * this batch and submitted as one batch. Wait here to make sure the
+ * context state init is finished before any other submission to GuC. */
+ if (!ret && i915.enable_guc_scheduling)
+ ret = i915_wait_request(so.obj->last_read_req);
out:
i915_gem_render_state_fini(&so);
return ret;
@@ -1291,8 +1307,13 @@ out:
static int gen8_init_rcs_context(struct intel_engine_cs *ring,
struct intel_context *ctx)
{
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
int ret;
+ /* Invalidate GuC TLB. */
+ if (i915.enable_guc_scheduling)
+ I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+
ret = intel_logical_ring_workarounds_emit(ring, ctx);
if (ret)
return ret;
@@ -1819,7 +1840,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
}
if (is_global_default_ctx) {
- ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
+ ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
+ PIN_OFFSET_BIAS | GUC_WOPCM_SIZE_VALUE);
if (ret) {
DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
ret);
@@ -85,6 +85,8 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
struct drm_i915_gem_object *batch_obj,
u64 exec_start, u32 dispatch_flags);
u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
+uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
+ struct drm_i915_gem_object *ctx_obj);
void intel_lrc_irq_handler(struct intel_engine_cs *ring);
void intel_execlists_retire_requests(struct intel_engine_cs *ring);