Message ID | 20191106222550.20752-5-daniele.ceraolospurio@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Start removing legacy guc code | expand |
On 11/6/2019 14:25, Daniele Ceraolo Spurio wrote: > We now only use 1 client without any plan to add more. The client is > also only holding information about the WQ and the process desc, so we > can just move those in the intel_guc structure and always use stage_id > 0. > > Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> > Cc: John Harrison <John.C.Harrison@Intel.com> > Cc: Matthew Brost <matthew.brost@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_reset.c | 6 +- > drivers/gpu/drm/i915/gt/uc/intel_guc.h | 8 +- > drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 1 - > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 272 +++++------------- > .../gpu/drm/i915/gt/uc/intel_guc_submission.h | 45 +-- > drivers/gpu/drm/i915/i915_debugfs.c | 11 - > 6 files changed, 87 insertions(+), 256 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c > index f03e000051c1..d2d88d0bc9d7 100644 > --- a/drivers/gpu/drm/i915/gt/intel_reset.c > +++ b/drivers/gpu/drm/i915/gt/intel_reset.c > @@ -21,6 +21,7 @@ > #include "intel_reset.h" > > #include "uc/intel_guc.h" > +#include "uc/intel_guc_submission.h" > > #define RESET_MAX_RETRIES 3 > > @@ -1070,6 +1071,7 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) > int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) > { > struct intel_gt *gt = engine->gt; > + bool uses_guc = intel_engine_in_guc_submission_mode(engine); > int ret; > > GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags); > @@ -1085,14 +1087,14 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) > "Resetting %s for %s\n", engine->name, msg); > atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); > > - if (!engine->gt->uc.guc.execbuf_client) > + if (!uses_guc) > ret = intel_gt_reset_engine(engine); > else > ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); > if (ret) { > /* If we fail here, we expect to fallback to a global reset */ > DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", > - engine->gt->uc.guc.execbuf_client ? "GuC " : "", > + uses_guc ? "GuC " : "", > engine->name, ret); > goto out; > } > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > index b2d1766e689a..cd09c912e361 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > @@ -46,9 +46,13 @@ struct intel_guc { > > struct i915_vma *stage_desc_pool; > void *stage_desc_pool_vaddr; > - struct ida stage_ids; > > - struct intel_guc_client *execbuf_client; > + struct i915_vma *workqueue; > + void *workqueue_vaddr; > + spinlock_t wq_lock; > + > + struct i915_vma *proc_desc; > + void *proc_desc_vaddr; > > /* Control params for fw initialization */ > u32 params[GUC_CTL_MAX_DWORDS]; > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > index 1e8e4af7d9ca..a6b733c146c9 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > @@ -31,7 +31,6 @@ > > #define GUC_DOORBELL_INVALID 256 > > -#define GUC_PD_SIZE (PAGE_SIZE) > #define GUC_WQ_SIZE (PAGE_SIZE * 2) > > /* Work queue item header definitions */ > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index 0088c3417641..71788589f9fe 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -27,24 +27,13 @@ > * code) matches the old submission model and will be updated as part of the > * upgrade to the new flow. > * > - * GuC client: > - * A intel_guc_client refers to a submission path through GuC. Currently, there > - * is only one client, which is charged with all submissions to the GuC. This > - * struct is the owner of a process descriptor and a workqueue (both of them > - * inside a single gem object that contains all required pages for these > - * elements). > - * > * GuC stage descriptor: > * During initialization, the driver allocates a static pool of 1024 such > - * descriptors, and shares them with the GuC. > - * Currently, there exists a 1:1 mapping between a intel_guc_client and a > - * guc_stage_desc (via the client's stage_id), so effectively only one > - * gets used. This stage descriptor lets the GuC know about the workqueue and > + * descriptors, and shares them with the GuC. Currently, we only use one > + * descriptor. This stage descriptor lets the GuC know about the workqueue and > * process descriptor. Theoretically, it also lets the GuC know about our HW > * contexts (context ID, etc...), but we actually employ a kind of submission > - * where the GuC uses the LRCA sent via the work item instead (the single > - * guc_stage_desc associated to execbuf client contains information about the > - * default kernel context only, but this is essentially unused). This is called > + * where the GuC uses the LRCA sent via the work item instead. This is called > * a "proxy" submission. > * > * The Scratch registers: > @@ -71,33 +60,45 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) > return rb_entry(rb, struct i915_priolist, node); > } > > -static inline bool is_high_priority(struct intel_guc_client *client) > +static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id) > { > - return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || > - client->priority == GUC_CLIENT_PRIORITY_HIGH); > + struct guc_stage_desc *base = guc->stage_desc_pool_vaddr; > + > + return &base[id]; > } > > -static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client) > +static int guc_workqueue_create(struct intel_guc *guc) > { > - struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr; > - > - return &base[client->stage_id]; > + return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue, > + &guc->workqueue_vaddr); > } > > -static inline struct guc_process_desc * > -__get_process_desc(struct intel_guc_client *client) > +static void guc_workqueue_destroy(struct intel_guc *guc) > { > - return client->vaddr; > + i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP); > } > > /* > * Initialise the process descriptor shared with the GuC firmware. > */ > -static void guc_proc_desc_init(struct intel_guc_client *client) > +static int guc_proc_desc_create(struct intel_guc *guc) > +{ > + const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc)); > + > + return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc, > + &guc->proc_desc_vaddr); > +} > + > +static void guc_proc_desc_destroy(struct intel_guc *guc) > +{ > + i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP); > +} > + > +static void guc_proc_desc_init(struct intel_guc *guc) > { > struct guc_process_desc *desc; > > - desc = memset(__get_process_desc(client), 0, sizeof(*desc)); > + desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc)); > > /* > * XXX: pDoorbell and WQVBaseAddress are pointers in process address > @@ -108,39 +109,27 @@ static void guc_proc_desc_init(struct intel_guc_client *client) > desc->wq_base_addr = 0; > desc->db_base_addr = 0; > > - desc->stage_id = client->stage_id; > desc->wq_size_bytes = GUC_WQ_SIZE; > desc->wq_status = WQ_STATUS_ACTIVE; > - desc->priority = client->priority; > + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; > } > > -static void guc_proc_desc_fini(struct intel_guc_client *client) > +static void guc_proc_desc_fini(struct intel_guc *guc) > { > - struct guc_process_desc *desc; > - > - desc = __get_process_desc(client); > - memset(desc, 0, sizeof(*desc)); > + memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc)); > } > > static int guc_stage_desc_pool_create(struct intel_guc *guc) > { > u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) * > GUC_MAX_STAGE_DESCRIPTORS); > - int ret; > - > - ret = intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, > - &guc->stage_desc_pool_vaddr); > - if (ret) > - return ret; > - > - ida_init(&guc->stage_ids); > > - return 0; > + return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, > + &guc->stage_desc_pool_vaddr); > } > > static void guc_stage_desc_pool_destroy(struct intel_guc *guc) > { > - ida_destroy(&guc->stage_ids); > i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP); > } > > @@ -148,58 +137,49 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc) > * Initialise/clear the stage descriptor shared with the GuC firmware. > * > * This descriptor tells the GuC where (in GGTT space) to find the important > - * data structures relating to this client (process descriptor, write queue, > + * data structures related to work submission (process descriptor, write queue, > * etc). > */ > -static void guc_stage_desc_init(struct intel_guc_client *client) > +static void guc_stage_desc_init(struct intel_guc *guc) > { > - struct intel_guc *guc = client->guc; > struct guc_stage_desc *desc; > - u32 gfx_addr; > > - desc = __get_stage_desc(client); > + /* we only use 1 stage desc, so hardcode it to 0 */ > + desc = __get_stage_desc(guc, 0); > memset(desc, 0, sizeof(*desc)); > > desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | > GUC_STAGE_DESC_ATTR_KERNEL; > - if (is_high_priority(client)) > - desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT; > - desc->stage_id = client->stage_id; > - desc->priority = client->priority; > > - /* > - * The process descriptor and workqueue are all parts of the client > - * object, which the GuC will reference via the GGTT > - */ > - gfx_addr = intel_guc_ggtt_offset(guc, client->vma); > - desc->process_desc = gfx_addr; > - desc->wq_addr = gfx_addr + GUC_PD_SIZE; > - desc->wq_size = GUC_WQ_SIZE; > + desc->stage_id = 0; > + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; > > - desc->desc_private = ptr_to_u64(client); > + desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc); > + desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue); > + desc->wq_size = GUC_WQ_SIZE; > } > > -static void guc_stage_desc_fini(struct intel_guc_client *client) > +static void guc_stage_desc_fini(struct intel_guc *guc) > { > struct guc_stage_desc *desc; > > - desc = __get_stage_desc(client); > + desc = __get_stage_desc(guc, 0); > memset(desc, 0, sizeof(*desc)); > } > > /* Construct a Work Item and append it to the GuC's Work Queue */ > -static void guc_wq_item_append(struct intel_guc_client *client, > +static void guc_wq_item_append(struct intel_guc *guc, > u32 target_engine, u32 context_desc, > u32 ring_tail, u32 fence_id) > { > /* wqi_len is in DWords, and does not include the one-word header */ > const size_t wqi_size = sizeof(struct guc_wq_item); > const u32 wqi_len = wqi_size / sizeof(u32) - 1; > - struct guc_process_desc *desc = __get_process_desc(client); > + struct guc_process_desc *desc = guc->proc_desc_vaddr; > struct guc_wq_item *wqi; > u32 wq_off; > > - lockdep_assert_held(&client->wq_lock); > + lockdep_assert_held(&guc->wq_lock); > > /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we > * should not have the case where structure wqi is across page, neither > @@ -220,7 +200,7 @@ static void guc_wq_item_append(struct intel_guc_client *client, > GEM_BUG_ON(wq_off & (wqi_size - 1)); > > /* WQ starts from the page after process_desc */ This comment is now invalid. With that fixed: Reviewed-by: John Harrison <John.C.Harrison@Intel.com> > - wqi = client->vaddr + wq_off + GUC_PD_SIZE; > + wqi = guc->workqueue_vaddr + wq_off; > > /* Now fill in the 4-word work queue item */ > wqi->header = WQ_TYPE_INORDER | > @@ -238,12 +218,11 @@ static void guc_wq_item_append(struct intel_guc_client *client, > > static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) > { > - struct intel_guc_client *client = guc->execbuf_client; > struct intel_engine_cs *engine = rq->engine; > u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc); > u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); > > - guc_wq_item_append(client, engine->guc_id, ctx_desc, > + guc_wq_item_append(guc, engine->guc_id, ctx_desc, > ring_tail, rq->fence.seqno); > } > > @@ -267,9 +246,8 @@ static void guc_submit(struct intel_engine_cs *engine, > struct i915_request **end) > { > struct intel_guc *guc = &engine->gt->uc.guc; > - struct intel_guc_client *client = guc->execbuf_client; > > - spin_lock(&client->wq_lock); > + spin_lock(&guc->wq_lock); > > do { > struct i915_request *rq = *out++; > @@ -278,7 +256,7 @@ static void guc_submit(struct intel_engine_cs *engine, > guc_add_request(guc, rq); > } while (out != end); > > - spin_unlock(&client->wq_lock); > + spin_unlock(&guc->wq_lock); > } > > static inline int rq_prio(const struct i915_request *rq) > @@ -529,125 +507,6 @@ static void guc_reset_finish(struct intel_engine_cs *engine) > * path of guc_submit() above. > */ > > -/** > - * guc_client_alloc() - Allocate an intel_guc_client > - * @guc: the intel_guc structure > - * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW > - * The kernel client to replace ExecList submission is created with > - * NORMAL priority. Priority of a client for scheduler can be HIGH, > - * while a preemption context can use CRITICAL. > - * > - * Return: An intel_guc_client object if success, else NULL. > - */ > -static struct intel_guc_client * > -guc_client_alloc(struct intel_guc *guc, u32 priority) > -{ > - struct intel_guc_client *client; > - struct i915_vma *vma; > - void *vaddr; > - int ret; > - > - client = kzalloc(sizeof(*client), GFP_KERNEL); > - if (!client) > - return ERR_PTR(-ENOMEM); > - > - client->guc = guc; > - client->priority = priority; > - spin_lock_init(&client->wq_lock); > - > - ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, > - GFP_KERNEL); > - if (ret < 0) > - goto err_client; > - > - client->stage_id = ret; > - > - /* The first page is doorbell/proc_desc. Two followed pages are wq. */ > - vma = intel_guc_allocate_vma(guc, GUC_PD_SIZE + GUC_WQ_SIZE); > - if (IS_ERR(vma)) { > - ret = PTR_ERR(vma); > - goto err_id; > - } > - > - /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ > - client->vma = vma; > - > - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); > - if (IS_ERR(vaddr)) { > - ret = PTR_ERR(vaddr); > - goto err_vma; > - } > - client->vaddr = vaddr; > - > - DRM_DEBUG_DRIVER("new priority %u client %p: stage_id %u\n", > - priority, client, client->stage_id); > - > - return client; > - > -err_vma: > - i915_vma_unpin_and_release(&client->vma, 0); > -err_id: > - ida_simple_remove(&guc->stage_ids, client->stage_id); > -err_client: > - kfree(client); > - return ERR_PTR(ret); > -} > - > -static void guc_client_free(struct intel_guc_client *client) > -{ > - i915_vma_unpin_and_release(&client->vma, I915_VMA_RELEASE_MAP); > - ida_simple_remove(&client->guc->stage_ids, client->stage_id); > - kfree(client); > -} > - > -static int guc_clients_create(struct intel_guc *guc) > -{ > - struct intel_guc_client *client; > - > - GEM_BUG_ON(guc->execbuf_client); > - > - client = guc_client_alloc(guc, GUC_CLIENT_PRIORITY_KMD_NORMAL); > - if (IS_ERR(client)) { > - DRM_ERROR("Failed to create GuC client for submission!\n"); > - return PTR_ERR(client); > - } > - guc->execbuf_client = client; > - > - return 0; > -} > - > -static void guc_clients_destroy(struct intel_guc *guc) > -{ > - struct intel_guc_client *client; > - > - client = fetch_and_zero(&guc->execbuf_client); > - if (client) > - guc_client_free(client); > -} > - > -static void __guc_client_enable(struct intel_guc_client *client) > -{ > - guc_proc_desc_init(client); > - guc_stage_desc_init(client); > -} > - > -static void __guc_client_disable(struct intel_guc_client *client) > -{ > - guc_stage_desc_fini(client); > - guc_proc_desc_fini(client); > -} > - > -static void guc_clients_enable(struct intel_guc *guc) > -{ > - __guc_client_enable(guc->execbuf_client); > -} > - > -static void guc_clients_disable(struct intel_guc *guc) > -{ > - if (guc->execbuf_client) > - __guc_client_disable(guc->execbuf_client); > -} > - > /* > * Set up the memory resources to be shared with the GuC (via the GGTT) > * at firmware loading time. > @@ -668,12 +527,20 @@ int intel_guc_submission_init(struct intel_guc *guc) > */ > GEM_BUG_ON(!guc->stage_desc_pool); > > - ret = guc_clients_create(guc); > + ret = guc_workqueue_create(guc); > if (ret) > goto err_pool; > > + ret = guc_proc_desc_create(guc); > + if (ret) > + goto err_workqueue; > + > + spin_lock_init(&guc->wq_lock); > + > return 0; > > +err_workqueue: > + guc_workqueue_destroy(guc); > err_pool: > guc_stage_desc_pool_destroy(guc); > return ret; > @@ -681,10 +548,11 @@ int intel_guc_submission_init(struct intel_guc *guc) > > void intel_guc_submission_fini(struct intel_guc *guc) > { > - guc_clients_destroy(guc); > - > - if (guc->stage_desc_pool) > + if (guc->stage_desc_pool) { > + guc_proc_desc_destroy(guc); > + guc_workqueue_destroy(guc); > guc_stage_desc_pool_destroy(guc); > + } > } > > static void guc_interrupts_capture(struct intel_gt *gt) > @@ -816,9 +684,8 @@ void intel_guc_submission_enable(struct intel_guc *guc) > sizeof(struct guc_wq_item) * > I915_NUM_ENGINES > GUC_WQ_SIZE); > > - GEM_BUG_ON(!guc->execbuf_client); > - > - guc_clients_enable(guc); > + guc_proc_desc_init(guc); > + guc_stage_desc_init(guc); > > /* Take over from manual control of ELSP (execlists) */ > guc_interrupts_capture(gt); > @@ -836,7 +703,9 @@ void intel_guc_submission_disable(struct intel_guc *guc) > GEM_BUG_ON(gt->awake); /* GT should be parked first */ > > guc_interrupts_release(gt); > - guc_clients_disable(guc); > + > + guc_stage_desc_fini(guc); > + guc_proc_desc_fini(guc); > } > > static bool __guc_submission_support(struct intel_guc *guc) > @@ -854,3 +723,8 @@ void intel_guc_submission_init_early(struct intel_guc *guc) > { > guc->submission_supported = __guc_submission_support(guc); > } > + > +bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine) > +{ > + return engine->set_default_submission == guc_set_default_submission; > +} > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h > index e2deb4e6f42a..e402a2932592 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h > @@ -6,48 +6,10 @@ > #ifndef _INTEL_GUC_SUBMISSION_H_ > #define _INTEL_GUC_SUBMISSION_H_ > > -#include <linux/spinlock.h> > +#include <linux/types.h> > > -#include "gt/intel_engine_types.h" > - > -#include "i915_gem.h" > -#include "i915_selftest.h" > - > -struct drm_i915_private; > - > -/* > - * This structure primarily describes the GEM object shared with the GuC. > - * The specs sometimes refer to this object as a "GuC context", but we use > - * the term "client" to avoid confusion with hardware contexts. This > - * GEM object is held for the entire lifetime of our interaction with > - * the GuC, being allocated before the GuC is loaded with its firmware. > - * Because there's no way to update the address used by the GuC after > - * initialisation, the shared object must stay pinned into the GGTT as > - * long as the GuC is in use. We also keep the first page (only) mapped > - * into kernel address space, as it includes shared data that must be > - * updated on every request submission. > - * > - * The single GEM object described here is actually made up of several > - * separate areas, as far as the GuC is concerned. The first page (kept > - * kmap'd) includes the "process descriptor" which holds sequence data for > - * the doorbell, and one cacheline which actually *is* the doorbell; a > - * write to this will "ring the doorbell" (i.e. send an interrupt to the > - * GuC). The subsequent pages of the client object constitute the work > - * queue (a circular array of work items), again described in the process > - * descriptor. Work queue pages are mapped momentarily as required. > - */ > -struct intel_guc_client { > - struct i915_vma *vma; > - void *vaddr; > - struct intel_guc *guc; > - > - /* bitmap of (host) engine ids */ > - u32 priority; > - u32 stage_id; > - > - /* Protects GuC client's WQ access */ > - spinlock_t wq_lock; > -}; > +struct intel_guc; > +struct intel_engine_cs; > > void intel_guc_submission_init_early(struct intel_guc *guc); > int intel_guc_submission_init(struct intel_guc *guc); > @@ -56,5 +18,6 @@ void intel_guc_submission_disable(struct intel_guc *guc); > void intel_guc_submission_fini(struct intel_guc *guc); > int intel_guc_preempt_work_create(struct intel_guc *guc); > void intel_guc_preempt_work_destroy(struct intel_guc *guc); > +bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine); > > #endif > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 5d5974e7f3ed..f32e7b016197 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -1802,23 +1802,12 @@ static void i915_guc_log_info(struct seq_file *m, > static int i915_guc_info(struct seq_file *m, void *data) > { > struct drm_i915_private *dev_priv = node_to_i915(m->private); > - const struct intel_guc *guc = &dev_priv->gt.uc.guc; > - struct intel_guc_client *client = guc->execbuf_client; > > if (!USES_GUC(dev_priv)) > return -ENODEV; > > i915_guc_log_info(m, dev_priv); > > - if (!USES_GUC_SUBMISSION(dev_priv)) > - return 0; > - > - GEM_BUG_ON(!guc->execbuf_client); > - > - seq_printf(m, "\nGuC execbuf client @ %p:\n", client); > - seq_printf(m, "\tPriority %d, GuC stage index: %u\n", > - client->priority, > - client->stage_id); > /* Add more as required ... */ > > return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index f03e000051c1..d2d88d0bc9d7 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -21,6 +21,7 @@ #include "intel_reset.h" #include "uc/intel_guc.h" +#include "uc/intel_guc_submission.h" #define RESET_MAX_RETRIES 3 @@ -1070,6 +1071,7 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) { struct intel_gt *gt = engine->gt; + bool uses_guc = intel_engine_in_guc_submission_mode(engine); int ret; GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags); @@ -1085,14 +1087,14 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) "Resetting %s for %s\n", engine->name, msg); atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); - if (!engine->gt->uc.guc.execbuf_client) + if (!uses_guc) ret = intel_gt_reset_engine(engine); else ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", - engine->gt->uc.guc.execbuf_client ? "GuC " : "", + uses_guc ? "GuC " : "", engine->name, ret); goto out; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index b2d1766e689a..cd09c912e361 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -46,9 +46,13 @@ struct intel_guc { struct i915_vma *stage_desc_pool; void *stage_desc_pool_vaddr; - struct ida stage_ids; - struct intel_guc_client *execbuf_client; + struct i915_vma *workqueue; + void *workqueue_vaddr; + spinlock_t wq_lock; + + struct i915_vma *proc_desc; + void *proc_desc_vaddr; /* Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 1e8e4af7d9ca..a6b733c146c9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -31,7 +31,6 @@ #define GUC_DOORBELL_INVALID 256 -#define GUC_PD_SIZE (PAGE_SIZE) #define GUC_WQ_SIZE (PAGE_SIZE * 2) /* Work queue item header definitions */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 0088c3417641..71788589f9fe 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -27,24 +27,13 @@ * code) matches the old submission model and will be updated as part of the * upgrade to the new flow. * - * GuC client: - * A intel_guc_client refers to a submission path through GuC. Currently, there - * is only one client, which is charged with all submissions to the GuC. This - * struct is the owner of a process descriptor and a workqueue (both of them - * inside a single gem object that contains all required pages for these - * elements). - * * GuC stage descriptor: * During initialization, the driver allocates a static pool of 1024 such - * descriptors, and shares them with the GuC. - * Currently, there exists a 1:1 mapping between a intel_guc_client and a - * guc_stage_desc (via the client's stage_id), so effectively only one - * gets used. This stage descriptor lets the GuC know about the workqueue and + * descriptors, and shares them with the GuC. Currently, we only use one + * descriptor. This stage descriptor lets the GuC know about the workqueue and * process descriptor. Theoretically, it also lets the GuC know about our HW * contexts (context ID, etc...), but we actually employ a kind of submission - * where the GuC uses the LRCA sent via the work item instead (the single - * guc_stage_desc associated to execbuf client contains information about the - * default kernel context only, but this is essentially unused). This is called + * where the GuC uses the LRCA sent via the work item instead. This is called * a "proxy" submission. * * The Scratch registers: @@ -71,33 +60,45 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static inline bool is_high_priority(struct intel_guc_client *client) +static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id) { - return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || - client->priority == GUC_CLIENT_PRIORITY_HIGH); + struct guc_stage_desc *base = guc->stage_desc_pool_vaddr; + + return &base[id]; } -static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client) +static int guc_workqueue_create(struct intel_guc *guc) { - struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr; - - return &base[client->stage_id]; + return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue, + &guc->workqueue_vaddr); } -static inline struct guc_process_desc * -__get_process_desc(struct intel_guc_client *client) +static void guc_workqueue_destroy(struct intel_guc *guc) { - return client->vaddr; + i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP); } /* * Initialise the process descriptor shared with the GuC firmware. */ -static void guc_proc_desc_init(struct intel_guc_client *client) +static int guc_proc_desc_create(struct intel_guc *guc) +{ + const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc)); + + return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc, + &guc->proc_desc_vaddr); +} + +static void guc_proc_desc_destroy(struct intel_guc *guc) +{ + i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP); +} + +static void guc_proc_desc_init(struct intel_guc *guc) { struct guc_process_desc *desc; - desc = memset(__get_process_desc(client), 0, sizeof(*desc)); + desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc)); /* * XXX: pDoorbell and WQVBaseAddress are pointers in process address @@ -108,39 +109,27 @@ static void guc_proc_desc_init(struct intel_guc_client *client) desc->wq_base_addr = 0; desc->db_base_addr = 0; - desc->stage_id = client->stage_id; desc->wq_size_bytes = GUC_WQ_SIZE; desc->wq_status = WQ_STATUS_ACTIVE; - desc->priority = client->priority; + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; } -static void guc_proc_desc_fini(struct intel_guc_client *client) +static void guc_proc_desc_fini(struct intel_guc *guc) { - struct guc_process_desc *desc; - - desc = __get_process_desc(client); - memset(desc, 0, sizeof(*desc)); + memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc)); } static int guc_stage_desc_pool_create(struct intel_guc *guc) { u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) * GUC_MAX_STAGE_DESCRIPTORS); - int ret; - - ret = intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, - &guc->stage_desc_pool_vaddr); - if (ret) - return ret; - - ida_init(&guc->stage_ids); - return 0; + return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, + &guc->stage_desc_pool_vaddr); } static void guc_stage_desc_pool_destroy(struct intel_guc *guc) { - ida_destroy(&guc->stage_ids); i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP); } @@ -148,58 +137,49 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc) * Initialise/clear the stage descriptor shared with the GuC firmware. * * This descriptor tells the GuC where (in GGTT space) to find the important - * data structures relating to this client (process descriptor, write queue, + * data structures related to work submission (process descriptor, write queue, * etc). */ -static void guc_stage_desc_init(struct intel_guc_client *client) +static void guc_stage_desc_init(struct intel_guc *guc) { - struct intel_guc *guc = client->guc; struct guc_stage_desc *desc; - u32 gfx_addr; - desc = __get_stage_desc(client); + /* we only use 1 stage desc, so hardcode it to 0 */ + desc = __get_stage_desc(guc, 0); memset(desc, 0, sizeof(*desc)); desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | GUC_STAGE_DESC_ATTR_KERNEL; - if (is_high_priority(client)) - desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT; - desc->stage_id = client->stage_id; - desc->priority = client->priority; - /* - * The process descriptor and workqueue are all parts of the client - * object, which the GuC will reference via the GGTT - */ - gfx_addr = intel_guc_ggtt_offset(guc, client->vma); - desc->process_desc = gfx_addr; - desc->wq_addr = gfx_addr + GUC_PD_SIZE; - desc->wq_size = GUC_WQ_SIZE; + desc->stage_id = 0; + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; - desc->desc_private = ptr_to_u64(client); + desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc); + desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue); + desc->wq_size = GUC_WQ_SIZE; } -static void guc_stage_desc_fini(struct intel_guc_client *client) +static void guc_stage_desc_fini(struct intel_guc *guc) { struct guc_stage_desc *desc; - desc = __get_stage_desc(client); + desc = __get_stage_desc(guc, 0); memset(desc, 0, sizeof(*desc)); } /* Construct a Work Item and append it to the GuC's Work Queue */ -static void guc_wq_item_append(struct intel_guc_client *client, +static void guc_wq_item_append(struct intel_guc *guc, u32 target_engine, u32 context_desc, u32 ring_tail, u32 fence_id) { /* wqi_len is in DWords, and does not include the one-word header */ const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size / sizeof(u32) - 1; - struct guc_process_desc *desc = __get_process_desc(client); + struct guc_process_desc *desc = guc->proc_desc_vaddr; struct guc_wq_item *wqi; u32 wq_off; - lockdep_assert_held(&client->wq_lock); + lockdep_assert_held(&guc->wq_lock); /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we * should not have the case where structure wqi is across page, neither @@ -220,7 +200,7 @@ static void guc_wq_item_append(struct intel_guc_client *client, GEM_BUG_ON(wq_off & (wqi_size - 1)); /* WQ starts from the page after process_desc */ - wqi = client->vaddr + wq_off + GUC_PD_SIZE; + wqi = guc->workqueue_vaddr + wq_off; /* Now fill in the 4-word work queue item */ wqi->header = WQ_TYPE_INORDER | @@ -238,12 +218,11 @@ static void guc_wq_item_append(struct intel_guc_client *client, static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { - struct intel_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine = rq->engine; u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc); u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); - guc_wq_item_append(client, engine->guc_id, ctx_desc, + guc_wq_item_append(guc, engine->guc_id, ctx_desc, ring_tail, rq->fence.seqno); } @@ -267,9 +246,8 @@ static void guc_submit(struct intel_engine_cs *engine, struct i915_request **end) { struct intel_guc *guc = &engine->gt->uc.guc; - struct intel_guc_client *client = guc->execbuf_client; - spin_lock(&client->wq_lock); + spin_lock(&guc->wq_lock); do { struct i915_request *rq = *out++; @@ -278,7 +256,7 @@ static void guc_submit(struct intel_engine_cs *engine, guc_add_request(guc, rq); } while (out != end); - spin_unlock(&client->wq_lock); + spin_unlock(&guc->wq_lock); } static inline int rq_prio(const struct i915_request *rq) @@ -529,125 +507,6 @@ static void guc_reset_finish(struct intel_engine_cs *engine) * path of guc_submit() above. */ -/** - * guc_client_alloc() - Allocate an intel_guc_client - * @guc: the intel_guc structure - * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW - * The kernel client to replace ExecList submission is created with - * NORMAL priority. Priority of a client for scheduler can be HIGH, - * while a preemption context can use CRITICAL. - * - * Return: An intel_guc_client object if success, else NULL. - */ -static struct intel_guc_client * -guc_client_alloc(struct intel_guc *guc, u32 priority) -{ - struct intel_guc_client *client; - struct i915_vma *vma; - void *vaddr; - int ret; - - client = kzalloc(sizeof(*client), GFP_KERNEL); - if (!client) - return ERR_PTR(-ENOMEM); - - client->guc = guc; - client->priority = priority; - spin_lock_init(&client->wq_lock); - - ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, - GFP_KERNEL); - if (ret < 0) - goto err_client; - - client->stage_id = ret; - - /* The first page is doorbell/proc_desc. Two followed pages are wq. */ - vma = intel_guc_allocate_vma(guc, GUC_PD_SIZE + GUC_WQ_SIZE); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_id; - } - - /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ - client->vma = vma; - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto err_vma; - } - client->vaddr = vaddr; - - DRM_DEBUG_DRIVER("new priority %u client %p: stage_id %u\n", - priority, client, client->stage_id); - - return client; - -err_vma: - i915_vma_unpin_and_release(&client->vma, 0); -err_id: - ida_simple_remove(&guc->stage_ids, client->stage_id); -err_client: - kfree(client); - return ERR_PTR(ret); -} - -static void guc_client_free(struct intel_guc_client *client) -{ - i915_vma_unpin_and_release(&client->vma, I915_VMA_RELEASE_MAP); - ida_simple_remove(&client->guc->stage_ids, client->stage_id); - kfree(client); -} - -static int guc_clients_create(struct intel_guc *guc) -{ - struct intel_guc_client *client; - - GEM_BUG_ON(guc->execbuf_client); - - client = guc_client_alloc(guc, GUC_CLIENT_PRIORITY_KMD_NORMAL); - if (IS_ERR(client)) { - DRM_ERROR("Failed to create GuC client for submission!\n"); - return PTR_ERR(client); - } - guc->execbuf_client = client; - - return 0; -} - -static void guc_clients_destroy(struct intel_guc *guc) -{ - struct intel_guc_client *client; - - client = fetch_and_zero(&guc->execbuf_client); - if (client) - guc_client_free(client); -} - -static void __guc_client_enable(struct intel_guc_client *client) -{ - guc_proc_desc_init(client); - guc_stage_desc_init(client); -} - -static void __guc_client_disable(struct intel_guc_client *client) -{ - guc_stage_desc_fini(client); - guc_proc_desc_fini(client); -} - -static void guc_clients_enable(struct intel_guc *guc) -{ - __guc_client_enable(guc->execbuf_client); -} - -static void guc_clients_disable(struct intel_guc *guc) -{ - if (guc->execbuf_client) - __guc_client_disable(guc->execbuf_client); -} - /* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. @@ -668,12 +527,20 @@ int intel_guc_submission_init(struct intel_guc *guc) */ GEM_BUG_ON(!guc->stage_desc_pool); - ret = guc_clients_create(guc); + ret = guc_workqueue_create(guc); if (ret) goto err_pool; + ret = guc_proc_desc_create(guc); + if (ret) + goto err_workqueue; + + spin_lock_init(&guc->wq_lock); + return 0; +err_workqueue: + guc_workqueue_destroy(guc); err_pool: guc_stage_desc_pool_destroy(guc); return ret; @@ -681,10 +548,11 @@ int intel_guc_submission_init(struct intel_guc *guc) void intel_guc_submission_fini(struct intel_guc *guc) { - guc_clients_destroy(guc); - - if (guc->stage_desc_pool) + if (guc->stage_desc_pool) { + guc_proc_desc_destroy(guc); + guc_workqueue_destroy(guc); guc_stage_desc_pool_destroy(guc); + } } static void guc_interrupts_capture(struct intel_gt *gt) @@ -816,9 +684,8 @@ void intel_guc_submission_enable(struct intel_guc *guc) sizeof(struct guc_wq_item) * I915_NUM_ENGINES > GUC_WQ_SIZE); - GEM_BUG_ON(!guc->execbuf_client); - - guc_clients_enable(guc); + guc_proc_desc_init(guc); + guc_stage_desc_init(guc); /* Take over from manual control of ELSP (execlists) */ guc_interrupts_capture(gt); @@ -836,7 +703,9 @@ void intel_guc_submission_disable(struct intel_guc *guc) GEM_BUG_ON(gt->awake); /* GT should be parked first */ guc_interrupts_release(gt); - guc_clients_disable(guc); + + guc_stage_desc_fini(guc); + guc_proc_desc_fini(guc); } static bool __guc_submission_support(struct intel_guc *guc) @@ -854,3 +723,8 @@ void intel_guc_submission_init_early(struct intel_guc *guc) { guc->submission_supported = __guc_submission_support(guc); } + +bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine) +{ + return engine->set_default_submission == guc_set_default_submission; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index e2deb4e6f42a..e402a2932592 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -6,48 +6,10 @@ #ifndef _INTEL_GUC_SUBMISSION_H_ #define _INTEL_GUC_SUBMISSION_H_ -#include <linux/spinlock.h> +#include <linux/types.h> -#include "gt/intel_engine_types.h" - -#include "i915_gem.h" -#include "i915_selftest.h" - -struct drm_i915_private; - -/* - * This structure primarily describes the GEM object shared with the GuC. - * The specs sometimes refer to this object as a "GuC context", but we use - * the term "client" to avoid confusion with hardware contexts. This - * GEM object is held for the entire lifetime of our interaction with - * the GuC, being allocated before the GuC is loaded with its firmware. - * Because there's no way to update the address used by the GuC after - * initialisation, the shared object must stay pinned into the GGTT as - * long as the GuC is in use. We also keep the first page (only) mapped - * into kernel address space, as it includes shared data that must be - * updated on every request submission. - * - * The single GEM object described here is actually made up of several - * separate areas, as far as the GuC is concerned. The first page (kept - * kmap'd) includes the "process descriptor" which holds sequence data for - * the doorbell, and one cacheline which actually *is* the doorbell; a - * write to this will "ring the doorbell" (i.e. send an interrupt to the - * GuC). The subsequent pages of the client object constitute the work - * queue (a circular array of work items), again described in the process - * descriptor. Work queue pages are mapped momentarily as required. - */ -struct intel_guc_client { - struct i915_vma *vma; - void *vaddr; - struct intel_guc *guc; - - /* bitmap of (host) engine ids */ - u32 priority; - u32 stage_id; - - /* Protects GuC client's WQ access */ - spinlock_t wq_lock; -}; +struct intel_guc; +struct intel_engine_cs; void intel_guc_submission_init_early(struct intel_guc *guc); int intel_guc_submission_init(struct intel_guc *guc); @@ -56,5 +18,6 @@ void intel_guc_submission_disable(struct intel_guc *guc); void intel_guc_submission_fini(struct intel_guc *guc); int intel_guc_preempt_work_create(struct intel_guc *guc); void intel_guc_preempt_work_destroy(struct intel_guc *guc); +bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine); #endif diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5d5974e7f3ed..f32e7b016197 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1802,23 +1802,12 @@ static void i915_guc_log_info(struct seq_file *m, static int i915_guc_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - const struct intel_guc *guc = &dev_priv->gt.uc.guc; - struct intel_guc_client *client = guc->execbuf_client; if (!USES_GUC(dev_priv)) return -ENODEV; i915_guc_log_info(m, dev_priv); - if (!USES_GUC_SUBMISSION(dev_priv)) - return 0; - - GEM_BUG_ON(!guc->execbuf_client); - - seq_printf(m, "\nGuC execbuf client @ %p:\n", client); - seq_printf(m, "\tPriority %d, GuC stage index: %u\n", - client->priority, - client->stage_id); /* Add more as required ... */ return 0;
We now only use 1 client without any plan to add more. The client is also only holding information about the WQ and the process desc, so we can just move those in the intel_guc structure and always use stage_id 0. Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: John Harrison <John.C.Harrison@Intel.com> Cc: Matthew Brost <matthew.brost@intel.com> --- drivers/gpu/drm/i915/gt/intel_reset.c | 6 +- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 8 +- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 1 - .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 272 +++++------------- .../gpu/drm/i915/gt/uc/intel_guc_submission.h | 45 +-- drivers/gpu/drm/i915/i915_debugfs.c | 11 - 6 files changed, 87 insertions(+), 256 deletions(-)