diff mbox

[02/12] drm/i915/guc: Keep the ctx_pool_vaddr mapped, for easy access

Message ID 1490086977-9282-3-git-send-email-oscar.mateo@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

oscar.mateo@intel.com March 21, 2017, 9:02 a.m. UTC
The GuC descriptor is big in size. If we use a local definition of
guc_desc we have a chance to overflow stack, so avoid it.

Also, Chris abhors scatterlists :)

v2: Rebased, helper function to retrieve the context descriptor,
s/ctx_pool_vma/ctx_pool/

v3: Zero out guc_context_desc before initialization

v4: Do not do arithmetic on void pointers

Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 93 +++++++++++++++---------------
 drivers/gpu/drm/i915/intel_guc_loader.c    |  2 +-
 drivers/gpu/drm/i915/intel_uc.h            |  3 +-
 3 files changed, 48 insertions(+), 50 deletions(-)

Comments

oscar.mateo@intel.com March 22, 2017, 9:42 a.m. UTC | #1
On 03/22/2017 02:45 AM, Chris Wilson wrote:
> On Tue, Mar 21, 2017 at 02:02:47AM -0700, Oscar Mateo wrote:
>> The GuC descriptor is big in size. If we use a local definition of
>> guc_desc we have a chance to overflow stack, so avoid it.
>>
>> Also, Chris abhors scatterlists :)
>>
>> v2: Rebased, helper function to retrieve the context descriptor,
>> s/ctx_pool_vma/ctx_pool/
>>
>> v3: Zero out guc_context_desc before initialization
>>
>> v4: Do not do arithmetic on void pointers
>>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 93 +++++++++++++++---------------
>>   drivers/gpu/drm/i915/intel_guc_loader.c    |  2 +-
>>   drivers/gpu/drm/i915/intel_uc.h            |  3 +-
>>   3 files changed, 48 insertions(+), 50 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 21dadc1..5870cec 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -133,6 +133,12 @@ static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index)
>>   	return intel_guc_send(guc, action, ARRAY_SIZE(action));
>>   }
>>   
>> +static struct guc_context_desc *__get_context_desc(struct i915_guc_client *client)
>> +{
>> +	return (struct guc_context_desc *)((char *)client->guc->ctx_pool_vaddr +
> We can use gccisms like using void * for arithmetic computations in the
> kernel, i.e.
> 	void *base = client->guc->ctx_pool_vaddr;
> 	return base + sizeof(struct guc_context_desc) * client->ctx_index;
> is just fine.
> -Chris

:_(
I got convinced by a previous review comment that arithmetic with void 
pointers was frowned upon.
Ok, thanks!
Chris Wilson March 22, 2017, 9:45 a.m. UTC | #2
On Tue, Mar 21, 2017 at 02:02:47AM -0700, Oscar Mateo wrote:
> The GuC descriptor is big in size. If we use a local definition of
> guc_desc we have a chance to overflow stack, so avoid it.
> 
> Also, Chris abhors scatterlists :)
> 
> v2: Rebased, helper function to retrieve the context descriptor,
> s/ctx_pool_vma/ctx_pool/
> 
> v3: Zero out guc_context_desc before initialization
> 
> v4: Do not do arithmetic on void pointers
> 
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_guc_submission.c | 93 +++++++++++++++---------------
>  drivers/gpu/drm/i915/intel_guc_loader.c    |  2 +-
>  drivers/gpu/drm/i915/intel_uc.h            |  3 +-
>  3 files changed, 48 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 21dadc1..5870cec 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -133,6 +133,12 @@ static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index)
>  	return intel_guc_send(guc, action, ARRAY_SIZE(action));
>  }
>  
> +static struct guc_context_desc *__get_context_desc(struct i915_guc_client *client)
> +{
> +	return (struct guc_context_desc *)((char *)client->guc->ctx_pool_vaddr +

We can use gccisms like using void * for arithmetic computations in the
kernel, i.e.
	void *base = client->guc->ctx_pool_vaddr;
	return base + sizeof(struct guc_context_desc) * client->ctx_index;
is just fine.
-Chris
Chris Wilson March 22, 2017, 4:57 p.m. UTC | #3
On Wed, Mar 22, 2017 at 02:42:27AM -0700, Oscar Mateo wrote:
> 
> 
> On 03/22/2017 02:45 AM, Chris Wilson wrote:
> >On Tue, Mar 21, 2017 at 02:02:47AM -0700, Oscar Mateo wrote:
> >>The GuC descriptor is big in size. If we use a local definition of
> >>guc_desc we have a chance to overflow stack, so avoid it.
> >>
> >>Also, Chris abhors scatterlists :)
> >>
> >>v2: Rebased, helper function to retrieve the context descriptor,
> >>s/ctx_pool_vma/ctx_pool/
> >>
> >>v3: Zero out guc_context_desc before initialization
> >>
> >>v4: Do not do arithmetic on void pointers
> >>
> >>Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> >>Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >>Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> >>---
> >>  drivers/gpu/drm/i915/i915_guc_submission.c | 93 +++++++++++++++---------------
> >>  drivers/gpu/drm/i915/intel_guc_loader.c    |  2 +-
> >>  drivers/gpu/drm/i915/intel_uc.h            |  3 +-
> >>  3 files changed, 48 insertions(+), 50 deletions(-)
> >>
> >>diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> >>index 21dadc1..5870cec 100644
> >>--- a/drivers/gpu/drm/i915/i915_guc_submission.c
> >>+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> >>@@ -133,6 +133,12 @@ static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index)
> >>  	return intel_guc_send(guc, action, ARRAY_SIZE(action));
> >>  }
> >>+static struct guc_context_desc *__get_context_desc(struct i915_guc_client *client)
> >>+{
> >>+	return (struct guc_context_desc *)((char *)client->guc->ctx_pool_vaddr +
> >We can use gccisms like using void * for arithmetic computations in the
> >kernel, i.e.
> >	void *base = client->guc->ctx_pool_vaddr;
> >	return base + sizeof(struct guc_context_desc) * client->ctx_index;
> >is just fine.
> >-Chris
> 
> :_(
> I got convinced by a previous review comment that arithmetic with
> void pointers was frowned upon.
> Ok, thanks!

Joonas mentioned trying:

	struct guc_context_desc *base = client->guc->ctx_pool_vaddr;
	return base + client->ctx_index;

or
	return &base[client->ctx_index];

for even more idiomatic C.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 21dadc1..5870cec 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -133,6 +133,12 @@  static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index)
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
+static struct guc_context_desc *__get_context_desc(struct i915_guc_client *client)
+{
+	return (struct guc_context_desc *)((char *)client->guc->ctx_pool_vaddr +
+		sizeof(struct guc_context_desc) * client->ctx_index);
+}
+
 /*
  * Initialise, update, or clear doorbell data shared with the GuC
  *
@@ -142,21 +148,11 @@  static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index)
 
 static int __update_doorbell_desc(struct i915_guc_client *client, u16 new_id)
 {
-	struct sg_table *sg = client->guc->ctx_pool_vma->pages;
-	struct guc_context_desc desc;
-	size_t len;
+	struct guc_context_desc *desc;
 
 	/* Update the GuC's idea of the doorbell ID */
-	len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
-				 sizeof(desc) * client->ctx_index);
-	if (len != sizeof(desc))
-		return -EFAULT;
-
-	desc.db_id = new_id;
-	len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
-				   sizeof(desc) * client->ctx_index);
-	if (len != sizeof(desc))
-		return -EFAULT;
+	desc = __get_context_desc(client);
+	desc->db_id = new_id;
 
 	return 0;
 }
@@ -272,29 +268,28 @@  static void guc_proc_desc_init(struct intel_guc *guc,
  * data structures relating to this client (doorbell, process descriptor,
  * write queue, etc).
  */
-
 static void guc_ctx_desc_init(struct intel_guc *guc,
 			      struct i915_guc_client *client)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx = client->owner;
-	struct guc_context_desc desc;
-	struct sg_table *sg;
+	struct guc_context_desc *desc;
 	unsigned int tmp;
 	u32 gfx_addr;
 
-	memset(&desc, 0, sizeof(desc));
+	desc = __get_context_desc(client);
+	memset(desc, 0, sizeof(*desc));
 
-	desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL;
-	desc.context_id = client->ctx_index;
-	desc.priority = client->priority;
-	desc.db_id = client->doorbell_id;
+	desc->attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL;
+	desc->context_id = client->ctx_index;
+	desc->priority = client->priority;
+	desc->db_id = client->doorbell_id;
 
 	for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
 		struct intel_context *ce = &ctx->engine[engine->id];
 		uint32_t guc_engine_id = engine->guc_id;
-		struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
+		struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
 
 		/* TODO: We have a design issue to be solved here. Only when we
 		 * receive the first batch, we know which engine is used by the
@@ -319,49 +314,40 @@  static void guc_ctx_desc_init(struct intel_guc *guc,
 		lrc->ring_next_free_location = lrc->ring_begin;
 		lrc->ring_current_tail_pointer_value = 0;
 
-		desc.engines_used |= (1 << guc_engine_id);
+		desc->engines_used |= (1 << guc_engine_id);
 	}
 
 	DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
-			client->engines, desc.engines_used);
-	WARN_ON(desc.engines_used == 0);
+			client->engines, desc->engines_used);
+	WARN_ON(desc->engines_used == 0);
 
 	/*
 	 * The doorbell, process descriptor, and workqueue are all parts
 	 * of the client object, which the GuC will reference via the GGTT
 	 */
 	gfx_addr = guc_ggtt_offset(client->vma);
-	desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
+	desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
 				client->doorbell_offset;
-	desc.db_trigger_cpu = (uintptr_t)__get_doorbell(client);
-	desc.db_trigger_uk = gfx_addr + client->doorbell_offset;
-	desc.process_desc = gfx_addr + client->proc_desc_offset;
-	desc.wq_addr = gfx_addr + client->wq_offset;
-	desc.wq_size = client->wq_size;
+	desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client);
+	desc->db_trigger_uk = gfx_addr + client->doorbell_offset;
+	desc->process_desc = gfx_addr + client->proc_desc_offset;
+	desc->wq_addr = gfx_addr + client->wq_offset;
+	desc->wq_size = client->wq_size;
 
 	/*
 	 * XXX: Take LRCs from an existing context if this is not an
 	 * IsKMDCreatedContext client
 	 */
-	desc.desc_private = (uintptr_t)client;
-
-	/* Pool context is pinned already */
-	sg = guc->ctx_pool_vma->pages;
-	sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
-			     sizeof(desc) * client->ctx_index);
+	desc->desc_private = (uintptr_t)client;
 }
 
 static void guc_ctx_desc_fini(struct intel_guc *guc,
 			      struct i915_guc_client *client)
 {
-	struct guc_context_desc desc;
-	struct sg_table *sg;
-
-	memset(&desc, 0, sizeof(desc));
+	struct guc_context_desc *desc;
 
-	sg = guc->ctx_pool_vma->pages;
-	sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
-			     sizeof(desc) * client->ctx_index);
+	desc = __get_context_desc(client);
+	memset(desc, 0, sizeof(*desc));
 }
 
 /**
@@ -1025,6 +1011,7 @@  int i915_guc_submission_init(struct drm_i915_private *dev_priv)
 	const size_t gemsize = round_up(poolsize, PAGE_SIZE);
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_vma *vma;
+	void *vaddr;
 
 	if (!HAS_GUC_SCHED(dev_priv))
 		return 0;
@@ -1036,14 +1023,21 @@  int i915_guc_submission_init(struct drm_i915_private *dev_priv)
 	if (!i915.enable_guc_submission)
 		return 0; /* not enabled  */
 
-	if (guc->ctx_pool_vma)
+	if (guc->ctx_pool)
 		return 0; /* already allocated */
 
 	vma = intel_guc_allocate_vma(guc, gemsize);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	guc->ctx_pool_vma = vma;
+	guc->ctx_pool = vma;
+
+	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+	if (IS_ERR(vaddr))
+		goto err;
+
+	guc->ctx_pool_vaddr = vaddr;
+
 	ida_init(&guc->ctx_ids);
 	intel_guc_log_create(guc);
 	guc_addon_create(guc);
@@ -1218,9 +1212,12 @@  void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
 	i915_vma_unpin_and_release(&guc->ads_vma);
 	i915_vma_unpin_and_release(&guc->log.vma);
 
-	if (guc->ctx_pool_vma)
+	if (guc->ctx_pool_vaddr) {
 		ida_destroy(&guc->ctx_ids);
-	i915_vma_unpin_and_release(&guc->ctx_pool_vma);
+		i915_gem_object_unpin_map(guc->ctx_pool->obj);
+	}
+
+	i915_vma_unpin_and_release(&guc->ctx_pool);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 2f270d0..1a6e478 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -156,7 +156,7 @@  static void guc_params_init(struct drm_i915_private *dev_priv)
 
 	/* If GuC submission is enabled, set up additional parameters here */
 	if (i915.enable_guc_submission) {
-		u32 pgs = guc_ggtt_offset(dev_priv->guc.ctx_pool_vma);
+		u32 pgs = guc_ggtt_offset(dev_priv->guc.ctx_pool);
 		u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16;
 
 		pgs >>= PAGE_SHIFT;
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index c3a3843..77f7153 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -153,7 +153,8 @@  struct intel_guc {
 	bool interrupts_enabled;
 
 	struct i915_vma *ads_vma;
-	struct i915_vma *ctx_pool_vma;
+	struct i915_vma *ctx_pool;
+	void *ctx_pool_vaddr;
 	struct ida ctx_ids;
 
 	struct i915_guc_client *execbuf_client;