diff mbox

[6/7] drm/i915: Convert execlists_ctx_descriptor() for requests

Message ID 1435932578-32209-7-git-send-email-mika.kuoppala@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala July 3, 2015, 2:09 p.m. UTC
Pass around requests to carry context deeper in callchain.

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

Comments

yu.dai@intel.com July 8, 2015, 4:40 p.m. UTC | #1
On 07/03/2015 07:09 AM, Mika Kuoppala wrote:
> Pass around requests to carry context deeper in callchain.
>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 14 ++++++--------
>   1 file changed, 6 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index c3c029a..67ff460 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -261,10 +261,11 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
>   	return lrca >> 12;
>   }
>   
> -static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
> -					 struct drm_i915_gem_object *ctx_obj)
> +static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request *rq)
>   {

GuC patch series tries to utilize this function. However, changing from 
ring/context to request makes it unusable in the case where request is 
not needed (not available). This context descriptor has nothing to do 
with drm_i915_gem_request IMO. And it is waste of time to call it for 
each batch submission. This value is fixed when context is pinned. Maybe 
add a member ctx_desc into intel_context->engine; initialize the value 
within intel_lr_context_pin; then use it whenever needed.

Thanks,
Alex
> +	struct intel_engine_cs *ring = rq->ring;
>   	struct drm_device *dev = ring->dev;
> +	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
>   	uint64_t desc;
>   	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
>   
> @@ -299,21 +300,18 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
>   	struct intel_engine_cs *ring = rq0->ring;
>   	struct drm_device *dev = ring->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct drm_i915_gem_object *ctx_obj0 = rq0->ctx->engine[ring->id].state;
> -	struct drm_i915_gem_object *ctx_obj1 = rq1 ?
> -		rq1->ctx->engine[ring->id].state : NULL;
>   	uint64_t temp = 0;
>   	uint32_t desc[4];
>   
>   	/* XXX: You must always write both descriptors in the order below. */
> -	if (ctx_obj1)
> -		temp = execlists_ctx_descriptor(ring, ctx_obj1);
> +	if (rq1)
> +		temp = execlists_ctx_descriptor(rq1);
>   	else
>   		temp = 0;
>   	desc[1] = (u32)(temp >> 32);
>   	desc[0] = (u32)temp;
>   
> -	temp = execlists_ctx_descriptor(ring, ctx_obj0);
> +	temp = execlists_ctx_descriptor(rq0);
>   	desc[3] = (u32)(temp >> 32);
>   	desc[2] = (u32)temp;
>
Dave Gordon July 8, 2015, 5:04 p.m. UTC | #2
On 08/07/15 17:40, Yu Dai wrote:
> On 07/03/2015 07:09 AM, Mika Kuoppala wrote:
>> Pass around requests to carry context deeper in callchain.
>>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_lrc.c | 14 ++++++--------
>>   1 file changed, 6 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index c3c029a..67ff460 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -261,10 +261,11 @@ u32 intel_execlists_ctx_id(struct
>> drm_i915_gem_object *ctx_obj)
>>       return lrca >> 12;
>>   }
>> -static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
>> -                     struct drm_i915_gem_object *ctx_obj)
>> +static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request
>> *rq)
>>   {
>
> GuC patch series tries to utilize this function. However, changing from
> ring/context to request makes it unusable in the case where request is
> not needed (not available). This context descriptor has nothing to do
> with drm_i915_gem_request IMO. And it is waste of time to call it for
> each batch submission. This value is fixed when context is pinned. Maybe
> add a member ctx_desc into intel_context->engine; initialize the value
> within intel_lr_context_pin; then use it whenever needed.
>
> Thanks,
> Alex

Yes, I've effectively reversed this in the next GuC submission series, 
since we don't have a request during setup, and this function doesn't 
actually need one either; it's only being used as a handle for 
extracting the context and ring.

So in my version it will take an intel_context and a ring so it's up to 
the caller to extract those from the drm_i915_gem_request (rq->ctx) and 
(rq->ring) and pass them separately. Then the GuC can use it during 
setup as well as at runtime :)

.Dave.
Mika Kuoppala July 8, 2015, 5:26 p.m. UTC | #3
On Wed Jul 08 2015 at 18:04:44 +0100, Dave Gordon wrote:
> On 08/07/15 17:40, Yu Dai wrote:
> >On 07/03/2015 07:09 AM, Mika Kuoppala wrote:
> >>Pass around requests to carry context deeper in callchain.
> >>
> >>Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> >>---
> >>  drivers/gpu/drm/i915/intel_lrc.c | 14 ++++++--------
> >>  1 file changed, 6 insertions(+), 8 deletions(-)
> >>
> >>diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> >>b/drivers/gpu/drm/i915/intel_lrc.c
> >>index c3c029a..67ff460 100644
> >>--- a/drivers/gpu/drm/i915/intel_lrc.c
> >>+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >>@@ -261,10 +261,11 @@ u32 intel_execlists_ctx_id(struct
> >>drm_i915_gem_object *ctx_obj)
> >>      return lrca >> 12;
> >>  }
> >>-static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
> >>-                     struct drm_i915_gem_object *ctx_obj)
> >>+static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request
> >>*rq)
> >>  {
> >
> >GuC patch series tries to utilize this function. However, changing from
> >ring/context to request makes it unusable in the case where request is
> >not needed (not available). This context descriptor has nothing to do
> >with drm_i915_gem_request IMO. And it is waste of time to call it for
> >each batch submission. This value is fixed when context is pinned. Maybe
> >add a member ctx_desc into intel_context->engine; initialize the value
> >within intel_lr_context_pin; then use it whenever needed.
> >
> >Thanks,
> >Alex
> 
> Yes, I've effectively reversed this in the next GuC submission
> series, since we don't have a request during setup, and this
> function doesn't actually need one either; it's only being used as a
> handle for extracting the context and ring.
> 
> So in my version it will take an intel_context and a ring so it's up
> to the caller to extract those from the drm_i915_gem_request
> (rq->ctx) and (rq->ring) and pass them separately. Then the GuC can
> use it during setup as well as at runtime :)
> 

I admit I went one step too far on this one in the series. The bspec says
the context ids created should be unique across all engines and all execlists.
I was concerned that our lrca only setup and resubmission for
pre-emption would break this rule. (and be the reason for skl hickups)

The bspec has a note about creating context id with ring/counter(seqno)/lrca
combination. Thus the request -> descriptor creation.

I didn't post that patch as it didn't cure the skl hang. But it made
debugging the context status queue easier as seqnos were part of
context_id.

-Mika
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c3c029a..67ff460 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -261,10 +261,11 @@  u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
 	return lrca >> 12;
 }
 
-static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
-					 struct drm_i915_gem_object *ctx_obj)
+static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request *rq)
 {
+	struct intel_engine_cs *ring = rq->ring;
 	struct drm_device *dev = ring->dev;
+	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
 	uint64_t desc;
 	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
 
@@ -299,21 +300,18 @@  static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 	struct intel_engine_cs *ring = rq0->ring;
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *ctx_obj0 = rq0->ctx->engine[ring->id].state;
-	struct drm_i915_gem_object *ctx_obj1 = rq1 ?
-		rq1->ctx->engine[ring->id].state : NULL;
 	uint64_t temp = 0;
 	uint32_t desc[4];
 
 	/* XXX: You must always write both descriptors in the order below. */
-	if (ctx_obj1)
-		temp = execlists_ctx_descriptor(ring, ctx_obj1);
+	if (rq1)
+		temp = execlists_ctx_descriptor(rq1);
 	else
 		temp = 0;
 	desc[1] = (u32)(temp >> 32);
 	desc[0] = (u32)temp;
 
-	temp = execlists_ctx_descriptor(ring, ctx_obj0);
+	temp = execlists_ctx_descriptor(rq0);
 	desc[3] = (u32)(temp >> 32);
 	desc[2] = (u32)temp;