diff mbox

[v8,6/8] drm/i915: create context image vma in kernel context

Message ID 20180529191618.19050-7-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lionel Landwerlin May 29, 2018, 7:16 p.m. UTC
We want to be able to modify other context images from the kernel
context in a following commit. To be able to do this we need to map
the context image into the kernel context's ppgtt.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.h |  1 +
 drivers/gpu/drm/i915/intel_lrc.c        | 19 ++++++++++++++-----
 2 files changed, 15 insertions(+), 5 deletions(-)

Comments

Michel Thierry May 29, 2018, 8:26 p.m. UTC | #1
Hi,

On 5/29/2018 12:16 PM, Lionel Landwerlin wrote:
> We want to be able to modify other context images from the kernel
> context in a following commit. To be able to do this we need to map
> the context image into the kernel context's ppgtt.
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.h |  1 +
>   drivers/gpu/drm/i915/intel_lrc.c        | 19 ++++++++++++++-----
>   2 files changed, 15 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index f40d85448a28..9c313c2edb09 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -155,6 +155,7 @@ struct i915_gem_context {
>   	struct intel_context {
>   		struct i915_gem_context *gem_context;
>   		struct i915_vma *state;
> +		struct i915_vma *kernel_state; /**/
>   		struct intel_ring *ring;
>   		u32 *lrc_reg_state;
>   		u64 lrc_desc;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 7314e548fb4e..8a49323f6672 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2739,7 +2739,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>   					    struct intel_context *ce)
>   {
>   	struct drm_i915_gem_object *ctx_obj;
> -	struct i915_vma *vma;
> +	struct i915_vma *ggtt_vma, *ppgtt_vma;
>   	uint32_t context_size;
>   	struct intel_ring *ring;
>   	struct i915_timeline *timeline;
> @@ -2762,9 +2762,17 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>   		goto error_deref_obj;
>   	}
>   
> -	vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
> -	if (IS_ERR(vma)) {
> -		ret = PTR_ERR(vma);
> +	ggtt_vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
> +	if (IS_ERR(ggtt_vma)) {
> +		ret = PTR_ERR(ggtt_vma);
> +		goto error_deref_obj;
> +	}
> +
> +	ppgtt_vma = i915_vma_instance(ctx_obj,
> +				      &ctx->i915->kernel_context->ppgtt->base,
This is dangerous if someone decides to use 'aliasing ppgtt' 
(enable_ppgtt=1 is still a valid option).

You'll need something like this,
struct i915_hw_ppgtt *ppgtt = ctx->i915->kernel_context->ppgtt ?:
					ctx->i915->mm.aliasing_ppgtt;


> +				      NULL);
> +	if (IS_ERR(ppgtt_vma)) {
> +		ret = PTR_ERR(ppgtt_vma);
>   		goto error_deref_obj;
>   	}
>   
> @@ -2788,7 +2796,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>   	}
>   
>   	ce->ring = ring;
> -	ce->state = vma;
> +	ce->state = ggtt_vma;
> +	ce->kernel_state = ppgtt_vma;
>   
>   	return 0;
>   
>
Lionel Landwerlin May 29, 2018, 9:35 p.m. UTC | #2
On 29/05/18 21:26, Michel Thierry wrote:
> Hi,
>
> On 5/29/2018 12:16 PM, Lionel Landwerlin wrote:
>> We want to be able to modify other context images from the kernel
>> context in a following commit. To be able to do this we need to map
>> the context image into the kernel context's ppgtt.
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem_context.h |  1 +
>>   drivers/gpu/drm/i915/intel_lrc.c        | 19 ++++++++++++++-----
>>   2 files changed, 15 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
>> b/drivers/gpu/drm/i915/i915_gem_context.h
>> index f40d85448a28..9c313c2edb09 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.h
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
>> @@ -155,6 +155,7 @@ struct i915_gem_context {
>>       struct intel_context {
>>           struct i915_gem_context *gem_context;
>>           struct i915_vma *state;
>> +        struct i915_vma *kernel_state; /**/
>>           struct intel_ring *ring;
>>           u32 *lrc_reg_state;
>>           u64 lrc_desc;
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 7314e548fb4e..8a49323f6672 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2739,7 +2739,7 @@ static int 
>> execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>>                           struct intel_context *ce)
>>   {
>>       struct drm_i915_gem_object *ctx_obj;
>> -    struct i915_vma *vma;
>> +    struct i915_vma *ggtt_vma, *ppgtt_vma;
>>       uint32_t context_size;
>>       struct intel_ring *ring;
>>       struct i915_timeline *timeline;
>> @@ -2762,9 +2762,17 @@ static int 
>> execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>>           goto error_deref_obj;
>>       }
>>   -    vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
>> -    if (IS_ERR(vma)) {
>> -        ret = PTR_ERR(vma);
>> +    ggtt_vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
>> +    if (IS_ERR(ggtt_vma)) {
>> +        ret = PTR_ERR(ggtt_vma);
>> +        goto error_deref_obj;
>> +    }
>> +
>> +    ppgtt_vma = i915_vma_instance(ctx_obj,
>> + &ctx->i915->kernel_context->ppgtt->base,
> This is dangerous if someone decides to use 'aliasing ppgtt' 
> (enable_ppgtt=1 is still a valid option).
>
> You'll need something like this,
> struct i915_hw_ppgtt *ppgtt = ctx->i915->kernel_context->ppgtt ?:
>                     ctx->i915->mm.aliasing_ppgtt;

Thanks a lot of the tip, I'm very much new to this.
Will fix and see if they are tests that can exercise this in igt.

-
Lionel

>
>
>> +                      NULL);
>> +    if (IS_ERR(ppgtt_vma)) {
>> +        ret = PTR_ERR(ppgtt_vma);
>>           goto error_deref_obj;
>>       }
>>   @@ -2788,7 +2796,8 @@ static int 
>> execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>>       }
>>         ce->ring = ring;
>> -    ce->state = vma;
>> +    ce->state = ggtt_vma;
>> +    ce->kernel_state = ppgtt_vma;
>>         return 0;
>>
>
Chris Wilson May 29, 2018, 10:08 p.m. UTC | #3
Quoting Lionel Landwerlin (2018-05-29 22:35:08)
> On 29/05/18 21:26, Michel Thierry wrote:
> > Hi,
> >
> > On 5/29/2018 12:16 PM, Lionel Landwerlin wrote:
> >> We want to be able to modify other context images from the kernel
> >> context in a following commit. To be able to do this we need to map
> >> the context image into the kernel context's ppgtt.
> >>
> >> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_gem_context.h |  1 +
> >>   drivers/gpu/drm/i915/intel_lrc.c        | 19 ++++++++++++++-----
> >>   2 files changed, 15 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
> >> b/drivers/gpu/drm/i915/i915_gem_context.h
> >> index f40d85448a28..9c313c2edb09 100644
> >> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> >> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> >> @@ -155,6 +155,7 @@ struct i915_gem_context {
> >>       struct intel_context {
> >>           struct i915_gem_context *gem_context;
> >>           struct i915_vma *state;
> >> +        struct i915_vma *kernel_state; /**/

It's debatable if we want to keep the pointer around.

We have one for the context state vma and ring vma, because we need to
keep a pointer for the object and so choose to keep the vma instead as
we use that more often than the drm_i915_gem_object and can derive it
from vma->obj.

For this piece of code, which should be run that often I don't see a lot
of advantage over using the rbtree search, and since the number of
objects in that tree will not be large (2 at most), quick.

/* Don't forget to declare the dependency tree! */
prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
			  &rq->i915->drm.struct_mutex);
if (prev) {
	err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
					       &prev->submit,
					       I915_FENCE_GFP);
	if (err < 0)
		return err;
}

vma = i915_vma_instance(ce->state->obj, my_vm, NULL);
if (IS_ERR(vma)) ...

err = i915_vma_pin(vma, 0, PIN_USER);
if (err) ...

err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
i915_vma_unpin(vma);
if (err) ... /* not today, but tomorrow */

Now you can

cs = intel_ring_begin(rq, 4);
...
*cs++ = gen8_canonical_addr(vma->node.state + offset);
...
intel_ring_advance(rq, cs);

On error, you will have to submit the request since it has interacted
with the tracking.

Don't bother pushing this into an engine vfunc, if you don't intend to
have multiple implementations. It's only a SDM, nothing that appears the
need to be specialised. Although you might argue the context layout will
require it later? As for the SDM, Joonas might complain about the
proliferation, but I'm not sure if we have a good repeating pattern yet.
-Chris
Lionel Landwerlin May 30, 2018, 10:05 a.m. UTC | #4
On 29/05/18 23:08, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2018-05-29 22:35:08)
>> On 29/05/18 21:26, Michel Thierry wrote:
>>> Hi,
>>>
>>> On 5/29/2018 12:16 PM, Lionel Landwerlin wrote:
>>>> We want to be able to modify other context images from the kernel
>>>> context in a following commit. To be able to do this we need to map
>>>> the context image into the kernel context's ppgtt.
>>>>
>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/i915_gem_context.h |  1 +
>>>>    drivers/gpu/drm/i915/intel_lrc.c        | 19 ++++++++++++++-----
>>>>    2 files changed, 15 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h
>>>> b/drivers/gpu/drm/i915/i915_gem_context.h
>>>> index f40d85448a28..9c313c2edb09 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem_context.h
>>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
>>>> @@ -155,6 +155,7 @@ struct i915_gem_context {
>>>>        struct intel_context {
>>>>            struct i915_gem_context *gem_context;
>>>>            struct i915_vma *state;
>>>> +        struct i915_vma *kernel_state; /**/
> It's debatable if we want to keep the pointer around.
>
> We have one for the context state vma and ring vma, because we need to
> keep a pointer for the object and so choose to keep the vma instead as
> we use that more often than the drm_i915_gem_object and can derive it
> from vma->obj.
>
> For this piece of code, which should be run that often I don't see a lot
> of advantage over using the rbtree search, and since the number of
> objects in that tree will not be large (2 at most), quick.
>
> /* Don't forget to declare the dependency tree! */
> prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
> 			  &rq->i915->drm.struct_mutex);
> if (prev) {
> 	err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
> 					       &prev->submit,
> 					       I915_FENCE_GFP);
> 	if (err < 0)
> 		return err;
> }

Thanks, this is done by the caller.

>
> vma = i915_vma_instance(ce->state->obj, my_vm, NULL);
> if (IS_ERR(vma)) ...
>
> err = i915_vma_pin(vma, 0, PIN_USER);
> if (err) ...
>
> err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> i915_vma_unpin(vma);
> if (err) ... /* not today, but tomorrow */
>
> Now you can
>
> cs = intel_ring_begin(rq, 4);
> ...
> *cs++ = gen8_canonical_addr(vma->node.state + offset);
> ...
> intel_ring_advance(rq, cs);
>
> On error, you will have to submit the request since it has interacted
> with the tracking.

Yep, done in the caller too.

> Don't bother pushing this into an engine vfunc, if you don't intend to 
> have multiple implementations. It's only a SDM, nothing that appears 
> the need to be specialised. Although you might argue the context 
> layout will require it later? As for the SDM, Joonas might complain 
> about the proliferation, but I'm not sure if we have a good repeating 
> pattern yet. -Chris
Hmm.. That's kind of useful to detect the ability to change powergating 
configs.

Thanks a lot,

-
Lionel
Chris Wilson May 30, 2018, 10:11 a.m. UTC | #5
Quoting Lionel Landwerlin (2018-05-30 11:05:25)
> On 29/05/18 23:08, Chris Wilson wrote:
> > Quoting Lionel Landwerlin (2018-05-29 22:35:08)
> >> On 29/05/18 21:26, Michel Thierry wrote:
> >>> Hi,
> >>>
> >>> On 5/29/2018 12:16 PM, Lionel Landwerlin wrote:
> >>>> We want to be able to modify other context images from the kernel
> >>>> context in a following commit. To be able to do this we need to map
> >>>> the context image into the kernel context's ppgtt.
> >>>>
> >>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> >>>> ---
> >>>>    drivers/gpu/drm/i915/i915_gem_context.h |  1 +
> >>>>    drivers/gpu/drm/i915/intel_lrc.c        | 19 ++++++++++++++-----
> >>>>    2 files changed, 15 insertions(+), 5 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h
> >>>> b/drivers/gpu/drm/i915/i915_gem_context.h
> >>>> index f40d85448a28..9c313c2edb09 100644
> >>>> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> >>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> >>>> @@ -155,6 +155,7 @@ struct i915_gem_context {
> >>>>        struct intel_context {
> >>>>            struct i915_gem_context *gem_context;
> >>>>            struct i915_vma *state;
> >>>> +        struct i915_vma *kernel_state; /**/
> > It's debatable if we want to keep the pointer around.
> >
> > We have one for the context state vma and ring vma, because we need to
> > keep a pointer for the object and so choose to keep the vma instead as
> > we use that more often than the drm_i915_gem_object and can derive it
> > from vma->obj.
> >
> > For this piece of code, which should be run that often I don't see a lot
> > of advantage over using the rbtree search, and since the number of
> > objects in that tree will not be large (2 at most), quick.
> >
> > /* Don't forget to declare the dependency tree! */
> > prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
> >                         &rq->i915->drm.struct_mutex);
> > if (prev) {
> >       err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
> >                                              &prev->submit,
> >                                              I915_FENCE_GFP);
> >       if (err < 0)
> >               return err;
> > }
> 
> Thanks, this is done by the caller.

It probably shouldn't be, because the fence is related to this write.
(And it should be i915_request_await_request, but that'll require an
export.) The await + move-to-active (hmm, this is before fence export
refactoring, oh well, needs that patch as well) are paired. Ideally we
do all awaits first, checking for errors before touching the global
state as then we can discard the request; but here I think the coupling
is very, very special because the context objects do not follow the
normal rules. So something to be very wary of.

> > vma = i915_vma_instance(ce->state->obj, my_vm, NULL);
> > if (IS_ERR(vma)) ...
> >
> > err = i915_vma_pin(vma, 0, PIN_USER);
> > if (err) ...
> >
> > err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> > i915_vma_unpin(vma);
> > if (err) ... /* not today, but tomorrow */
> >
> > Now you can
> >
> > cs = intel_ring_begin(rq, 4);
> > ...
> > *cs++ = gen8_canonical_addr(vma->node.state + offset);
> > ...
> > intel_ring_advance(rq, cs);
> >
> > On error, you will have to submit the request since it has interacted
> > with the tracking.
> 
> Yep, done in the caller too.

Yes, it has to be. I'm just stressing that we can't simply discard this
request part way through because it has manipulated global state.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index f40d85448a28..9c313c2edb09 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -155,6 +155,7 @@  struct i915_gem_context {
 	struct intel_context {
 		struct i915_gem_context *gem_context;
 		struct i915_vma *state;
+		struct i915_vma *kernel_state; /**/
 		struct intel_ring *ring;
 		u32 *lrc_reg_state;
 		u64 lrc_desc;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7314e548fb4e..8a49323f6672 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2739,7 +2739,7 @@  static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_context *ce)
 {
 	struct drm_i915_gem_object *ctx_obj;
-	struct i915_vma *vma;
+	struct i915_vma *ggtt_vma, *ppgtt_vma;
 	uint32_t context_size;
 	struct intel_ring *ring;
 	struct i915_timeline *timeline;
@@ -2762,9 +2762,17 @@  static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 		goto error_deref_obj;
 	}
 
-	vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
+	ggtt_vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
+	if (IS_ERR(ggtt_vma)) {
+		ret = PTR_ERR(ggtt_vma);
+		goto error_deref_obj;
+	}
+
+	ppgtt_vma = i915_vma_instance(ctx_obj,
+				      &ctx->i915->kernel_context->ppgtt->base,
+				      NULL);
+	if (IS_ERR(ppgtt_vma)) {
+		ret = PTR_ERR(ppgtt_vma);
 		goto error_deref_obj;
 	}
 
@@ -2788,7 +2796,8 @@  static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 	}
 
 	ce->ring = ring;
-	ce->state = vma;
+	ce->state = ggtt_vma;
+	ce->kernel_state = ppgtt_vma;
 
 	return 0;