diff mbox series

[v3] drm/i915/execlists: Use coherent writes into the context image

Message ID 20180913193302.25446-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [v3] drm/i915/execlists: Use coherent writes into the context image | expand

Commit Message

Chris Wilson Sept. 13, 2018, 7:33 p.m. UTC
That we use a WB mapping for updating the RING_TAIL register inside the
context image even on !llc machines has been a source of consternation
for every reader. It appears to work on bsw+, but it may just have been
that we have been incredibly bad at detecting the errors.

v2: With extra enthusiasm.
v3: Drop force of map type for pinned default_state as by the time we
pin it, the map type is always WB and doesn't conflict with the earlier
use by ce->state.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         | 6 ++++++
 drivers/gpu/drm/i915/i915_gem.c         | 2 ++
 drivers/gpu/drm/i915/i915_perf.c        | 4 +++-
 drivers/gpu/drm/i915/intel_lrc.c        | 8 +++++---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
 5 files changed, 17 insertions(+), 5 deletions(-)

Comments

Tvrtko Ursulin Sept. 14, 2018, 8:14 a.m. UTC | #1
On 13/09/2018 20:33, Chris Wilson wrote:
> That we use a WB mapping for updating the RING_TAIL register inside the
> context image even on !llc machines has been a source of consternation
> for every reader. It appears to work on bsw+, but it may just have been
> that we have been incredibly bad at detecting the errors.
> 
> v2: With extra enthusiasm.
> v3: Drop force of map type for pinned default_state as by the time we
> pin it, the map type is always WB and doesn't conflict with the earlier
> use by ce->state.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         | 6 ++++++
>   drivers/gpu/drm/i915/i915_gem.c         | 2 ++
>   drivers/gpu/drm/i915/i915_perf.c        | 4 +++-
>   drivers/gpu/drm/i915/intel_lrc.c        | 8 +++++---
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
>   5 files changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7ea442033a57..5c833a45682d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3074,6 +3074,12 @@ enum i915_map_type {
>   	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
>   };
>   
> +static inline enum i915_map_type
> +i915_coherent_map_type(struct drm_i915_private *i915)
> +{
> +	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
> +}
> +
>   /**
>    * i915_gem_object_pin_map - return a contiguous mapping of the entire object
>    * @obj: the object to map into kernel address space
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 89834ce19acd..d6f2bbd6a0dc 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5417,6 +5417,8 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
>   	for_each_engine(engine, i915, id) {
>   		struct i915_vma *state;
>   
> +		GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count);
> +
>   		state = to_intel_context(ctx, engine)->state;
>   		if (!state)
>   			continue;
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 3d7a052b4cca..90168ac845c2 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1735,13 +1735,15 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>   	/* Update all contexts now that we've stalled the submission. */
>   	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
>   		struct intel_context *ce = to_intel_context(ctx, engine);
> +		unsigned int map_type;
>   		u32 *regs;
>   
>   		/* OA settings will be set upon first use */
>   		if (!ce->state)
>   			continue;
>   
> -		regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
> +		map_type = i915_coherent_map_type(dev_priv);

Local for line length only? Could move it out of the loop as well to 
cache it if you feel like it.

> +		regs = i915_gem_object_pin_map(ce->state->obj, map_type);
>   		if (IS_ERR(regs))
>   			return PTR_ERR(regs);
>   
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d7fcbba8e982..7b1f322f232b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1294,7 +1294,7 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
>   	 * on an active context (which by nature is already on the GPU).
>   	 */
>   	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
> -		err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
> +		err = i915_gem_object_set_to_wc_domain(vma->obj, true);

I am still confused by this. Cache flushing effects of the old and new 
call seem the same due object being in CPU write domain at this point. 
What changes is that it will be marked differently from this point one. 
Does that come into play later in the objects lifetime and where?

>   		if (err)
>   			return err;
>   	}
> @@ -1322,7 +1322,9 @@ __execlists_context_pin(struct intel_engine_cs *engine,
>   	if (ret)
>   		goto err;
>   
> -	vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
> +	vaddr = i915_gem_object_pin_map(ce->state->obj,
> +					i915_coherent_map_type(ctx->i915) |
> +					I915_MAP_OVERRIDE);

Override MAP_WB from populate_lr_context - OK I think.

>   	if (IS_ERR(vaddr)) {
>   		ret = PTR_ERR(vaddr);
>   		goto unpin_vma;
> @@ -2753,7 +2755,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>   		void *defaults;
>   
>   		defaults = i915_gem_object_pin_map(engine->default_state,
> -						   I915_MAP_WB);
> +						   I915_MAP_FORCE_WB);
>   		if (IS_ERR(defaults)) {
>   			ret = PTR_ERR(defaults);
>   			goto err_unpin_ctx;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index d0ef50bf930a..1eb68d77b66c 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1288,7 +1288,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
>   		}
>   
>   		defaults = i915_gem_object_pin_map(engine->default_state,
> -						   I915_MAP_WB);
> +						   I915_MAP_FORCE_WB);
>   		if (IS_ERR(defaults)) {
>   			err = PTR_ERR(defaults);
>   			goto err_map;
> 

These two do not need to be changed AFAICT.

Regards,

Tvrtko
Chris Wilson Sept. 14, 2018, 8:21 a.m. UTC | #2
Quoting Tvrtko Ursulin (2018-09-14 09:14:54)
> 
> On 13/09/2018 20:33, Chris Wilson wrote:
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index d7fcbba8e982..7b1f322f232b 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -1294,7 +1294,7 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
> >        * on an active context (which by nature is already on the GPU).
> >        */
> >       if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
> > -             err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
> > +             err = i915_gem_object_set_to_wc_domain(vma->obj, true);
> 
> I am still confused by this. Cache flushing effects of the old and new 
> call seem the same due object being in CPU write domain at this point. 
> What changes is that it will be marked differently from this point one. 
> Does that come into play later in the objects lifetime and where?

No, just taking the opportunity to use a more correct domain now that it
exists and logically ties in with using WC.

> >               if (err)
> >                       return err;
> >       }
> > @@ -1322,7 +1322,9 @@ __execlists_context_pin(struct intel_engine_cs *engine,
> >       if (ret)
> >               goto err;
> >   
> > -     vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
> > +     vaddr = i915_gem_object_pin_map(ce->state->obj,
> > +                                     i915_coherent_map_type(ctx->i915) |
> > +                                     I915_MAP_OVERRIDE);
> 
> Override MAP_WB from populate_lr_context - OK I think.
> 
> >       if (IS_ERR(vaddr)) {
> >               ret = PTR_ERR(vaddr);
> >               goto unpin_vma;
> > @@ -2753,7 +2755,7 @@ populate_lr_context(struct i915_gem_context *ctx,
> >               void *defaults;
> >   
> >               defaults = i915_gem_object_pin_map(engine->default_state,
> > -                                                I915_MAP_WB);
> > +                                                I915_MAP_FORCE_WB);
> >               if (IS_ERR(defaults)) {
> >                       ret = PTR_ERR(defaults);
> >                       goto err_unpin_ctx;
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index d0ef50bf930a..1eb68d77b66c 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -1288,7 +1288,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
> >               }
> >   
> >               defaults = i915_gem_object_pin_map(engine->default_state,
> > -                                                I915_MAP_WB);
> > +                                                I915_MAP_FORCE_WB);
> >               if (IS_ERR(defaults)) {
> >                       err = PTR_ERR(defaults);
> >                       goto err_map;
> > 
> 
> These two do not need to be changed AFAICT.

I think we cannot rely on engine->default_state always being MAP_WB
already at this point, due to not having an idle cycle between creation
of engine->default_state on module_load and first use.

Having thought of that last night, I did mean to add a call to
__i915_gem_park() during init so we forced ourselves to idle.
-Chris
Tvrtko Ursulin Sept. 14, 2018, 9:12 a.m. UTC | #3
On 14/09/2018 09:21, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-14 09:14:54)
>>
>> On 13/09/2018 20:33, Chris Wilson wrote:
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index d7fcbba8e982..7b1f322f232b 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -1294,7 +1294,7 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
>>>         * on an active context (which by nature is already on the GPU).
>>>         */
>>>        if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
>>> -             err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
>>> +             err = i915_gem_object_set_to_wc_domain(vma->obj, true);
>>
>> I am still confused by this. Cache flushing effects of the old and new
>> call seem the same due object being in CPU write domain at this point.
>> What changes is that it will be marked differently from this point one.
>> Does that come into play later in the objects lifetime and where?
> 
> No, just taking the opportunity to use a more correct domain now that it
> exists and logically ties in with using WC.

Ok.

>>>                if (err)
>>>                        return err;
>>>        }
>>> @@ -1322,7 +1322,9 @@ __execlists_context_pin(struct intel_engine_cs *engine,
>>>        if (ret)
>>>                goto err;
>>>    
>>> -     vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
>>> +     vaddr = i915_gem_object_pin_map(ce->state->obj,
>>> +                                     i915_coherent_map_type(ctx->i915) |
>>> +                                     I915_MAP_OVERRIDE);
>>
>> Override MAP_WB from populate_lr_context - OK I think.
>>
>>>        if (IS_ERR(vaddr)) {
>>>                ret = PTR_ERR(vaddr);
>>>                goto unpin_vma;
>>> @@ -2753,7 +2755,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>>>                void *defaults;
>>>    
>>>                defaults = i915_gem_object_pin_map(engine->default_state,
>>> -                                                I915_MAP_WB);
>>> +                                                I915_MAP_FORCE_WB);
>>>                if (IS_ERR(defaults)) {
>>>                        ret = PTR_ERR(defaults);
>>>                        goto err_unpin_ctx;
>>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> index d0ef50bf930a..1eb68d77b66c 100644
>>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> @@ -1288,7 +1288,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
>>>                }
>>>    
>>>                defaults = i915_gem_object_pin_map(engine->default_state,
>>> -                                                I915_MAP_WB);
>>> +                                                I915_MAP_FORCE_WB);
>>>                if (IS_ERR(defaults)) {
>>>                        err = PTR_ERR(defaults);
>>>                        goto err_map;
>>>
>>
>> These two do not need to be changed AFAICT.
> 
> I think we cannot rely on engine->default_state always being MAP_WB
> already at this point, due to not having an idle cycle between creation
> of engine->default_state on module_load and first use.
 >
 > Having thought of that last night, I did mean to add a call to
 > __i915_gem_park() during init so we forced ourselves to idle.

I don't follow - all places where we map it use MAP_WB. Isn't force flag 
just to override the existing different mapping?

Regards,

Tvrtko
Chris Wilson Sept. 14, 2018, 9:17 a.m. UTC | #4
Quoting Tvrtko Ursulin (2018-09-14 10:12:15)
> 
> On 14/09/2018 09:21, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-09-14 09:14:54)
> >>
> >> On 13/09/2018 20:33, Chris Wilson wrote:
> >>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >>> index d7fcbba8e982..7b1f322f232b 100644
> >>> --- a/drivers/gpu/drm/i915/intel_lrc.c
> >>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> >>> @@ -1294,7 +1294,7 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
> >>>         * on an active context (which by nature is already on the GPU).
> >>>         */
> >>>        if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
> >>> -             err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
> >>> +             err = i915_gem_object_set_to_wc_domain(vma->obj, true);
> >>
> >> I am still confused by this. Cache flushing effects of the old and new
> >> call seem the same due object being in CPU write domain at this point.
> >> What changes is that it will be marked differently from this point one.
> >> Does that come into play later in the objects lifetime and where?
> > 
> > No, just taking the opportunity to use a more correct domain now that it
> > exists and logically ties in with using WC.
> 
> Ok.
> 
> >>>                if (err)
> >>>                        return err;
> >>>        }
> >>> @@ -1322,7 +1322,9 @@ __execlists_context_pin(struct intel_engine_cs *engine,
> >>>        if (ret)
> >>>                goto err;
> >>>    
> >>> -     vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
> >>> +     vaddr = i915_gem_object_pin_map(ce->state->obj,
> >>> +                                     i915_coherent_map_type(ctx->i915) |
> >>> +                                     I915_MAP_OVERRIDE);
> >>
> >> Override MAP_WB from populate_lr_context - OK I think.
> >>
> >>>        if (IS_ERR(vaddr)) {
> >>>                ret = PTR_ERR(vaddr);
> >>>                goto unpin_vma;
> >>> @@ -2753,7 +2755,7 @@ populate_lr_context(struct i915_gem_context *ctx,
> >>>                void *defaults;
> >>>    
> >>>                defaults = i915_gem_object_pin_map(engine->default_state,
> >>> -                                                I915_MAP_WB);
> >>> +                                                I915_MAP_FORCE_WB);
> >>>                if (IS_ERR(defaults)) {
> >>>                        ret = PTR_ERR(defaults);
> >>>                        goto err_unpin_ctx;
> >>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >>> index d0ef50bf930a..1eb68d77b66c 100644
> >>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> >>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >>> @@ -1288,7 +1288,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
> >>>                }
> >>>    
> >>>                defaults = i915_gem_object_pin_map(engine->default_state,
> >>> -                                                I915_MAP_WB);
> >>> +                                                I915_MAP_FORCE_WB);
> >>>                if (IS_ERR(defaults)) {
> >>>                        err = PTR_ERR(defaults);
> >>>                        goto err_map;
> >>>
> >>
> >> These two do not need to be changed AFAICT.
> > 
> > I think we cannot rely on engine->default_state always being MAP_WB
> > already at this point, due to not having an idle cycle between creation
> > of engine->default_state on module_load and first use.
>  >
>  > Having thought of that last night, I did mean to add a call to
>  > __i915_gem_park() during init so we forced ourselves to idle.
> 
> I don't follow - all places where we map it use MAP_WB. Isn't force flag 
> just to override the existing different mapping?

Yes, but we may not have done the force from MAP_WC to MAP_WB in
i915_gem_unpark->intel_engines_unpark by this point, so
engine->default_state may still have a WC mapping on it. To be sure, we
need to validate that we can acquire that mapping on init.
-Chris
Tvrtko Ursulin Sept. 14, 2018, 9:25 a.m. UTC | #5
On 14/09/2018 10:17, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-14 10:12:15)
>>
>> On 14/09/2018 09:21, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-09-14 09:14:54)
>>>>
>>>> On 13/09/2018 20:33, Chris Wilson wrote:
>>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> index d7fcbba8e982..7b1f322f232b 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> @@ -1294,7 +1294,7 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
>>>>>          * on an active context (which by nature is already on the GPU).
>>>>>          */
>>>>>         if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
>>>>> -             err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
>>>>> +             err = i915_gem_object_set_to_wc_domain(vma->obj, true);
>>>>
>>>> I am still confused by this. Cache flushing effects of the old and new
>>>> call seem the same due object being in CPU write domain at this point.
>>>> What changes is that it will be marked differently from this point one.
>>>> Does that come into play later in the objects lifetime and where?
>>>
>>> No, just taking the opportunity to use a more correct domain now that it
>>> exists and logically ties in with using WC.
>>
>> Ok.
>>
>>>>>                 if (err)
>>>>>                         return err;
>>>>>         }
>>>>> @@ -1322,7 +1322,9 @@ __execlists_context_pin(struct intel_engine_cs *engine,
>>>>>         if (ret)
>>>>>                 goto err;
>>>>>     
>>>>> -     vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
>>>>> +     vaddr = i915_gem_object_pin_map(ce->state->obj,
>>>>> +                                     i915_coherent_map_type(ctx->i915) |
>>>>> +                                     I915_MAP_OVERRIDE);
>>>>
>>>> Override MAP_WB from populate_lr_context - OK I think.
>>>>
>>>>>         if (IS_ERR(vaddr)) {
>>>>>                 ret = PTR_ERR(vaddr);
>>>>>                 goto unpin_vma;
>>>>> @@ -2753,7 +2755,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>>>>>                 void *defaults;
>>>>>     
>>>>>                 defaults = i915_gem_object_pin_map(engine->default_state,
>>>>> -                                                I915_MAP_WB);
>>>>> +                                                I915_MAP_FORCE_WB);
>>>>>                 if (IS_ERR(defaults)) {
>>>>>                         ret = PTR_ERR(defaults);
>>>>>                         goto err_unpin_ctx;
>>>>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>>>> index d0ef50bf930a..1eb68d77b66c 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>>>> @@ -1288,7 +1288,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
>>>>>                 }
>>>>>     
>>>>>                 defaults = i915_gem_object_pin_map(engine->default_state,
>>>>> -                                                I915_MAP_WB);
>>>>> +                                                I915_MAP_FORCE_WB);
>>>>>                 if (IS_ERR(defaults)) {
>>>>>                         err = PTR_ERR(defaults);
>>>>>                         goto err_map;
>>>>>
>>>>
>>>> These two do not need to be changed AFAICT.
>>>
>>> I think we cannot rely on engine->default_state always being MAP_WB
>>> already at this point, due to not having an idle cycle between creation
>>> of engine->default_state on module_load and first use.
>>   >
>>   > Having thought of that last night, I did mean to add a call to
>>   > __i915_gem_park() during init so we forced ourselves to idle.
>>
>> I don't follow - all places where we map it use MAP_WB. Isn't force flag
>> just to override the existing different mapping?
> 
> Yes, but we may not have done the force from MAP_WC to MAP_WB in
> i915_gem_unpark->intel_engines_unpark by this point, so
> engine->default_state may still have a WC mapping on it. To be sure, we
> need to validate that we can acquire that mapping on init.

Okay I missed the fact we just transfer the engine->default_state object 
ownership from ce->state->obj. Somehow I assumed it is a dedicated buffer.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7ea442033a57..5c833a45682d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3074,6 +3074,12 @@  enum i915_map_type {
 	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
 };
 
+static inline enum i915_map_type
+i915_coherent_map_type(struct drm_i915_private *i915)
+{
+	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
+}
+
 /**
  * i915_gem_object_pin_map - return a contiguous mapping of the entire object
  * @obj: the object to map into kernel address space
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 89834ce19acd..d6f2bbd6a0dc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5417,6 +5417,8 @@  static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	for_each_engine(engine, i915, id) {
 		struct i915_vma *state;
 
+		GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count);
+
 		state = to_intel_context(ctx, engine)->state;
 		if (!state)
 			continue;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3d7a052b4cca..90168ac845c2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1735,13 +1735,15 @@  static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 	/* Update all contexts now that we've stalled the submission. */
 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
 		struct intel_context *ce = to_intel_context(ctx, engine);
+		unsigned int map_type;
 		u32 *regs;
 
 		/* OA settings will be set upon first use */
 		if (!ce->state)
 			continue;
 
-		regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
+		map_type = i915_coherent_map_type(dev_priv);
+		regs = i915_gem_object_pin_map(ce->state->obj, map_type);
 		if (IS_ERR(regs))
 			return PTR_ERR(regs);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d7fcbba8e982..7b1f322f232b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1294,7 +1294,7 @@  static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 	 * on an active context (which by nature is already on the GPU).
 	 */
 	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
-		err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
+		err = i915_gem_object_set_to_wc_domain(vma->obj, true);
 		if (err)
 			return err;
 	}
@@ -1322,7 +1322,9 @@  __execlists_context_pin(struct intel_engine_cs *engine,
 	if (ret)
 		goto err;
 
-	vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
+	vaddr = i915_gem_object_pin_map(ce->state->obj,
+					i915_coherent_map_type(ctx->i915) |
+					I915_MAP_OVERRIDE);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
 		goto unpin_vma;
@@ -2753,7 +2755,7 @@  populate_lr_context(struct i915_gem_context *ctx,
 		void *defaults;
 
 		defaults = i915_gem_object_pin_map(engine->default_state,
-						   I915_MAP_WB);
+						   I915_MAP_FORCE_WB);
 		if (IS_ERR(defaults)) {
 			ret = PTR_ERR(defaults);
 			goto err_unpin_ctx;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d0ef50bf930a..1eb68d77b66c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1288,7 +1288,7 @@  alloc_context_vma(struct intel_engine_cs *engine)
 		}
 
 		defaults = i915_gem_object_pin_map(engine->default_state,
-						   I915_MAP_WB);
+						   I915_MAP_FORCE_WB);
 		if (IS_ERR(defaults)) {
 			err = PTR_ERR(defaults);
 			goto err_map;