diff mbox

[2/2] drm/i915: Drop i915_gem_obj_is_pinned() from set-cache-level

Message ID 1444127996-23561-2-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Oct. 6, 2015, 10:39 a.m. UTC
Since the remove of the pin-ioctl, we only care about not changing the
cache level on buffers pinned to the hardware as indicated by
obj->pin_display. So we can safely replace i915_gem_object_is_pinned()
here with a plain obj->pin_display check. During rebinding, we will check
sanity checks in case vma->pin_count is erroneously set.

At the same time, we can micro-optimise GTT mmap() behaviour since we
only need to relinquish the mmaps before Sandybridge.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

Comments

Daniel Vetter Oct. 6, 2015, 11:28 a.m. UTC | #1
On Tue, Oct 06, 2015 at 11:39:56AM +0100, Chris Wilson wrote:
> Since the remove of the pin-ioctl, we only care about not changing the
> cache level on buffers pinned to the hardware as indicated by
> obj->pin_display. So we can safely replace i915_gem_object_is_pinned()
> here with a plain obj->pin_display check. During rebinding, we will check
> sanity checks in case vma->pin_count is erroneously set.
> 
> At the same time, we can micro-optimise GTT mmap() behaviour since we
> only need to relinquish the mmaps before Sandybridge.

Actual condition is !LLC so would need to be updated (and split out imo).
 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++----------------
>  1 file changed, 24 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d4a3bdf0c5b6..2b8ed7a2faab 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct i915_vma *vma, *next;
> +	bool bound = false;
>  	int ret = 0;
>  
>  	if (obj->cache_level == cache_level)
>  		goto out;
>  
> -	if (i915_gem_obj_is_pinned(obj)) {
> -		DRM_DEBUG("can not change the cache level of pinned objects\n");
> -		return -EBUSY;
> -	}
> -
>  	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
> +		if (!drm_mm_node_allocated(&vma->node))
> +			continue;
> +
> +		if (vma->pin_count) {
> +			DRM_DEBUG("can not change the cache level of pinned objects\n");
> +			return -EBUSY;
> +		}
> +
>  		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
>  			ret = i915_vma_unbind(vma);
>  			if (ret)
>  				return ret;
> -		}
> +		} else
> +			bound = true;
>  	}
>  
> -	if (i915_gem_obj_bound_any(obj)) {
> +	if (bound) {
>  		ret = i915_gem_object_wait_rendering(obj, false);
>  		if (ret)
>  			return ret;

Shouldn't the below be split out into a separate patch? And maybe for
paranoia keep calling finish_gtt but restrict it to !LLC && snooped like
you do below.
-Daniel

>  
> -		i915_gem_object_finish_gtt(obj);
> -
>  		/* Before SandyBridge, you could not use tiling or fence
>  		 * registers with snooped memory, so relinquish any fences
>  		 * currently pointing to our region in the aperture.
> @@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  				return ret;
>  		}
>  
> -		list_for_each_entry(vma, &obj->vma_list, vma_link)
> -			if (drm_mm_node_allocated(&vma->node)) {
> -				ret = i915_vma_bind(vma, cache_level,
> -						    PIN_UPDATE);
> -				if (ret)
> -					return ret;
> -			}
> +		/* Access to snoopable pages through the GTT is incoherent. */
> +		if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev))
> +			i915_gem_release_mmap(obj);
> +
> +		list_for_each_entry(vma, &obj->vma_list, vma_link) {
> +			if (!drm_mm_node_allocated(&vma->node))
> +				continue;
> +
> +			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
> +			if (ret)
> +				return ret;
> +		}
>  	}
>  
>  	list_for_each_entry(vma, &obj->vma_list, vma_link)
> -- 
> 2.6.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Oct. 6, 2015, 11:41 a.m. UTC | #2
On Tue, Oct 06, 2015 at 01:28:07PM +0200, Daniel Vetter wrote:
> On Tue, Oct 06, 2015 at 11:39:56AM +0100, Chris Wilson wrote:
> > Since the remove of the pin-ioctl, we only care about not changing the
> > cache level on buffers pinned to the hardware as indicated by
> > obj->pin_display. So we can safely replace i915_gem_object_is_pinned()
> > here with a plain obj->pin_display check. During rebinding, we will check
> > sanity checks in case vma->pin_count is erroneously set.
> > 
> > At the same time, we can micro-optimise GTT mmap() behaviour since we
> > only need to relinquish the mmaps before Sandybridge.
> 
> Actual condition is !LLC so would need to be updated (and split out imo).
>  
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++----------------
> >  1 file changed, 24 insertions(+), 16 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index d4a3bdf0c5b6..2b8ed7a2faab 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> >  {
> >  	struct drm_device *dev = obj->base.dev;
> >  	struct i915_vma *vma, *next;
> > +	bool bound = false;
> >  	int ret = 0;
> >  
> >  	if (obj->cache_level == cache_level)
> >  		goto out;
> >  
> > -	if (i915_gem_obj_is_pinned(obj)) {
> > -		DRM_DEBUG("can not change the cache level of pinned objects\n");
> > -		return -EBUSY;
> > -	}
> > -
> >  	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
> > +		if (!drm_mm_node_allocated(&vma->node))
> > +			continue;
> > +
> > +		if (vma->pin_count) {
> > +			DRM_DEBUG("can not change the cache level of pinned objects\n");
> > +			return -EBUSY;
> > +		}
> > +
> >  		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
> >  			ret = i915_vma_unbind(vma);
> >  			if (ret)
> >  				return ret;
> > -		}
> > +		} else
> > +			bound = true;
> >  	}
> >  
> > -	if (i915_gem_obj_bound_any(obj)) {
> > +	if (bound) {
> >  		ret = i915_gem_object_wait_rendering(obj, false);
> >  		if (ret)
> >  			return ret;
> 
> Shouldn't the below be split out into a separate patch? And maybe for
> paranoia keep calling finish_gtt but restrict it to !LLC && snooped like
> you do below.

Hmm, I don't have a finish-gtt. The serialisation is based on
release-mmaps (we have to be sure that any concurrent access is
prohibited). So the question is: is i915_gem_release_mmap() a sufficient
barrier and if not, why not. In release-mmap we are revoking the CPU's PTE,
but that can be ordered with the memory accesses, but before we continue
we should be sure that they have been revoked. Paranoia says we should
be moving the mb() we have from outside of release-mmaps into
release-mmaps.
-Chris
Tvrtko Ursulin Oct. 7, 2015, 3:57 p.m. UTC | #3
Hi,

On 06/10/15 11:39, Chris Wilson wrote:
> Since the remove of the pin-ioctl, we only care about not changing the
> cache level on buffers pinned to the hardware as indicated by
> obj->pin_display. So we can safely replace i915_gem_object_is_pinned()

i915_gem_obj_is_pinned

> here with a plain obj->pin_display check. During rebinding, we will check
> sanity checks in case vma->pin_count is erroneously set.

"do sanity checks" or something.

> At the same time, we can micro-optimise GTT mmap() behaviour since we
> only need to relinquish the mmaps before Sandybridge.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++----------------
>   1 file changed, 24 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d4a3bdf0c5b6..2b8ed7a2faab 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>   {
>   	struct drm_device *dev = obj->base.dev;
>   	struct i915_vma *vma, *next;
> +	bool bound = false;
>   	int ret = 0;
>
>   	if (obj->cache_level == cache_level)
>   		goto out;
>
> -	if (i915_gem_obj_is_pinned(obj)) {
> -		DRM_DEBUG("can not change the cache level of pinned objects\n");
> -		return -EBUSY;
> -	}
> -
>   	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
> +		if (!drm_mm_node_allocated(&vma->node))
> +			continue;
> +
> +		if (vma->pin_count) {
> +			DRM_DEBUG("can not change the cache level of pinned objects\n");
> +			return -EBUSY;
> +		}
> +

But this is the same as i915_gem_obj_is_pinned, where is the 
obj->pin_display change commit message talks about?

>   		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
>   			ret = i915_vma_unbind(vma);
>   			if (ret)
>   				return ret;
> -		}
> +		} else
> +			bound = true;
>   	}
>
> -	if (i915_gem_obj_bound_any(obj)) {
> +	if (bound) {
>   		ret = i915_gem_object_wait_rendering(obj, false);
>   		if (ret)
>   			return ret;
>
> -		i915_gem_object_finish_gtt(obj);
> -
>   		/* Before SandyBridge, you could not use tiling or fence
>   		 * registers with snooped memory, so relinquish any fences
>   		 * currently pointing to our region in the aperture.
> @@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>   				return ret;
>   		}
>
> -		list_for_each_entry(vma, &obj->vma_list, vma_link)
> -			if (drm_mm_node_allocated(&vma->node)) {
> -				ret = i915_vma_bind(vma, cache_level,
> -						    PIN_UPDATE);
> -				if (ret)
> -					return ret;
> -			}
> +		/* Access to snoopable pages through the GTT is incoherent. */
> +		if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev))
> +			i915_gem_release_mmap(obj);

Don't fully understand this one - but my question is this. Previously 
userspace would lose mappings on cache level changes any time, after 
this only on !LLC when turning on caching mode. So this means userspace 
needs to know about this change and modify it's behavior? Or what 
exactly would happen in practice?

Regards,

Tvrtko
Chris Wilson Oct. 7, 2015, 4:19 p.m. UTC | #4
On Wed, Oct 07, 2015 at 04:57:25PM +0100, Tvrtko Ursulin wrote:
> 
> Hi,
> 
> On 06/10/15 11:39, Chris Wilson wrote:
> >Since the remove of the pin-ioctl, we only care about not changing the
> >cache level on buffers pinned to the hardware as indicated by
> >obj->pin_display. So we can safely replace i915_gem_object_is_pinned()
> 
> i915_gem_obj_is_pinned

What? That's not the normal prefix, who named that monstrosity!

> 
> >here with a plain obj->pin_display check. During rebinding, we will check
> >sanity checks in case vma->pin_count is erroneously set.
> 
> "do sanity checks" or something.
> 
> >At the same time, we can micro-optimise GTT mmap() behaviour since we
> >only need to relinquish the mmaps before Sandybridge.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++----------------
> >  1 file changed, 24 insertions(+), 16 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >index d4a3bdf0c5b6..2b8ed7a2faab 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> >  {
> >  	struct drm_device *dev = obj->base.dev;
> >  	struct i915_vma *vma, *next;
> >+	bool bound = false;
> >  	int ret = 0;
> >
> >  	if (obj->cache_level == cache_level)
> >  		goto out;
> >
> >-	if (i915_gem_obj_is_pinned(obj)) {
> >-		DRM_DEBUG("can not change the cache level of pinned objects\n");
> >-		return -EBUSY;
> >-	}
> >-
> >  	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
> >+		if (!drm_mm_node_allocated(&vma->node))
> >+			continue;
> >+
> >+		if (vma->pin_count) {
> >+			DRM_DEBUG("can not change the cache level of pinned objects\n");
> >+			return -EBUSY;
> >+		}
> >+
> 
> But this is the same as i915_gem_obj_is_pinned, where is the
> obj->pin_display change commit message talks about?

Right here. The difference is that we are only iterating the vma list
once rather than 3x.

> >  		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
> >  			ret = i915_vma_unbind(vma);
> >  			if (ret)
> >  				return ret;
> >-		}
> >+		} else
> >+			bound = true;
> >  	}
> >
> >-	if (i915_gem_obj_bound_any(obj)) {
> >+	if (bound) {
> >  		ret = i915_gem_object_wait_rendering(obj, false);
> >  		if (ret)
> >  			return ret;
> >
> >-		i915_gem_object_finish_gtt(obj);
> >-
> >  		/* Before SandyBridge, you could not use tiling or fence
> >  		 * registers with snooped memory, so relinquish any fences
> >  		 * currently pointing to our region in the aperture.
> >@@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> >  				return ret;
> >  		}
> >
> >-		list_for_each_entry(vma, &obj->vma_list, vma_link)
> >-			if (drm_mm_node_allocated(&vma->node)) {
> >-				ret = i915_vma_bind(vma, cache_level,
> >-						    PIN_UPDATE);
> >-				if (ret)
> >-					return ret;
> >-			}
> >+		/* Access to snoopable pages through the GTT is incoherent. */
> >+		if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev))
> >+			i915_gem_release_mmap(obj);
> 
> Don't fully understand this one - but my question is this.
> Previously userspace would lose mappings on cache level changes any
> time, after this only on !LLC when turning on caching mode. So this
> means userspace needs to know about this change and modify it's
> behavior? Or what exactly would happen in practice?

No. Userspace has no knowledge of the kernel handling the PTEs, its
mapping is persistent (i.e. the obj->mmap_offset inside the dev->mappping).
Otoh, we are improving the situation so even if userspace tries to avoid
set-cache-level nothing is lost.
-Chris
Tvrtko Ursulin Oct. 8, 2015, 9:32 a.m. UTC | #5
On 07/10/15 17:19, Chris Wilson wrote:
> On Wed, Oct 07, 2015 at 04:57:25PM +0100, Tvrtko Ursulin wrote:
>>
>> Hi,
>>
>> On 06/10/15 11:39, Chris Wilson wrote:
>>> Since the remove of the pin-ioctl, we only care about not changing the
>>> cache level on buffers pinned to the hardware as indicated by
>>> obj->pin_display. So we can safely replace i915_gem_object_is_pinned()

[snip]

>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index d4a3bdf0c5b6..2b8ed7a2faab 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>>>   {
>>>   	struct drm_device *dev = obj->base.dev;
>>>   	struct i915_vma *vma, *next;
>>> +	bool bound = false;
>>>   	int ret = 0;
>>>
>>>   	if (obj->cache_level == cache_level)
>>>   		goto out;
>>>
>>> -	if (i915_gem_obj_is_pinned(obj)) {
>>> -		DRM_DEBUG("can not change the cache level of pinned objects\n");
>>> -		return -EBUSY;
>>> -	}
>>> -
>>>   	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
>>> +		if (!drm_mm_node_allocated(&vma->node))
>>> +			continue;
>>> +
>>> +		if (vma->pin_count) {
>>> +			DRM_DEBUG("can not change the cache level of pinned objects\n");
>>> +			return -EBUSY;
>>> +		}
>>> +
>>
>> But this is the same as i915_gem_obj_is_pinned, where is the
>> obj->pin_display change commit message talks about?
>
> Right here. The difference is that we are only iterating the vma list
> once rather than 3x.

Thats true, but the commit says it is going to use obj->pin_display for 
something and then doesn't use it at all. Riddles in patches are not 
that hot. :)

>>>   		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
>>>   			ret = i915_vma_unbind(vma);
>>>   			if (ret)
>>>   				return ret;
>>> -		}
>>> +		} else
>>> +			bound = true;
>>>   	}
>>>
>>> -	if (i915_gem_obj_bound_any(obj)) {
>>> +	if (bound) {
>>>   		ret = i915_gem_object_wait_rendering(obj, false);
>>>   		if (ret)
>>>   			return ret;
>>>
>>> -		i915_gem_object_finish_gtt(obj);
>>> -
>>>   		/* Before SandyBridge, you could not use tiling or fence
>>>   		 * registers with snooped memory, so relinquish any fences
>>>   		 * currently pointing to our region in the aperture.
>>> @@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>>>   				return ret;
>>>   		}
>>>
>>> -		list_for_each_entry(vma, &obj->vma_list, vma_link)
>>> -			if (drm_mm_node_allocated(&vma->node)) {
>>> -				ret = i915_vma_bind(vma, cache_level,
>>> -						    PIN_UPDATE);
>>> -				if (ret)
>>> -					return ret;
>>> -			}
>>> +		/* Access to snoopable pages through the GTT is incoherent. */
>>> +		if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev))
>>> +			i915_gem_release_mmap(obj);
>>
>> Don't fully understand this one - but my question is this.
>> Previously userspace would lose mappings on cache level changes any
>> time, after this only on !LLC when turning on caching mode. So this
>> means userspace needs to know about this change and modify it's
>> behavior? Or what exactly would happen in practice?
>
> No. Userspace has no knowledge of the kernel handling the PTEs, its
> mapping is persistent (i.e. the obj->mmap_offset inside the dev->mappping).
> Otoh, we are improving the situation so even if userspace tries to avoid
> set-cache-level nothing is lost.

Hm so if a VMA is re-bound in this process and it could have gotten a 
new GGTT address, why it is not necessary to always release mmaps and so 
to update CPU PTEs?

Also what about Sandy Bridge? Commit message mentions it and the code 
doesn't?

Regards,

Tvrtko
Chris Wilson Oct. 8, 2015, 9:46 a.m. UTC | #6
On Thu, Oct 08, 2015 at 10:32:39AM +0100, Tvrtko Ursulin wrote:
> 
> On 07/10/15 17:19, Chris Wilson wrote:
> >On Wed, Oct 07, 2015 at 04:57:25PM +0100, Tvrtko Ursulin wrote:
> >>
> >>Hi,
> >>
> >>On 06/10/15 11:39, Chris Wilson wrote:
> >>>Since the remove of the pin-ioctl, we only care about not changing the
> >>>cache level on buffers pinned to the hardware as indicated by
> >>>obj->pin_display. So we can safely replace i915_gem_object_is_pinned()
> 
> [snip]
> 
> >>>diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >>>index d4a3bdf0c5b6..2b8ed7a2faab 100644
> >>>--- a/drivers/gpu/drm/i915/i915_gem.c
> >>>+++ b/drivers/gpu/drm/i915/i915_gem.c
> >>>@@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> >>>  {
> >>>  	struct drm_device *dev = obj->base.dev;
> >>>  	struct i915_vma *vma, *next;
> >>>+	bool bound = false;
> >>>  	int ret = 0;
> >>>
> >>>  	if (obj->cache_level == cache_level)
> >>>  		goto out;
> >>>
> >>>-	if (i915_gem_obj_is_pinned(obj)) {
> >>>-		DRM_DEBUG("can not change the cache level of pinned objects\n");
> >>>-		return -EBUSY;
> >>>-	}
> >>>-
> >>>  	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
> >>>+		if (!drm_mm_node_allocated(&vma->node))
> >>>+			continue;
> >>>+
> >>>+		if (vma->pin_count) {
> >>>+			DRM_DEBUG("can not change the cache level of pinned objects\n");
> >>>+			return -EBUSY;
> >>>+		}
> >>>+
> >>
> >>But this is the same as i915_gem_obj_is_pinned, where is the
> >>obj->pin_display change commit message talks about?
> >
> >Right here. The difference is that we are only iterating the vma list
> >once rather than 3x.
> 
> Thats true, but the commit says it is going to use obj->pin_display
> for something and then doesn't use it at all. Riddles in patches are
> not that hot. :)

I was trying to explain what the actual rules pertaining to the
rebinding the vma was. We can rebind anything that isn't pinned and the
only thing pinned here can be obj->pin_display.
 
> >>>  		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
> >>>  			ret = i915_vma_unbind(vma);
> >>>  			if (ret)
> >>>  				return ret;
> >>>-		}
> >>>+		} else
> >>>+			bound = true;
> >>>  	}
> >>>
> >>>-	if (i915_gem_obj_bound_any(obj)) {
> >>>+	if (bound) {
> >>>  		ret = i915_gem_object_wait_rendering(obj, false);
> >>>  		if (ret)
> >>>  			return ret;
> >>>
> >>>-		i915_gem_object_finish_gtt(obj);
> >>>-
> >>>  		/* Before SandyBridge, you could not use tiling or fence
> >>>  		 * registers with snooped memory, so relinquish any fences
> >>>  		 * currently pointing to our region in the aperture.
> >>>@@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> >>>  				return ret;
> >>>  		}
> >>>
> >>>-		list_for_each_entry(vma, &obj->vma_list, vma_link)
> >>>-			if (drm_mm_node_allocated(&vma->node)) {
> >>>-				ret = i915_vma_bind(vma, cache_level,
> >>>-						    PIN_UPDATE);
> >>>-				if (ret)
> >>>-					return ret;
> >>>-			}
> >>>+		/* Access to snoopable pages through the GTT is incoherent. */
> >>>+		if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev))
> >>>+			i915_gem_release_mmap(obj);
> >>
> >>Don't fully understand this one - but my question is this.
> >>Previously userspace would lose mappings on cache level changes any
> >>time, after this only on !LLC when turning on caching mode. So this
> >>means userspace needs to know about this change and modify it's
> >>behavior? Or what exactly would happen in practice?
> >
> >No. Userspace has no knowledge of the kernel handling the PTEs, its
> >mapping is persistent (i.e. the obj->mmap_offset inside the dev->mappping).
> >Otoh, we are improving the situation so even if userspace tries to avoid
> >set-cache-level nothing is lost.
> 
> Hm so if a VMA is re-bound in this process and it could have gotten
> a new GGTT address, why it is not necessary to always release mmaps
> and so to update CPU PTEs?

The VMA are not moved by this function, only the PTEs are rewritten. The
GTT ignores the bits we are changing on llc architectures. On !llc using
the GTT to access snoopable PTE is verboten and does cause machine hangs.
 
> Also what about Sandy Bridge? Commit message mentions it and the
> code doesn't?

Age of patch and lack of !llc snb+ at the time, and the state of my mind
when I think about llc.
-Chris
Tvrtko Ursulin Oct. 9, 2015, 10:17 a.m. UTC | #7
On 08/10/15 10:46, Chris Wilson wrote:
> On Thu, Oct 08, 2015 at 10:32:39AM +0100, Tvrtko Ursulin wrote:
>>
>> On 07/10/15 17:19, Chris Wilson wrote:
>>> On Wed, Oct 07, 2015 at 04:57:25PM +0100, Tvrtko Ursulin wrote:
>>>>
>>>> Hi,
>>>>
>>>> On 06/10/15 11:39, Chris Wilson wrote:
>>>>> Since the remove of the pin-ioctl, we only care about not changing the
>>>>> cache level on buffers pinned to the hardware as indicated by
>>>>> obj->pin_display. So we can safely replace i915_gem_object_is_pinned()
>>
>> [snip]
>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>>>> index d4a3bdf0c5b6..2b8ed7a2faab 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>>>> @@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>>>>>   {
>>>>>   	struct drm_device *dev = obj->base.dev;
>>>>>   	struct i915_vma *vma, *next;
>>>>> +	bool bound = false;
>>>>>   	int ret = 0;
>>>>>
>>>>>   	if (obj->cache_level == cache_level)
>>>>>   		goto out;
>>>>>
>>>>> -	if (i915_gem_obj_is_pinned(obj)) {
>>>>> -		DRM_DEBUG("can not change the cache level of pinned objects\n");
>>>>> -		return -EBUSY;
>>>>> -	}
>>>>> -
>>>>>   	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
>>>>> +		if (!drm_mm_node_allocated(&vma->node))
>>>>> +			continue;
>>>>> +
>>>>> +		if (vma->pin_count) {
>>>>> +			DRM_DEBUG("can not change the cache level of pinned objects\n");
>>>>> +			return -EBUSY;
>>>>> +		}
>>>>> +
>>>>
>>>> But this is the same as i915_gem_obj_is_pinned, where is the
>>>> obj->pin_display change commit message talks about?
>>>
>>> Right here. The difference is that we are only iterating the vma list
>>> once rather than 3x.
>>
>> Thats true, but the commit says it is going to use obj->pin_display
>> for something and then doesn't use it at all. Riddles in patches are
>> not that hot. :)
>
> I was trying to explain what the actual rules pertaining to the
> rebinding the vma was. We can rebind anything that isn't pinned and the
> only thing pinned here can be obj->pin_display.

Okay but the commit message says the is going to use obj->pin_display.

>>>>>   		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
>>>>>   			ret = i915_vma_unbind(vma);
>>>>>   			if (ret)
>>>>>   				return ret;
>>>>> -		}
>>>>> +		} else
>>>>> +			bound = true;
>>>>>   	}
>>>>>
>>>>> -	if (i915_gem_obj_bound_any(obj)) {
>>>>> +	if (bound) {
>>>>>   		ret = i915_gem_object_wait_rendering(obj, false);
>>>>>   		if (ret)
>>>>>   			return ret;
>>>>>
>>>>> -		i915_gem_object_finish_gtt(obj);
>>>>> -
>>>>>   		/* Before SandyBridge, you could not use tiling or fence
>>>>>   		 * registers with snooped memory, so relinquish any fences
>>>>>   		 * currently pointing to our region in the aperture.
>>>>> @@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>>>>>   				return ret;
>>>>>   		}
>>>>>
>>>>> -		list_for_each_entry(vma, &obj->vma_list, vma_link)
>>>>> -			if (drm_mm_node_allocated(&vma->node)) {
>>>>> -				ret = i915_vma_bind(vma, cache_level,
>>>>> -						    PIN_UPDATE);
>>>>> -				if (ret)
>>>>> -					return ret;
>>>>> -			}
>>>>> +		/* Access to snoopable pages through the GTT is incoherent. */
>>>>> +		if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev))
>>>>> +			i915_gem_release_mmap(obj);
>>>>
>>>> Don't fully understand this one - but my question is this.
>>>> Previously userspace would lose mappings on cache level changes any
>>>> time, after this only on !LLC when turning on caching mode. So this
>>>> means userspace needs to know about this change and modify it's
>>>> behavior? Or what exactly would happen in practice?
>>>
>>> No. Userspace has no knowledge of the kernel handling the PTEs, its
>>> mapping is persistent (i.e. the obj->mmap_offset inside the dev->mappping).
>>> Otoh, we are improving the situation so even if userspace tries to avoid
>>> set-cache-level nothing is lost.
>>
>> Hm so if a VMA is re-bound in this process and it could have gotten
>> a new GGTT address, why it is not necessary to always release mmaps
>> and so to update CPU PTEs?
>
> The VMA are not moved by this function, only the PTEs are rewritten. The
> GTT ignores the bits we are changing on llc architectures. On !llc using
> the GTT to access snoopable PTE is verboten and does cause machine hangs.

How come they are not moved when they can be unbound and then bound again?

Regards,

Tvrtko
Chris Wilson Oct. 9, 2015, 10:34 a.m. UTC | #8
On Fri, Oct 09, 2015 at 11:17:19AM +0100, Tvrtko Ursulin wrote:
> >>>>>-		list_for_each_entry(vma, &obj->vma_list, vma_link)
> >>>>>-			if (drm_mm_node_allocated(&vma->node)) {
> >>>>>-				ret = i915_vma_bind(vma, cache_level,
> >>>>>-						    PIN_UPDATE);
> >>>>>-				if (ret)
> >>>>>-					return ret;
> >>>>>-			}
> >>>>>+		/* Access to snoopable pages through the GTT is incoherent. */
> >>>>>+		if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev))
> >>>>>+			i915_gem_release_mmap(obj);
> >>>>
> >>>>Don't fully understand this one - but my question is this.
> >>>>Previously userspace would lose mappings on cache level changes any
> >>>>time, after this only on !LLC when turning on caching mode. So this
> >>>>means userspace needs to know about this change and modify it's
> >>>>behavior? Or what exactly would happen in practice?
> >>>
> >>>No. Userspace has no knowledge of the kernel handling the PTEs, its
> >>>mapping is persistent (i.e. the obj->mmap_offset inside the dev->mappping).
> >>>Otoh, we are improving the situation so even if userspace tries to avoid
> >>>set-cache-level nothing is lost.
> >>
> >>Hm so if a VMA is re-bound in this process and it could have gotten
> >>a new GGTT address, why it is not necessary to always release mmaps
> >>and so to update CPU PTEs?
> >
> >The VMA are not moved by this function, only the PTEs are rewritten. The
> >GTT ignores the bits we are changing on llc architectures. On !llc using
> >the GTT to access snoopable PTE is verboten and does cause machine hangs.
> 
> How come they are not moved when they can be unbound and then bound again?

The only relevant vma here are rebound with PIN_UPDATE. If we have to
unbind any due to subsequent placement errors, the behaviour doesn't
change in this patch. So I'm not understanding your concern and can't
address it adequately. :(
-Chris
Tvrtko Ursulin Oct. 9, 2015, 12:01 p.m. UTC | #9
On 09/10/15 11:34, Chris Wilson wrote:
> On Fri, Oct 09, 2015 at 11:17:19AM +0100, Tvrtko Ursulin wrote:
>>>>>>> -		list_for_each_entry(vma, &obj->vma_list, vma_link)
>>>>>>> -			if (drm_mm_node_allocated(&vma->node)) {
>>>>>>> -				ret = i915_vma_bind(vma, cache_level,
>>>>>>> -						    PIN_UPDATE);
>>>>>>> -				if (ret)
>>>>>>> -					return ret;
>>>>>>> -			}
>>>>>>> +		/* Access to snoopable pages through the GTT is incoherent. */
>>>>>>> +		if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev))
>>>>>>> +			i915_gem_release_mmap(obj);
>>>>>>
>>>>>> Don't fully understand this one - but my question is this.
>>>>>> Previously userspace would lose mappings on cache level changes any
>>>>>> time, after this only on !LLC when turning on caching mode. So this
>>>>>> means userspace needs to know about this change and modify it's
>>>>>> behavior? Or what exactly would happen in practice?
>>>>>
>>>>> No. Userspace has no knowledge of the kernel handling the PTEs, its
>>>>> mapping is persistent (i.e. the obj->mmap_offset inside the dev->mappping).
>>>>> Otoh, we are improving the situation so even if userspace tries to avoid
>>>>> set-cache-level nothing is lost.
>>>>
>>>> Hm so if a VMA is re-bound in this process and it could have gotten
>>>> a new GGTT address, why it is not necessary to always release mmaps
>>>> and so to update CPU PTEs?
>>>
>>> The VMA are not moved by this function, only the PTEs are rewritten. The
>>> GTT ignores the bits we are changing on llc architectures. On !llc using
>>> the GTT to access snoopable PTE is verboten and does cause machine hangs.
>>
>> How come they are not moved when they can be unbound and then bound again?
>
> The only relevant vma here are rebound with PIN_UPDATE. If we have to
> unbind any due to subsequent placement errors, the behaviour doesn't
> change in this patch. So I'm not understanding your concern and can't
> address it adequately. :(

I started to understand how this works after a chat on IRC. Before I had 
a completely wrong assumptions.

(This also demonstrates this code should really have a good high level 
comment.)

Unless I missed something it really looks the behaviour is unchanged, 
just a trip to the fault handler is avoided if not needed.

But I still think you need to improve the commit message to be clearer 
on pin_display (un)usage and SandyBridge referencing.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d4a3bdf0c5b6..2b8ed7a2faab 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3629,31 +3629,34 @@  int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct i915_vma *vma, *next;
+	bool bound = false;
 	int ret = 0;
 
 	if (obj->cache_level == cache_level)
 		goto out;
 
-	if (i915_gem_obj_is_pinned(obj)) {
-		DRM_DEBUG("can not change the cache level of pinned objects\n");
-		return -EBUSY;
-	}
-
 	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		if (vma->pin_count) {
+			DRM_DEBUG("can not change the cache level of pinned objects\n");
+			return -EBUSY;
+		}
+
 		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
 			ret = i915_vma_unbind(vma);
 			if (ret)
 				return ret;
-		}
+		} else
+			bound = true;
 	}
 
-	if (i915_gem_obj_bound_any(obj)) {
+	if (bound) {
 		ret = i915_gem_object_wait_rendering(obj, false);
 		if (ret)
 			return ret;
 
-		i915_gem_object_finish_gtt(obj);
-
 		/* Before SandyBridge, you could not use tiling or fence
 		 * registers with snooped memory, so relinquish any fences
 		 * currently pointing to our region in the aperture.
@@ -3664,13 +3667,18 @@  int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 				return ret;
 		}
 
-		list_for_each_entry(vma, &obj->vma_list, vma_link)
-			if (drm_mm_node_allocated(&vma->node)) {
-				ret = i915_vma_bind(vma, cache_level,
-						    PIN_UPDATE);
-				if (ret)
-					return ret;
-			}
+		/* Access to snoopable pages through the GTT is incoherent. */
+		if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev))
+			i915_gem_release_mmap(obj);
+
+		list_for_each_entry(vma, &obj->vma_list, vma_link) {
+			if (!drm_mm_node_allocated(&vma->node))
+				continue;
+
+			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+			if (ret)
+				return ret;
+		}
 	}
 
 	list_for_each_entry(vma, &obj->vma_list, vma_link)