Message ID | 1444127996-23561-2-git-send-email-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Oct 06, 2015 at 11:39:56AM +0100, Chris Wilson wrote: > Since the remove of the pin-ioctl, we only care about not changing the > cache level on buffers pinned to the hardware as indicated by > obj->pin_display. So we can safely replace i915_gem_object_is_pinned() > here with a plain obj->pin_display check. During rebinding, we will check > sanity checks in case vma->pin_count is erroneously set. > > At the same time, we can micro-optimise GTT mmap() behaviour since we > only need to relinquish the mmaps before Sandybridge. Actual condition is !LLC so would need to be updated (and split out imo). > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++---------------- > 1 file changed, 24 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index d4a3bdf0c5b6..2b8ed7a2faab 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > { > struct drm_device *dev = obj->base.dev; > struct i915_vma *vma, *next; > + bool bound = false; > int ret = 0; > > if (obj->cache_level == cache_level) > goto out; > > - if (i915_gem_obj_is_pinned(obj)) { > - DRM_DEBUG("can not change the cache level of pinned objects\n"); > - return -EBUSY; > - } > - > list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { > + if (!drm_mm_node_allocated(&vma->node)) > + continue; > + > + if (vma->pin_count) { > + DRM_DEBUG("can not change the cache level of pinned objects\n"); > + return -EBUSY; > + } > + > if (!i915_gem_valid_gtt_space(vma, cache_level)) { > ret = i915_vma_unbind(vma); > if (ret) > return ret; > - } > + } else > + bound = true; > } > > - if (i915_gem_obj_bound_any(obj)) { > + if (bound) { > ret = i915_gem_object_wait_rendering(obj, false); > if (ret) > return ret; Shouldn't the below be split out into a separate patch? And maybe for paranoia keep calling finish_gtt but restrict it to !LLC && snooped like you do below. -Daniel > > - i915_gem_object_finish_gtt(obj); > - > /* Before SandyBridge, you could not use tiling or fence > * registers with snooped memory, so relinquish any fences > * currently pointing to our region in the aperture. > @@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > return ret; > } > > - list_for_each_entry(vma, &obj->vma_list, vma_link) > - if (drm_mm_node_allocated(&vma->node)) { > - ret = i915_vma_bind(vma, cache_level, > - PIN_UPDATE); > - if (ret) > - return ret; > - } > + /* Access to snoopable pages through the GTT is incoherent. */ > + if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) > + i915_gem_release_mmap(obj); > + > + list_for_each_entry(vma, &obj->vma_list, vma_link) { > + if (!drm_mm_node_allocated(&vma->node)) > + continue; > + > + ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); > + if (ret) > + return ret; > + } > } > > list_for_each_entry(vma, &obj->vma_list, vma_link) > -- > 2.6.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Tue, Oct 06, 2015 at 01:28:07PM +0200, Daniel Vetter wrote: > On Tue, Oct 06, 2015 at 11:39:56AM +0100, Chris Wilson wrote: > > Since the remove of the pin-ioctl, we only care about not changing the > > cache level on buffers pinned to the hardware as indicated by > > obj->pin_display. So we can safely replace i915_gem_object_is_pinned() > > here with a plain obj->pin_display check. During rebinding, we will check > > sanity checks in case vma->pin_count is erroneously set. > > > > At the same time, we can micro-optimise GTT mmap() behaviour since we > > only need to relinquish the mmaps before Sandybridge. > > Actual condition is !LLC so would need to be updated (and split out imo). > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++---------------- > > 1 file changed, 24 insertions(+), 16 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > > index d4a3bdf0c5b6..2b8ed7a2faab 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > > { > > struct drm_device *dev = obj->base.dev; > > struct i915_vma *vma, *next; > > + bool bound = false; > > int ret = 0; > > > > if (obj->cache_level == cache_level) > > goto out; > > > > - if (i915_gem_obj_is_pinned(obj)) { > > - DRM_DEBUG("can not change the cache level of pinned objects\n"); > > - return -EBUSY; > > - } > > - > > list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { > > + if (!drm_mm_node_allocated(&vma->node)) > > + continue; > > + > > + if (vma->pin_count) { > > + DRM_DEBUG("can not change the cache level of pinned objects\n"); > > + return -EBUSY; > > + } > > + > > if (!i915_gem_valid_gtt_space(vma, cache_level)) { > > ret = i915_vma_unbind(vma); > > if (ret) > > return ret; > > - } > > + } else > > + bound = true; > > } > > > > - if (i915_gem_obj_bound_any(obj)) { > > + if (bound) { > > ret = i915_gem_object_wait_rendering(obj, false); > > if (ret) > > return ret; > > Shouldn't the below be split out into a separate patch? And maybe for > paranoia keep calling finish_gtt but restrict it to !LLC && snooped like > you do below. Hmm, I don't have a finish-gtt. The serialisation is based on release-mmaps (we have to be sure that any concurrent access is prohibited). So the question is: is i915_gem_release_mmap() a sufficient barrier and if not, why not. In release-mmap we are revoking the CPU's PTE, but that can be ordered with the memory accesses, but before we continue we should be sure that they have been revoked. Paranoia says we should be moving the mb() we have from outside of release-mmaps into release-mmaps. -Chris
Hi, On 06/10/15 11:39, Chris Wilson wrote: > Since the remove of the pin-ioctl, we only care about not changing the > cache level on buffers pinned to the hardware as indicated by > obj->pin_display. So we can safely replace i915_gem_object_is_pinned() i915_gem_obj_is_pinned > here with a plain obj->pin_display check. During rebinding, we will check > sanity checks in case vma->pin_count is erroneously set. "do sanity checks" or something. > At the same time, we can micro-optimise GTT mmap() behaviour since we > only need to relinquish the mmaps before Sandybridge. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++---------------- > 1 file changed, 24 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index d4a3bdf0c5b6..2b8ed7a2faab 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > { > struct drm_device *dev = obj->base.dev; > struct i915_vma *vma, *next; > + bool bound = false; > int ret = 0; > > if (obj->cache_level == cache_level) > goto out; > > - if (i915_gem_obj_is_pinned(obj)) { > - DRM_DEBUG("can not change the cache level of pinned objects\n"); > - return -EBUSY; > - } > - > list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { > + if (!drm_mm_node_allocated(&vma->node)) > + continue; > + > + if (vma->pin_count) { > + DRM_DEBUG("can not change the cache level of pinned objects\n"); > + return -EBUSY; > + } > + But this is the same as i915_gem_obj_is_pinned, where is the obj->pin_display change commit message talks about? > if (!i915_gem_valid_gtt_space(vma, cache_level)) { > ret = i915_vma_unbind(vma); > if (ret) > return ret; > - } > + } else > + bound = true; > } > > - if (i915_gem_obj_bound_any(obj)) { > + if (bound) { > ret = i915_gem_object_wait_rendering(obj, false); > if (ret) > return ret; > > - i915_gem_object_finish_gtt(obj); > - > /* Before SandyBridge, you could not use tiling or fence > * registers with snooped memory, so relinquish any fences > * currently pointing to our region in the aperture. > @@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > return ret; > } > > - list_for_each_entry(vma, &obj->vma_list, vma_link) > - if (drm_mm_node_allocated(&vma->node)) { > - ret = i915_vma_bind(vma, cache_level, > - PIN_UPDATE); > - if (ret) > - return ret; > - } > + /* Access to snoopable pages through the GTT is incoherent. */ > + if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) > + i915_gem_release_mmap(obj); Don't fully understand this one - but my question is this. Previously userspace would lose mappings on cache level changes any time, after this only on !LLC when turning on caching mode. So this means userspace needs to know about this change and modify it's behavior? Or what exactly would happen in practice? Regards, Tvrtko
On Wed, Oct 07, 2015 at 04:57:25PM +0100, Tvrtko Ursulin wrote: > > Hi, > > On 06/10/15 11:39, Chris Wilson wrote: > >Since the remove of the pin-ioctl, we only care about not changing the > >cache level on buffers pinned to the hardware as indicated by > >obj->pin_display. So we can safely replace i915_gem_object_is_pinned() > > i915_gem_obj_is_pinned What? That's not the normal prefix, who named that monstrosity! > > >here with a plain obj->pin_display check. During rebinding, we will check > >sanity checks in case vma->pin_count is erroneously set. > > "do sanity checks" or something. > > >At the same time, we can micro-optimise GTT mmap() behaviour since we > >only need to relinquish the mmaps before Sandybridge. > > > >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > >--- > > drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++---------------- > > 1 file changed, 24 insertions(+), 16 deletions(-) > > > >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > >index d4a3bdf0c5b6..2b8ed7a2faab 100644 > >--- a/drivers/gpu/drm/i915/i915_gem.c > >+++ b/drivers/gpu/drm/i915/i915_gem.c > >@@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > > { > > struct drm_device *dev = obj->base.dev; > > struct i915_vma *vma, *next; > >+ bool bound = false; > > int ret = 0; > > > > if (obj->cache_level == cache_level) > > goto out; > > > >- if (i915_gem_obj_is_pinned(obj)) { > >- DRM_DEBUG("can not change the cache level of pinned objects\n"); > >- return -EBUSY; > >- } > >- > > list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { > >+ if (!drm_mm_node_allocated(&vma->node)) > >+ continue; > >+ > >+ if (vma->pin_count) { > >+ DRM_DEBUG("can not change the cache level of pinned objects\n"); > >+ return -EBUSY; > >+ } > >+ > > But this is the same as i915_gem_obj_is_pinned, where is the > obj->pin_display change commit message talks about? Right here. The difference is that we are only iterating the vma list once rather than 3x. > > if (!i915_gem_valid_gtt_space(vma, cache_level)) { > > ret = i915_vma_unbind(vma); > > if (ret) > > return ret; > >- } > >+ } else > >+ bound = true; > > } > > > >- if (i915_gem_obj_bound_any(obj)) { > >+ if (bound) { > > ret = i915_gem_object_wait_rendering(obj, false); > > if (ret) > > return ret; > > > >- i915_gem_object_finish_gtt(obj); > >- > > /* Before SandyBridge, you could not use tiling or fence > > * registers with snooped memory, so relinquish any fences > > * currently pointing to our region in the aperture. > >@@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > > return ret; > > } > > > >- list_for_each_entry(vma, &obj->vma_list, vma_link) > >- if (drm_mm_node_allocated(&vma->node)) { > >- ret = i915_vma_bind(vma, cache_level, > >- PIN_UPDATE); > >- if (ret) > >- return ret; > >- } > >+ /* Access to snoopable pages through the GTT is incoherent. */ > >+ if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) > >+ i915_gem_release_mmap(obj); > > Don't fully understand this one - but my question is this. > Previously userspace would lose mappings on cache level changes any > time, after this only on !LLC when turning on caching mode. So this > means userspace needs to know about this change and modify it's > behavior? Or what exactly would happen in practice? No. Userspace has no knowledge of the kernel handling the PTEs, its mapping is persistent (i.e. the obj->mmap_offset inside the dev->mappping). Otoh, we are improving the situation so even if userspace tries to avoid set-cache-level nothing is lost. -Chris
On 07/10/15 17:19, Chris Wilson wrote: > On Wed, Oct 07, 2015 at 04:57:25PM +0100, Tvrtko Ursulin wrote: >> >> Hi, >> >> On 06/10/15 11:39, Chris Wilson wrote: >>> Since the remove of the pin-ioctl, we only care about not changing the >>> cache level on buffers pinned to the hardware as indicated by >>> obj->pin_display. So we can safely replace i915_gem_object_is_pinned() [snip] >>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >>> index d4a3bdf0c5b6..2b8ed7a2faab 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem.c >>> +++ b/drivers/gpu/drm/i915/i915_gem.c >>> @@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, >>> { >>> struct drm_device *dev = obj->base.dev; >>> struct i915_vma *vma, *next; >>> + bool bound = false; >>> int ret = 0; >>> >>> if (obj->cache_level == cache_level) >>> goto out; >>> >>> - if (i915_gem_obj_is_pinned(obj)) { >>> - DRM_DEBUG("can not change the cache level of pinned objects\n"); >>> - return -EBUSY; >>> - } >>> - >>> list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { >>> + if (!drm_mm_node_allocated(&vma->node)) >>> + continue; >>> + >>> + if (vma->pin_count) { >>> + DRM_DEBUG("can not change the cache level of pinned objects\n"); >>> + return -EBUSY; >>> + } >>> + >> >> But this is the same as i915_gem_obj_is_pinned, where is the >> obj->pin_display change commit message talks about? > > Right here. The difference is that we are only iterating the vma list > once rather than 3x. Thats true, but the commit says it is going to use obj->pin_display for something and then doesn't use it at all. Riddles in patches are not that hot. :) >>> if (!i915_gem_valid_gtt_space(vma, cache_level)) { >>> ret = i915_vma_unbind(vma); >>> if (ret) >>> return ret; >>> - } >>> + } else >>> + bound = true; >>> } >>> >>> - if (i915_gem_obj_bound_any(obj)) { >>> + if (bound) { >>> ret = i915_gem_object_wait_rendering(obj, false); >>> if (ret) >>> return ret; >>> >>> - i915_gem_object_finish_gtt(obj); >>> - >>> /* Before SandyBridge, you could not use tiling or fence >>> * registers with snooped memory, so relinquish any fences >>> * currently pointing to our region in the aperture. >>> @@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, >>> return ret; >>> } >>> >>> - list_for_each_entry(vma, &obj->vma_list, vma_link) >>> - if (drm_mm_node_allocated(&vma->node)) { >>> - ret = i915_vma_bind(vma, cache_level, >>> - PIN_UPDATE); >>> - if (ret) >>> - return ret; >>> - } >>> + /* Access to snoopable pages through the GTT is incoherent. */ >>> + if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) >>> + i915_gem_release_mmap(obj); >> >> Don't fully understand this one - but my question is this. >> Previously userspace would lose mappings on cache level changes any >> time, after this only on !LLC when turning on caching mode. So this >> means userspace needs to know about this change and modify it's >> behavior? Or what exactly would happen in practice? > > No. Userspace has no knowledge of the kernel handling the PTEs, its > mapping is persistent (i.e. the obj->mmap_offset inside the dev->mappping). > Otoh, we are improving the situation so even if userspace tries to avoid > set-cache-level nothing is lost. Hm so if a VMA is re-bound in this process and it could have gotten a new GGTT address, why it is not necessary to always release mmaps and so to update CPU PTEs? Also what about Sandy Bridge? Commit message mentions it and the code doesn't? Regards, Tvrtko
On Thu, Oct 08, 2015 at 10:32:39AM +0100, Tvrtko Ursulin wrote: > > On 07/10/15 17:19, Chris Wilson wrote: > >On Wed, Oct 07, 2015 at 04:57:25PM +0100, Tvrtko Ursulin wrote: > >> > >>Hi, > >> > >>On 06/10/15 11:39, Chris Wilson wrote: > >>>Since the remove of the pin-ioctl, we only care about not changing the > >>>cache level on buffers pinned to the hardware as indicated by > >>>obj->pin_display. So we can safely replace i915_gem_object_is_pinned() > > [snip] > > >>>diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > >>>index d4a3bdf0c5b6..2b8ed7a2faab 100644 > >>>--- a/drivers/gpu/drm/i915/i915_gem.c > >>>+++ b/drivers/gpu/drm/i915/i915_gem.c > >>>@@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > >>> { > >>> struct drm_device *dev = obj->base.dev; > >>> struct i915_vma *vma, *next; > >>>+ bool bound = false; > >>> int ret = 0; > >>> > >>> if (obj->cache_level == cache_level) > >>> goto out; > >>> > >>>- if (i915_gem_obj_is_pinned(obj)) { > >>>- DRM_DEBUG("can not change the cache level of pinned objects\n"); > >>>- return -EBUSY; > >>>- } > >>>- > >>> list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { > >>>+ if (!drm_mm_node_allocated(&vma->node)) > >>>+ continue; > >>>+ > >>>+ if (vma->pin_count) { > >>>+ DRM_DEBUG("can not change the cache level of pinned objects\n"); > >>>+ return -EBUSY; > >>>+ } > >>>+ > >> > >>But this is the same as i915_gem_obj_is_pinned, where is the > >>obj->pin_display change commit message talks about? > > > >Right here. The difference is that we are only iterating the vma list > >once rather than 3x. > > Thats true, but the commit says it is going to use obj->pin_display > for something and then doesn't use it at all. Riddles in patches are > not that hot. :) I was trying to explain what the actual rules pertaining to the rebinding the vma was. We can rebind anything that isn't pinned and the only thing pinned here can be obj->pin_display. > >>> if (!i915_gem_valid_gtt_space(vma, cache_level)) { > >>> ret = i915_vma_unbind(vma); > >>> if (ret) > >>> return ret; > >>>- } > >>>+ } else > >>>+ bound = true; > >>> } > >>> > >>>- if (i915_gem_obj_bound_any(obj)) { > >>>+ if (bound) { > >>> ret = i915_gem_object_wait_rendering(obj, false); > >>> if (ret) > >>> return ret; > >>> > >>>- i915_gem_object_finish_gtt(obj); > >>>- > >>> /* Before SandyBridge, you could not use tiling or fence > >>> * registers with snooped memory, so relinquish any fences > >>> * currently pointing to our region in the aperture. > >>>@@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > >>> return ret; > >>> } > >>> > >>>- list_for_each_entry(vma, &obj->vma_list, vma_link) > >>>- if (drm_mm_node_allocated(&vma->node)) { > >>>- ret = i915_vma_bind(vma, cache_level, > >>>- PIN_UPDATE); > >>>- if (ret) > >>>- return ret; > >>>- } > >>>+ /* Access to snoopable pages through the GTT is incoherent. */ > >>>+ if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) > >>>+ i915_gem_release_mmap(obj); > >> > >>Don't fully understand this one - but my question is this. > >>Previously userspace would lose mappings on cache level changes any > >>time, after this only on !LLC when turning on caching mode. So this > >>means userspace needs to know about this change and modify it's > >>behavior? Or what exactly would happen in practice? > > > >No. Userspace has no knowledge of the kernel handling the PTEs, its > >mapping is persistent (i.e. the obj->mmap_offset inside the dev->mappping). > >Otoh, we are improving the situation so even if userspace tries to avoid > >set-cache-level nothing is lost. > > Hm so if a VMA is re-bound in this process and it could have gotten > a new GGTT address, why it is not necessary to always release mmaps > and so to update CPU PTEs? The VMA are not moved by this function, only the PTEs are rewritten. The GTT ignores the bits we are changing on llc architectures. On !llc using the GTT to access snoopable PTE is verboten and does cause machine hangs. > Also what about Sandy Bridge? Commit message mentions it and the > code doesn't? Age of patch and lack of !llc snb+ at the time, and the state of my mind when I think about llc. -Chris
On 08/10/15 10:46, Chris Wilson wrote: > On Thu, Oct 08, 2015 at 10:32:39AM +0100, Tvrtko Ursulin wrote: >> >> On 07/10/15 17:19, Chris Wilson wrote: >>> On Wed, Oct 07, 2015 at 04:57:25PM +0100, Tvrtko Ursulin wrote: >>>> >>>> Hi, >>>> >>>> On 06/10/15 11:39, Chris Wilson wrote: >>>>> Since the remove of the pin-ioctl, we only care about not changing the >>>>> cache level on buffers pinned to the hardware as indicated by >>>>> obj->pin_display. So we can safely replace i915_gem_object_is_pinned() >> >> [snip] >> >>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >>>>> index d4a3bdf0c5b6..2b8ed7a2faab 100644 >>>>> --- a/drivers/gpu/drm/i915/i915_gem.c >>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c >>>>> @@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, >>>>> { >>>>> struct drm_device *dev = obj->base.dev; >>>>> struct i915_vma *vma, *next; >>>>> + bool bound = false; >>>>> int ret = 0; >>>>> >>>>> if (obj->cache_level == cache_level) >>>>> goto out; >>>>> >>>>> - if (i915_gem_obj_is_pinned(obj)) { >>>>> - DRM_DEBUG("can not change the cache level of pinned objects\n"); >>>>> - return -EBUSY; >>>>> - } >>>>> - >>>>> list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { >>>>> + if (!drm_mm_node_allocated(&vma->node)) >>>>> + continue; >>>>> + >>>>> + if (vma->pin_count) { >>>>> + DRM_DEBUG("can not change the cache level of pinned objects\n"); >>>>> + return -EBUSY; >>>>> + } >>>>> + >>>> >>>> But this is the same as i915_gem_obj_is_pinned, where is the >>>> obj->pin_display change commit message talks about? >>> >>> Right here. The difference is that we are only iterating the vma list >>> once rather than 3x. >> >> Thats true, but the commit says it is going to use obj->pin_display >> for something and then doesn't use it at all. Riddles in patches are >> not that hot. :) > > I was trying to explain what the actual rules pertaining to the > rebinding the vma was. We can rebind anything that isn't pinned and the > only thing pinned here can be obj->pin_display. Okay but the commit message says the is going to use obj->pin_display. >>>>> if (!i915_gem_valid_gtt_space(vma, cache_level)) { >>>>> ret = i915_vma_unbind(vma); >>>>> if (ret) >>>>> return ret; >>>>> - } >>>>> + } else >>>>> + bound = true; >>>>> } >>>>> >>>>> - if (i915_gem_obj_bound_any(obj)) { >>>>> + if (bound) { >>>>> ret = i915_gem_object_wait_rendering(obj, false); >>>>> if (ret) >>>>> return ret; >>>>> >>>>> - i915_gem_object_finish_gtt(obj); >>>>> - >>>>> /* Before SandyBridge, you could not use tiling or fence >>>>> * registers with snooped memory, so relinquish any fences >>>>> * currently pointing to our region in the aperture. >>>>> @@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, >>>>> return ret; >>>>> } >>>>> >>>>> - list_for_each_entry(vma, &obj->vma_list, vma_link) >>>>> - if (drm_mm_node_allocated(&vma->node)) { >>>>> - ret = i915_vma_bind(vma, cache_level, >>>>> - PIN_UPDATE); >>>>> - if (ret) >>>>> - return ret; >>>>> - } >>>>> + /* Access to snoopable pages through the GTT is incoherent. */ >>>>> + if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) >>>>> + i915_gem_release_mmap(obj); >>>> >>>> Don't fully understand this one - but my question is this. >>>> Previously userspace would lose mappings on cache level changes any >>>> time, after this only on !LLC when turning on caching mode. So this >>>> means userspace needs to know about this change and modify it's >>>> behavior? Or what exactly would happen in practice? >>> >>> No. Userspace has no knowledge of the kernel handling the PTEs, its >>> mapping is persistent (i.e. the obj->mmap_offset inside the dev->mappping). >>> Otoh, we are improving the situation so even if userspace tries to avoid >>> set-cache-level nothing is lost. >> >> Hm so if a VMA is re-bound in this process and it could have gotten >> a new GGTT address, why it is not necessary to always release mmaps >> and so to update CPU PTEs? > > The VMA are not moved by this function, only the PTEs are rewritten. The > GTT ignores the bits we are changing on llc architectures. On !llc using > the GTT to access snoopable PTE is verboten and does cause machine hangs. How come they are not moved when they can be unbound and then bound again? Regards, Tvrtko
On Fri, Oct 09, 2015 at 11:17:19AM +0100, Tvrtko Ursulin wrote: > >>>>>- list_for_each_entry(vma, &obj->vma_list, vma_link) > >>>>>- if (drm_mm_node_allocated(&vma->node)) { > >>>>>- ret = i915_vma_bind(vma, cache_level, > >>>>>- PIN_UPDATE); > >>>>>- if (ret) > >>>>>- return ret; > >>>>>- } > >>>>>+ /* Access to snoopable pages through the GTT is incoherent. */ > >>>>>+ if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) > >>>>>+ i915_gem_release_mmap(obj); > >>>> > >>>>Don't fully understand this one - but my question is this. > >>>>Previously userspace would lose mappings on cache level changes any > >>>>time, after this only on !LLC when turning on caching mode. So this > >>>>means userspace needs to know about this change and modify it's > >>>>behavior? Or what exactly would happen in practice? > >>> > >>>No. Userspace has no knowledge of the kernel handling the PTEs, its > >>>mapping is persistent (i.e. the obj->mmap_offset inside the dev->mappping). > >>>Otoh, we are improving the situation so even if userspace tries to avoid > >>>set-cache-level nothing is lost. > >> > >>Hm so if a VMA is re-bound in this process and it could have gotten > >>a new GGTT address, why it is not necessary to always release mmaps > >>and so to update CPU PTEs? > > > >The VMA are not moved by this function, only the PTEs are rewritten. The > >GTT ignores the bits we are changing on llc architectures. On !llc using > >the GTT to access snoopable PTE is verboten and does cause machine hangs. > > How come they are not moved when they can be unbound and then bound again? The only relevant vma here are rebound with PIN_UPDATE. If we have to unbind any due to subsequent placement errors, the behaviour doesn't change in this patch. So I'm not understanding your concern and can't address it adequately. :( -Chris
On 09/10/15 11:34, Chris Wilson wrote: > On Fri, Oct 09, 2015 at 11:17:19AM +0100, Tvrtko Ursulin wrote: >>>>>>> - list_for_each_entry(vma, &obj->vma_list, vma_link) >>>>>>> - if (drm_mm_node_allocated(&vma->node)) { >>>>>>> - ret = i915_vma_bind(vma, cache_level, >>>>>>> - PIN_UPDATE); >>>>>>> - if (ret) >>>>>>> - return ret; >>>>>>> - } >>>>>>> + /* Access to snoopable pages through the GTT is incoherent. */ >>>>>>> + if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) >>>>>>> + i915_gem_release_mmap(obj); >>>>>> >>>>>> Don't fully understand this one - but my question is this. >>>>>> Previously userspace would lose mappings on cache level changes any >>>>>> time, after this only on !LLC when turning on caching mode. So this >>>>>> means userspace needs to know about this change and modify it's >>>>>> behavior? Or what exactly would happen in practice? >>>>> >>>>> No. Userspace has no knowledge of the kernel handling the PTEs, its >>>>> mapping is persistent (i.e. the obj->mmap_offset inside the dev->mappping). >>>>> Otoh, we are improving the situation so even if userspace tries to avoid >>>>> set-cache-level nothing is lost. >>>> >>>> Hm so if a VMA is re-bound in this process and it could have gotten >>>> a new GGTT address, why it is not necessary to always release mmaps >>>> and so to update CPU PTEs? >>> >>> The VMA are not moved by this function, only the PTEs are rewritten. The >>> GTT ignores the bits we are changing on llc architectures. On !llc using >>> the GTT to access snoopable PTE is verboten and does cause machine hangs. >> >> How come they are not moved when they can be unbound and then bound again? > > The only relevant vma here are rebound with PIN_UPDATE. If we have to > unbind any due to subsequent placement errors, the behaviour doesn't > change in this patch. So I'm not understanding your concern and can't > address it adequately. :( I started to understand how this works after a chat on IRC. Before I had a completely wrong assumptions. (This also demonstrates this code should really have a good high level comment.) Unless I missed something it really looks the behaviour is unchanged, just a trip to the fault handler is avoided if not needed. But I still think you need to improve the commit message to be clearer on pin_display (un)usage and SandyBridge referencing. Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d4a3bdf0c5b6..2b8ed7a2faab 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3629,31 +3629,34 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, { struct drm_device *dev = obj->base.dev; struct i915_vma *vma, *next; + bool bound = false; int ret = 0; if (obj->cache_level == cache_level) goto out; - if (i915_gem_obj_is_pinned(obj)) { - DRM_DEBUG("can not change the cache level of pinned objects\n"); - return -EBUSY; - } - list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; + + if (vma->pin_count) { + DRM_DEBUG("can not change the cache level of pinned objects\n"); + return -EBUSY; + } + if (!i915_gem_valid_gtt_space(vma, cache_level)) { ret = i915_vma_unbind(vma); if (ret) return ret; - } + } else + bound = true; } - if (i915_gem_obj_bound_any(obj)) { + if (bound) { ret = i915_gem_object_wait_rendering(obj, false); if (ret) return ret; - i915_gem_object_finish_gtt(obj); - /* Before SandyBridge, you could not use tiling or fence * registers with snooped memory, so relinquish any fences * currently pointing to our region in the aperture. @@ -3664,13 +3667,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return ret; } - list_for_each_entry(vma, &obj->vma_list, vma_link) - if (drm_mm_node_allocated(&vma->node)) { - ret = i915_vma_bind(vma, cache_level, - PIN_UPDATE); - if (ret) - return ret; - } + /* Access to snoopable pages through the GTT is incoherent. */ + if (cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) + i915_gem_release_mmap(obj); + + list_for_each_entry(vma, &obj->vma_list, vma_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; + + ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); + if (ret) + return ret; + } } list_for_each_entry(vma, &obj->vma_list, vma_link)
Since the remove of the pin-ioctl, we only care about not changing the cache level on buffers pinned to the hardware as indicated by obj->pin_display. So we can safely replace i915_gem_object_is_pinned() here with a plain obj->pin_display check. During rebinding, we will check sanity checks in case vma->pin_count is erroneously set. At the same time, we can micro-optimise GTT mmap() behaviour since we only need to relinquish the mmaps before Sandybridge. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-)