[v2,1/2] i965/gen9: Pass alignment as function parameter in drm_intel_gem_bo_alloc_internal()
diff mbox

Message ID 1434998822-11855-1-git-send-email-anuj.phogat@gmail.com
State New
Headers show

Commit Message

Anuj Phogat June 22, 2015, 6:47 p.m. UTC
and use it to initialize the align variable in drm_intel_bo.

In case of YF/YS tiled buffers libdrm need not know about the tiling
format because these buffers don't have hardware support to be tiled
or detiled through a fenced region. But, libdrm still need to know
about buffer alignment restrictions because kernel uses it when
resolving the relocation.

Mesa uses drm_intel_gem_bo_alloc_for_render() to allocate Yf/Ys buffers.
So, use the passed alignment value in this function. Note that we continue
ignoring the alignment value passed to drm_intel_gem_bo_alloc() to follow
the previous behavior.

V2: Add a condition to avoid allocation from cache. (Ben)

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Cc: Ben Widawsky <ben@bwidawsk.net>
---
 intel/intel_bufmgr_gem.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

Comments

Daniel Vetter June 22, 2015, 7:51 p.m. UTC | #1
On Mon, Jun 22, 2015 at 11:47:02AM -0700, Anuj Phogat wrote:
> and use it to initialize the align variable in drm_intel_bo.
> 
> In case of YF/YS tiled buffers libdrm need not know about the tiling
> format because these buffers don't have hardware support to be tiled
> or detiled through a fenced region. But, libdrm still need to know
> about buffer alignment restrictions because kernel uses it when
> resolving the relocation.
> 
> Mesa uses drm_intel_gem_bo_alloc_for_render() to allocate Yf/Ys buffers.
> So, use the passed alignment value in this function. Note that we continue
> ignoring the alignment value passed to drm_intel_gem_bo_alloc() to follow
> the previous behavior.
> 
> V2: Add a condition to avoid allocation from cache. (Ben)

This will hurt badly since some programs love to cycle through textures.
You can still allocate from the cache, you only need to update the
alignement constraint. The kernel will move the buffer on the next execbuf
if it's misplaced.
-Daniel

> 
> Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
> Cc: Ben Widawsky <ben@bwidawsk.net>
> ---
>  intel/intel_bufmgr_gem.c | 20 ++++++++++++++------
>  1 file changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> index 60c06fc..60f494e 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -660,7 +660,8 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
>  				unsigned long size,
>  				unsigned long flags,
>  				uint32_t tiling_mode,
> -				unsigned long stride)
> +				unsigned long stride,
> +				unsigned int alignment)
>  {
>  	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
>  	drm_intel_bo_gem *bo_gem;
> @@ -700,9 +701,14 @@ retry:
>  			 */
>  			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
>  					      bucket->head.prev, head);
> -			DRMLISTDEL(&bo_gem->head);
> -			alloc_from_cache = true;
> +			if (alignment > 0 && bo_gem->bo.align != alignment) {
> +				alloc_from_cache = false;
> +			} else {
> +				alloc_from_cache = true;
> +				DRMLISTDEL(&bo_gem->head);
> +			}
>  		} else {
> +			assert(alignment == 0);
>  			/* For non-render-target BOs (where we're probably
>  			 * going to map it first thing in order to fill it
>  			 * with data), check if the last BO in the cache is
> @@ -759,6 +765,7 @@ retry:
>  			return NULL;
>  		}
>  		bo_gem->bo.bufmgr = bufmgr;
> +		bo_gem->bo.align = alignment;
>  
>  		bo_gem->tiling_mode = I915_TILING_NONE;
>  		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
> @@ -802,7 +809,8 @@ drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
>  {
>  	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
>  					       BO_ALLOC_FOR_RENDER,
> -					       I915_TILING_NONE, 0);
> +					       I915_TILING_NONE, 0,
> +					       alignment);
>  }
>  
>  static drm_intel_bo *
> @@ -812,7 +820,7 @@ drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
>  		       unsigned int alignment)
>  {
>  	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
> -					       I915_TILING_NONE, 0);
> +					       I915_TILING_NONE, 0, 0);
>  }
>  
>  static drm_intel_bo *
> @@ -864,7 +872,7 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
>  		stride = 0;
>  
>  	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
> -					       tiling, stride);
> +					       tiling, stride, 0);
>  }
>  
>  static drm_intel_bo *
> -- 
> 1.9.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson June 22, 2015, 8:04 p.m. UTC | #2
On Mon, Jun 22, 2015 at 09:51:08PM +0200, Daniel Vetter wrote:
> On Mon, Jun 22, 2015 at 11:47:02AM -0700, Anuj Phogat wrote:
> > and use it to initialize the align variable in drm_intel_bo.
> > 
> > In case of YF/YS tiled buffers libdrm need not know about the tiling
> > format because these buffers don't have hardware support to be tiled
> > or detiled through a fenced region. But, libdrm still need to know
> > about buffer alignment restrictions because kernel uses it when
> > resolving the relocation.
> > 
> > Mesa uses drm_intel_gem_bo_alloc_for_render() to allocate Yf/Ys buffers.
> > So, use the passed alignment value in this function. Note that we continue
> > ignoring the alignment value passed to drm_intel_gem_bo_alloc() to follow
> > the previous behavior.
> > 
> > V2: Add a condition to avoid allocation from cache. (Ben)
> 
> This will hurt badly since some programs love to cycle through textures.
> You can still allocate from the cache, you only need to update the
> alignement constraint. The kernel will move the buffer on the next execbuf
> if it's misplaced.

For llc, using fresh pages just puts memory and aperture pressure (plus
a small amount of interface pressure) on the system by allocating more bo.

For !llc, it is far better to move an object in the GTT to match a
change in alignment than it is to allocate fresh pages (and deallocate
stale pages).
-Chris
Anuj Phogat June 23, 2015, 11:44 p.m. UTC | #3
On Mon, Jun 22, 2015 at 1:04 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Mon, Jun 22, 2015 at 09:51:08PM +0200, Daniel Vetter wrote:
>> On Mon, Jun 22, 2015 at 11:47:02AM -0700, Anuj Phogat wrote:
>> > and use it to initialize the align variable in drm_intel_bo.
>> >
>> > In case of YF/YS tiled buffers libdrm need not know about the tiling
>> > format because these buffers don't have hardware support to be tiled
>> > or detiled through a fenced region. But, libdrm still need to know
>> > about buffer alignment restrictions because kernel uses it when
>> > resolving the relocation.
>> >
>> > Mesa uses drm_intel_gem_bo_alloc_for_render() to allocate Yf/Ys buffers.
>> > So, use the passed alignment value in this function. Note that we continue
>> > ignoring the alignment value passed to drm_intel_gem_bo_alloc() to follow
>> > the previous behavior.
>> >
>> > V2: Add a condition to avoid allocation from cache. (Ben)
>>
>> This will hurt badly since some programs love to cycle through textures.
>> You can still allocate from the cache, you only need to update the
>> alignement constraint. The kernel will move the buffer on the next execbuf
>> if it's misplaced.
>
> For llc, using fresh pages just puts memory and aperture pressure (plus
> a small amount of interface pressure) on the system by allocating more bo.
>
> For !llc, it is far better to move an object in the GTT to match a
> change in alignment than it is to allocate fresh pages (and deallocate
> stale pages).
Could you please explain this and point me to what you want to be
changed in this patch?

> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
Chris Wilson June 24, 2015, 7:28 a.m. UTC | #4
On Tue, Jun 23, 2015 at 04:44:52PM -0700, Anuj Phogat wrote:
> On Mon, Jun 22, 2015 at 1:04 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Mon, Jun 22, 2015 at 09:51:08PM +0200, Daniel Vetter wrote:
> >> On Mon, Jun 22, 2015 at 11:47:02AM -0700, Anuj Phogat wrote:
> >> > and use it to initialize the align variable in drm_intel_bo.
> >> >
> >> > In case of YF/YS tiled buffers libdrm need not know about the tiling
> >> > format because these buffers don't have hardware support to be tiled
> >> > or detiled through a fenced region. But, libdrm still need to know
> >> > about buffer alignment restrictions because kernel uses it when
> >> > resolving the relocation.
> >> >
> >> > Mesa uses drm_intel_gem_bo_alloc_for_render() to allocate Yf/Ys buffers.
> >> > So, use the passed alignment value in this function. Note that we continue
> >> > ignoring the alignment value passed to drm_intel_gem_bo_alloc() to follow
> >> > the previous behavior.
> >> >
> >> > V2: Add a condition to avoid allocation from cache. (Ben)
> >>
> >> This will hurt badly since some programs love to cycle through textures.
> >> You can still allocate from the cache, you only need to update the
> >> alignement constraint. The kernel will move the buffer on the next execbuf
> >> if it's misplaced.
> >
> > For llc, using fresh pages just puts memory and aperture pressure (plus
> > a small amount of interface pressure) on the system by allocating more bo.
> >
> > For !llc, it is far better to move an object in the GTT to match a
> > change in alignment than it is to allocate fresh pages (and deallocate
> > stale pages).
> Could you please explain this and point me to what you want to be
> changed in this patch?

You can reuse the cached bo if the alignment mismatches. You can
experiment with searching for a better match, but it's unlikely to be
useful (numbers talk here).

Reusing the cache and keeping the working set small (with zero turnover)
is vital on !llc. Moving a GTT binding is far cheaper than preparing the
backing store for access with the GPU.
-Chris
Daniel Vetter June 24, 2015, 8:24 a.m. UTC | #5
On Wed, Jun 24, 2015 at 08:28:13AM +0100, Chris Wilson wrote:
> On Tue, Jun 23, 2015 at 04:44:52PM -0700, Anuj Phogat wrote:
> > On Mon, Jun 22, 2015 at 1:04 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > On Mon, Jun 22, 2015 at 09:51:08PM +0200, Daniel Vetter wrote:
> > >> On Mon, Jun 22, 2015 at 11:47:02AM -0700, Anuj Phogat wrote:
> > >> > and use it to initialize the align variable in drm_intel_bo.
> > >> >
> > >> > In case of YF/YS tiled buffers libdrm need not know about the tiling
> > >> > format because these buffers don't have hardware support to be tiled
> > >> > or detiled through a fenced region. But, libdrm still need to know
> > >> > about buffer alignment restrictions because kernel uses it when
> > >> > resolving the relocation.
> > >> >
> > >> > Mesa uses drm_intel_gem_bo_alloc_for_render() to allocate Yf/Ys buffers.
> > >> > So, use the passed alignment value in this function. Note that we continue
> > >> > ignoring the alignment value passed to drm_intel_gem_bo_alloc() to follow
> > >> > the previous behavior.
> > >> >
> > >> > V2: Add a condition to avoid allocation from cache. (Ben)
> > >>
> > >> This will hurt badly since some programs love to cycle through textures.
> > >> You can still allocate from the cache, you only need to update the
> > >> alignement constraint. The kernel will move the buffer on the next execbuf
> > >> if it's misplaced.
> > >
> > > For llc, using fresh pages just puts memory and aperture pressure (plus
> > > a small amount of interface pressure) on the system by allocating more bo.
> > >
> > > For !llc, it is far better to move an object in the GTT to match a
> > > change in alignment than it is to allocate fresh pages (and deallocate
> > > stale pages).
> > Could you please explain this and point me to what you want to be
> > changed in this patch?
> 
> You can reuse the cached bo if the alignment mismatches. You can
> experiment with searching for a better match, but it's unlikely to be
> useful (numbers talk here).
> 
> Reusing the cache and keeping the working set small (with zero turnover)
> is vital on !llc. Moving a GTT binding is far cheaper than preparing the
> backing store for access with the GPU.

In other words Chris just supported my suggestion to reuse from the bo
cache and just update the alignment with his own experience: It's a lot
faster than allocating new bo, especially on machines lacking llc (like
vlv/chv).
-Daniel
Ben Widawsky June 25, 2015, 6:01 p.m. UTC | #6
On Wed, Jun 24, 2015 at 08:28:13AM +0100, Chris Wilson wrote:
> On Tue, Jun 23, 2015 at 04:44:52PM -0700, Anuj Phogat wrote:
> > On Mon, Jun 22, 2015 at 1:04 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > On Mon, Jun 22, 2015 at 09:51:08PM +0200, Daniel Vetter wrote:
> > >> On Mon, Jun 22, 2015 at 11:47:02AM -0700, Anuj Phogat wrote:
> > >> > and use it to initialize the align variable in drm_intel_bo.
> > >> >
> > >> > In case of YF/YS tiled buffers libdrm need not know about the tiling
> > >> > format because these buffers don't have hardware support to be tiled
> > >> > or detiled through a fenced region. But, libdrm still need to know
> > >> > about buffer alignment restrictions because kernel uses it when
> > >> > resolving the relocation.
> > >> >
> > >> > Mesa uses drm_intel_gem_bo_alloc_for_render() to allocate Yf/Ys buffers.
> > >> > So, use the passed alignment value in this function. Note that we continue
> > >> > ignoring the alignment value passed to drm_intel_gem_bo_alloc() to follow
> > >> > the previous behavior.
> > >> >
> > >> > V2: Add a condition to avoid allocation from cache. (Ben)
> > >>
> > >> This will hurt badly since some programs love to cycle through textures.
> > >> You can still allocate from the cache, you only need to update the
> > >> alignement constraint. The kernel will move the buffer on the next execbuf
> > >> if it's misplaced.
> > >
> > > For llc, using fresh pages just puts memory and aperture pressure (plus
> > > a small amount of interface pressure) on the system by allocating more bo.
> > >
> > > For !llc, it is far better to move an object in the GTT to match a
> > > change in alignment than it is to allocate fresh pages (and deallocate
> > > stale pages).
> > Could you please explain this and point me to what you want to be
> > changed in this patch?
> 
> You can reuse the cached bo if the alignment mismatches. You can
> experiment with searching for a better match, but it's unlikely to be
> useful (numbers talk here).

There's no "better" match, there's only match or no match. It seems pretty
intuitive to me that trying to find a match would be better though, I'm
curious why you think it's unlikely. Even though we don't specifically get the
alignment back from the kernel after the relocation, we can certainly use the
presumed offset as a guess when trying to find a buffer in the cache.

> 
> Reusing the cache and keeping the working set small (with zero turnover)
> is vital on !llc. Moving a GTT binding is far cheaper than preparing the
> backing store for access with the GPU.
> -Chris
> 

FWIW, none of the buffers we're talking about here should ever have a GTT
binding as (according to Anuj, I didn't check) there is no way to get fenced
detiling.

Anuj, I think what Chris is saying is, move the bo->align = XXX outside of the
alloc_from_cache block, and do it unconditionally. Leave the cache searching
part alone. If the kernel has to move it, it will. As I questioned above, I
think it does make sense to try to find a buffer with the right alignment in the
cache.

> -- 
> Chris Wilson, Intel Open Source Technology Centre
Chris Wilson June 25, 2015, 6:11 p.m. UTC | #7
On Thu, Jun 25, 2015 at 11:01:44AM -0700, Ben Widawsky wrote:
> On Wed, Jun 24, 2015 at 08:28:13AM +0100, Chris Wilson wrote:
> > On Tue, Jun 23, 2015 at 04:44:52PM -0700, Anuj Phogat wrote:
> > > On Mon, Jun 22, 2015 at 1:04 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > > On Mon, Jun 22, 2015 at 09:51:08PM +0200, Daniel Vetter wrote:
> > > >> On Mon, Jun 22, 2015 at 11:47:02AM -0700, Anuj Phogat wrote:
> > > >> > and use it to initialize the align variable in drm_intel_bo.
> > > >> >
> > > >> > In case of YF/YS tiled buffers libdrm need not know about the tiling
> > > >> > format because these buffers don't have hardware support to be tiled
> > > >> > or detiled through a fenced region. But, libdrm still need to know
> > > >> > about buffer alignment restrictions because kernel uses it when
> > > >> > resolving the relocation.
> > > >> >
> > > >> > Mesa uses drm_intel_gem_bo_alloc_for_render() to allocate Yf/Ys buffers.
> > > >> > So, use the passed alignment value in this function. Note that we continue
> > > >> > ignoring the alignment value passed to drm_intel_gem_bo_alloc() to follow
> > > >> > the previous behavior.
> > > >> >
> > > >> > V2: Add a condition to avoid allocation from cache. (Ben)
> > > >>
> > > >> This will hurt badly since some programs love to cycle through textures.
> > > >> You can still allocate from the cache, you only need to update the
> > > >> alignement constraint. The kernel will move the buffer on the next execbuf
> > > >> if it's misplaced.
> > > >
> > > > For llc, using fresh pages just puts memory and aperture pressure (plus
> > > > a small amount of interface pressure) on the system by allocating more bo.
> > > >
> > > > For !llc, it is far better to move an object in the GTT to match a
> > > > change in alignment than it is to allocate fresh pages (and deallocate
> > > > stale pages).
> > > Could you please explain this and point me to what you want to be
> > > changed in this patch?
> > 
> > You can reuse the cached bo if the alignment mismatches. You can
> > experiment with searching for a better match, but it's unlikely to be
> > useful (numbers talk here).
> 
> There's no "better" match, there's only match or no match. It seems pretty
> intuitive to me that trying to find a match would be better though, I'm
> curious why you think it's unlikely. Even though we don't specifically get the
> alignment back from the kernel after the relocation, we can certainly use the
> presumed offset as a guess when trying to find a buffer in the cache.

I was thinking in terms of cycles saved, the cost of walking the cache
for an exact match vs returning the first available buffer + the cost of
any relocation, and whether there would be a measurable difference. I
would have thought two patches, do the naive search that doesn't require
any changes to the cached allocation strategy then a second
demonstrating the perf improvement from being a bit smarter in buffer
reuse would have been your ideal anyway.
-Chris
Ben Widawsky June 25, 2015, 7:17 p.m. UTC | #8
On Thu, Jun 25, 2015 at 07:11:21PM +0100, Chris Wilson wrote:
> On Thu, Jun 25, 2015 at 11:01:44AM -0700, Ben Widawsky wrote:
> > On Wed, Jun 24, 2015 at 08:28:13AM +0100, Chris Wilson wrote:
> > > On Tue, Jun 23, 2015 at 04:44:52PM -0700, Anuj Phogat wrote:
> > > > On Mon, Jun 22, 2015 at 1:04 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > > > On Mon, Jun 22, 2015 at 09:51:08PM +0200, Daniel Vetter wrote:
> > > > >> On Mon, Jun 22, 2015 at 11:47:02AM -0700, Anuj Phogat wrote:
> > > > >> > and use it to initialize the align variable in drm_intel_bo.
> > > > >> >
> > > > >> > In case of YF/YS tiled buffers libdrm need not know about the tiling
> > > > >> > format because these buffers don't have hardware support to be tiled
> > > > >> > or detiled through a fenced region. But, libdrm still need to know
> > > > >> > about buffer alignment restrictions because kernel uses it when
> > > > >> > resolving the relocation.
> > > > >> >
> > > > >> > Mesa uses drm_intel_gem_bo_alloc_for_render() to allocate Yf/Ys buffers.
> > > > >> > So, use the passed alignment value in this function. Note that we continue
> > > > >> > ignoring the alignment value passed to drm_intel_gem_bo_alloc() to follow
> > > > >> > the previous behavior.
> > > > >> >
> > > > >> > V2: Add a condition to avoid allocation from cache. (Ben)
> > > > >>
> > > > >> This will hurt badly since some programs love to cycle through textures.
> > > > >> You can still allocate from the cache, you only need to update the
> > > > >> alignement constraint. The kernel will move the buffer on the next execbuf
> > > > >> if it's misplaced.
> > > > >
> > > > > For llc, using fresh pages just puts memory and aperture pressure (plus
> > > > > a small amount of interface pressure) on the system by allocating more bo.
> > > > >
> > > > > For !llc, it is far better to move an object in the GTT to match a
> > > > > change in alignment than it is to allocate fresh pages (and deallocate
> > > > > stale pages).
> > > > Could you please explain this and point me to what you want to be
> > > > changed in this patch?
> > > 
> > > You can reuse the cached bo if the alignment mismatches. You can
> > > experiment with searching for a better match, but it's unlikely to be
> > > useful (numbers talk here).
> > 
> > There's no "better" match, there's only match or no match. It seems pretty
> > intuitive to me that trying to find a match would be better though, I'm
> > curious why you think it's unlikely. Even though we don't specifically get the
> > alignment back from the kernel after the relocation, we can certainly use the
> > presumed offset as a guess when trying to find a buffer in the cache.
> 
> I was thinking in terms of cycles saved, the cost of walking the cache
> for an exact match vs returning the first available buffer + the cost of
> any relocation, and whether there would be a measurable difference. I
> would have thought two patches, do the naive search that doesn't require
> any changes to the cached allocation strategy then a second
> demonstrating the perf improvement from being a bit smarter in buffer
> reuse would have been your ideal anyway.
> -Chris

That's exactly what I suggested Anuj do :-)
> 
> -- 
> Chris Wilson, Intel Open Source Technology Centre
Anuj Phogat June 26, 2015, 8:43 p.m. UTC | #9
On Thu, Jun 25, 2015 at 12:17 PM, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Thu, Jun 25, 2015 at 07:11:21PM +0100, Chris Wilson wrote:
>> On Thu, Jun 25, 2015 at 11:01:44AM -0700, Ben Widawsky wrote:
>> > On Wed, Jun 24, 2015 at 08:28:13AM +0100, Chris Wilson wrote:
>> > > On Tue, Jun 23, 2015 at 04:44:52PM -0700, Anuj Phogat wrote:
>> > > > On Mon, Jun 22, 2015 at 1:04 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> > > > > On Mon, Jun 22, 2015 at 09:51:08PM +0200, Daniel Vetter wrote:
>> > > > >> On Mon, Jun 22, 2015 at 11:47:02AM -0700, Anuj Phogat wrote:
>> > > > >> > and use it to initialize the align variable in drm_intel_bo.
>> > > > >> >
>> > > > >> > In case of YF/YS tiled buffers libdrm need not know about the tiling
>> > > > >> > format because these buffers don't have hardware support to be tiled
>> > > > >> > or detiled through a fenced region. But, libdrm still need to know
>> > > > >> > about buffer alignment restrictions because kernel uses it when
>> > > > >> > resolving the relocation.
>> > > > >> >
>> > > > >> > Mesa uses drm_intel_gem_bo_alloc_for_render() to allocate Yf/Ys buffers.
>> > > > >> > So, use the passed alignment value in this function. Note that we continue
>> > > > >> > ignoring the alignment value passed to drm_intel_gem_bo_alloc() to follow
>> > > > >> > the previous behavior.
>> > > > >> >
>> > > > >> > V2: Add a condition to avoid allocation from cache. (Ben)
>> > > > >>
>> > > > >> This will hurt badly since some programs love to cycle through textures.
>> > > > >> You can still allocate from the cache, you only need to update the
>> > > > >> alignement constraint. The kernel will move the buffer on the next execbuf
>> > > > >> if it's misplaced.
>> > > > >
>> > > > > For llc, using fresh pages just puts memory and aperture pressure (plus
>> > > > > a small amount of interface pressure) on the system by allocating more bo.
>> > > > >
>> > > > > For !llc, it is far better to move an object in the GTT to match a
>> > > > > change in alignment than it is to allocate fresh pages (and deallocate
>> > > > > stale pages).
>> > > > Could you please explain this and point me to what you want to be
>> > > > changed in this patch?
>> > >
>> > > You can reuse the cached bo if the alignment mismatches. You can
>> > > experiment with searching for a better match, but it's unlikely to be
>> > > useful (numbers talk here).
>> >
>> > There's no "better" match, there's only match or no match. It seems pretty
>> > intuitive to me that trying to find a match would be better though, I'm
>> > curious why you think it's unlikely. Even though we don't specifically get the
>> > alignment back from the kernel after the relocation, we can certainly use the
>> > presumed offset as a guess when trying to find a buffer in the cache.
>>
>> I was thinking in terms of cycles saved, the cost of walking the cache
>> for an exact match vs returning the first available buffer + the cost of
>> any relocation, and whether there would be a measurable difference. I
>> would have thought two patches, do the naive search that doesn't require
>> any changes to the cached allocation strategy then a second
>> demonstrating the perf improvement from being a bit smarter in buffer
>> reuse would have been your ideal anyway.
>> -Chris
>
> That's exactly what I suggested Anuj do :-)
Thanks for helping me to find the right fix. I'll send out an updated patch
doing the naive search with passed alignment and a new patch adding
up the alignment in aperture size.
>>
>> --
>> Chris Wilson, Intel Open Source Technology Centre

Patch
diff mbox

diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 60c06fc..60f494e 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -660,7 +660,8 @@  drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
 				unsigned long size,
 				unsigned long flags,
 				uint32_t tiling_mode,
-				unsigned long stride)
+				unsigned long stride,
+				unsigned int alignment)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
 	drm_intel_bo_gem *bo_gem;
@@ -700,9 +701,14 @@  retry:
 			 */
 			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
 					      bucket->head.prev, head);
-			DRMLISTDEL(&bo_gem->head);
-			alloc_from_cache = true;
+			if (alignment > 0 && bo_gem->bo.align != alignment) {
+				alloc_from_cache = false;
+			} else {
+				alloc_from_cache = true;
+				DRMLISTDEL(&bo_gem->head);
+			}
 		} else {
+			assert(alignment == 0);
 			/* For non-render-target BOs (where we're probably
 			 * going to map it first thing in order to fill it
 			 * with data), check if the last BO in the cache is
@@ -759,6 +765,7 @@  retry:
 			return NULL;
 		}
 		bo_gem->bo.bufmgr = bufmgr;
+		bo_gem->bo.align = alignment;
 
 		bo_gem->tiling_mode = I915_TILING_NONE;
 		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
@@ -802,7 +809,8 @@  drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
 {
 	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
 					       BO_ALLOC_FOR_RENDER,
-					       I915_TILING_NONE, 0);
+					       I915_TILING_NONE, 0,
+					       alignment);
 }
 
 static drm_intel_bo *
@@ -812,7 +820,7 @@  drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
 		       unsigned int alignment)
 {
 	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
-					       I915_TILING_NONE, 0);
+					       I915_TILING_NONE, 0, 0);
 }
 
 static drm_intel_bo *
@@ -864,7 +872,7 @@  drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
 		stride = 0;
 
 	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
-					       tiling, stride);
+					       tiling, stride, 0);
 }
 
 static drm_intel_bo *