diff mbox

[1/2] intel: Defer setting madv on the bo cache

Message ID 1429024125-7859-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson April 14, 2015, 3:08 p.m. UTC
Convert the bo-cache into 2 phases:

1. A two second cache of recently used buffers, keep untouched.
2. A two second cache of older buffers, marked for eviction.

This helps reduce ioctl traffic on a rapid turnover in working sets. The
issue is that even a 2 second window is enough for an application to
fill all of memory with inactive buffers (and we would rely on the
oom-killer identifying the right culprit).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
---
 intel/intel_bufmgr_gem.c | 42 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 7 deletions(-)

Comments

Ben Widawsky April 14, 2015, 7:38 p.m. UTC | #1
On Tue, Apr 14, 2015 at 04:08:44PM +0100, Chris Wilson wrote:
> Convert the bo-cache into 2 phases:
> 
> 1. A two second cache of recently used buffers, keep untouched.
> 2. A two second cache of older buffers, marked for eviction.
> 
> This helps reduce ioctl traffic on a rapid turnover in working sets. The
> issue is that even a 2 second window is enough for an application to
> fill all of memory with inactive buffers (and we would rely on the
> oom-killer identifying the right culprit).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> ---
>  intel/intel_bufmgr_gem.c | 42 +++++++++++++++++++++++++++++++++++-------
>  1 file changed, 35 insertions(+), 7 deletions(-)
> 
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> index f5d6130..ecbf8ee 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -211,6 +211,11 @@ struct _drm_intel_bo_gem {
>  	bool used_as_reloc_target;
>  
>  	/**
> +	 * Are we keeping this buffer around in semi-permenant cache?
> +	 */
> +	bool dontneed;
> +
> +	/**
>  	 * Boolean of whether we have encountered an error whilst building the relocation tree.
>  	 */
>  	bool has_error;
> @@ -714,7 +719,8 @@ retry:
>  		}
>  
>  		if (alloc_from_cache) {
> -			if (!drm_intel_gem_bo_madvise_internal
> +			if (bo_gem->dontneed &&
> +			    !drm_intel_gem_bo_madvise_internal
>  			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
>  				drm_intel_gem_bo_free(&bo_gem->bo);
>  				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
> @@ -1150,6 +1156,20 @@ drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
>  #endif
>  }
>  
> +static inline void __splice(const drmMMListHead *list,
> +			    drmMMListHead *prev,
> +			    drmMMListHead *next)
> +{
> +	drmMMListHead *first = list->next;
> +	drmMMListHead *last = list->prev;
> +
> +	first->prev = prev;
> +	prev->next = first;
> +
> +	last->next = next;
> +	next->prev = last;
> +}
> +

Seems worth adding to libdrm_lists.h

>  /** Frees all cached buffers significantly older than @time. */
>  static void
>  drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
> @@ -1162,19 +1182,29 @@ drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
>  	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
>  		struct drm_intel_gem_bo_bucket *bucket =
>  		    &bufmgr_gem->cache_bucket[i];
> +		drmMMListHead madv;
>  
> +		DRMINITLISTHEAD(&madv);
>  		while (!DRMLISTEMPTY(&bucket->head)) {
>  			drm_intel_bo_gem *bo_gem;
>  
>  			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
>  					      bucket->head.next, head);
> -			if (time - bo_gem->free_time <= 1)
> +			if (time - bo_gem->free_time < 2*(1+bo_gem->dontneed))

Something somewhere will complain about mixing bools and ints, I'd guess.

Also perhaps for perf bisection maybe do a patch before this changing the
expiration time to 2 (or 4), then add the extra dontneed state?

>  				break;
>  
>  			DRMLISTDEL(&bo_gem->head);
> -
> -			drm_intel_gem_bo_free(&bo_gem->bo);
> +			if (bo_gem->dontneed) {
> +				drm_intel_gem_bo_free(&bo_gem->bo);
> +			} else {
> +				drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
> +								  I915_MADV_DONTNEED);
> +				bo_gem->dontneed = 1;
> +				DRMLISTADDTAIL(&bo_gem->head, &madv);
> +			}
>  		}

Maybe add a comment here?
/* Objects which have elapsed 2s of disuse should go to the top of the bucket. */

> +		if (!DRMLISTEMPTY(&madv))
> +			__splice(&madv, &bucket->head, bucket->head.next);


>  	}
>  
>  	bufmgr_gem->time = time;
> @@ -1285,9 +1315,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
>  
>  	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
>  	/* Put the buffer into our internal cache for reuse if we can. */
> -	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
> -	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
> -					      I915_MADV_DONTNEED)) {
> +	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL) {
>  		bo_gem->free_time = time;
>  
>  		bo_gem->name = NULL;

In general, it looks fine to me. Like I said above wrt adding the patch before
this, I am curious how much difference you see just messing with the expiration
times versus the two state model.
Chris Wilson April 14, 2015, 8:14 p.m. UTC | #2
On Tue, Apr 14, 2015 at 12:38:56PM -0700, Ben Widawsky wrote:
> >  /** Frees all cached buffers significantly older than @time. */
> >  static void
> >  drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
> > @@ -1162,19 +1182,29 @@ drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
> >  	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
> >  		struct drm_intel_gem_bo_bucket *bucket =
> >  		    &bufmgr_gem->cache_bucket[i];
> > +		drmMMListHead madv;
> >  
> > +		DRMINITLISTHEAD(&madv);
> >  		while (!DRMLISTEMPTY(&bucket->head)) {
> >  			drm_intel_bo_gem *bo_gem;
> >  
> >  			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
> >  					      bucket->head.next, head);
> > -			if (time - bo_gem->free_time <= 1)
> > +			if (time - bo_gem->free_time < 2*(1+bo_gem->dontneed))
> 
> Something somewhere will complain about mixing bools and ints, I'd guess.
> 
> Also perhaps for perf bisection maybe do a patch before this changing the
> expiration time to 2 (or 4), then add the extra dontneed state?

It is already 2, since it uses <= and the patch just converts it to <
to be consistent after the multiplication.
 
> >  				break;
> >  
> >  			DRMLISTDEL(&bo_gem->head);
> > -
> > -			drm_intel_gem_bo_free(&bo_gem->bo);
> > +			if (bo_gem->dontneed) {
> > +				drm_intel_gem_bo_free(&bo_gem->bo);
> > +			} else {
> > +				drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
> > +								  I915_MADV_DONTNEED);
> > +				bo_gem->dontneed = 1;
> > +				DRMLISTADDTAIL(&bo_gem->head, &madv);
> > +			}
> >  		}
> 
> Maybe add a comment here?
> /* Objects which have elapsed 2s of disuse should go to the top of the bucket. */
> 
> > +		if (!DRMLISTEMPTY(&madv))
> > +			__splice(&madv, &bucket->head, bucket->head.next);
> 
> 
> >  	}
> >  
> >  	bufmgr_gem->time = time;
> > @@ -1285,9 +1315,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
> >  
> >  	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
> >  	/* Put the buffer into our internal cache for reuse if we can. */
> > -	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
> > -	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
> > -					      I915_MADV_DONTNEED)) {
> > +	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL) {
> >  		bo_gem->free_time = time;
> >  
> >  		bo_gem->name = NULL;
> 
> In general, it looks fine to me. Like I said above wrt adding the patch before
> this, I am curious how much difference you see just messing with the expiration
> times versus the two state model.

The issue I was looking at was simply struct_mutex contention in madvise
(then busy). For busy we can tweak the ordering to reduce the contention
slightly, but madvise is trickier (though madvise is a candidate for one
of the first per-object locks). This just moves the contention elsewhere,
but madvise/busy calls tend to dominate overall and trimming them reduces
the intersection.
-Chris
diff mbox

Patch

diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index f5d6130..ecbf8ee 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -211,6 +211,11 @@  struct _drm_intel_bo_gem {
 	bool used_as_reloc_target;
 
 	/**
+	 * Are we keeping this buffer around in semi-permenant cache?
+	 */
+	bool dontneed;
+
+	/**
 	 * Boolean of whether we have encountered an error whilst building the relocation tree.
 	 */
 	bool has_error;
@@ -714,7 +719,8 @@  retry:
 		}
 
 		if (alloc_from_cache) {
-			if (!drm_intel_gem_bo_madvise_internal
+			if (bo_gem->dontneed &&
+			    !drm_intel_gem_bo_madvise_internal
 			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
 				drm_intel_gem_bo_free(&bo_gem->bo);
 				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
@@ -1150,6 +1156,20 @@  drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
 #endif
 }
 
+static inline void __splice(const drmMMListHead *list,
+			    drmMMListHead *prev,
+			    drmMMListHead *next)
+{
+	drmMMListHead *first = list->next;
+	drmMMListHead *last = list->prev;
+
+	first->prev = prev;
+	prev->next = first;
+
+	last->next = next;
+	next->prev = last;
+}
+
 /** Frees all cached buffers significantly older than @time. */
 static void
 drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
@@ -1162,19 +1182,29 @@  drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
 	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
 		struct drm_intel_gem_bo_bucket *bucket =
 		    &bufmgr_gem->cache_bucket[i];
+		drmMMListHead madv;
 
+		DRMINITLISTHEAD(&madv);
 		while (!DRMLISTEMPTY(&bucket->head)) {
 			drm_intel_bo_gem *bo_gem;
 
 			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
 					      bucket->head.next, head);
-			if (time - bo_gem->free_time <= 1)
+			if (time - bo_gem->free_time < 2*(1+bo_gem->dontneed))
 				break;
 
 			DRMLISTDEL(&bo_gem->head);
-
-			drm_intel_gem_bo_free(&bo_gem->bo);
+			if (bo_gem->dontneed) {
+				drm_intel_gem_bo_free(&bo_gem->bo);
+			} else {
+				drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
+								  I915_MADV_DONTNEED);
+				bo_gem->dontneed = 1;
+				DRMLISTADDTAIL(&bo_gem->head, &madv);
+			}
 		}
+		if (!DRMLISTEMPTY(&madv))
+			__splice(&madv, &bucket->head, bucket->head.next);
 	}
 
 	bufmgr_gem->time = time;
@@ -1285,9 +1315,7 @@  drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
 
 	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
 	/* Put the buffer into our internal cache for reuse if we can. */
-	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
-	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
-					      I915_MADV_DONTNEED)) {
+	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL) {
 		bo_gem->free_time = time;
 
 		bo_gem->name = NULL;