Message ID | 1430816040-25285-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Sorry. I know I asked some questions last time around, but I am having trouble finding the response. On Tue, May 05, 2015 at 09:53:55AM +0100, Chris Wilson wrote: > Convert the bo-cache into 2 phases: > > 1. A two second cache of recently used buffers, keep untouched. > 2. A two second cache of older buffers, marked for eviction. > > This helps reduce ioctl traffic on a rapid turnover in working sets. The > issue is that even a 2 second window is enough for an application to > fill all of memory with inactive buffers (and we would rely on the > oom-killer identifying the right culprit). > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > Cc: Ben Widawsky <benjamin.widawsky@intel.com> > --- > intel/intel_bufmgr_gem.c | 27 ++++++++++++++++++++------- > 1 file changed, 20 insertions(+), 7 deletions(-) > > diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c > index 60c06fc..eeb9acf 100644 > --- a/intel/intel_bufmgr_gem.c > +++ b/intel/intel_bufmgr_gem.c > @@ -216,6 +216,11 @@ struct _drm_intel_bo_gem { > bool used_as_reloc_target; > > /** > + * Are we keeping this buffer around in semi-permenant cache? > + */ > + bool dontneed; > + > + /** > * Boolean of whether we have encountered an error whilst building the relocation tree. > */ > bool has_error; > @@ -719,7 +724,8 @@ retry: > } > > if (alloc_from_cache) { > - if (!drm_intel_gem_bo_madvise_internal > + if (bo_gem->dontneed && > + !drm_intel_gem_bo_madvise_internal Why wouldn't you set dontneed = false? Just a thought, but it seems like a nicer solution here would just be a state variable tracking what we last sent with the madv ioctl. enum { WILLNEED DONTNEED } With that, it might be cool for the curious to put a histogram/hysteresis thing. Every time you hit this condition to see how many times we flip flop. > (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { > drm_intel_gem_bo_free(&bo_gem->bo); > drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, > @@ -1166,19 +1172,28 @@ drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) > for (i = 0; i < bufmgr_gem->num_buckets; i++) { > struct drm_intel_gem_bo_bucket *bucket = > &bufmgr_gem->cache_bucket[i]; > + drmMMListHead madv; > > + DRMINITLISTHEAD(&madv); > while (!DRMLISTEMPTY(&bucket->head)) { > drm_intel_bo_gem *bo_gem; > > bo_gem = DRMLISTENTRY(drm_intel_bo_gem, > bucket->head.next, head); > - if (time - bo_gem->free_time <= 1) > + if (time - bo_gem->free_time < 2*(1+bo_gem->dontneed)) > break; > > DRMLISTDEL(&bo_gem->head); > - > - drm_intel_gem_bo_free(&bo_gem->bo); > + if (bo_gem->dontneed) { > + drm_intel_gem_bo_free(&bo_gem->bo); > + } else { > + drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, > + I915_MADV_DONTNEED); > + bo_gem->dontneed = 1; > + DRMLISTADDTAIL(&bo_gem->head, &madv); > + } > } Again with above, how about? assert(!bo_gem->dontneed); bo_gem->dontneed = true; Also I think some comments here explaining what you're doing overall would bce nice since I surely won't remember it in >= 1 minutes. > + DRMLISTJOIN(&madv, &bucket->head); > } > > bufmgr_gem->time = time; > @@ -1289,9 +1304,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) > > bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); > /* Put the buffer into our internal cache for reuse if we can. */ > - if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && > - drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, > - I915_MADV_DONTNEED)) { > + if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL) { > bo_gem->free_time = time; > > bo_gem->name = NULL; Just to be clear, you're reducing IOCTL traffic by deferring an IOCTL until you cleanup the BO cache. I think I said it last time, overall the idea seems fine to me.
On Fri, Jun 05, 2015 at 09:34:23AM -0700, Ben Widawsky wrote: > Sorry. I know I asked some questions last time around, but I am having trouble > finding the response. You thought the existing code used a one second timeout and so wanted a separated patch to convert from age <= 1 to age < 2. Other than that you should have pointed out that dontneed should be moved into madvise_internal() so that it is kept in sync with the kernel value to avoid EFAULT embarrassment. > Just to be clear, you're reducing IOCTL traffic by deferring an IOCTL until you > cleanup the BO cache. Right and thereby reducing ioctls. > I think I said it last time, overall the idea seems fine to me. The change is marginal except when there is a lot of mutex contention (e.g. other clients clflushing) and then the effect is to push the contention to until it is unavoidable. -Chris
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index 60c06fc..eeb9acf 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -216,6 +216,11 @@ struct _drm_intel_bo_gem { bool used_as_reloc_target; /** + * Are we keeping this buffer around in semi-permenant cache? + */ + bool dontneed; + + /** * Boolean of whether we have encountered an error whilst building the relocation tree. */ bool has_error; @@ -719,7 +724,8 @@ retry: } if (alloc_from_cache) { - if (!drm_intel_gem_bo_madvise_internal + if (bo_gem->dontneed && + !drm_intel_gem_bo_madvise_internal (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { drm_intel_gem_bo_free(&bo_gem->bo); drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, @@ -1166,19 +1172,28 @@ drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) for (i = 0; i < bufmgr_gem->num_buckets; i++) { struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i]; + drmMMListHead madv; + DRMINITLISTHEAD(&madv); while (!DRMLISTEMPTY(&bucket->head)) { drm_intel_bo_gem *bo_gem; bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head); - if (time - bo_gem->free_time <= 1) + if (time - bo_gem->free_time < 2*(1+bo_gem->dontneed)) break; DRMLISTDEL(&bo_gem->head); - - drm_intel_gem_bo_free(&bo_gem->bo); + if (bo_gem->dontneed) { + drm_intel_gem_bo_free(&bo_gem->bo); + } else { + drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, + I915_MADV_DONTNEED); + bo_gem->dontneed = 1; + DRMLISTADDTAIL(&bo_gem->head, &madv); + } } + DRMLISTJOIN(&madv, &bucket->head); } bufmgr_gem->time = time; @@ -1289,9 +1304,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); /* Put the buffer into our internal cache for reuse if we can. */ - if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && - drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, - I915_MADV_DONTNEED)) { + if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL) { bo_gem->free_time = time; bo_gem->name = NULL;
Convert the bo-cache into 2 phases: 1. A two second cache of recently used buffers, keep untouched. 2. A two second cache of older buffers, marked for eviction. This helps reduce ioctl traffic on a rapid turnover in working sets. The issue is that even a 2 second window is enough for an application to fill all of memory with inactive buffers (and we would rely on the oom-killer identifying the right culprit). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ben Widawsky <benjamin.widawsky@intel.com> --- intel/intel_bufmgr_gem.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-)