@@ -68,40 +68,7 @@
typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
-struct drm_intel_gem_bo_bucket {
- drmMMListHead head;
- unsigned long size;
-};
-
-/* Only cache objects up to 64MB. Bigger than that, and the rounding of the
- * size makes many operations fail that wouldn't otherwise.
- */
-#define DRM_INTEL_GEM_BO_BUCKETS 14
-typedef struct _drm_intel_bufmgr_gem {
- drm_intel_bufmgr bufmgr;
-
- int fd;
-
- int max_relocs;
-
- pthread_mutex_t lock;
-
- struct drm_i915_gem_exec_object *exec_objects;
- struct drm_i915_gem_exec_object2 *exec2_objects;
- drm_intel_bo **exec_bos;
- int exec_size;
- int exec_count;
-
- /** Array of lists of cached gem objects of power-of-two sizes */
- struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS];
-
- uint64_t gtt_size;
- int available_fences;
- int pci_device;
- int gen;
- char bo_reuse;
- char fenced_relocs;
-} drm_intel_bufmgr_gem;
+#define BO_CACHE_MAX_SIZE (64 * 1024 * 1024)
#define DRM_INTEL_RELOC_FENCE (1<<0)
@@ -150,7 +117,8 @@ struct _drm_intel_bo_gem {
void *gtt_virtual;
/** BO cache list */
- drmMMListHead head;
+ drm_intel_bo_gem *next;
+ drm_intel_bo_gem *prev;
/**
* Boolean of whether this BO and its children have been included in
@@ -190,6 +158,37 @@ struct _drm_intel_bo_gem {
int reloc_tree_fences;
};
+typedef struct _drm_intel_bufmgr_gem {
+ drm_intel_bufmgr bufmgr;
+
+ int fd;
+
+ int max_relocs;
+
+ pthread_mutex_t lock;
+
+ struct drm_i915_gem_exec_object *exec_objects;
+ struct drm_i915_gem_exec_object2 *exec2_objects;
+ drm_intel_bo **exec_bos;
+ int exec_size;
+ int exec_count;
+
+ /**
+ * List structure for the BO cache. It would be nice to not
+ * store this thing as a bo_gem but just a pair of pointers,
+ * but the libdrm list macros are sadly not the linux list
+ * macros.
+ */
+ drm_intel_bo_gem bo_cache;
+
+ uint64_t gtt_size;
+ int available_fences;
+ int pci_device;
+ int gen;
+ char bo_reuse;
+ char fenced_relocs;
+} drm_intel_bufmgr_gem;
+
static unsigned int
drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
@@ -279,23 +278,6 @@ drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
return i;
}
-static struct drm_intel_gem_bo_bucket *
-drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
- unsigned long size)
-{
- int i;
-
- for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
- struct drm_intel_gem_bo_bucket *bucket =
- &bufmgr_gem->cache_bucket[i];
- if (bucket->size >= size) {
- return bucket;
- }
- }
-
- return NULL;
-}
-
static void
drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
{
@@ -529,23 +511,84 @@ drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
/* drop the oldest entries that have been purged by the kernel */
static void
-drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
- struct drm_intel_gem_bo_bucket *bucket)
+drm_intel_gem_bo_cache_purge(drm_intel_bufmgr_gem *bufmgr_gem)
{
- while (!DRMLISTEMPTY(&bucket->head)) {
- drm_intel_bo_gem *bo_gem;
+ while (!DRMLISTEMPTY(&bufmgr_gem->bo_cache)) {
+ drm_intel_bo_gem *bo_gem = bufmgr_gem->bo_cache.next;
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
- bucket->head.next, head);
if (drm_intel_gem_bo_madvise_internal
(bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
break;
- DRMLISTDEL(&bo_gem->head);
+ DRMLISTDEL(bo_gem);
drm_intel_gem_bo_free(&bo_gem->bo);
}
}
+/* For non-render-target BOs (where we're probably going to map it
+ * first thing in order to fill it with data), check if the last
+ * appropriately-sized BO in the cache is unbusy, and only reuse in
+ * that case. Otherwise, allocating a new buffer is probably faster
+ * than waiting for the GPU to finish.
+ */
+static drm_intel_bo_gem *
+find_cached_bo_for_cpu(drm_intel_bufmgr *bufmgr, unsigned long size)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
+ drm_intel_bo_gem *bo_gem, *temp;
+
+retry:
+ DRMLISTFOREACHSAFE(bo_gem, temp, &bufmgr_gem->bo_cache) {
+ if (bo_gem->bo.size != size)
+ continue;
+
+ if (drm_intel_gem_bo_busy(&bo_gem->bo))
+ return NULL;
+
+ DRMLISTDEL(bo_gem);
+
+ if (!drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
+ I915_MADV_WILLNEED)) {
+ drm_intel_gem_bo_free(&bo_gem->bo);
+ drm_intel_gem_bo_cache_purge(bufmgr_gem);
+ goto retry;
+ } else {
+ return bo_gem;
+ }
+ }
+
+ return NULL;
+}
+
+/* Allocate new render-target BOs from the tail (MRU) of the list, as
+ * it will likely be hot in the GPU cache and in the aperture for us.
+ */
+static drm_intel_bo_gem *
+find_cached_bo_for_render(drm_intel_bufmgr *bufmgr, unsigned long size)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
+ drm_intel_bo_gem *bo_gem, *temp;
+
+retry:
+ DRMLISTFOREACHSAFEREVERSE(bo_gem, temp, &bufmgr_gem->bo_cache) {
+ if (bo_gem->bo.size != size)
+ continue;
+
+ DRMLISTDEL(bo_gem);
+
+ if (!drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
+ I915_MADV_WILLNEED)) {
+ drm_intel_gem_bo_free(&bo_gem->bo);
+ drm_intel_gem_bo_cache_purge(bufmgr_gem);
+ goto retry;
+ } else {
+ return bo_gem;
+ }
+ }
+
+ return NULL;
+}
+
static drm_intel_bo *
drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
const char *name,
@@ -553,83 +596,35 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
unsigned long flags)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
- drm_intel_bo_gem *bo_gem;
+ drm_intel_bo_gem *bo_gem = NULL;
unsigned int page_size = getpagesize();
int ret;
- struct drm_intel_gem_bo_bucket *bucket;
- int alloc_from_cache;
- unsigned long bo_size;
int for_render = 0;
if (flags & BO_ALLOC_FOR_RENDER)
for_render = 1;
- /* Round the allocated size up to a power of two number of pages. */
- bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
+ size = ALIGN(size, page_size);
- /* If we don't have caching at this size, don't actually round the
- * allocation up.
- */
- if (bucket == NULL) {
- bo_size = size;
- if (bo_size < page_size)
- bo_size = page_size;
- } else {
- bo_size = bucket->size;
- }
-
- pthread_mutex_lock(&bufmgr_gem->lock);
- /* Get a buffer out of the cache if available */
-retry:
- alloc_from_cache = 0;
- if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
- if (for_render) {
- /* Allocate new render-target BOs from the tail (MRU)
- * of the list, as it will likely be hot in the GPU
- * cache and in the aperture for us.
- */
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
- bucket->head.prev, head);
- DRMLISTDEL(&bo_gem->head);
- alloc_from_cache = 1;
- } else {
- /* For non-render-target BOs (where we're probably
- * going to map it first thing in order to fill it
- * with data), check if the last BO in the cache is
- * unbusy, and only reuse in that case. Otherwise,
- * allocating a new buffer is probably faster than
- * waiting for the GPU to finish.
- */
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
- bucket->head.next, head);
- if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
- alloc_from_cache = 1;
- DRMLISTDEL(&bo_gem->head);
- }
- }
-
- if (alloc_from_cache) {
- if (!drm_intel_gem_bo_madvise_internal
- (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
- drm_intel_gem_bo_free(&bo_gem->bo);
- drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
- bucket);
- goto retry;
- }
- }
+ if (size < BO_CACHE_MAX_SIZE) {
+ pthread_mutex_lock(&bufmgr_gem->lock);
+ if (flags & BO_ALLOC_FOR_RENDER)
+ bo_gem = find_cached_bo_for_render(bufmgr, size);
+ else
+ bo_gem = find_cached_bo_for_cpu(bufmgr, size);
+ pthread_mutex_unlock(&bufmgr_gem->lock);
}
- pthread_mutex_unlock(&bufmgr_gem->lock);
- if (!alloc_from_cache) {
+ if (!bo_gem) {
struct drm_i915_gem_create create;
bo_gem = calloc(1, sizeof(*bo_gem));
if (!bo_gem)
return NULL;
- bo_gem->bo.size = bo_size;
+ bo_gem->bo.size = size;
memset(&create, 0, sizeof(create));
- create.size = bo_size;
+ create.size = size;
do {
ret = ioctl(bufmgr_gem->fd,
@@ -653,7 +648,8 @@ retry:
bo_gem->has_error = 0;
bo_gem->tiling_mode = I915_TILING_NONE;
bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
- bo_gem->reusable = 1;
+ if (size < BO_CACHE_MAX_SIZE)
+ bo_gem->reusable = 1;
drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
@@ -820,24 +816,16 @@ drm_intel_gem_bo_free(drm_intel_bo *bo)
static void
drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
{
- int i;
-
- for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
- struct drm_intel_gem_bo_bucket *bucket =
- &bufmgr_gem->cache_bucket[i];
-
- while (!DRMLISTEMPTY(&bucket->head)) {
- drm_intel_bo_gem *bo_gem;
+ while (!DRMLISTEMPTY(&bufmgr_gem->bo_cache)) {
+ drm_intel_bo_gem *bo_gem;
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
- bucket->head.next, head);
- if (time - bo_gem->free_time <= 1)
- break;
+ bo_gem = bufmgr_gem->bo_cache.next;
+ if (time - bo_gem->free_time <= 1)
+ break;
- DRMLISTDEL(&bo_gem->head);
+ DRMLISTDEL(bo_gem);
- drm_intel_gem_bo_free(&bo_gem->bo);
- }
+ drm_intel_gem_bo_free(&bo_gem->bo);
}
}
@@ -846,7 +834,6 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
- struct drm_intel_gem_bo_bucket *bucket;
uint32_t tiling_mode;
int i;
@@ -872,10 +859,9 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
bo_gem->relocs = NULL;
}
- bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
/* Put the buffer into our internal cache for reuse if we can. */
tiling_mode = I915_TILING_NONE;
- if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
+ if (bufmgr_gem->bo_reuse && bo_gem->reusable &&
drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0 &&
drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
I915_MADV_DONTNEED)) {
@@ -884,7 +870,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
bo_gem->name = NULL;
bo_gem->validate_index = -1;
- DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
+ DRMLISTADDTAIL(bo_gem, &bufmgr_gem->bo_cache);
drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time);
} else {
@@ -1241,7 +1227,7 @@ static void
drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
- int i;
+ drm_intel_bo_gem *bo_gem;
free(bufmgr_gem->exec2_objects);
free(bufmgr_gem->exec_objects);
@@ -1250,18 +1236,12 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
pthread_mutex_destroy(&bufmgr_gem->lock);
/* Free any cached buffer objects we were going to reuse */
- for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
- struct drm_intel_gem_bo_bucket *bucket =
- &bufmgr_gem->cache_bucket[i];
- drm_intel_bo_gem *bo_gem;
- while (!DRMLISTEMPTY(&bucket->head)) {
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
- bucket->head.next, head);
- DRMLISTDEL(&bo_gem->head);
+ while (!DRMLISTEMPTY(&bufmgr_gem->bo_cache)) {
+ bo_gem = bufmgr_gem->bo_cache.next;
+ DRMLISTDEL(bo_gem);
- drm_intel_gem_bo_free(&bo_gem->bo);
- }
+ drm_intel_gem_bo_free(&bo_gem->bo);
}
free(bufmgr);
@@ -1723,9 +1703,10 @@ drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
/**
* Enables unlimited caching of buffer objects for reuse.
*
- * This is potentially very memory expensive, as the cache at each bucket
- * size is only bounded by how many buffers of that size we've managed to have
- * in flight at once.
+ * This is potentially very memory expensive, as the cache at each bo
+ * size is only bounded by how many buffers of that size we've managed
+ * to have in flight at once. However, the kernel's memory pressure
+ * handler will throw out BOs if required.
*/
void
drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
@@ -1985,8 +1966,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
drm_intel_bufmgr_gem *bufmgr_gem;
struct drm_i915_gem_get_aperture aperture;
drm_i915_getparam_t gp;
- int ret, i;
- unsigned long size;
+ int ret;
int exec2 = 0, has_bsd = 0;
bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
@@ -2112,11 +2092,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
drm_intel_gem_get_pipe_from_crtc_id;
bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
- /* Initialize the linked lists for BO reuse cache. */
- for (i = 0, size = 4096; i < DRM_INTEL_GEM_BO_BUCKETS; i++, size *= 2) {
- DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
- bufmgr_gem->cache_bucket[i].size = size;
- }
+ DRMINITLISTHEAD(&bufmgr_gem->bo_cache);
return &bufmgr_gem->bufmgr;
}