@@ -704,6 +704,71 @@ drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
}
}
+static drm_intel_bo_gem *
+drm_intel_gem_bo_cached_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
+ unsigned long size,
+ uint32_t tiling_mode,
+ unsigned long stride,
+ unsigned long alignment,
+ bool for_render)
+{
+ struct drm_intel_gem_bo_bucket *bucket =
+ drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
+
+ if (bucket != NULL) {
+ drm_intel_bo_gem *bo_gem, *temp_bo_gem;
+retry:
+ bo_gem = NULL;
+ if (for_render) {
+ /* Allocate new render-target BOs from the tail (MRU)
+ * of the list, as it will likely be hot in the GPU
+ * cache and in the aperture for us.
+ */
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+ bucket->head.prev, head);
+ DRMLISTDEL(&bo_gem->head);
+ bo_gem->bo.align = alignment;
+ } else {
+ assert(alignment == 0);
+ /* For non-render-target BOs (where we're probably
+ * going to map it first thing in order to fill it
+ * with data), check if the last BO in the cache is
+ * unbusy, and only reuse in that case. Otherwise,
+ * allocating a new buffer is probably faster than
+ * waiting for the GPU to finish.
+ */
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+ bucket->head.next, head);
+ if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
+ DRMLISTDEL(&bo_gem->head);
+ } else {
+ bo_gem = NULL;
+ }
+ }
+
+ if (bo_gem) {
+ if (!drm_intel_gem_bo_madvise_internal
+ (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
+ drm_intel_gem_bo_free(&bo_gem->bo);
+ drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
+ bucket);
+ return NULL;
+ }
+
+ if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
+ tiling_mode,
+ stride)) {
+ drm_intel_gem_bo_free(&bo_gem->bo);
+ goto retry;
+ }
+ }
+
+ return bo_gem;
+ }
+
+ return NULL;
+}
+
static drm_intel_bo *
drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
const char *name,
@@ -715,81 +780,21 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
drm_intel_bo_gem *bo_gem;
- unsigned int page_size = getpagesize();
int ret;
- struct drm_intel_gem_bo_bucket *bucket;
- bool alloc_from_cache;
- unsigned long bo_size;
bool for_render = false;
if (flags & BO_ALLOC_FOR_RENDER)
for_render = true;
- /* Round the allocated size up to a power of two number of pages. */
- bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
-
- /* If we don't have caching at this size, don't actually round the
- * allocation up.
- */
- if (bucket == NULL) {
- bo_size = size;
- if (bo_size < page_size)
- bo_size = page_size;
- } else {
- bo_size = bucket->size;
- }
+ /* first align the size on page boundary */
+ size = ALIGN(size, getpagesize());
pthread_mutex_lock(&bufmgr_gem->lock);
/* Get a buffer out of the cache if available */
-retry:
- alloc_from_cache = false;
- if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
- if (for_render) {
- /* Allocate new render-target BOs from the tail (MRU)
- * of the list, as it will likely be hot in the GPU
- * cache and in the aperture for us.
- */
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
- bucket->head.prev, head);
- DRMLISTDEL(&bo_gem->head);
- alloc_from_cache = true;
- bo_gem->bo.align = alignment;
- } else {
- assert(alignment == 0);
- /* For non-render-target BOs (where we're probably
- * going to map it first thing in order to fill it
- * with data), check if the last BO in the cache is
- * unbusy, and only reuse in that case. Otherwise,
- * allocating a new buffer is probably faster than
- * waiting for the GPU to finish.
- */
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
- bucket->head.next, head);
- if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
- alloc_from_cache = true;
- DRMLISTDEL(&bo_gem->head);
- }
- }
+ bo_gem = drm_intel_gem_bo_cached_for_size(bufmgr_gem, size, tiling_mode,
+ stride, alignment, for_render);
- if (alloc_from_cache) {
- if (!drm_intel_gem_bo_madvise_internal
- (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
- drm_intel_gem_bo_free(&bo_gem->bo);
- drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
- bucket);
- goto retry;
- }
-
- if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
- tiling_mode,
- stride)) {
- drm_intel_gem_bo_free(&bo_gem->bo);
- goto retry;
- }
- }
- }
-
- if (!alloc_from_cache) {
+ if (bo_gem == NULL) {
struct drm_i915_gem_create create;
bo_gem = calloc(1, sizeof(*bo_gem));
@@ -800,10 +805,10 @@ retry:
list (vma_list), so better set the list head here */
DRMINITLISTHEAD(&bo_gem->vma_list);
- bo_gem->bo.size = bo_size;
+ bo_gem->bo.size = size;
memclear(create);
- create.size = bo_size;
+ create.size = size;
ret = drmIoctl(bufmgr_gem->fd,
DRM_IOCTL_I915_GEM_CREATE,
@@ -844,7 +849,7 @@ retry:
pthread_mutex_unlock(&bufmgr_gem->lock);
DBG("bo_create: buf %d (%s) %ldb\n",
- bo_gem->gem_handle, bo_gem->name, size);
+ bo_gem->gem_handle, bo_gem->name, bo_gem->bo.size);
return &bo_gem->bo;