@@ -262,7 +262,8 @@ int ast_mm_init(struct ast_private *ast)
&ast_bo_driver,
dev->anon_inode->i_mapping,
DRM_FILE_PAGE_OFFSET,
- true);
+ true,
+ 0);
if (ret) {
DRM_ERROR("Error initialising bo driver; %d\n", ret);
return ret;
@@ -228,7 +228,8 @@ int bochs_mm_init(struct bochs_device *bochs)
&bochs_bo_driver,
bochs->dev->anon_inode->i_mapping,
DRM_FILE_PAGE_OFFSET,
- true);
+ true,
+ 0);
if (ret) {
DRM_ERROR("Error initialising bo driver; %d\n", ret);
return ret;
@@ -262,7 +262,8 @@ int cirrus_mm_init(struct cirrus_device *cirrus)
&cirrus_bo_driver,
dev->anon_inode->i_mapping,
DRM_FILE_PAGE_OFFSET,
- true);
+ true,
+ 0);
if (ret) {
DRM_ERROR("Error initialising bo driver; %d\n", ret);
return ret;
@@ -102,7 +102,8 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_
static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
struct drm_mm_node *node,
unsigned long size, unsigned alignment,
- unsigned long color)
+ unsigned long color,
+ enum drm_mm_allocator_flags flags)
{
struct drm_mm *mm = hole_node->mm;
unsigned long hole_start = drm_mm_hole_node_start(hole_node);
@@ -115,12 +116,22 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
if (mm->color_adjust)
mm->color_adjust(hole_node, color, &adj_start, &adj_end);
+ if (flags & DRM_MM_CREATE_TOP)
+ adj_start = adj_end - size;
+
if (alignment) {
unsigned tmp = adj_start % alignment;
- if (tmp)
- adj_start += alignment - tmp;
+ if (tmp) {
+ if (flags & DRM_MM_CREATE_TOP)
+ adj_start -= tmp;
+ else
+ adj_start += alignment - tmp;
+ }
}
+ BUG_ON(adj_start < hole_start);
+ BUG_ON(adj_end > hole_end);
+
if (adj_start == hole_start) {
hole_node->hole_follows = 0;
list_del(&hole_node->hole_stack);
@@ -215,16 +226,17 @@ EXPORT_SYMBOL(drm_mm_reserve_node);
int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node,
unsigned long size, unsigned alignment,
unsigned long color,
- enum drm_mm_search_flags flags)
+ enum drm_mm_search_flags sflags,
+ enum drm_mm_allocator_flags aflags)
{
struct drm_mm_node *hole_node;
hole_node = drm_mm_search_free_generic(mm, size, alignment,
- color, flags);
+ color, sflags);
if (!hole_node)
return -ENOSPC;
- drm_mm_insert_helper(hole_node, node, size, alignment, color);
+ drm_mm_insert_helper(hole_node, node, size, alignment, color, aflags);
return 0;
}
EXPORT_SYMBOL(drm_mm_insert_node_generic);
@@ -233,7 +245,8 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
struct drm_mm_node *node,
unsigned long size, unsigned alignment,
unsigned long color,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end,
+ enum drm_mm_allocator_flags flags)
{
struct drm_mm *mm = hole_node->mm;
unsigned long hole_start = drm_mm_hole_node_start(hole_node);
@@ -248,13 +261,20 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
if (adj_end > end)
adj_end = end;
+ if (flags & DRM_MM_CREATE_TOP)
+ adj_start = adj_end - size;
+
if (mm->color_adjust)
mm->color_adjust(hole_node, color, &adj_start, &adj_end);
if (alignment) {
unsigned tmp = adj_start % alignment;
- if (tmp)
- adj_start += alignment - tmp;
+ if (tmp) {
+ if (flags & DRM_MM_CREATE_TOP)
+ adj_start -= tmp;
+ else
+ adj_start += alignment - tmp;
+ }
}
if (adj_start == hole_start) {
@@ -271,6 +291,8 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
INIT_LIST_HEAD(&node->hole_stack);
list_add(&node->node_list, &hole_node->node_list);
+ BUG_ON(node->start < start);
+ BUG_ON(node->start < adj_start);
BUG_ON(node->start + node->size > adj_end);
BUG_ON(node->start + node->size > end);
@@ -298,21 +320,23 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
* 0 on success, -ENOSPC if there's no suitable hole.
*/
int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node,
- unsigned long size, unsigned alignment, unsigned long color,
+ unsigned long size, unsigned alignment,
+ unsigned long color,
unsigned long start, unsigned long end,
- enum drm_mm_search_flags flags)
+ enum drm_mm_search_flags sflags,
+ enum drm_mm_allocator_flags aflags)
{
struct drm_mm_node *hole_node;
hole_node = drm_mm_search_free_in_range_generic(mm,
size, alignment, color,
- start, end, flags);
+ start, end, sflags);
if (!hole_node)
return -ENOSPC;
drm_mm_insert_helper_range(hole_node, node,
size, alignment, color,
- start, end);
+ start, end, aflags);
return 0;
}
EXPORT_SYMBOL(drm_mm_insert_node_in_range_generic);
@@ -391,7 +415,8 @@ static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
best = NULL;
best_size = ~0UL;
- drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
+ __drm_mm_for_each_hole(entry, mm, adj_start, adj_end,
+ flags & DRM_MM_SEARCH_BELOW) {
if (mm->color_adjust) {
mm->color_adjust(entry, color, &adj_start, &adj_end);
if (adj_end <= adj_start)
@@ -432,7 +457,8 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_
best = NULL;
best_size = ~0UL;
- drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
+ __drm_mm_for_each_hole(entry, mm, adj_start, adj_end,
+ flags & DRM_MM_SEARCH_BELOW) {
if (adj_start < start)
adj_start = start;
if (adj_end > end)
@@ -3270,7 +3270,8 @@ search_free:
ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
size, alignment,
obj->cache_level, 0, gtt_max,
- DRM_MM_SEARCH_DEFAULT);
+ DRM_MM_SEARCH_DEFAULT,
+ DRM_MM_CREATE_DEFAULT);
if (ret) {
ret = i915_gem_evict_something(dev, vm, size, alignment,
obj->cache_level, flags);
@@ -1072,7 +1072,8 @@ alloc:
&ppgtt->node, GEN6_PD_SIZE,
GEN6_PD_ALIGN, 0,
0, dev_priv->gtt.base.total,
- DRM_MM_SEARCH_DEFAULT);
+ DRM_MM_SEARCH_DEFAULT,
+ DRM_MM_CREATE_DEFAULT);
if (ret == -ENOSPC && !retried) {
ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
GEN6_PD_SIZE, GEN6_PD_ALIGN,
@@ -262,7 +262,8 @@ int mgag200_mm_init(struct mga_device *mdev)
&mgag200_bo_driver,
dev->anon_inode->i_mapping,
DRM_FILE_PAGE_OFFSET,
- true);
+ true,
+ 0);
if (ret) {
DRM_ERROR("Error initialising bo driver; %d\n", ret);
return ret;
@@ -384,7 +384,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
&nouveau_bo_driver,
dev->anon_inode->i_mapping,
DRM_FILE_PAGE_OFFSET,
- bits <= 32 ? true : false);
+ bits <= 32 ? true : false, 0);
if (ret) {
NV_ERROR(drm, "error initialising bo driver, %d\n", ret);
return ret;
@@ -495,7 +495,7 @@ int qxl_ttm_init(struct qxl_device *qdev)
qdev->mman.bo_global_ref.ref.object,
&qxl_bo_driver,
qdev->ddev->anon_inode->i_mapping,
- DRM_FILE_PAGE_OFFSET, 0);
+ DRM_FILE_PAGE_OFFSET, 0, 0);
if (r) {
DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
return r;
@@ -710,7 +710,8 @@ int radeon_ttm_init(struct radeon_device *rdev)
&radeon_bo_driver,
rdev->ddev->anon_inode->i_mapping,
DRM_FILE_PAGE_OFFSET,
- rdev->need_dma32);
+ rdev->need_dma32,
+ 512 * 1024);
if (r) {
DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
return r;
@@ -1453,7 +1453,8 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
struct ttm_bo_driver *driver,
struct address_space *mapping,
uint64_t file_page_offset,
- bool need_dma32)
+ bool need_dma32,
+ uint32_t alloc_threshold)
{
int ret = -EINVAL;
@@ -1476,6 +1477,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
bdev->dev_mapping = mapping;
bdev->glob = glob;
bdev->need_dma32 = need_dma32;
+ bdev->alloc_threshold = alloc_threshold;
bdev->val_seq = 0;
spin_lock_init(&bdev->fence_lock);
mutex_lock(&glob->device_list_mutex);
@@ -55,6 +55,7 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv;
struct drm_mm *mm = &rman->mm;
struct drm_mm_node *node = NULL;
+ enum drm_mm_allocator_flags aflags = DRM_MM_CREATE_DEFAULT;
unsigned long lpfn;
int ret;
@@ -65,12 +66,21 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
node = kzalloc(sizeof(*node), GFP_KERNEL);
if (!node)
return -ENOMEM;
+ /**
+ * If the driver requested a threshold, use two-ended allocation.
+ * Pinned buffers require bottom-up allocation.
+ */
+ if (man->bdev->alloc_threshold &&
+ !(bo->mem.placement & TTM_PL_FLAG_NO_EVICT) &&
+ man->bdev->alloc_threshold < (mem->num_pages * PAGE_SIZE))
+ aflags = DRM_MM_CREATE_TOP;
spin_lock(&rman->lock);
- ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages,
- mem->page_alignment,
+ ret = drm_mm_insert_node_in_range_generic(mm, node, mem->num_pages,
+ mem->page_alignment, 0,
placement->fpfn, lpfn,
- DRM_MM_SEARCH_BEST);
+ DRM_MM_SEARCH_BEST,
+ aflags);
spin_unlock(&rman->lock);
if (unlikely(ret)) {
@@ -725,7 +725,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
&vmw_bo_driver,
dev->anon_inode->i_mapping,
VMWGFX_FILE_PAGE_OFFSET,
- false);
+ false, 0);
if (unlikely(ret != 0)) {
DRM_ERROR("Failed initializing TTM buffer object driver.\n");
goto out_err1;
@@ -47,8 +47,17 @@
enum drm_mm_search_flags {
DRM_MM_SEARCH_DEFAULT = 0,
DRM_MM_SEARCH_BEST = 1 << 0,
+ DRM_MM_SEARCH_BELOW = 1 << 1,
};
+enum drm_mm_allocator_flags {
+ DRM_MM_CREATE_DEFAULT = 0,
+ DRM_MM_CREATE_TOP = 1 << 0,
+};
+
+#define DRM_MM_BOTTOMUP DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT
+#define DRM_MM_TOPDOWN DRM_MM_SEARCH_BELOW, DRM_MM_CREATE_TOP
+
struct drm_mm_node {
struct list_head node_list;
struct list_head hole_stack;
@@ -195,6 +204,14 @@ static inline unsigned long drm_mm_hole_node_end(struct drm_mm_node *hole_node)
1 : 0; \
entry = list_entry(entry->hole_stack.next, struct drm_mm_node, hole_stack))
+#define __drm_mm_for_each_hole(entry, mm, hole_start, hole_end, backwards) \
+ for (entry = list_entry((backwards) ? (mm)->hole_stack.prev : (mm)->hole_stack.next, struct drm_mm_node, hole_stack); \
+ &entry->hole_stack != &(mm)->hole_stack ? \
+ hole_start = drm_mm_hole_node_start(entry), \
+ hole_end = drm_mm_hole_node_end(entry), \
+ 1 : 0; \
+ entry = list_entry((backwards) ? entry->hole_stack.prev : entry->hole_stack.next, struct drm_mm_node, hole_stack))
+
/*
* Basic range manager support (drm_mm.c)
*/
@@ -205,7 +222,8 @@ int drm_mm_insert_node_generic(struct drm_mm *mm,
unsigned long size,
unsigned alignment,
unsigned long color,
- enum drm_mm_search_flags flags);
+ enum drm_mm_search_flags sflags,
+ enum drm_mm_allocator_flags aflags);
/**
* drm_mm_insert_node - search for space and insert @node
* @mm: drm_mm to allocate from
@@ -228,7 +246,8 @@ static inline int drm_mm_insert_node(struct drm_mm *mm,
unsigned alignment,
enum drm_mm_search_flags flags)
{
- return drm_mm_insert_node_generic(mm, node, size, alignment, 0, flags);
+ return drm_mm_insert_node_generic(mm, node, size, alignment, 0, flags,
+ DRM_MM_CREATE_DEFAULT);
}
int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
@@ -238,7 +257,8 @@ int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
unsigned long color,
unsigned long start,
unsigned long end,
- enum drm_mm_search_flags flags);
+ enum drm_mm_search_flags sflags,
+ enum drm_mm_allocator_flags aflags);
/**
* drm_mm_insert_node_in_range - ranged search for space and insert @node
* @mm: drm_mm to allocate from
@@ -266,7 +286,8 @@ static inline int drm_mm_insert_node_in_range(struct drm_mm *mm,
enum drm_mm_search_flags flags)
{
return drm_mm_insert_node_in_range_generic(mm, node, size, alignment,
- 0, start, end, flags);
+ 0, start, end, flags,
+ DRM_MM_CREATE_DEFAULT);
}
void drm_mm_remove_node(struct drm_mm_node *node);
@@ -565,6 +565,7 @@ struct ttm_bo_device {
struct delayed_work wq;
bool need_dma32;
+ uint32_t alloc_threshold;
};
/**
@@ -751,6 +752,8 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
* @file_page_offset: Offset into the device address space that is available
* for buffer data. This ensures compatibility with other users of the
* address space.
+ * @alloc_threshold: If non-zero, use this as the threshold for two-ended
+ * allocation.
*
* Initializes a struct ttm_bo_device:
* Returns:
@@ -760,7 +763,9 @@ extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
struct ttm_bo_global *glob,
struct ttm_bo_driver *driver,
struct address_space *mapping,
- uint64_t file_page_offset, bool need_dma32);
+ uint64_t file_page_offset,
+ bool need_dma32,
+ uint32_t alloc_threshold);
/**
* ttm_bo_unmap_virtual
Clients like i915 need to segregate cache domains within the GTT which can lead to small amounts of fragmentation. By allocating the uncached buffers from the bottom and the cacheable buffers from the top, we can reduce the amount of wasted space and also optimize allocation of the mappable portion of the GTT to only those buffers that require CPU access through the GTT. For other drivers, allocating small bos from one end and large ones from the other helps improve the quality of fragmentation. Based on drm_mm work by Chris Wilson. -- Radeon uses a 512kb threshold. This decreases eviction by up to 20%, by improving the fragmentation quality. No harm in normal cases that fit VRAM fully (PTS gaming suite). In some cases, even the VRAM-fitting cases improved slightly (openarena, urban terror). 512kb was measured as the most optimal threshold for 3d workloads common to radeon. Other drivers may need different thresholds according to their workloads. Signed-off-by: Lauri Kasanen <cand@gmx.com> --- drivers/gpu/drm/ast/ast_ttm.c | 3 +- drivers/gpu/drm/bochs/bochs_mm.c | 3 +- drivers/gpu/drm/cirrus/cirrus_ttm.c | 3 +- drivers/gpu/drm/drm_mm.c | 56 +++++++++++++++++++++++++---------- drivers/gpu/drm/i915/i915_gem.c | 3 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +- drivers/gpu/drm/mgag200/mgag200_ttm.c | 3 +- drivers/gpu/drm/nouveau/nouveau_ttm.c | 2 +- drivers/gpu/drm/qxl/qxl_ttm.c | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c | 3 +- drivers/gpu/drm/ttm/ttm_bo.c | 4 ++- drivers/gpu/drm/ttm/ttm_bo_manager.c | 16 ++++++++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/drm_mm.h | 29 +++++++++++++++--- include/drm/ttm/ttm_bo_driver.h | 7 ++++- 15 files changed, 105 insertions(+), 34 deletions(-)