@@ -38,6 +38,7 @@
#include <linux/slab.h>
#include <linux/swap.h>
#include <drm/ttm/ttm_pool.h>
+#include <drm/ttm/ttm_tt.h>
#define TTM_MEMORY_ALLOC_RETRIES 4
@@ -414,10 +415,11 @@ static int ttm_mem_init_dma32_zone(struct ttm_mem_global *glob,
int ttm_mem_global_init(struct ttm_mem_global *glob)
{
+ uint64_t num_pages, num_dma32_pages;
+ struct ttm_mem_zone *zone;
struct sysinfo si;
int ret;
int i;
- struct ttm_mem_zone *zone;
spin_lock_init(&glob->lock);
glob->swap_queue = create_singlethread_workqueue("ttm_swap");
@@ -452,6 +454,19 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
zone->name, (unsigned long long)zone->max_mem >> 10);
}
ttm_pool_mgr_init(glob->zone_kernel->max_mem/(2*PAGE_SIZE));
+
+ /* We generally allow TTM to allocate a maximum of 50% of
+ * the available system memory.
+ */
+ num_pages = (u64)si.totalram * si.mem_unit;
+ num_pages = (num_pages * 50 / 100) >> PAGE_SHIFT;
+
+ /* But for DMA32 we limit ourself to only use 2GiB maximum. */
+ num_dma32_pages = (u64)(si.totalram - si.totalhigh) * si.mem_unit;
+ num_dma32_pages = min(num_dma32_pages, 2ULL << 30) >> PAGE_SHIFT;
+
+ ttm_tt_mgr_init(num_pages, num_dma32_pages);
+
return 0;
out_no_zone:
ttm_mem_global_release(glob);
@@ -38,6 +38,19 @@
#include <drm/drm_cache.h>
#include <drm/ttm/ttm_bo_driver.h>
+static unsigned long ttm_pages_limit;
+
+MODULE_PARM_DESC(pages_limit, "Limit for the allocated pages");
+module_param_named(pages_limit, ttm_pages_limit, ulong, 0644);
+
+static unsigned long ttm_dma32_pages_limit;
+
+MODULE_PARM_DESC(dma32_pages_limit, "Limit for the allocated DMA32 pages");
+module_param_named(dma32_pages_limit, ttm_dma32_pages_limit, ulong, 0644);
+
+static atomic_long_t ttm_pages_allocated;
+static atomic_long_t ttm_dma32_pages_allocated;
+
/**
* Allocates a ttm structure for the given BO.
*/
@@ -295,8 +308,8 @@ static void ttm_tt_add_mapping(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
ttm->pages[i]->mapping = bdev->dev_mapping;
}
-int ttm_tt_populate(struct ttm_bo_device *bdev,
- struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
+int ttm_tt_populate(struct ttm_bo_device *bdev, struct ttm_tt *ttm,
+ struct ttm_operation_ctx *ctx)
{
int ret;
@@ -306,12 +319,25 @@ int ttm_tt_populate(struct ttm_bo_device *bdev,
if (ttm_tt_is_populated(ttm))
return 0;
+ atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
+ if (bdev->pool.use_dma32)
+ atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated);
+
+ while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
+ atomic_long_read(&ttm_dma32_pages_allocated) >
+ ttm_dma32_pages_limit) {
+
+ ret = ttm_bo_swapout(ctx);
+ if (ret)
+ goto error;
+ }
+
if (bdev->driver->ttm_tt_populate)
ret = bdev->driver->ttm_tt_populate(bdev, ttm, ctx);
else
ret = ttm_pool_alloc(&bdev->pool, ttm, ctx);
if (ret)
- return ret;
+ goto error;
ttm_tt_add_mapping(bdev, ttm);
ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED;
@@ -324,6 +350,12 @@ int ttm_tt_populate(struct ttm_bo_device *bdev,
}
return 0;
+
+error:
+ atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
+ if (bdev->pool.use_dma32)
+ atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
+ return ret;
}
EXPORT_SYMBOL(ttm_tt_populate);
@@ -341,8 +373,7 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm)
}
}
-void ttm_tt_unpopulate(struct ttm_bo_device *bdev,
- struct ttm_tt *ttm)
+void ttm_tt_unpopulate(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
{
if (!ttm_tt_is_populated(ttm))
return;
@@ -352,5 +383,28 @@ void ttm_tt_unpopulate(struct ttm_bo_device *bdev,
bdev->driver->ttm_tt_unpopulate(bdev, ttm);
else
ttm_pool_free(&bdev->pool, ttm);
+
+ atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
+ if (bdev->pool.use_dma32)
+ atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
+
ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
}
+
+/**
+ * ttm_tt_mgr_init - set defaults for the number of pages
+ *
+ * @num_pages: global limit of system pages to use
+ * @num_dma32_pages: global limit of DMA32 pages to use
+ *
+ * Set reasonable defaults for the number of allocated pages.
+ */
+void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages)
+{
+ if (!ttm_pages_limit)
+ ttm_pages_limit = num_pages;
+
+ if (!ttm_dma32_pages_limit)
+ ttm_dma32_pages_limit = num_dma32_pages;
+
+}
@@ -157,6 +157,8 @@ int ttm_tt_populate(struct ttm_bo_device *bdev, struct ttm_tt *ttm, struct ttm_o
*/
void ttm_tt_unpopulate(struct ttm_bo_device *bdev, struct ttm_tt *ttm);
+void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages);
+
#if IS_ENABLED(CONFIG_AGP)
#include <linux/agp_backend.h>
TTM implements a rather extensive accounting of allocated memory. There are two reasons for this: 1. It tries to block userspace allocating a huge number of very small BOs without accounting for the kmalloced memory. 2. Make sure we don't over allocate and run into an OOM situation during swapout while trying to handle the memory shortage. This is only partially a good idea. First of all it is perfectly valid for an application to use all of system memory, limiting it to 50% is not really acceptable. What we need to take care of is that the application is held accountable for the memory it allocated. This is what control mechanisms like memcg and the normal Linux page accounting already do. Making sure that we don't run into an OOM situation while trying to cope with a memory shortage is still a good idea, but this is also not very well implemented since it means another opportunity of recursion from the driver back into TTM. So start to rework all of this by accounting the memory before allocating the pages and making sure that we never go over a certain limit pages locked by TTM in the ttm_tt objects. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/ttm/ttm_memory.c | 17 ++++++++- drivers/gpu/drm/ttm/ttm_tt.c | 64 +++++++++++++++++++++++++++++--- include/drm/ttm/ttm_tt.h | 2 + 3 files changed, 77 insertions(+), 6 deletions(-)