diff mbox series

[RFC,08/16] drm/ttm: Add a shrinker and shrinker accounting

Message ID 20230215161405.187368-9-thomas.hellstrom@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Add a TTM shrinker | expand

Commit Message

Thomas Hellstrom Feb. 15, 2023, 4:13 p.m. UTC
Register a TTM system memory-backed object shrinker and add
accounting for shrinkable and purgeable pages. For the shrinker to work,
the driver needs to register the bo_shrink callback which is responsible
for unbinding from GPU and the dma layer if needed. Helpers for that
callback to actually perform shrinking will be introduced in upcoming
patches.

Note that we can't lock the ttm_global_mutex from within the shrinker
scan() function as that might cause a deadlock issue. To fix that, add and
use a mutex which is used for global device list manipulation only and
make sure it isn't held when registering the shrinker.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/ttm/ttm_device.c |  26 ++++---
 drivers/gpu/drm/ttm/ttm_tt.c     | 112 +++++++++++++++++++++++++++++--
 include/drm/ttm/ttm_tt.h         |   2 +
 3 files changed, 125 insertions(+), 15 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index e0a2be3ed13d..ce98752d2d32 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -36,10 +36,10 @@ 
 
 #include "ttm_module.h"
 
-/*
- * ttm_global_mutex - protecting the global state
- */
+/* ttm_global_mutex - protects the global state init and fini. */
 static DEFINE_MUTEX(ttm_global_mutex);
+/* ttm_global_list_mutex - protects the device list. */
+static DEFINE_MUTEX(ttm_global_list_mutex);
 static unsigned ttm_glob_use_count;
 struct ttm_global ttm_glob;
 EXPORT_SYMBOL(ttm_glob);
@@ -54,6 +54,7 @@  static void ttm_global_release(void)
 	if (--ttm_glob_use_count > 0)
 		goto out;
 
+	ttm_tt_mgr_fini();
 	ttm_pool_mgr_fini();
 	debugfs_remove(ttm_debugfs_root);
 
@@ -102,7 +103,10 @@  static int ttm_global_init(void)
 		goto out;
 	}
 
+	mutex_lock(&ttm_global_list_mutex);
 	INIT_LIST_HEAD(&glob->device_list);
+	mutex_unlock(&ttm_global_list_mutex);
+
 	atomic_set(&glob->bo_count, 0);
 
 	debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root,
@@ -135,7 +139,7 @@  long ttm_global_swapout(struct ttm_operation_ctx *ctx,
 	struct ttm_device *bdev;
 	long ret = 0;
 
-	mutex_lock(&ttm_global_mutex);
+	mutex_lock(&ttm_global_list_mutex);
 	list_for_each_entry(bdev, &glob->device_list, device_list) {
 		ret = ttm_device_swapout(bdev, ctx, reason);
 		if (ret > 0) {
@@ -143,7 +147,7 @@  long ttm_global_swapout(struct ttm_operation_ctx *ctx,
 			break;
 		}
 	}
-	mutex_unlock(&ttm_global_mutex);
+	mutex_unlock(&ttm_global_list_mutex);
 	return ret;
 }
 
@@ -247,9 +251,9 @@  int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
 	spin_lock_init(&bdev->lru_lock);
 	INIT_LIST_HEAD(&bdev->pinned);
 	bdev->dev_mapping = mapping;
-	mutex_lock(&ttm_global_mutex);
+	mutex_lock(&ttm_global_list_mutex);
 	list_add_tail(&bdev->device_list, &glob->device_list);
-	mutex_unlock(&ttm_global_mutex);
+	mutex_unlock(&ttm_global_list_mutex);
 
 	return 0;
 }
@@ -260,14 +264,14 @@  void ttm_device_fini(struct ttm_device *bdev)
 	struct ttm_resource_manager *man;
 	unsigned i;
 
+	mutex_lock(&ttm_global_list_mutex);
+	list_del(&bdev->device_list);
+	mutex_unlock(&ttm_global_list_mutex);
+
 	man = ttm_manager_type(bdev, TTM_PL_SYSTEM);
 	ttm_resource_manager_set_used(man, false);
 	ttm_set_driver_manager(bdev, TTM_PL_SYSTEM, NULL);
 
-	mutex_lock(&ttm_global_mutex);
-	list_del(&bdev->device_list);
-	mutex_unlock(&ttm_global_mutex);
-
 	drain_workqueue(bdev->wq);
 	destroy_workqueue(bdev->wq);
 
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 771e5f3c2fee..5a57117c21ec 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -37,6 +37,7 @@ 
 #include <linux/module.h>
 #include <drm/drm_cache.h>
 #include <drm/ttm/ttm_bo.h>
+#include <drm/ttm/ttm_pool.h>
 #include <drm/ttm/ttm_tt.h>
 
 #include "ttm_module.h"
@@ -54,6 +55,11 @@  module_param_named(dma32_pages_limit, ttm_dma32_pages_limit, ulong, 0644);
 static atomic_long_t ttm_pages_allocated;
 static atomic_long_t ttm_dma32_pages_allocated;
 
+static long shrinkable_pages;
+static long purgeable_pages;
+static DEFINE_RWLOCK(shrinkable_lock);
+static struct shrinker mm_shrinker;
+
 static bool ttm_tt_shrinkable(const struct ttm_device *bdev,
 			      const struct ttm_tt *tt)
 {
@@ -69,6 +75,14 @@  static void ttm_tt_mod_allocated(bool dma32, long value)
 		atomic_long_add(value, &ttm_dma32_pages_allocated);
 }
 
+static void ttm_tt_mod_shrinkable_pages(long shrinkable, long purgeable)
+{
+	write_lock(&shrinkable_lock);
+	shrinkable_pages += shrinkable;
+	purgeable_pages += purgeable;
+	write_unlock(&shrinkable_lock);
+}
+
 /*
  * Allocates a ttm structure for the given BO.
  */
@@ -352,6 +366,9 @@  int ttm_tt_populate(struct ttm_device *bdev,
 		}
 	}
 
+	if (ttm_tt_shrinkable(bdev, ttm))
+		ttm_tt_mod_shrinkable_pages(ttm->num_pages, 0);
+
 	return 0;
 
 error:
@@ -368,6 +385,13 @@  void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
 	if (!ttm_tt_is_populated(ttm))
 		return;
 
+	if (ttm_tt_shrinkable(bdev, ttm)) {
+		if (ttm_tt_purgeable(ttm))
+			ttm_tt_mod_shrinkable_pages(0, -(long)ttm->num_pages);
+		else
+			ttm_tt_mod_shrinkable_pages(-(long)ttm->num_pages, 0);
+	}
+
 	if (bdev->funcs->ttm_tt_unpopulate)
 		bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
 	else
@@ -394,11 +418,86 @@  DEFINE_SHOW_ATTRIBUTE(ttm_tt_debugfs_shrink);
 
 #endif
 
+static unsigned long ttm_tt_shrinker_count(struct shrinker *shrink,
+					   struct shrink_control *sc)
+{
+	unsigned long num_pages;
 
-/*
- * ttm_tt_mgr_init - register with the MM shrinker
- *
- * Register with the MM shrinker for swapping out BOs.
+	num_pages = get_nr_swap_pages();
+	read_lock(&shrinkable_lock);
+	num_pages = min_t(unsigned long, num_pages, shrinkable_pages);
+	num_pages += purgeable_pages;
+	read_unlock(&shrinkable_lock);
+
+	return num_pages ? num_pages : SHRINK_EMPTY;
+}
+
+static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
+					  struct shrink_control *sc)
+{
+	bool is_kswapd = current_is_kswapd();
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = !is_kswapd,
+	};
+	unsigned long nr_to_scan, freed = 0;
+	long ret;
+
+	sc->nr_scanned = 0;
+	nr_to_scan = sc->nr_to_scan;
+
+	while (freed < nr_to_scan) {
+		ret = ttm_global_swapout(&ctx, TTM_SHRINK_PURGE);
+		if (ret <= 0)
+			break;
+
+		freed += ret;
+	}
+
+	sc->nr_scanned = freed;
+	if (freed < nr_to_scan)
+		nr_to_scan -= freed;
+	else
+		nr_to_scan = 0;
+	if (!nr_to_scan)
+		return freed ? freed : SHRINK_STOP;
+
+	while (freed < nr_to_scan) {
+		ret = ttm_global_swapout(&ctx, TTM_SHRINK_SWAP);
+		if (ret <= 0)
+			break;
+
+		freed += ret;
+	}
+
+	sc->nr_scanned = freed;
+
+	return freed ? freed : SHRINK_STOP;
+}
+
+/**
+ * ttm_tt_mgr_fini() - Check shrinkable accounting consistensy and remove
+ * the shrinker.
+ */
+void ttm_tt_mgr_fini(void)
+{
+	if (WARN_ON_ONCE(atomic_long_read(&ttm_pages_allocated) ||
+			 atomic_long_read(&ttm_dma32_pages_allocated) ||
+			 shrinkable_pages || purgeable_pages)) {
+		pr_warn("Inconsistent ttm_tt accounting:\n");
+		pr_warn("pages %ld dma32 %ld shrinkable %ld purgeable %ld\n",
+			atomic_long_read(&ttm_pages_allocated),
+			atomic_long_read(&ttm_dma32_pages_allocated),
+			shrinkable_pages, purgeable_pages);
+	}
+
+	unregister_shrinker(&mm_shrinker);
+}
+
+/**
+ * ttm_tt_mgr_init() - Provide watermark limits and register the shrinker.
+ * @num_pages - Number of pages TTM is allowed to pin.
+ * @num_dma32_pages - Number of dma32 pages TTM is allowed to pin.
  */
 void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages)
 {
@@ -412,6 +511,11 @@  void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages)
 
 	if (!ttm_dma32_pages_limit)
 		ttm_dma32_pages_limit = num_dma32_pages;
+
+	mm_shrinker.count_objects = ttm_tt_shrinker_count;
+	mm_shrinker.scan_objects = ttm_tt_shrinker_scan;
+	mm_shrinker.seeks = DEFAULT_SEEKS;
+	(void)register_shrinker(&mm_shrinker, "ttm-objects");
 }
 
 static void ttm_kmap_iter_tt_map_local(struct ttm_kmap_iter *iter,
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 627168eba8f6..3f99787e2b93 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -221,6 +221,8 @@  static inline void ttm_tt_mark_for_clear(struct ttm_tt *ttm)
 
 void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages);
 
+void ttm_tt_mgr_fini(void);
+
 struct ttm_kmap_iter *ttm_kmap_iter_tt_init(struct ttm_kmap_iter_tt *iter_tt,
 					    struct ttm_tt *tt);