diff mbox series

[v3,7/8] drm/panthor: Make heap chunk allocation non-blocking

Message ID 20250404092634.2968115-8-boris.brezillon@collabora.com (mailing list archive)
State New
Headers show
Series drm: Introduce sparse GEM shmem | expand

Commit Message

Boris Brezillon April 4, 2025, 9:26 a.m. UTC
Make heap chunk allocation non-blocking when we are in the growing
path. This way, we can fail the job and signal its fence instead of
blocking on memory reclaim, which will become problematic once we throw
a memory shrinker into the mix.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 drivers/gpu/drm/panthor/panthor_heap.c | 222 ++++++++++++-------------
 1 file changed, 110 insertions(+), 112 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
index 3bdf61c14264..2017a1950f63 100644
--- a/drivers/gpu/drm/panthor/panthor_heap.c
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -3,6 +3,7 @@ 
 
 #include <linux/iosys-map.h>
 #include <linux/rwsem.h>
+#include <linux/vmalloc.h>
 
 #include <drm/panthor_drm.h>
 
@@ -35,25 +36,14 @@  struct panthor_heap_chunk_header {
 	u32 unknown[14];
 };
 
-/**
- * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
- */
-struct panthor_heap_chunk {
-	/** @node: Used to insert the heap chunk in panthor_heap::chunks. */
-	struct list_head node;
-
-	/** @bo: Buffer object backing the heap chunk. */
-	struct panthor_kernel_bo *bo;
-};
-
 /**
  * struct panthor_heap - Structure used to manage tiler heap contexts.
  */
 struct panthor_heap {
-	/** @chunks: List containing all heap chunks allocated so far. */
-	struct list_head chunks;
+	/** @bo: Buffer object backing a heap. */
+	struct panthor_kernel_bo *bo;
 
-	/** @lock: Lock protecting insertion in the chunks list. */
+	/** @lock: Lock protecting chunks addition. */
 	struct mutex lock;
 
 	/** @chunk_size: Size of each chunk. */
@@ -70,6 +60,9 @@  struct panthor_heap {
 
 	/** @chunk_count: Number of heap chunks currently allocated. */
 	u32 chunk_count;
+
+	/** @free_list: List of free chunks. */
+	u64 free_list;
 };
 
 #define MAX_HEAPS_PER_POOL    128
@@ -121,100 +114,120 @@  static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
 	       panthor_get_heap_ctx_offset(pool, id);
 }
 
-static void panthor_free_heap_chunk(struct panthor_heap_pool *pool,
-				    struct panthor_heap *heap,
-				    struct panthor_heap_chunk *chunk)
-{
-	mutex_lock(&heap->lock);
-	list_del(&chunk->node);
-	heap->chunk_count--;
-	mutex_unlock(&heap->lock);
-
-	atomic_sub(heap->chunk_size, &pool->size);
-
-	panthor_kernel_bo_destroy(chunk->bo);
-	kfree(chunk);
-}
-
 static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool,
 				    struct panthor_heap *heap,
-				    bool initial_chunk)
+				    bool initial_chunk,
+				    u64 *chunk_gpu_va)
 {
-	struct panthor_heap_chunk *chunk;
 	struct panthor_heap_chunk_header *hdr;
+	unsigned int npages = heap->chunk_size >> PAGE_SHIFT;
+	pgoff_t pgoffs = heap->chunk_count * npages;
+	struct panthor_kernel_bo *kbo = heap->bo;
+	struct panthor_gem_object *bo = to_panthor_bo(kbo->obj);
+	struct address_space *mapping = bo->base.base.filp->f_mapping;
+	pgprot_t prot = PAGE_KERNEL;
+	gfp_t page_gfp, other_gfp;
+	bool from_free_list = false;
+	struct page *page;
 	int ret;
 
-	chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
-	if (!chunk)
-		return -ENOMEM;
-
-	chunk->bo = panthor_kernel_bo_create(pool->ptdev, pool->vm, heap->chunk_size,
-					     DRM_PANTHOR_BO_NO_MMAP,
-					     DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
-					     PANTHOR_VM_KERNEL_AUTO_VA);
-	if (IS_ERR(chunk->bo)) {
-		ret = PTR_ERR(chunk->bo);
-		goto err_free_chunk;
+	if (initial_chunk) {
+		page_gfp = mapping_gfp_mask(mapping);
+		other_gfp = GFP_KERNEL;
+	} else {
+		page_gfp = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM) |
+			   __GFP_NORETRY | __GFP_NOWARN;
+		other_gfp = __GFP_NORETRY | __GFP_NOWARN;
 	}
 
-	ret = panthor_kernel_bo_vmap(chunk->bo);
-	if (ret)
-		goto err_destroy_bo;
+	if (!heap->free_list) {
+		u64 new_chunk_va = panthor_kernel_bo_gpuva(kbo) +
+				   ((u64)heap->chunk_size * heap->chunk_count);
+
+		ret = panthor_vm_pre_fault_range(pool->vm, new_chunk_va,
+						 heap->chunk_size,
+						 page_gfp, other_gfp);
+		if (ret)
+			return ret;
+
+		page = xa_load(&bo->sparse.pages, pgoffs);
+		if (!page)
+			return -ENOMEM;
+
+		*chunk_gpu_va = new_chunk_va;
+	} else {
+		u64 offset = heap->free_list - panthor_kernel_bo_gpuva(kbo);
+
+		page = xa_load(&bo->sparse.pages, offset >> PAGE_SHIFT);
+		*chunk_gpu_va = heap->free_list;
+		from_free_list = true;
+	}
+
+	if (bo->base.map_wc)
+		prot = pgprot_writecombine(prot);
+
+	hdr = vmap(&page, 1, VM_MAP, prot);
+	if (!hdr)
+		return -ENOMEM;
+
+	if (from_free_list)
+		heap->free_list = hdr->next & GENMASK_ULL(63, 12);
 
-	hdr = chunk->bo->kmap;
 	memset(hdr, 0, sizeof(*hdr));
 
-	if (initial_chunk && !list_empty(&heap->chunks)) {
-		struct panthor_heap_chunk *prev_chunk;
-		u64 prev_gpuva;
+	if (initial_chunk && heap->chunk_count) {
+		u64 prev_gpuva = panthor_kernel_bo_gpuva(kbo) +
+				 ((u64)heap->chunk_size * (heap->chunk_count - 1));
 
-		prev_chunk = list_first_entry(&heap->chunks,
-					      struct panthor_heap_chunk,
-					      node);
-
-		prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
 		hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
 			    (heap->chunk_size >> 12);
 	}
 
-	panthor_kernel_bo_vunmap(chunk->bo);
+	vunmap(hdr);
 
-	mutex_lock(&heap->lock);
-	list_add(&chunk->node, &heap->chunks);
-	heap->chunk_count++;
-	mutex_unlock(&heap->lock);
+	if (!from_free_list)
+		heap->chunk_count++;
 
 	atomic_add(heap->chunk_size, &pool->size);
 
 	return 0;
-
-err_destroy_bo:
-	panthor_kernel_bo_destroy(chunk->bo);
-
-err_free_chunk:
-	kfree(chunk);
-
-	return ret;
 }
 
-static void panthor_free_heap_chunks(struct panthor_heap_pool *pool,
-				     struct panthor_heap *heap)
+static void panthor_free_heap_chunk(struct panthor_heap *heap,
+				    u64 chunk_gpu_va)
 {
-	struct panthor_heap_chunk *chunk, *tmp;
+	struct panthor_kernel_bo *kbo = heap->bo;
+	struct panthor_gem_object *bo = to_panthor_bo(kbo->obj);
+	u64 offset = chunk_gpu_va - panthor_kernel_bo_gpuva(kbo);
+	pgoff_t pgoffs = offset >> PAGE_SHIFT;
+	struct panthor_heap_chunk_header *hdr;
+	pgprot_t prot = bo->base.map_wc ? pgprot_writecombine(PAGE_KERNEL) :
+					  PAGE_KERNEL;
+	struct page *page;
 
-	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
-		panthor_free_heap_chunk(pool, heap, chunk);
+	page = xa_load(&bo->sparse.pages, pgoffs);
+	if (!page)
+		return;
+
+	hdr = vmap(&page, 1, VM_MAP, prot);
+	if (!hdr)
+		return;
+
+	hdr->next = heap->free_list;
+	heap->free_list = chunk_gpu_va;
+	vunmap(hdr);
 }
 
 static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool,
 				     struct panthor_heap *heap,
-				     u32 chunk_count)
+				     u32 chunk_count,
+				     u64 *first_chunk_gpu_va)
 {
 	int ret;
 	u32 i;
 
 	for (i = 0; i < chunk_count; i++) {
-		ret = panthor_alloc_heap_chunk(pool, heap, true);
+		ret = panthor_alloc_heap_chunk(pool, heap, true, first_chunk_gpu_va);
 		if (ret)
 			return ret;
 	}
@@ -231,7 +244,7 @@  panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
 	if (!heap)
 		return -EINVAL;
 
-	panthor_free_heap_chunks(pool, heap);
+	panthor_kernel_bo_destroy(heap->bo);
 	mutex_destroy(&heap->lock);
 	kfree(heap);
 	return 0;
@@ -278,7 +291,7 @@  int panthor_heap_create(struct panthor_heap_pool *pool,
 			u64 *first_chunk_gpu_va)
 {
 	struct panthor_heap *heap;
-	struct panthor_heap_chunk *first_chunk;
+	struct panthor_kernel_bo *bo;
 	struct panthor_vm *vm;
 	int ret = 0;
 	u32 id;
@@ -308,20 +321,27 @@  int panthor_heap_create(struct panthor_heap_pool *pool,
 	}
 
 	mutex_init(&heap->lock);
-	INIT_LIST_HEAD(&heap->chunks);
 	heap->chunk_size = chunk_size;
 	heap->max_chunks = max_chunks;
 	heap->target_in_flight = target_in_flight;
 
-	ret = panthor_alloc_heap_chunks(pool, heap, initial_chunk_count);
+	bo = panthor_kernel_bo_create(pool->ptdev, pool->vm, max_chunks * chunk_size,
+				      DRM_PANTHOR_BO_NO_MMAP | DRM_PANTHOR_BO_ALLOC_ON_FAULT,
+				      chunk_size >> PAGE_SHIFT,
+				      DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
+				      PANTHOR_VM_KERNEL_AUTO_VA);
+	if (IS_ERR(bo)) {
+		ret = PTR_ERR(bo);
+		goto err_free_heap;
+	}
+
+	heap->bo = bo;
+
+	ret = panthor_alloc_heap_chunks(pool, heap, initial_chunk_count,
+					first_chunk_gpu_va);
 	if (ret)
 		goto err_free_heap;
 
-	first_chunk = list_first_entry(&heap->chunks,
-				       struct panthor_heap_chunk,
-				       node);
-	*first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
-
 	down_write(&pool->lock);
 	/* The pool has been destroyed, we can't create a new heap. */
 	if (!pool->vm) {
@@ -346,7 +366,7 @@  int panthor_heap_create(struct panthor_heap_pool *pool,
 	return id;
 
 err_free_heap:
-	panthor_free_heap_chunks(pool, heap);
+	panthor_kernel_bo_destroy(heap->bo);
 	mutex_destroy(&heap->lock);
 	kfree(heap);
 
@@ -371,7 +391,6 @@  int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
 {
 	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
 	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
-	struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
 	struct panthor_heap *heap;
 	int ret;
 
@@ -385,28 +404,10 @@  int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
 		goto out_unlock;
 	}
 
-	chunk_gpu_va &= GENMASK_ULL(63, 12);
-
 	mutex_lock(&heap->lock);
-	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
-		if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
-			removed = chunk;
-			list_del(&chunk->node);
-			heap->chunk_count--;
-			atomic_sub(heap->chunk_size, &pool->size);
-			break;
-		}
-	}
+	panthor_free_heap_chunk(heap, chunk_gpu_va & GENMASK_ULL(63, 12));
 	mutex_unlock(&heap->lock);
 
-	if (removed) {
-		panthor_kernel_bo_destroy(chunk->bo);
-		kfree(chunk);
-		ret = 0;
-	} else {
-		ret = -EINVAL;
-	}
-
 out_unlock:
 	up_read(&pool->lock);
 	return ret;
@@ -435,7 +436,6 @@  int panthor_heap_grow(struct panthor_heap_pool *pool,
 {
 	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
 	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
-	struct panthor_heap_chunk *chunk;
 	struct panthor_heap *heap;
 	int ret;
 
@@ -471,15 +471,13 @@  int panthor_heap_grow(struct panthor_heap_pool *pool,
 	 * further jobs in this queue fail immediately instead of having to
 	 * wait for the job timeout.
 	 */
-	ret = panthor_alloc_heap_chunk(pool, heap, false);
+	mutex_lock(&heap->lock);
+	ret = panthor_alloc_heap_chunk(pool, heap, false, new_chunk_gpu_va);
+	mutex_unlock(&heap->lock);
 	if (ret)
 		goto out_unlock;
 
-	chunk = list_first_entry(&heap->chunks,
-				 struct panthor_heap_chunk,
-				 node);
-	*new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
-			    (heap->chunk_size >> 12);
+	*new_chunk_gpu_va |= (heap->chunk_size >> 12);
 	ret = 0;
 
 out_unlock:
@@ -553,7 +551,7 @@  panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
 	kref_init(&pool->refcount);
 
 	pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
-						      DRM_PANTHOR_BO_NO_MMAP,
+						      DRM_PANTHOR_BO_NO_MMAP, 0,
 						      DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
 						      PANTHOR_VM_KERNEL_AUTO_VA);
 	if (IS_ERR(pool->gpu_contexts)) {