diff mbox series

[v2,14/18] io_uring: use region api for CQ

Message ID 42b3eda88aed4b3542534747cb0ce22744042d98.1732481694.git.asml.silence@gmail.com (mailing list archive)
State New
Headers show
Series kernel allocated regions and convert memmap to regions | expand

Commit Message

Pavel Begunkov Nov. 24, 2024, 9:12 p.m. UTC
Convert internal parts of the CQ/SQ array managment to the region API.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 include/linux/io_uring_types.h |  8 +----
 io_uring/io_uring.c            | 36 +++++++---------------
 io_uring/memmap.c              | 55 +++++-----------------------------
 io_uring/memmap.h              |  4 ---
 io_uring/register.c            | 35 ++++++++++------------
 5 files changed, 36 insertions(+), 102 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3f353f269c6e..2db252841509 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -426,14 +426,8 @@  struct io_ring_ctx {
 	 */
 	struct mutex			mmap_lock;
 
-	/*
-	 * If IORING_SETUP_NO_MMAP is used, then the below holds
-	 * the gup'ed pages for the two rings, and the sqes.
-	 */
-	unsigned short			n_ring_pages;
-	struct page			**ring_pages;
-
 	struct io_mapped_region		sq_region;
+	struct io_mapped_region		ring_region;
 	/* used for optimised request parameter and wait argument passing  */
 	struct io_mapped_region		param_region;
 };
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index a1dca7bce54a..b346a1f5f353 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2626,26 +2626,10 @@  static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
 	return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
 }
 
-static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr,
-			  size_t size)
-{
-	return __io_uaddr_map(&ctx->ring_pages, &ctx->n_ring_pages, uaddr,
-				size);
-}
-
 static void io_rings_free(struct io_ring_ctx *ctx)
 {
-	if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
-		io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages,
-				true);
-	} else {
-		io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
-		ctx->n_ring_pages = 0;
-		vunmap(ctx->rings);
-	}
-
 	io_free_region(ctx, &ctx->sq_region);
-
+	io_free_region(ctx, &ctx->ring_region);
 	ctx->rings = NULL;
 	ctx->sq_sqes = NULL;
 }
@@ -3476,15 +3460,17 @@  static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
 	if (size == SIZE_MAX)
 		return -EOVERFLOW;
 
-	if (!(ctx->flags & IORING_SETUP_NO_MMAP))
-		rings = io_pages_map(&ctx->ring_pages, &ctx->n_ring_pages, size);
-	else
-		rings = io_rings_map(ctx, p->cq_off.user_addr, size);
-
-	if (IS_ERR(rings))
-		return PTR_ERR(rings);
+	memset(&rd, 0, sizeof(rd));
+	rd.size = PAGE_ALIGN(size);
+	if (ctx->flags & IORING_SETUP_NO_MMAP) {
+		rd.user_addr = p->cq_off.user_addr;
+		rd.flags |= IORING_MEM_REGION_TYPE_USER;
+	}
+	ret = io_create_region(ctx, &ctx->ring_region, &rd, IORING_OFF_CQ_RING);
+	if (ret)
+		return ret;
+	ctx->rings = rings = io_region_get_ptr(&ctx->ring_region);
 
-	ctx->rings = rings;
 	if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
 		ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
 	rings->sq_ring_mask = p->sq_entries - 1;
diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index 0a2d03bd312b..52afe0576be6 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -118,18 +118,6 @@  void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
 	*npages = 0;
 }
 
-void io_pages_free(struct page ***pages, int npages)
-{
-	struct page **page_array = *pages;
-
-	if (!page_array)
-		return;
-
-	unpin_user_pages(page_array, npages);
-	kvfree(page_array);
-	*pages = NULL;
-}
-
 struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
 {
 	unsigned long start, end, nr_pages;
@@ -167,34 +155,6 @@  struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
 	return ERR_PTR(ret);
 }
 
-void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
-		     unsigned long uaddr, size_t size)
-{
-	struct page **page_array;
-	unsigned int nr_pages;
-	void *page_addr;
-
-	*npages = 0;
-
-	if (uaddr & (PAGE_SIZE - 1) || !size)
-		return ERR_PTR(-EINVAL);
-
-	nr_pages = 0;
-	page_array = io_pin_pages(uaddr, size, &nr_pages);
-	if (IS_ERR(page_array))
-		return page_array;
-
-	page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL);
-	if (page_addr) {
-		*pages = page_array;
-		*npages = nr_pages;
-		return page_addr;
-	}
-
-	io_pages_free(&page_array, nr_pages);
-	return ERR_PTR(-ENOMEM);
-}
-
 enum {
 	IO_REGION_F_VMAP			= 1,
 	IO_REGION_F_USER_PINNED			= 2,
@@ -383,9 +343,10 @@  static void *io_region_validate_mmap(struct io_ring_ctx *ctx,
 
 static int io_region_mmap(struct io_ring_ctx *ctx,
 			  struct io_mapped_region *mr,
-			  struct vm_area_struct *vma)
+			  struct vm_area_struct *vma,
+			  unsigned max_pages)
 {
-	unsigned long nr_pages = mr->nr_pages;
+	unsigned long nr_pages = min(mr->nr_pages, max_pages);
 
 	vm_flags_set(vma, VM_DONTEXPAND);
 	return vm_insert_pages(vma, vma->vm_start, mr->pages, &nr_pages);
@@ -449,7 +410,7 @@  __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
 	struct io_ring_ctx *ctx = file->private_data;
 	size_t sz = vma->vm_end - vma->vm_start;
 	long offset = vma->vm_pgoff << PAGE_SHIFT;
-	unsigned int npages;
+	unsigned int page_limit;
 	void *ptr;
 
 	guard(mutex)(&ctx->mmap_lock);
@@ -461,14 +422,14 @@  __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
 	switch (offset & IORING_OFF_MMAP_MASK) {
 	case IORING_OFF_SQ_RING:
 	case IORING_OFF_CQ_RING:
-		npages = min(ctx->n_ring_pages, (sz + PAGE_SIZE - 1) >> PAGE_SHIFT);
-		return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, npages);
+		page_limit = (sz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		return io_region_mmap(ctx, &ctx->ring_region, vma, page_limit);
 	case IORING_OFF_SQES:
-		return io_region_mmap(ctx, &ctx->sq_region, vma);
+		return io_region_mmap(ctx, &ctx->sq_region, vma, UINT_MAX);
 	case IORING_OFF_PBUF_RING:
 		return io_pbuf_mmap(file, vma);
 	case IORING_MAP_OFF_PARAM_REGION:
-		return io_region_mmap(ctx, &ctx->param_region, vma);
+		return io_region_mmap(ctx, &ctx->param_region, vma, UINT_MAX);
 	}
 
 	return -EINVAL;
diff --git a/io_uring/memmap.h b/io_uring/memmap.h
index 2402bca3d700..7395996eb353 100644
--- a/io_uring/memmap.h
+++ b/io_uring/memmap.h
@@ -4,7 +4,6 @@ 
 #define IORING_MAP_OFF_PARAM_REGION		0x20000000ULL
 
 struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
-void io_pages_free(struct page ***pages, int npages);
 int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
 			struct page **pages, int npages);
 
@@ -13,9 +12,6 @@  void *io_pages_map(struct page ***out_pages, unsigned short *npages,
 void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
 		    bool put_pages);
 
-void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
-		     unsigned long uaddr, size_t size);
-
 #ifndef CONFIG_MMU
 unsigned int io_uring_nommu_mmap_capabilities(struct file *file);
 #endif
diff --git a/io_uring/register.c b/io_uring/register.c
index 44cd64923d31..f1698c18c7cb 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -367,26 +367,19 @@  static int io_register_clock(struct io_ring_ctx *ctx,
  * either mapping or freeing.
  */
 struct io_ring_ctx_rings {
-	unsigned short n_ring_pages;
-	struct page **ring_pages;
 	struct io_rings *rings;
-
 	struct io_uring_sqe *sq_sqes;
+
 	struct io_mapped_region sq_region;
+	struct io_mapped_region ring_region;
 };
 
 static void io_register_free_rings(struct io_ring_ctx *ctx,
 				   struct io_uring_params *p,
 				   struct io_ring_ctx_rings *r)
 {
-	if (!(p->flags & IORING_SETUP_NO_MMAP)) {
-		io_pages_unmap(r->rings, &r->ring_pages, &r->n_ring_pages,
-				true);
-	} else {
-		io_pages_free(&r->ring_pages, r->n_ring_pages);
-		vunmap(r->rings);
-	}
 	io_free_region(ctx, &r->sq_region);
+	io_free_region(ctx, &r->ring_region);
 }
 
 #define swap_old(ctx, o, n, field)		\
@@ -436,13 +429,18 @@  static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
 	if (size == SIZE_MAX)
 		return -EOVERFLOW;
 
-	if (!(p.flags & IORING_SETUP_NO_MMAP))
-		n.rings = io_pages_map(&n.ring_pages, &n.n_ring_pages, size);
-	else
-		n.rings = __io_uaddr_map(&n.ring_pages, &n.n_ring_pages,
-						p.cq_off.user_addr, size);
-	if (IS_ERR(n.rings))
-		return PTR_ERR(n.rings);
+	memset(&rd, 0, sizeof(rd));
+	rd.size = PAGE_ALIGN(size);
+	if (p.flags & IORING_SETUP_NO_MMAP) {
+		rd.user_addr = p.cq_off.user_addr;
+		rd.flags |= IORING_MEM_REGION_TYPE_USER;
+	}
+	ret = io_create_region_mmap_safe(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING);
+	if (ret) {
+		io_register_free_rings(ctx, &p, &n);
+		return ret;
+	}
+	n.rings = io_region_get_ptr(&n.ring_region);
 
 	n.rings->sq_ring_mask = p.sq_entries - 1;
 	n.rings->cq_ring_mask = p.cq_entries - 1;
@@ -552,8 +550,7 @@  static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
 
 	ctx->rings = n.rings;
 	ctx->sq_sqes = n.sq_sqes;
-	swap_old(ctx, o, n, n_ring_pages);
-	swap_old(ctx, o, n, ring_pages);
+	swap_old(ctx, o, n, ring_region);
 	swap_old(ctx, o, n, sq_region);
 	to_free = &o;
 	ret = 0;