diff mbox series

[11/16] io_uring: batch io_kiocb allocation

Message ID 20190108165645.19311-12-axboe@kernel.dk (mailing list archive)
State New, archived
Headers show
Series [01/16] fs: add an iopoll method to struct file_operations | expand

Commit Message

Jens Axboe Jan. 8, 2019, 4:56 p.m. UTC
Similarly to how we use the state->ios_left to know how many references
to get to a file, we can use it to allocate the io_kiocb's we need in
bulk.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

Comments

Christoph Hellwig Jan. 9, 2019, 12:13 p.m. UTC | #1
> +	if (!state)
> +		req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);

Just return an error here if kmem_cache_alloc fails.

> +	if (req)
> +		io_req_init(ctx, req);

Because all the other ones can't reached this with a NULL req.
Jens Axboe Jan. 9, 2019, 4:57 p.m. UTC | #2
On 1/9/19 5:13 AM, Christoph Hellwig wrote:
>> +	if (!state)
>> +		req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
> 
> Just return an error here if kmem_cache_alloc fails.
> 
>> +	if (req)
>> +		io_req_init(ctx, req);
> 
> Because all the other ones can't reached this with a NULL req.

This is different in the current tree, since I properly fixed the
ctx ref issue.
Christoph Hellwig Jan. 9, 2019, 7:03 p.m. UTC | #3
On Wed, Jan 09, 2019 at 09:57:59AM -0700, Jens Axboe wrote:
> On 1/9/19 5:13 AM, Christoph Hellwig wrote:
> >> +	if (!state)
> >> +		req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
> > 
> > Just return an error here if kmem_cache_alloc fails.
> > 
> >> +	if (req)
> >> +		io_req_init(ctx, req);
> > 
> > Because all the other ones can't reached this with a NULL req.
> 
> This is different in the current tree, since I properly fixed the
> ctx ref issue.

Your tree does a percpu_ref_tryget very first, and then leaks that if
kmem_cache_alloc_bulk fails, and also is inconsistent for NULL vs
ERR_PTR returns.  I think you want something like this on top:

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 35d055dcbc22..6c95749e9601 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -250,14 +250,6 @@ static struct io_uring_event *io_peek_cqring(struct io_ring_ctx *ctx)
 	return &ring->events[tail & ctx->cq_ring.ring_mask];
 }
 
-static bool io_req_init(struct io_ring_ctx *ctx, struct io_kiocb *req)
-{
-	req->ki_ctx = ctx;
-	INIT_LIST_HEAD(&req->ki_list);
-	req->ki_flags = 0;
-	return true;
-}
-
 static void io_ring_drop_ctx_ref(struct io_ring_ctx *ctx, unsigned refs)
 {
 	percpu_ref_put_many(&ctx->refs, refs);
@@ -274,9 +266,11 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 	if (!percpu_ref_tryget(&ctx->refs))
 		return NULL;
 
-	if (!state)
+	if (!state) {
 		req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
-	else if (!state->free_iocbs) {
+		if (!req)
+			goto out_drop_ref;
+	} else if (!state->free_iocbs) {
 		size_t size;
 		int ret;
 
@@ -284,7 +278,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 		ret = kmem_cache_alloc_bulk(kiocb_cachep, GFP_KERNEL, size,
 						state->iocbs);
 		if (ret <= 0)
-			return ERR_PTR(-ENOMEM);
+			goto out_drop_ref;
 		state->free_iocbs = ret - 1;
 		state->cur_iocb = 1;
 		req = state->iocbs[0];
@@ -294,11 +288,11 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 		state->cur_iocb++;
 	}
 
-	if (req) {
-		io_req_init(ctx, req);
-		return req;
-	}
+	req->ki_ctx = ctx;
+	req->ki_flags = 0;
+	return req;
 
+out_drop_ref:
 	io_ring_drop_ctx_ref(ctx, 1);
 	return NULL;
 }
Jens Axboe Jan. 9, 2019, 8:08 p.m. UTC | #4
On 1/9/19 12:03 PM, Christoph Hellwig wrote:
> On Wed, Jan 09, 2019 at 09:57:59AM -0700, Jens Axboe wrote:
>> On 1/9/19 5:13 AM, Christoph Hellwig wrote:
>>>> +	if (!state)
>>>> +		req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
>>>
>>> Just return an error here if kmem_cache_alloc fails.
>>>
>>>> +	if (req)
>>>> +		io_req_init(ctx, req);
>>>
>>> Because all the other ones can't reached this with a NULL req.
>>
>> This is different in the current tree, since I properly fixed the
>> ctx ref issue.
> 
> Your tree does a percpu_ref_tryget very first, and then leaks that if
> kmem_cache_alloc_bulk fails, and also is inconsistent for NULL vs
> ERR_PTR returns.  I think you want something like this on top:

I fixed it up while doing the rebase already. Just haven't pushed anything
out until I've had a chance to run it through testing.
diff mbox series

Patch

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 11d045f0f799..62778d7ffb8d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -135,6 +135,13 @@  struct io_submit_state {
 	struct list_head req_list;
 	unsigned int req_count;
 
+	/*
+	 * io_kiocb alloc cache
+	 */
+	void *iocbs[IO_IOPOLL_BATCH];
+	unsigned int free_iocbs;
+	unsigned int cur_iocb;
+
 	/*
 	 * File reference cache
 	 */
@@ -210,15 +217,33 @@  static void io_req_init(struct io_ring_ctx *ctx, struct io_kiocb *req)
 	req->ki_flags = 0;
 }
 
-static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx)
+static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
+				   struct io_submit_state *state)
 {
 	struct io_kiocb *req;
 
-	req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
-	if (!req)
-		return NULL;
+	if (!state)
+		req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
+	else if (!state->free_iocbs) {
+		size_t size;
+		int ret;
+
+		size = min_t(size_t, state->ios_left, ARRAY_SIZE(state->iocbs));
+		ret = kmem_cache_alloc_bulk(kiocb_cachep, GFP_KERNEL, size,
+						state->iocbs);
+		if (ret <= 0)
+			return ERR_PTR(-ENOMEM);
+		state->free_iocbs = ret - 1;
+		state->cur_iocb = 1;
+		req = state->iocbs[0];
+	} else {
+		req = state->iocbs[state->cur_iocb];
+		state->free_iocbs--;
+		state->cur_iocb++;
+	}
 
-	io_req_init(ctx, req);
+	if (req)
+		io_req_init(ctx, req);
 	return req;
 }
 
@@ -773,7 +798,7 @@  static int __io_submit_one(struct io_ring_ctx *ctx,
 	if (unlikely(iocb->flags))
 		return -EINVAL;
 
-	req = io_get_req(ctx);
+	req = io_get_req(ctx, state);
 	if (unlikely(!req))
 		return -EAGAIN;
 
@@ -844,6 +869,9 @@  static void io_submit_state_end(struct io_submit_state *state)
 	if (!list_empty(&state->req_list))
 		io_flush_state_reqs(state->ctx, state);
 	io_file_put(state, NULL);
+	if (state->free_iocbs)
+		kmem_cache_free_bulk(kiocb_cachep, state->free_iocbs,
+					&state->iocbs[state->cur_iocb]);
 }
 
 /*
@@ -855,6 +883,7 @@  static void io_submit_state_start(struct io_submit_state *state,
 	state->ctx = ctx;
 	INIT_LIST_HEAD(&state->req_list);
 	state->req_count = 0;
+	state->free_iocbs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;
 #ifdef CONFIG_BLOCK