[11/16] io_uring: batch io_kiocb allocation

Message ID	20190108165645.19311-12-axboe@kernel.dk (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-block-owner@kernel.org> From: Jens Axboe <axboe@kernel.dk> To: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-block@vger.kernel.org, linux-arch@vger.kernel.org Cc: hch@lst.de, jmoyer@redhat.com, avi@scylladb.com, Jens Axboe <axboe@kernel.dk> Subject: [PATCH 11/16] io_uring: batch io_kiocb allocation Date: Tue, 8 Jan 2019 09:56:40 -0700 Message-Id: <20190108165645.19311-12-axboe@kernel.dk> In-Reply-To: <20190108165645.19311-1-axboe@kernel.dk> References: <20190108165645.19311-1-axboe@kernel.dk> Sender: linux-block-owner@vger.kernel.org Precedence: bulk
Series	[01/16] fs: add an iopoll method to struct file_operations \| expand [01/16] fs: add an iopoll method to struct file_operations [02/16] block: wire up block device iopoll method [03/16] block: add bio_set_polled() helper [04/16] iomap: wire up the iopoll method [05/16] Add io_uring IO interface [06/16] io_uring: support for IO polling [07/16] io_uring: add submission side request cache [08/16] fs: add fget_many() and fput_many() [09/16] io_uring: use fget/fput_many() for file references [10/16] io_uring: split kiocb init from allocation [11/16] io_uring: batch io_kiocb allocation [12/16] block: implement bio helper to add iter bvec pages to bio [13/16] io_uring: add support for pre-mapped user IO buffers [14/16] io_uring: support kernel side submission [15/16] io_uring: add submission polling [16/16] io_uring: add io_uring_event cache hit information

Message ID

20190108165645.19311-12-axboe@kernel.dk (mailing list archive)

State

New, archived

Headers

From: Jens Axboe <axboe@kernel.dk>
To: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org,
        linux-block@vger.kernel.org, linux-arch@vger.kernel.org
Cc: hch@lst.de, jmoyer@redhat.com, avi@scylladb.com,
        Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 11/16] io_uring: batch io_kiocb allocation
Date: Tue,  8 Jan 2019 09:56:40 -0700
Message-Id: <20190108165645.19311-12-axboe@kernel.dk>
In-Reply-To: <20190108165645.19311-1-axboe@kernel.dk>
References: <20190108165645.19311-1-axboe@kernel.dk>
Sender: linux-block-owner@vger.kernel.org
Precedence: bulk

Series

[01/16] fs: add an iopoll method to struct file_operations | expand

Commit Message

Jens Axboe Jan. 8, 2019, 4:56 p.m. UTC

Similarly to how we use the state->ios_left to know how many references
to get to a file, we can use it to allocate the io_kiocb's we need in
bulk.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

Comments

Christoph Hellwig Jan. 9, 2019, 12:13 p.m. UTC | #1

> +	if (!state)
> +		req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);

Just return an error here if kmem_cache_alloc fails.

> +	if (req)
> +		io_req_init(ctx, req);

Because all the other ones can't reached this with a NULL req.

Jens Axboe Jan. 9, 2019, 4:57 p.m. UTC | #2

On 1/9/19 5:13 AM, Christoph Hellwig wrote:
>> +	if (!state)
>> +		req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
> 
> Just return an error here if kmem_cache_alloc fails.
> 
>> +	if (req)
>> +		io_req_init(ctx, req);
> 
> Because all the other ones can't reached this with a NULL req.

This is different in the current tree, since I properly fixed the
ctx ref issue.

Christoph Hellwig Jan. 9, 2019, 7:03 p.m. UTC | #3

On Wed, Jan 09, 2019 at 09:57:59AM -0700, Jens Axboe wrote:
> On 1/9/19 5:13 AM, Christoph Hellwig wrote:
> >> +	if (!state)
> >> +		req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
> > 
> > Just return an error here if kmem_cache_alloc fails.
> > 
> >> +	if (req)
> >> +		io_req_init(ctx, req);
> > 
> > Because all the other ones can't reached this with a NULL req.
> 
> This is different in the current tree, since I properly fixed the
> ctx ref issue.

Your tree does a percpu_ref_tryget very first, and then leaks that if
kmem_cache_alloc_bulk fails, and also is inconsistent for NULL vs
ERR_PTR returns.  I think you want something like this on top:

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 35d055dcbc22..6c95749e9601 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -250,14 +250,6 @@ static struct io_uring_event *io_peek_cqring(struct io_ring_ctx *ctx)
 	return &ring->events[tail & ctx->cq_ring.ring_mask];
 }
 
-static bool io_req_init(struct io_ring_ctx *ctx, struct io_kiocb *req)
-{
-	req->ki_ctx = ctx;
-	INIT_LIST_HEAD(&req->ki_list);
-	req->ki_flags = 0;
-	return true;
-}
-
 static void io_ring_drop_ctx_ref(struct io_ring_ctx *ctx, unsigned refs)
 {
 	percpu_ref_put_many(&ctx->refs, refs);
@@ -274,9 +266,11 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 	if (!percpu_ref_tryget(&ctx->refs))
 		return NULL;
 
-	if (!state)
+	if (!state) {
 		req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
-	else if (!state->free_iocbs) {
+		if (!req)
+			goto out_drop_ref;
+	} else if (!state->free_iocbs) {
 		size_t size;
 		int ret;
 
@@ -284,7 +278,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 		ret = kmem_cache_alloc_bulk(kiocb_cachep, GFP_KERNEL, size,
 						state->iocbs);
 		if (ret <= 0)
-			return ERR_PTR(-ENOMEM);
+			goto out_drop_ref;
 		state->free_iocbs = ret - 1;
 		state->cur_iocb = 1;
 		req = state->iocbs[0];
@@ -294,11 +288,11 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 		state->cur_iocb++;
 	}
 
-	if (req) {
-		io_req_init(ctx, req);
-		return req;
-	}
+	req->ki_ctx = ctx;
+	req->ki_flags = 0;
+	return req;
 
+out_drop_ref:
 	io_ring_drop_ctx_ref(ctx, 1);
 	return NULL;
 }

Jens Axboe Jan. 9, 2019, 8:08 p.m. UTC | #4

On 1/9/19 12:03 PM, Christoph Hellwig wrote:
> On Wed, Jan 09, 2019 at 09:57:59AM -0700, Jens Axboe wrote:
>> On 1/9/19 5:13 AM, Christoph Hellwig wrote:
>>>> +	if (!state)
>>>> +		req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
>>>
>>> Just return an error here if kmem_cache_alloc fails.
>>>
>>>> +	if (req)
>>>> +		io_req_init(ctx, req);
>>>
>>> Because all the other ones can't reached this with a NULL req.
>>
>> This is different in the current tree, since I properly fixed the
>> ctx ref issue.
> 
> Your tree does a percpu_ref_tryget very first, and then leaks that if
> kmem_cache_alloc_bulk fails, and also is inconsistent for NULL vs
> ERR_PTR returns.  I think you want something like this on top:

I fixed it up while doing the rebase already. Just haven't pushed anything
out until I've had a chance to run it through testing.

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 11d045f0f799..62778d7ffb8d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -135,6 +135,13 @@  struct io_submit_state {
 	struct list_head req_list;
 	unsigned int req_count;
 
+	/*
+	 * io_kiocb alloc cache
+	 */
+	void *iocbs[IO_IOPOLL_BATCH];
+	unsigned int free_iocbs;
+	unsigned int cur_iocb;
+
 	/*
 	 * File reference cache
 	 */
@@ -210,15 +217,33 @@  static void io_req_init(struct io_ring_ctx *ctx, struct io_kiocb *req)
 	req->ki_flags = 0;
 }
 
-static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx)
+static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
+				   struct io_submit_state *state)
 {
 	struct io_kiocb *req;
 
-	req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
-	if (!req)
-		return NULL;
+	if (!state)
+		req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
+	else if (!state->free_iocbs) {
+		size_t size;
+		int ret;
+
+		size = min_t(size_t, state->ios_left, ARRAY_SIZE(state->iocbs));
+		ret = kmem_cache_alloc_bulk(kiocb_cachep, GFP_KERNEL, size,
+						state->iocbs);
+		if (ret <= 0)
+			return ERR_PTR(-ENOMEM);
+		state->free_iocbs = ret - 1;
+		state->cur_iocb = 1;
+		req = state->iocbs[0];
+	} else {
+		req = state->iocbs[state->cur_iocb];
+		state->free_iocbs--;
+		state->cur_iocb++;
+	}
 
-	io_req_init(ctx, req);
+	if (req)
+		io_req_init(ctx, req);
 	return req;
 }
 
@@ -773,7 +798,7 @@  static int __io_submit_one(struct io_ring_ctx *ctx,
 	if (unlikely(iocb->flags))
 		return -EINVAL;
 
-	req = io_get_req(ctx);
+	req = io_get_req(ctx, state);
 	if (unlikely(!req))
 		return -EAGAIN;
 
@@ -844,6 +869,9 @@  static void io_submit_state_end(struct io_submit_state *state)
 	if (!list_empty(&state->req_list))
 		io_flush_state_reqs(state->ctx, state);
 	io_file_put(state, NULL);
+	if (state->free_iocbs)
+		kmem_cache_free_bulk(kiocb_cachep, state->free_iocbs,
+					&state->iocbs[state->cur_iocb]);
 }
 
 /*
@@ -855,6 +883,7 @@  static void io_submit_state_start(struct io_submit_state *state,
 	state->ctx = ctx;
 	INIT_LIST_HEAD(&state->req_list);
 	state->req_count = 0;
+	state->free_iocbs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;
 #ifdef CONFIG_BLOCK

[11/16] io_uring: batch io_kiocb allocation

Commit Message

Comments

Patch