diff mbox series

[3/3] io_uring: use kmem_cache to alloc sqe

Message ID 20190713045454.2929-1-liuzhengyuan@kylinos.cn (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Zhengyuan Liu July 13, 2019, 4:54 a.m. UTC
As we introduced three lists(async, defer, link), there could been
many sqe allocation. A natural idea is using kmem_cache to satisfy
the allocation just like io_kiocb does.

Signed-off-by: Zhengyuan Liu <liuzhengyuan@kylinos.cn>
---
 fs/io_uring.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

Comments

Jens Axboe July 13, 2019, 9:44 p.m. UTC | #1
On 7/12/19 10:54 PM, Zhengyuan Liu wrote:
> As we introduced three lists(async, defer, link), there could been
> many sqe allocation. A natural idea is using kmem_cache to satisfy
> the allocation just like io_kiocb does.

A change like this needs to come with some performance numbers
or utilization numbers showing the benefit. I have considered
doing this before, but just never got around to testing if it's
worth while or not.

Have you?
Jens Axboe July 15, 2019, 3:51 a.m. UTC | #2
On Jul 14, 2019, at 9:38 PM, Zhengyuan Liu <liuzhengyuan@kylinos.cn> wrote:
> 
> 
>> On 7/14/19 5:44 AM, Jens Axboe wrote:
>>> On 7/12/19 10:54 PM, Zhengyuan Liu wrote:
>>> As we introduced three lists(async, defer, link), there could been
>>> many sqe allocation. A natural idea is using kmem_cache to satisfy
>>> the allocation just like io_kiocb does.
>> A change like this needs to come with some performance numbers
>> or utilization numbers showing the benefit. I have considered
>> doing this before, but just never got around to testing if it's
>> worth while or not.
>> Have you?
> I only did some simple testing with fio. The benefit was deeply depend on the IO  scenarios. For random and direct IO , it appears to be nearly no seq copying, but for buffered sequential rw, it appears to be more than 60% copying compared to original submition.

Right, which is great as it’s then working as designed! But my question was, for that sequential case, what kind of speed up (or reduction in overhead) do you see from allocating the unit out of slab vs kmalloc? There has to be a win there for the change to be worthwhile.

— 
Jens Axboe
Jens Axboe July 15, 2019, 1:52 p.m. UTC | #3
On 7/14/19 11:45 PM, Zhengyuan Liu wrote:
> 
> On 7/15/19 11:51 AM, Jens Axboe wrote:
>> On Jul 14, 2019, at 9:38 PM, Zhengyuan Liu <liuzhengyuan@kylinos.cn> wrote:
>>>
>>>> On 7/14/19 5:44 AM, Jens Axboe wrote:
>>>>> On 7/12/19 10:54 PM, Zhengyuan Liu wrote:
>>>>> As we introduced three lists(async, defer, link), there could been
>>>>> many sqe allocation. A natural idea is using kmem_cache to satisfy
>>>>> the allocation just like io_kiocb does.
>>>> A change like this needs to come with some performance numbers
>>>> or utilization numbers showing the benefit. I have considered
>>>> doing this before, but just never got around to testing if it's
>>>> worth while or not.
>>>> Have you?
>>> I only did some simple testing with fio. The benefit was deeply depend on the IO  scenarios. For random and direct IO , it appears to be nearly no seq copying, but for buffered sequential rw, it appears to be more than 60% copying compared to original submition.
>> Right, which is great as it’s then working as designed! But my
>> question was, for that sequential case, what kind of speed up (or
>> reduction in overhead) do you see from allocating the unit out of
>> slab vs kmalloc? There has to be a win there for the change to be
>> worthwhile.
>
> Thanks for your comments  Jens. No speed up indeed in overhead from my
> testing.

Then I suggest we just drop this change, it only makes sense if there's
a win.
diff mbox series

Patch

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 392cbf777f25..c325193a20bd 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -368,6 +368,7 @@  struct io_submit_state {
 static void io_sq_wq_submit_work(struct work_struct *work);
 
 static struct kmem_cache *req_cachep;
+static struct kmem_cache *sqe_cachep;
 
 static const struct file_operations io_uring_fops;
 
@@ -1673,14 +1674,14 @@  static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	if (!io_sequence_defer(ctx, req) && list_empty(&ctx->defer_list))
 		return 0;
 
-	sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL);
+	sqe_copy = kmem_cache_alloc(sqe_cachep, GFP_KERNEL | __GFP_NOWARN);
 	if (!sqe_copy)
 		return -EAGAIN;
 
 	spin_lock_irq(&ctx->completion_lock);
 	if (!io_sequence_defer(ctx, req) && list_empty(&ctx->defer_list)) {
 		spin_unlock_irq(&ctx->completion_lock);
-		kfree(sqe_copy);
+		kmem_cache_free(sqe_cachep, req);
 		return 0;
 	}
 
@@ -1845,7 +1846,7 @@  static void io_sq_wq_submit_work(struct work_struct *work)
 		}
 
 		/* async context always use a copy of the sqe */
-		kfree(sqe);
+		kmem_cache_free(sqe_cachep, (void *)sqe);
 
 		/* req from defer and link list needn't dec async_list->cnt */
 		if (req->flags & (REQ_F_IO_DRAINED | REQ_F_LINKED))
@@ -1991,7 +1992,7 @@  static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
 		struct io_uring_sqe *sqe_copy;
 
-		sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL);
+		sqe_copy = kmem_cache_alloc(sqe_cachep, GFP_KERNEL | __GFP_NOWARN);
 		if (sqe_copy) {
 			struct async_list *list;
 
@@ -2076,12 +2077,13 @@  static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
 	if (*link) {
 		struct io_kiocb *prev = *link;
 
-		sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
+		sqe_copy = kmem_cache_alloc(sqe_cachep, GFP_KERNEL | __GFP_NOWARN);
 		if (!sqe_copy) {
 			ret = -EAGAIN;
 			goto err_req;
 		}
 
+		memcpy(sqe_copy, s->sqe, sizeof(*sqe_copy));
 		s->sqe = sqe_copy;
 		memcpy(&req->submit, s, sizeof(*s));
 		list_add_tail(&req->list, &prev->link_list);
@@ -3470,6 +3472,7 @@  SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
 static int __init io_uring_init(void)
 {
 	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+	sqe_cachep = KMEM_CACHE(io_uring_sqe, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
 	return 0;
 };
 __initcall(io_uring_init);