Message ID | 20220707232345.54424-5-axboe@kernel.dk (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [1/4] io_uring: move apoll cache to poll.c | expand |
On Thu, 2022-07-07 at 17:23 -0600, Jens Axboe wrote: > For recvmsg/sendmsg, if they don't complete inline, we currently need > to allocate a struct io_async_msghdr for each request. This is a > somewhat large struct. > > Hook up sendmsg/recvmsg to use the io_alloc_cache. This reduces the > alloc + free overhead considerably, yielding 4-5% of extra > performance > running netbench. > > Signed-off-by: Jens Axboe <axboe@kernel.dk> > --- > include/linux/io_uring_types.h | 6 ++- > io_uring/io_uring.c | 3 ++ > io_uring/net.c | 73 +++++++++++++++++++++++++++++--- > -- > io_uring/net.h | 11 ++++- > 4 files changed, 81 insertions(+), 12 deletions(-) > > diff --git a/include/linux/io_uring_types.h > b/include/linux/io_uring_types.h > index bf8f95332eda..d54b8b7e0746 100644 > --- a/include/linux/io_uring_types.h > +++ b/include/linux/io_uring_types.h > @@ -222,8 +222,7 @@ struct io_ring_ctx { > struct io_hash_table cancel_table_locked; > struct list_head cq_overflow_list; > struct io_alloc_cache apoll_cache; > - struct xarray personalities; > - u32 pers_next; > + struct io_alloc_cache netmsg_cache; > } ____cacheline_aligned_in_smp; > > /* IRQ completion list, under ->completion_lock */ > @@ -241,6 +240,9 @@ struct io_ring_ctx { > unsigned int file_alloc_start; > unsigned int file_alloc_end; > > + struct xarray personalities; > + u32 pers_next; > + > struct { > /* > * We cache a range of free CQEs we can use, once > exhausted it > diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c > index b5098773d924..32110c5b4059 100644 > --- a/io_uring/io_uring.c > +++ b/io_uring/io_uring.c > @@ -89,6 +89,7 @@ > #include "kbuf.h" > #include "rsrc.h" > #include "cancel.h" > +#include "net.h" > > #include "timeout.h" > #include "poll.h" > @@ -297,6 +298,7 @@ static __cold struct io_ring_ctx > *io_ring_ctx_alloc(struct io_uring_params *p) > INIT_LIST_HEAD(&ctx->cq_overflow_list); > INIT_LIST_HEAD(&ctx->io_buffers_cache); > io_alloc_cache_init(&ctx->apoll_cache); > + io_alloc_cache_init(&ctx->netmsg_cache); > init_completion(&ctx->ref_comp); > xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); > mutex_init(&ctx->uring_lock); > @@ -2473,6 +2475,7 @@ static __cold void io_ring_ctx_free(struct > io_ring_ctx *ctx) > __io_cqring_overflow_flush(ctx, true); > io_eventfd_unregister(ctx); > io_flush_apoll_cache(ctx); > + io_flush_netmsg_cache(ctx); > mutex_unlock(&ctx->uring_lock); > io_destroy_buffers(ctx); > if (ctx->sq_creds) > diff --git a/io_uring/net.c b/io_uring/net.c > index 6679069eeef1..ba7e94ff287c 100644 > --- a/io_uring/net.c > +++ b/io_uring/net.c > @@ -12,6 +12,7 @@ > > #include "io_uring.h" > #include "kbuf.h" > +#include "alloc_cache.h" > #include "net.h" > > #if defined(CONFIG_NET) > @@ -97,18 +98,57 @@ static bool io_net_retry(struct socket *sock, int > flags) > return sock->type == SOCK_STREAM || sock->type == > SOCK_SEQPACKET; > } > > +static void io_netmsg_recycle(struct io_kiocb *req, unsigned int > issue_flags) > +{ > + struct io_async_msghdr *hdr = req->async_data; > + > + if (!hdr || issue_flags & IO_URING_F_UNLOCKED) > + return; > + > + if (io_alloc_cache_store(&req->ctx->netmsg_cache)) { > + hlist_add_head(&hdr->cache_list, &req->ctx- > >netmsg_cache.list); can io_alloc_cache_store just do the store? would be nicer to have cache::list be generally unused outside of the cache code. > + req->async_data = NULL; > + req->flags &= ~REQ_F_ASYNC_DATA; > + } > +} > + > +static struct io_async_msghdr *io_recvmsg_alloc_async(struct > io_kiocb *req, > + unsigned int > issue_flags) > +{ > + struct io_ring_ctx *ctx = req->ctx; > + > + if (!(issue_flags & IO_URING_F_UNLOCKED) && > + !hlist_empty(&ctx->netmsg_cache.list)) { > + struct io_async_msghdr *hdr; > + > + hdr = hlist_entry(ctx->netmsg_cache.list.first, > + struct io_async_msghdr, > cache_list); > + ctx->netmsg_cache.nr_cached--; > + hlist_del(&hdr->cache_list); ditto here. I think all the hlist stuff and the nr_cached manipulation can be wrapped up > + req->flags |= REQ_F_ASYNC_DATA; > + req->async_data = hdr; > + return hdr; > + } > + > + if (!io_alloc_async_data(req)) > + return req->async_data; > + > + return NULL; > +} > + > static int io_setup_async_msg(struct io_kiocb *req, > - struct io_async_msghdr *kmsg) > + struct io_async_msghdr *kmsg, > + unsigned int issue_flags) > { > struct io_async_msghdr *async_msg = req->async_data; > > if (async_msg) > return -EAGAIN; > - if (io_alloc_async_data(req)) { > + async_msg = io_recvmsg_alloc_async(req, issue_flags); > + if (!async_msg) { > kfree(kmsg->free_iov); > return -ENOMEM; > } > - async_msg = req->async_data; > req->flags |= REQ_F_NEED_CLEANUP; > memcpy(async_msg, kmsg, sizeof(*kmsg)); > async_msg->msg.msg_name = &async_msg->addr; > @@ -195,7 +235,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int > issue_flags) > > if (!(req->flags & REQ_F_POLLED) && > (sr->flags & IORING_RECVSEND_POLL_FIRST)) > - return io_setup_async_msg(req, kmsg); > + return io_setup_async_msg(req, kmsg, issue_flags); > > flags = sr->msg_flags; > if (issue_flags & IO_URING_F_NONBLOCK) > @@ -207,13 +247,13 @@ int io_sendmsg(struct io_kiocb *req, unsigned > int issue_flags) > > if (ret < min_ret) { > if (ret == -EAGAIN && (issue_flags & > IO_URING_F_NONBLOCK)) > - return io_setup_async_msg(req, kmsg); > + return io_setup_async_msg(req, kmsg, > issue_flags); > if (ret == -ERESTARTSYS) > ret = -EINTR; > if (ret > 0 && io_net_retry(sock, flags)) { > sr->done_io += ret; > req->flags |= REQ_F_PARTIAL_IO; > - return io_setup_async_msg(req, kmsg); > + return io_setup_async_msg(req, kmsg, > issue_flags); > } > req_set_fail(req); > } > @@ -221,6 +261,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int > issue_flags) > if (kmsg->free_iov) > kfree(kmsg->free_iov); > req->flags &= ~REQ_F_NEED_CLEANUP; > + io_netmsg_recycle(req, issue_flags); > if (ret >= 0) > ret += sr->done_io; > else if (sr->done_io) > @@ -495,7 +536,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int > issue_flags) > > if (!(req->flags & REQ_F_POLLED) && > (sr->flags & IORING_RECVSEND_POLL_FIRST)) > - return io_setup_async_msg(req, kmsg); > + return io_setup_async_msg(req, kmsg, issue_flags); > > if (io_do_buffer_select(req)) { > void __user *buf; > @@ -519,13 +560,13 @@ int io_recvmsg(struct io_kiocb *req, unsigned > int issue_flags) > ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg- > >uaddr, flags); > if (ret < min_ret) { > if (ret == -EAGAIN && force_nonblock) > - return io_setup_async_msg(req, kmsg); > + return io_setup_async_msg(req, kmsg, > issue_flags); > if (ret == -ERESTARTSYS) > ret = -EINTR; > if (ret > 0 && io_net_retry(sock, flags)) { > sr->done_io += ret; > req->flags |= REQ_F_PARTIAL_IO; > - return io_setup_async_msg(req, kmsg); > + return io_setup_async_msg(req, kmsg, > issue_flags); > } > req_set_fail(req); > } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & > (MSG_TRUNC | MSG_CTRUNC))) { > @@ -535,6 +576,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int > issue_flags) > /* fast path, check for non-NULL to avoid function call */ > if (kmsg->free_iov) > kfree(kmsg->free_iov); > + io_netmsg_recycle(req, issue_flags); > req->flags &= ~REQ_F_NEED_CLEANUP; > if (ret > 0) > ret += sr->done_io; > @@ -848,4 +890,17 @@ int io_connect(struct io_kiocb *req, unsigned > int issue_flags) > io_req_set_res(req, ret, 0); > return IOU_OK; > } > + > +void io_flush_netmsg_cache(struct io_ring_ctx *ctx) > +{ > + while (!hlist_empty(&ctx->netmsg_cache.list)) { > + struct io_async_msghdr *hdr; > + > + hdr = hlist_entry(ctx->netmsg_cache.list.first, > + struct io_async_msghdr, > cache_list); > + hlist_del(&hdr->cache_list); > + kfree(hdr); > + } > + ctx->netmsg_cache.nr_cached = 0; > +} again - could be put somewhere common. I assume there will not be much more cleanup than simple kfree > #endif > diff --git a/io_uring/net.h b/io_uring/net.h > index 81d71d164770..576efb602c7f 100644 > --- a/io_uring/net.h > +++ b/io_uring/net.h > @@ -5,7 +5,10 @@ > > #if defined(CONFIG_NET) > struct io_async_msghdr { > - struct iovec fast_iov[UIO_FASTIOV]; > + union { > + struct iovec fast_iov[UIO_FASTIOV]; > + struct hlist_node cache_list; > + }; > /* points to an allocated iov, if NULL we use fast_iov > instead */ > struct iovec *free_iov; > struct sockaddr __user *uaddr; > @@ -40,4 +43,10 @@ int io_socket(struct io_kiocb *req, unsigned int > issue_flags); > int io_connect_prep_async(struct io_kiocb *req); > int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe > *sqe); > int io_connect(struct io_kiocb *req, unsigned int issue_flags); > + > +void io_flush_netmsg_cache(struct io_ring_ctx *ctx); > +#else > +static inline void io_flush_netmsg_cache(struct io_ring_ctx *ctx) > +{ > +} > #endif
On 7/8/22 1:33 AM, Dylan Yudaken wrote: > On Thu, 2022-07-07 at 17:23 -0600, Jens Axboe wrote: >> For recvmsg/sendmsg, if they don't complete inline, we currently need >> to allocate a struct io_async_msghdr for each request. This is a >> somewhat large struct. >> >> Hook up sendmsg/recvmsg to use the io_alloc_cache. This reduces the >> alloc + free overhead considerably, yielding 4-5% of extra >> performance >> running netbench. >> >> Signed-off-by: Jens Axboe <axboe@kernel.dk> >> --- >> include/linux/io_uring_types.h | 6 ++- >> io_uring/io_uring.c | 3 ++ >> io_uring/net.c | 73 +++++++++++++++++++++++++++++--- >> -- >> io_uring/net.h | 11 ++++- >> 4 files changed, 81 insertions(+), 12 deletions(-) >> >> diff --git a/include/linux/io_uring_types.h >> b/include/linux/io_uring_types.h >> index bf8f95332eda..d54b8b7e0746 100644 >> --- a/include/linux/io_uring_types.h >> +++ b/include/linux/io_uring_types.h >> @@ -222,8 +222,7 @@ struct io_ring_ctx { >> struct io_hash_table cancel_table_locked; >> struct list_head cq_overflow_list; >> struct io_alloc_cache apoll_cache; >> - struct xarray personalities; >> - u32 pers_next; >> + struct io_alloc_cache netmsg_cache; >> } ____cacheline_aligned_in_smp; >> >> /* IRQ completion list, under ->completion_lock */ >> @@ -241,6 +240,9 @@ struct io_ring_ctx { >> unsigned int file_alloc_start; >> unsigned int file_alloc_end; >> >> + struct xarray personalities; >> + u32 pers_next; >> + >> struct { >> /* >> * We cache a range of free CQEs we can use, once >> exhausted it >> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c >> index b5098773d924..32110c5b4059 100644 >> --- a/io_uring/io_uring.c >> +++ b/io_uring/io_uring.c >> @@ -89,6 +89,7 @@ >> #include "kbuf.h" >> #include "rsrc.h" >> #include "cancel.h" >> +#include "net.h" >> >> #include "timeout.h" >> #include "poll.h" >> @@ -297,6 +298,7 @@ static __cold struct io_ring_ctx >> *io_ring_ctx_alloc(struct io_uring_params *p) >> INIT_LIST_HEAD(&ctx->cq_overflow_list); >> INIT_LIST_HEAD(&ctx->io_buffers_cache); >> io_alloc_cache_init(&ctx->apoll_cache); >> + io_alloc_cache_init(&ctx->netmsg_cache); >> init_completion(&ctx->ref_comp); >> xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); >> mutex_init(&ctx->uring_lock); >> @@ -2473,6 +2475,7 @@ static __cold void io_ring_ctx_free(struct >> io_ring_ctx *ctx) >> __io_cqring_overflow_flush(ctx, true); >> io_eventfd_unregister(ctx); >> io_flush_apoll_cache(ctx); >> + io_flush_netmsg_cache(ctx); >> mutex_unlock(&ctx->uring_lock); >> io_destroy_buffers(ctx); >> if (ctx->sq_creds) >> diff --git a/io_uring/net.c b/io_uring/net.c >> index 6679069eeef1..ba7e94ff287c 100644 >> --- a/io_uring/net.c >> +++ b/io_uring/net.c >> @@ -12,6 +12,7 @@ >> >> #include "io_uring.h" >> #include "kbuf.h" >> +#include "alloc_cache.h" >> #include "net.h" >> >> #if defined(CONFIG_NET) >> @@ -97,18 +98,57 @@ static bool io_net_retry(struct socket *sock, int >> flags) >> return sock->type == SOCK_STREAM || sock->type == >> SOCK_SEQPACKET; >> } >> >> +static void io_netmsg_recycle(struct io_kiocb *req, unsigned int >> issue_flags) >> +{ >> + struct io_async_msghdr *hdr = req->async_data; >> + >> + if (!hdr || issue_flags & IO_URING_F_UNLOCKED) >> + return; >> + >> + if (io_alloc_cache_store(&req->ctx->netmsg_cache)) { >> + hlist_add_head(&hdr->cache_list, &req->ctx- >>> netmsg_cache.list); > > can io_alloc_cache_store just do the store? > would be nicer to have cache::list be generally unused outside of the > cache code. We could do that if we just make the hlist_node be inside a struct. Would probably allow cleaning up the get-entry etc too, let me give that a whirl.
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index bf8f95332eda..d54b8b7e0746 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -222,8 +222,7 @@ struct io_ring_ctx { struct io_hash_table cancel_table_locked; struct list_head cq_overflow_list; struct io_alloc_cache apoll_cache; - struct xarray personalities; - u32 pers_next; + struct io_alloc_cache netmsg_cache; } ____cacheline_aligned_in_smp; /* IRQ completion list, under ->completion_lock */ @@ -241,6 +240,9 @@ struct io_ring_ctx { unsigned int file_alloc_start; unsigned int file_alloc_end; + struct xarray personalities; + u32 pers_next; + struct { /* * We cache a range of free CQEs we can use, once exhausted it diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index b5098773d924..32110c5b4059 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -89,6 +89,7 @@ #include "kbuf.h" #include "rsrc.h" #include "cancel.h" +#include "net.h" #include "timeout.h" #include "poll.h" @@ -297,6 +298,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->io_buffers_cache); io_alloc_cache_init(&ctx->apoll_cache); + io_alloc_cache_init(&ctx->netmsg_cache); init_completion(&ctx->ref_comp); xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); mutex_init(&ctx->uring_lock); @@ -2473,6 +2475,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) __io_cqring_overflow_flush(ctx, true); io_eventfd_unregister(ctx); io_flush_apoll_cache(ctx); + io_flush_netmsg_cache(ctx); mutex_unlock(&ctx->uring_lock); io_destroy_buffers(ctx); if (ctx->sq_creds) diff --git a/io_uring/net.c b/io_uring/net.c index 6679069eeef1..ba7e94ff287c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -12,6 +12,7 @@ #include "io_uring.h" #include "kbuf.h" +#include "alloc_cache.h" #include "net.h" #if defined(CONFIG_NET) @@ -97,18 +98,57 @@ static bool io_net_retry(struct socket *sock, int flags) return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; } +static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_async_msghdr *hdr = req->async_data; + + if (!hdr || issue_flags & IO_URING_F_UNLOCKED) + return; + + if (io_alloc_cache_store(&req->ctx->netmsg_cache)) { + hlist_add_head(&hdr->cache_list, &req->ctx->netmsg_cache.list); + req->async_data = NULL; + req->flags &= ~REQ_F_ASYNC_DATA; + } +} + +static struct io_async_msghdr *io_recvmsg_alloc_async(struct io_kiocb *req, + unsigned int issue_flags) +{ + struct io_ring_ctx *ctx = req->ctx; + + if (!(issue_flags & IO_URING_F_UNLOCKED) && + !hlist_empty(&ctx->netmsg_cache.list)) { + struct io_async_msghdr *hdr; + + hdr = hlist_entry(ctx->netmsg_cache.list.first, + struct io_async_msghdr, cache_list); + ctx->netmsg_cache.nr_cached--; + hlist_del(&hdr->cache_list); + req->flags |= REQ_F_ASYNC_DATA; + req->async_data = hdr; + return hdr; + } + + if (!io_alloc_async_data(req)) + return req->async_data; + + return NULL; +} + static int io_setup_async_msg(struct io_kiocb *req, - struct io_async_msghdr *kmsg) + struct io_async_msghdr *kmsg, + unsigned int issue_flags) { struct io_async_msghdr *async_msg = req->async_data; if (async_msg) return -EAGAIN; - if (io_alloc_async_data(req)) { + async_msg = io_recvmsg_alloc_async(req, issue_flags); + if (!async_msg) { kfree(kmsg->free_iov); return -ENOMEM; } - async_msg = req->async_data; req->flags |= REQ_F_NEED_CLEANUP; memcpy(async_msg, kmsg, sizeof(*kmsg)); async_msg->msg.msg_name = &async_msg->addr; @@ -195,7 +235,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) - return io_setup_async_msg(req, kmsg); + return io_setup_async_msg(req, kmsg, issue_flags); flags = sr->msg_flags; if (issue_flags & IO_URING_F_NONBLOCK) @@ -207,13 +247,13 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (ret < min_ret) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) - return io_setup_async_msg(req, kmsg); + return io_setup_async_msg(req, kmsg, issue_flags); if (ret == -ERESTARTSYS) ret = -EINTR; if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; - return io_setup_async_msg(req, kmsg); + return io_setup_async_msg(req, kmsg, issue_flags); } req_set_fail(req); } @@ -221,6 +261,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; + io_netmsg_recycle(req, issue_flags); if (ret >= 0) ret += sr->done_io; else if (sr->done_io) @@ -495,7 +536,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) - return io_setup_async_msg(req, kmsg); + return io_setup_async_msg(req, kmsg, issue_flags); if (io_do_buffer_select(req)) { void __user *buf; @@ -519,13 +560,13 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags); if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) - return io_setup_async_msg(req, kmsg); + return io_setup_async_msg(req, kmsg, issue_flags); if (ret == -ERESTARTSYS) ret = -EINTR; if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; - return io_setup_async_msg(req, kmsg); + return io_setup_async_msg(req, kmsg, issue_flags); } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { @@ -535,6 +576,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) /* fast path, check for non-NULL to avoid function call */ if (kmsg->free_iov) kfree(kmsg->free_iov); + io_netmsg_recycle(req, issue_flags); req->flags &= ~REQ_F_NEED_CLEANUP; if (ret > 0) ret += sr->done_io; @@ -848,4 +890,17 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) io_req_set_res(req, ret, 0); return IOU_OK; } + +void io_flush_netmsg_cache(struct io_ring_ctx *ctx) +{ + while (!hlist_empty(&ctx->netmsg_cache.list)) { + struct io_async_msghdr *hdr; + + hdr = hlist_entry(ctx->netmsg_cache.list.first, + struct io_async_msghdr, cache_list); + hlist_del(&hdr->cache_list); + kfree(hdr); + } + ctx->netmsg_cache.nr_cached = 0; +} #endif diff --git a/io_uring/net.h b/io_uring/net.h index 81d71d164770..576efb602c7f 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -5,7 +5,10 @@ #if defined(CONFIG_NET) struct io_async_msghdr { - struct iovec fast_iov[UIO_FASTIOV]; + union { + struct iovec fast_iov[UIO_FASTIOV]; + struct hlist_node cache_list; + }; /* points to an allocated iov, if NULL we use fast_iov instead */ struct iovec *free_iov; struct sockaddr __user *uaddr; @@ -40,4 +43,10 @@ int io_socket(struct io_kiocb *req, unsigned int issue_flags); int io_connect_prep_async(struct io_kiocb *req); int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_connect(struct io_kiocb *req, unsigned int issue_flags); + +void io_flush_netmsg_cache(struct io_ring_ctx *ctx); +#else +static inline void io_flush_netmsg_cache(struct io_ring_ctx *ctx) +{ +} #endif
For recvmsg/sendmsg, if they don't complete inline, we currently need to allocate a struct io_async_msghdr for each request. This is a somewhat large struct. Hook up sendmsg/recvmsg to use the io_alloc_cache. This reduces the alloc + free overhead considerably, yielding 4-5% of extra performance running netbench. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- include/linux/io_uring_types.h | 6 ++- io_uring/io_uring.c | 3 ++ io_uring/net.c | 73 +++++++++++++++++++++++++++++----- io_uring/net.h | 11 ++++- 4 files changed, 81 insertions(+), 12 deletions(-)