[RFC,net-next,v3,16/29] io_uring: cache struct io_notif

Message ID	91a78581e59863bd45125195055a1712e1e202e3.1653992701.git.asml.silence@gmail.com (mailing list archive)
State	New
Headers	show Return-Path: <io-uring-owner@kernel.org> From: Pavel Begunkov <asml.silence@gmail.com> To: io-uring@vger.kernel.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org Cc: "David S . Miller" <davem@davemloft.net>, Jakub Kicinski <kuba@kernel.org>, Jonathan Lemon <jonathan.lemon@gmail.com>, Willem de Bruijn <willemb@google.com>, Jens Axboe <axboe@kernel.dk>, kernel-team@fb.com, Pavel Begunkov <asml.silence@gmail.com> Subject: [RFC net-next v3 16/29] io_uring: cache struct io_notif Date: Tue, 28 Jun 2022 19:56:38 +0100 Message-Id: <91a78581e59863bd45125195055a1712e1e202e3.1653992701.git.asml.silence@gmail.com> In-Reply-To: <cover.1653992701.git.asml.silence@gmail.com> References: <cover.1653992701.git.asml.silence@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	io_uring zerocopy send \| expand [RFC,net-next,v3,00/29] io_uring zerocopy send [RFC,net-next,v3,01/29] ipv4: avoid partial copy for zc [RFC,net-next,v3,02/29] ipv6: avoid partial copy for zc [RFC,net-next,v3,03/29] skbuff: add SKBFL_DONT_ORPHAN flag [RFC,net-next,v3,04/29] skbuff: carry external ubuf_info in msghdr [RFC,net-next,v3,05/29] net: bvec specific path in zerocopy_sg_from_iter [RFC,net-next,v3,06/29] net: optimise bvec-based zc page referencing [RFC,net-next,v3,07/29] net: don't track pfmemalloc for managed frags [RFC,net-next,v3,08/29] skbuff: don't mix ubuf_info of different types [RFC,net-next,v3,09/29] ipv4/udp: support zc with managed data [RFC,net-next,v3,10/29] ipv6/udp: support zc with managed data [RFC,net-next,v3,11/29] tcp: support zc with managed data [RFC,net-next,v3,12/29] tcp: kill extra io_uring's uarg refcounting [RFC,net-next,v3,13/29] net: let callers provide extra ubuf_info refs [RFC,net-next,v3,14/29] io_uring: opcode independent fixed buf import [RFC,net-next,v3,15/29] io_uring: add zc notification infrastructure [RFC,net-next,v3,16/29] io_uring: cache struct io_notif [RFC,net-next,v3,17/29] io_uring: complete notifiers in tw [RFC,net-next,v3,18/29] io_uring: add notification slot registration [RFC,net-next,v3,19/29] io_uring: rename IORING_OP_FILES_UPDATE [RFC,net-next,v3,20/29] io_uring: add zc notification flush requests [RFC,net-next,v3,21/29] io_uring: wire send zc request type [RFC,net-next,v3,22/29] io_uring: account locked pages for non-fixed zc [RFC,net-next,v3,23/29] io_uring: allow to pass addr into sendzc [RFC,net-next,v3,24/29] io_uring: add rsrc referencing for notifiers [RFC,net-next,v3,25/29] io_uring: sendzc with fixed buffers [RFC,net-next,v3,26/29] io_uring: flush notifiers after sendzc [RFC,net-next,v3,27/29] io_uring: allow to override zc tag on flush [RFC,net-next,v3,28/29] io_uring: batch submission notif referencing [RFC,net-next,v3,29/29] selftests/io_uring: test zerocopy send

Message ID

91a78581e59863bd45125195055a1712e1e202e3.1653992701.git.asml.silence@gmail.com (mailing list archive)

State

New

Headers

From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org, netdev@vger.kernel.org,
        linux-kernel@vger.kernel.org
Cc: "David S . Miller" <davem@davemloft.net>,
        Jakub Kicinski <kuba@kernel.org>,
        Jonathan Lemon <jonathan.lemon@gmail.com>,
        Willem de Bruijn <willemb@google.com>,
        Jens Axboe <axboe@kernel.dk>, kernel-team@fb.com,
        Pavel Begunkov <asml.silence@gmail.com>
Subject: [RFC net-next v3 16/29] io_uring: cache struct io_notif
Date: Tue, 28 Jun 2022 19:56:38 +0100
Message-Id: 
 <91a78581e59863bd45125195055a1712e1e202e3.1653992701.git.asml.silence@gmail.com>
In-Reply-To: <cover.1653992701.git.asml.silence@gmail.com>
References: <cover.1653992701.git.asml.silence@gmail.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk

Series

io_uring zerocopy send | expand

Commit Message

Pavel Begunkov June 28, 2022, 6:56 p.m. UTC

kmalloc'ing struct io_notif is too expensive when done frequently, cache
them as many other resources in io_uring. Keep two list, the first one
is from where we're getting notifiers, it's protected by ->uring_lock.
The second is protected by ->completion_lock, to which we queue released
notifiers. Then we splice one list into another when needed.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 68 +++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 61 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7d058deb5f73..422ff835bf36 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -381,6 +381,8 @@  struct io_notif {
 	u64			tag;
 	/* see struct io_notif_slot::seq */
 	u32			seq;
+	/* hook into ctx->notif_list and ctx->notif_list_locked */
+	struct list_head	cache_node;
 
 	union {
 		struct callback_head	task_work;
@@ -469,6 +471,8 @@  struct io_ring_ctx {
 		struct xarray		io_bl_xa;
 		struct list_head	io_buffers_cache;
 
+		/* struct io_notif cache protected by uring_lock */
+		struct list_head	notif_list;
 		struct list_head	timeout_list;
 		struct list_head	ltimeout_list;
 		struct list_head	cq_overflow_list;
@@ -481,6 +485,9 @@  struct io_ring_ctx {
 	/* IRQ completion list, under ->completion_lock */
 	struct io_wq_work_list	locked_free_list;
 	unsigned int		locked_free_nr;
+	/* struct io_notif cache protected by completion_lock */
+	struct list_head	notif_list_locked;
+	unsigned int		notif_locked_nr;
 
 	const struct cred	*sq_creds;	/* cred used for __io_sq_thread() */
 	struct io_sq_data	*sq_data;	/* if using sq thread polling */
@@ -1932,6 +1939,8 @@  static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_WQ_LIST(&ctx->locked_free_list);
 	INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
 	INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
+	INIT_LIST_HEAD(&ctx->notif_list);
+	INIT_LIST_HEAD(&ctx->notif_list_locked);
 	return ctx;
 err:
 	kfree(ctx->dummy_ubuf);
@@ -2795,12 +2804,15 @@  static void __io_notif_complete_tw(struct callback_head *cb)
 
 	spin_lock(&ctx->completion_lock);
 	io_fill_cqe_aux(ctx, notif->tag, 0, notif->seq);
+
+	list_add(&notif->cache_node, &ctx->notif_list_locked);
+	ctx->notif_locked_nr++;
+
 	io_commit_cqring(ctx);
 	spin_unlock(&ctx->completion_lock);
 	io_cqring_ev_posted(ctx);
 
 	percpu_ref_put(&ctx->refs);
-	kfree(notif);
 }
 
 static inline void io_notif_complete(struct io_notif *notif)
@@ -2827,21 +2839,62 @@  static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
 	queue_work(system_unbound_wq, &notif->commit_work);
 }
 
+static void io_notif_splice_cached(struct io_ring_ctx *ctx)
+	__must_hold(&ctx->uring_lock)
+{
+	spin_lock(&ctx->completion_lock);
+	list_splice_init(&ctx->notif_list_locked, &ctx->notif_list);
+	ctx->notif_locked_nr = 0;
+	spin_unlock(&ctx->completion_lock);
+}
+
+static void io_notif_cache_purge(struct io_ring_ctx *ctx)
+	__must_hold(&ctx->uring_lock)
+{
+	io_notif_splice_cached(ctx);
+
+	while (!list_empty(&ctx->notif_list)) {
+		struct io_notif *notif = list_first_entry(&ctx->notif_list,
+						struct io_notif, cache_node);
+
+		list_del(&notif->cache_node);
+		kfree(notif);
+	}
+}
+
+static inline bool io_notif_has_cached(struct io_ring_ctx *ctx)
+	__must_hold(&ctx->uring_lock)
+{
+	if (likely(!list_empty(&ctx->notif_list)))
+		return true;
+	if (data_race(READ_ONCE(ctx->notif_locked_nr) <= IO_COMPL_BATCH))
+		return false;
+	io_notif_splice_cached(ctx);
+	return !list_empty(&ctx->notif_list);
+}
+
 static struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx,
 				       struct io_notif_slot *slot)
 	__must_hold(&ctx->uring_lock)
 {
 	struct io_notif *notif;
 
-	notif = kzalloc(sizeof(*notif), GFP_ATOMIC | __GFP_ACCOUNT);
-	if (!notif)
-		return NULL;
+	if (likely(io_notif_has_cached(ctx))) {
+		notif = list_first_entry(&ctx->notif_list,
+					 struct io_notif, cache_node);
+		list_del(&notif->cache_node);
+	} else {
+		notif = kzalloc(sizeof(*notif), GFP_ATOMIC | __GFP_ACCOUNT);
+		if (!notif)
+			return NULL;
+		/* pre-initialise some fields */
+		notif->ctx = ctx;
+		notif->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
+		notif->uarg.callback = io_uring_tx_zerocopy_callback;
+	}
 
 	notif->seq = slot->seq++;
 	notif->tag = slot->tag;
-	notif->ctx = ctx;
-	notif->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
-	notif->uarg.callback = io_uring_tx_zerocopy_callback;
 	/* master ref owned by io_notif_slot, will be dropped on flush */
 	refcount_set(&notif->uarg.refcnt, 1);
 	percpu_ref_get(&ctx->refs);
@@ -11330,6 +11383,7 @@  static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
 	WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots);
 
+	io_notif_cache_purge(ctx);
 	io_mem_free(ctx->rings);
 	io_mem_free(ctx->sq_sqes);

[RFC,net-next,v3,16/29] io_uring: cache struct io_notif

Commit Message

Patch