diff mbox series

[RFC,v2,17/19] io_uring: unclog ctx refs waiting with zc notifiers

Message ID 2c07d8e5cb5dfbd678d5a0bc6fb398aee82b67e4.1640029579.git.asml.silence@gmail.com (mailing list archive)
State RFC
Headers show
Series io_uring zerocopy tx | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply, async

Commit Message

Pavel Begunkov Dec. 21, 2021, 3:35 p.m. UTC
Currently every instance of struct io_tx_notifier holds a ctx reference,
including ones sitting in caches. So, when we try to quiesce the ring
(e.g. for register) we'd be waiting for refs that nobody can release.
That's worked around in for cancellation.

Don't do ctx references but wait for all notifiers to return into
caches when needed. Even better solution would be to wait for all rsrc
refs. It's also nice to remove an extra pair of percpu_ref_get/put().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5f79178a3f38..8cfa8ea161e4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -453,6 +453,7 @@  struct io_ring_ctx {
 		struct io_mapped_ubuf		*dummy_ubuf;
 		struct io_rsrc_data		*file_data;
 		struct io_rsrc_data		*buf_data;
+		int				nr_tx_ctx;
 
 		struct delayed_work		rsrc_put_work;
 		struct llist_head		rsrc_put_llist;
@@ -1982,7 +1983,6 @@  static void io_zc_tx_work_callback(struct work_struct *work)
 	io_cqring_ev_posted(ctx);
 
 	percpu_ref_put(rsrc_refs);
-	percpu_ref_put(&ctx->refs);
 }
 
 static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
@@ -2028,6 +2028,7 @@  static void io_notifier_free_cached(struct io_ring_ctx *ctx)
 					    struct io_tx_notifier, cache_node);
 		list_del(&notifier->cache_node);
 		kfree(notifier);
+		ctx->nr_tx_ctx--;
 	}
 }
 
@@ -2060,6 +2061,7 @@  static struct io_tx_notifier *io_alloc_tx_notifier(struct io_ring_ctx *ctx,
 		notifier = kmalloc(sizeof(*notifier), gfp_flags);
 		if (!notifier)
 			return NULL;
+		ctx->nr_tx_ctx++;
 		uarg = &notifier->uarg;
 		uarg->ctx = ctx;
 		uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
@@ -2072,7 +2074,6 @@  static struct io_tx_notifier *io_alloc_tx_notifier(struct io_ring_ctx *ctx,
 	io_set_rsrc_node(&notifier->fixed_rsrc_refs, ctx);
 
 	refcount_set(&notifier->uarg.refcnt, 1);
-	percpu_ref_get(&ctx->refs);
 	return notifier;
 }
 
@@ -9785,7 +9786,6 @@  static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 #endif
 	WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
 
-	io_notifier_free_cached(ctx);
 	io_sqe_tx_ctx_unregister(ctx);
 	io_mem_free(ctx->rings);
 	io_mem_free(ctx->sq_sqes);
@@ -9946,6 +9946,19 @@  static __cold void io_ring_exit_work(struct work_struct *work)
 	spin_lock(&ctx->completion_lock);
 	spin_unlock(&ctx->completion_lock);
 
+	while (1) {
+		int nr;
+
+		mutex_lock(&ctx->uring_lock);
+		io_notifier_free_cached(ctx);
+		nr = ctx->nr_tx_ctx;
+		mutex_unlock(&ctx->uring_lock);
+
+		if (!nr)
+			break;
+		schedule_timeout(interval);
+	}
+
 	io_ring_ctx_free(ctx);
 }