diff mbox series

[1/2] io_uring/napi: Introduce io_napi_tracking_ops

Message ID bfbb03a7ad6256b68d08429c0888a05032a1b182.1723567469.git.olivier@trillion01.com (mailing list archive)
State New
Headers show
Series abstract napi tracking strategy | expand

Commit Message

Olivier Langlois Aug. 13, 2024, 5:10 p.m. UTC
The long term goal is to lay out a framework to be able to offer
different napi tracking strategies to the user. The obvious first
alternative strategy is the static tracking where the user would update
manually the napi_list to remove the overhead made by io_uring managing the
list dynamically.

Signed-off-by: Olivier Langlois <olivier@trillion01.com>
---
 include/linux/io_uring_types.h | 12 +++++-
 io_uring/fdinfo.c              |  4 ++
 io_uring/napi.c                | 76 ++++++++++++++++++++++++++++++----
 io_uring/napi.h                | 11 +----
 4 files changed, 86 insertions(+), 17 deletions(-)

Comments

Olivier Langlois Aug. 14, 2024, 11:44 a.m. UTC | #1
On Tue, 2024-08-13 at 13:10 -0400, Olivier Langlois wrote:
> 
> ---
>  include/linux/io_uring_types.h | 12 +++++-
>  io_uring/fdinfo.c              |  4 ++
>  io_uring/napi.c                | 76 ++++++++++++++++++++++++++++++--
> --
>  io_uring/napi.h                | 11 +----
>  4 files changed, 86 insertions(+), 17 deletions(-)
> 
> diff --git a/include/linux/io_uring_types.h
> b/include/linux/io_uring_types.h
> index 3315005df117..c1d1b28f8cca 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -217,6 +217,16 @@ struct io_alloc_cache {
>  	size_t			elem_size;
>  };
>  
> +#ifdef CONFIG_NET_RX_BUSY_POLL
> +struct io_napi_tracking_ops {
> +	void (*add_id)(struct io_kiocb *req);
> +	bool (*do_busy_loop)(struct io_ring_ctx *ctx,
> +			     void *loop_end_arg);
> +	void (*show_fdinfo)(struct io_ring_ctx *ctx,
> +			    struct seq_file *m);
> +};
> +#endif
> +
I have kept thinking about the critic...

add_id is either NULL or equal to dynamic_tracking_add_id and the
pointer is even tested before calling it. This pointer is easily
removed.

show_fdinfo, well, this is is so unimportant, if you don't like it, it
is very easily removable too. nobody will notice.

the only thing that would remains is do_busy_loop. Its value can either
be:

- no_tracking_do_busy_loop
- dynamic_tracking_do_busy_loop
- static_tracking_do_busy_loop

so the whole io_napi_tracking_ops could be replaced by a single
function pointer

At this point, the only thing remaining point to determine is which
between calling a function pointer of calling a 2 conditional branches
code is more efficient. and I am of the opinion that the function
pointer is better due to my C++ background but this is debatable...
Jens Axboe Aug. 14, 2024, 1:17 p.m. UTC | #2
On 8/14/24 5:44 AM, Olivier Langlois wrote:
> At this point, the only thing remaining point to determine is which
> between calling a function pointer of calling a 2 conditional branches
> code is more efficient. and I am of the opinion that the function
> pointer is better due to my C++ background but this is debatable...

As mentioned earlier, your C++ background for systems programming isn't
super relevant here. Even without mitigations, a perfectly predictable
branch is surely faster than an indirect function call. And with
mitigations, it's not even close.

It's a single handler, just do the branches. I don't think this is worth
fretting over.
diff mbox series

Patch

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3315005df117..c1d1b28f8cca 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -217,6 +217,16 @@  struct io_alloc_cache {
 	size_t			elem_size;
 };
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+struct io_napi_tracking_ops {
+	void (*add_id)(struct io_kiocb *req);
+	bool (*do_busy_loop)(struct io_ring_ctx *ctx,
+			     void *loop_end_arg);
+	void (*show_fdinfo)(struct io_ring_ctx *ctx,
+			    struct seq_file *m);
+};
+#endif
+
 struct io_ring_ctx {
 	/* const or read-mostly hot data */
 	struct {
@@ -402,11 +412,11 @@  struct io_ring_ctx {
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	struct list_head	napi_list;	/* track busy poll napi_id */
 	spinlock_t		napi_lock;	/* napi_list lock */
+	struct io_napi_tracking_ops *napi_ops;
 
 	/* napi busy poll default timeout */
 	ktime_t			napi_busy_poll_dt;
 	bool			napi_prefer_busy_poll;
-	bool			napi_enabled;
 
 	DECLARE_HASHTABLE(napi_ht, 4);
 #endif
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index b1e0e0d85349..fa773687a684 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -223,5 +223,9 @@  __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 	}
 
 	spin_unlock(&ctx->completion_lock);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	ctx->napi_ops->show_fdinfo(ctx, m);
+#endif
+
 }
 #endif
diff --git a/io_uring/napi.c b/io_uring/napi.c
index 1de1d4d62925..75ac850af0c0 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -38,7 +38,7 @@  static inline ktime_t net_to_ktime(unsigned long t)
 	return ns_to_ktime(t << 10);
 }
 
-void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
+static inline void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
 {
 	struct hlist_head *hash_list;
 	unsigned int napi_id;
@@ -136,8 +136,52 @@  static bool io_napi_busy_loop_should_end(void *data,
 	return false;
 }
 
-static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
-				   void *loop_end_arg)
+/*
+ * does not perform any busy polling but still check if list entries are
+ * stalled if the list is not empty. This could happen by unregistering
+ * napi after having enabled it for some time.
+ */
+static bool no_tracking_do_busy_loop(struct io_ring_ctx *ctx,
+				     void *loop_end_arg)
+{
+	struct io_napi_entry *e;
+	bool is_stale = false;
+
+	list_for_each_entry_rcu(e, &ctx->napi_list, list) {
+		if (time_after(jiffies, e->timeout))
+			is_stale = true;
+	}
+
+	return is_stale;
+}
+
+static void no_tracking_show_fdinfo(struct io_ring_ctx *ctx,
+				    struct seq_file *m)
+{
+	seq_puts(m, "NAPI:\tdisabled\n");
+}
+
+/*
+ * default ops for a newly created ring for which NAPI busy poll is not enabled
+ */
+static struct io_napi_tracking_ops no_tracking_ops = {
+	.add_id = NULL,
+	.do_busy_loop = no_tracking_do_busy_loop,
+	.show_fdinfo = no_tracking_show_fdinfo,
+};
+
+static void dynamic_tracking_add_id(struct io_kiocb *req)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct socket *sock;
+
+	sock = sock_from_file(req->file);
+	if (sock)
+		__io_napi_add(ctx, sock);
+}
+
+static bool dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx,
+					  void *loop_end_arg)
 {
 	struct io_napi_entry *e;
 	bool (*loop_end)(void *, unsigned long) = NULL;
@@ -157,6 +201,23 @@  static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
 	return is_stale;
 }
 
+static void dynamic_tracking_show_fdinfo(struct io_ring_ctx *ctx,
+					 struct seq_file *m)
+{
+	seq_puts(m, "NAPI:\tenabled\n");
+	seq_printf(m, "napi_busy_poll_to:\t%u\n", ctx->napi_busy_poll_to);
+	if (ctx->napi_prefer_busy_poll)
+		seq_puts(m, "napi_prefer_busy_poll:\ttrue\n");
+	else
+		seq_puts(m, "napi_prefer_busy_poll:\tfalse\n");
+}
+
+static struct io_napi_tracking_ops dynamic_tracking_ops = {
+	.add_id = dynamic_tracking_add_id,
+	.do_busy_loop = dynamic_tracking_do_busy_loop,
+	.show_fdinfo = dynamic_tracking_show_fdinfo,
+};
+
 static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
 				       struct io_wait_queue *iowq)
 {
@@ -172,7 +233,7 @@  static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
 
 	rcu_read_lock();
 	do {
-		is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg);
+		is_stale = ctx->napi_ops->do_busy_loop(ctx, loop_end_arg);
 	} while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg);
 	rcu_read_unlock();
 
@@ -193,6 +254,7 @@  void io_napi_init(struct io_ring_ctx *ctx)
 	spin_lock_init(&ctx->napi_lock);
 	ctx->napi_prefer_busy_poll = false;
 	ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
+	ctx->napi_ops = &no_tracking_ops;
 }
 
 /*
@@ -241,7 +303,7 @@  int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
 
 	WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
 	WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
-	WRITE_ONCE(ctx->napi_enabled, true);
+	WRITE_ONCE(ctx->napi_ops, &dynamic_tracking_ops);
 	return 0;
 }
 
@@ -265,7 +327,7 @@  int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
 
 	WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
 	WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
-	WRITE_ONCE(ctx->napi_enabled, false);
+	WRITE_ONCE(ctx->napi_ops, &no_tracking_ops);
 	return 0;
 }
 
@@ -321,7 +383,7 @@  int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
 		return 0;
 
 	rcu_read_lock();
-	is_stale = __io_napi_do_busy_loop(ctx, NULL);
+	is_stale = ctx->napi_ops->do_busy_loop(ctx, NULL);
 	rcu_read_unlock();
 
 	io_napi_remove_stale(ctx, is_stale);
diff --git a/io_uring/napi.h b/io_uring/napi.h
index 27b88c3eb428..3d68d8e7b108 100644
--- a/io_uring/napi.h
+++ b/io_uring/napi.h
@@ -15,8 +15,6 @@  void io_napi_free(struct io_ring_ctx *ctx);
 int io_register_napi(struct io_ring_ctx *ctx, void __user *arg);
 int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg);
 
-void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock);
-
 void __io_napi_adjust_timeout(struct io_ring_ctx *ctx,
 		struct io_wait_queue *iowq, ktime_t to_wait);
 void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq);
@@ -53,14 +51,9 @@  static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
 static inline void io_napi_add(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
-	struct socket *sock;
-
-	if (!READ_ONCE(ctx->napi_enabled))
-		return;
 
-	sock = sock_from_file(req->file);
-	if (sock)
-		__io_napi_add(ctx, sock);
+	if (ctx->napi_ops->add_id)
+		ctx->napi_ops->add_id(req);
 }
 
 #else