@@ -217,6 +217,16 @@ struct io_alloc_cache {
size_t elem_size;
};
+#ifdef CONFIG_NET_RX_BUSY_POLL
+struct io_napi_tracking_ops {
+ void (*add_id)(struct io_kiocb *req);
+ bool (*do_busy_loop)(struct io_ring_ctx *ctx,
+ void *loop_end_arg);
+ void (*show_fdinfo)(struct io_ring_ctx *ctx,
+ struct seq_file *m);
+};
+#endif
+
struct io_ring_ctx {
/* const or read-mostly hot data */
struct {
@@ -402,11 +412,11 @@ struct io_ring_ctx {
#ifdef CONFIG_NET_RX_BUSY_POLL
struct list_head napi_list; /* track busy poll napi_id */
spinlock_t napi_lock; /* napi_list lock */
+ struct io_napi_tracking_ops *napi_ops;
/* napi busy poll default timeout */
ktime_t napi_busy_poll_dt;
bool napi_prefer_busy_poll;
- bool napi_enabled;
DECLARE_HASHTABLE(napi_ht, 4);
#endif
@@ -223,5 +223,9 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
}
spin_unlock(&ctx->completion_lock);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ ctx->napi_ops->show_fdinfo(ctx, m);
+#endif
+
}
#endif
@@ -38,7 +38,7 @@ static inline ktime_t net_to_ktime(unsigned long t)
return ns_to_ktime(t << 10);
}
-void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
+static inline void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
{
struct hlist_head *hash_list;
unsigned int napi_id;
@@ -136,8 +136,52 @@ static bool io_napi_busy_loop_should_end(void *data,
return false;
}
-static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
- void *loop_end_arg)
+/*
+ * does not perform any busy polling but still check if list entries are
+ * stalled if the list is not empty. This could happen by unregistering
+ * napi after having enabled it for some time.
+ */
+static bool no_tracking_do_busy_loop(struct io_ring_ctx *ctx,
+ void *loop_end_arg)
+{
+ struct io_napi_entry *e;
+ bool is_stale = false;
+
+ list_for_each_entry_rcu(e, &ctx->napi_list, list) {
+ if (time_after(jiffies, e->timeout))
+ is_stale = true;
+ }
+
+ return is_stale;
+}
+
+static void no_tracking_show_fdinfo(struct io_ring_ctx *ctx,
+ struct seq_file *m)
+{
+ seq_puts(m, "NAPI:\tdisabled\n");
+}
+
+/*
+ * default ops for a newly created ring for which NAPI busy poll is not enabled
+ */
+static struct io_napi_tracking_ops no_tracking_ops = {
+ .add_id = NULL,
+ .do_busy_loop = no_tracking_do_busy_loop,
+ .show_fdinfo = no_tracking_show_fdinfo,
+};
+
+static void dynamic_tracking_add_id(struct io_kiocb *req)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ struct socket *sock;
+
+ sock = sock_from_file(req->file);
+ if (sock)
+ __io_napi_add(ctx, sock);
+}
+
+static bool dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx,
+ void *loop_end_arg)
{
struct io_napi_entry *e;
bool (*loop_end)(void *, unsigned long) = NULL;
@@ -157,6 +201,23 @@ static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
return is_stale;
}
+static void dynamic_tracking_show_fdinfo(struct io_ring_ctx *ctx,
+ struct seq_file *m)
+{
+ seq_puts(m, "NAPI:\tenabled\n");
+ seq_printf(m, "napi_busy_poll_to:\t%u\n", ctx->napi_busy_poll_to);
+ if (ctx->napi_prefer_busy_poll)
+ seq_puts(m, "napi_prefer_busy_poll:\ttrue\n");
+ else
+ seq_puts(m, "napi_prefer_busy_poll:\tfalse\n");
+}
+
+static struct io_napi_tracking_ops dynamic_tracking_ops = {
+ .add_id = dynamic_tracking_add_id,
+ .do_busy_loop = dynamic_tracking_do_busy_loop,
+ .show_fdinfo = dynamic_tracking_show_fdinfo,
+};
+
static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq)
{
@@ -172,7 +233,7 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
rcu_read_lock();
do {
- is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg);
+ is_stale = ctx->napi_ops->do_busy_loop(ctx, loop_end_arg);
} while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg);
rcu_read_unlock();
@@ -193,6 +254,7 @@ void io_napi_init(struct io_ring_ctx *ctx)
spin_lock_init(&ctx->napi_lock);
ctx->napi_prefer_busy_poll = false;
ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
+ ctx->napi_ops = &no_tracking_ops;
}
/*
@@ -241,7 +303,7 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
- WRITE_ONCE(ctx->napi_enabled, true);
+ WRITE_ONCE(ctx->napi_ops, &dynamic_tracking_ops);
return 0;
}
@@ -265,7 +327,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
- WRITE_ONCE(ctx->napi_enabled, false);
+ WRITE_ONCE(ctx->napi_ops, &no_tracking_ops);
return 0;
}
@@ -321,7 +383,7 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
return 0;
rcu_read_lock();
- is_stale = __io_napi_do_busy_loop(ctx, NULL);
+ is_stale = ctx->napi_ops->do_busy_loop(ctx, NULL);
rcu_read_unlock();
io_napi_remove_stale(ctx, is_stale);
@@ -15,8 +15,6 @@ void io_napi_free(struct io_ring_ctx *ctx);
int io_register_napi(struct io_ring_ctx *ctx, void __user *arg);
int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg);
-void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock);
-
void __io_napi_adjust_timeout(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq, ktime_t to_wait);
void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq);
@@ -53,14 +51,9 @@ static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
static inline void io_napi_add(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- struct socket *sock;
-
- if (!READ_ONCE(ctx->napi_enabled))
- return;
- sock = sock_from_file(req->file);
- if (sock)
- __io_napi_add(ctx, sock);
+ if (ctx->napi_ops->add_id)
+ ctx->napi_ops->add_id(req);
}
#else
The long term goal is to lay out a framework to be able to offer different napi tracking strategies to the user. The obvious first alternative strategy is the static tracking where the user would update manually the napi_list to remove the overhead made by io_uring managing the list dynamically. Signed-off-by: Olivier Langlois <olivier@trillion01.com> --- include/linux/io_uring_types.h | 12 +++++- io_uring/fdinfo.c | 4 ++ io_uring/napi.c | 76 ++++++++++++++++++++++++++++++---- io_uring/napi.h | 11 +---- 4 files changed, 86 insertions(+), 17 deletions(-)