From patchwork Sun Oct 13 18:28:24 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Olivier Langlois X-Patchwork-Id: 13833434 Received: from cloud48395.mywhc.ca (cloud48395.mywhc.ca [173.209.37.211]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DDE3213D52C for ; Sun, 13 Oct 2024 18:28:26 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=173.209.37.211 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728844111; cv=none; b=gBR3SUJA0lTdzn5tvAIB9MNys335LlYUJ9XmPj8ROrHKk9DSajnrftiR+qnxVmfPVVwiYoccZb/vMrRX6qoAWh75/bLgQMUnai6weEma5zz4pejCwMinQ314ZoDDvmMmPAJ8nW7OLsYf5irjfpxM1/fnPLB7Ov6PVPiwsuIxdoM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728844111; c=relaxed/simple; bh=XgzkacEd4557jf4FkMMAuQ38R4t1u+ouZTB6TqAUfhE=; h=From:Date:Message-ID:In-Reply-To:References:To:Subject; b=AKIsHeMHnydpmvcI9cf6tx8dQtKBHOcD0ZbA1F0K+UIcDe/fzAUGV2ViXfCV7SBtUNss+VEiywd0eFtbfayNgLHBo7w8rWhOmmLPDhS7Jfs+HIjOpoJya+Oo+ZHGAjpmeCFIufJA+BGKLjIuIAMiR2WUiihM1+xEsAQlyxiIEkc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=trillion01.com; spf=pass smtp.mailfrom=trillion01.com; arc=none smtp.client-ip=173.209.37.211 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=trillion01.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=trillion01.com Received: from [45.44.224.220] (port=48014 helo=localhost) by cloud48395.mywhc.ca with esmtpsa (TLS1.2) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.96.2) (envelope-from ) id 1t03KL-0002hy-1i; Sun, 13 Oct 2024 14:28:25 -0400 From: Olivier Langlois Date: Sun, 13 Oct 2024 14:28:24 -0400 Message-ID: <3de3087563cf98f75266fd9f85fdba063a8720db.1728828877.git.olivier@trillion01.com> In-Reply-To: References: To: Jens Axboe ,Pavel Begunkov ,io-uring@vger.kernel.org Subject: [PATCH v4 1/6] io_uring/napi: protect concurrent io_napi_entry timeout accesses X-AntiAbuse: This header was added to track abuse, please include it with any abuse report X-AntiAbuse: Primary Hostname - cloud48395.mywhc.ca X-AntiAbuse: Original Domain - vger.kernel.org X-AntiAbuse: Originator/Caller UID/GID - [47 12] / [47 12] X-AntiAbuse: Sender Address Domain - trillion01.com X-Get-Message-Sender-Via: cloud48395.mywhc.ca: authenticated_id: olivier@trillion01.com X-Authenticated-Sender: cloud48395.mywhc.ca: olivier@trillion01.com X-Source: X-Source-Args: X-Source-Dir: Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: io_napi_entry timeout value can be updated while accessed from the poll functions. Its concurrent accesses are wrapped with READ_ONCE()/WRITE_ONCE() macros to avoid incorrect compiler optimizations. Signed-off-by: Olivier Langlois --- io_uring/napi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/io_uring/napi.c b/io_uring/napi.c index d0cf694d0172..dda2e083fb5d 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -60,7 +60,7 @@ void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) rcu_read_lock(); e = io_napi_hash_find(hash_list, napi_id); if (e) { - e->timeout = jiffies + NAPI_TIMEOUT; + WRITE_ONCE(e->timeout, jiffies + NAPI_TIMEOUT); rcu_read_unlock(); return; } @@ -92,7 +92,7 @@ static void __io_napi_remove_stale(struct io_ring_ctx *ctx) spin_lock(&ctx->napi_lock); hash_for_each(ctx->napi_ht, i, e, node) { - if (time_after(jiffies, e->timeout)) { + if (time_after(jiffies, READ_ONCE(e->timeout))) { list_del(&e->list); hash_del_rcu(&e->node); kfree_rcu(e, rcu); @@ -150,7 +150,7 @@ static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); - if (time_after(jiffies, e->timeout)) + if (time_after(jiffies, READ_ONCE(e->timeout))) is_stale = true; } From patchwork Sun Oct 13 18:28:38 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Olivier Langlois X-Patchwork-Id: 13833436 Received: from cloud48395.mywhc.ca (cloud48395.mywhc.ca [173.209.37.211]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BBBF713AD22 for ; Sun, 13 Oct 2024 18:28:40 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=173.209.37.211 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728844123; cv=none; b=V4HaMcZ0gt0qliT9Weys1r0zTVmUruvugjdlC7aZ+KnCV6JaaTOKnkCIvNREOt8m2E1Px5JZJBozsJjYcWlspDlSrk6Rz6H43lMvf7ZFV5wQ/sryeZQKPJH1pUJ2c6EQ1ECFfLg2HMID7kFQg1WQuXaBHEkesK77HgOSg4FDWCw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728844123; c=relaxed/simple; bh=+INTVSSsgBMSUVTx5i/51LG8M8l0DQWPagLuI8sfkAM=; h=From:Date:Message-ID:In-Reply-To:References:To:Subject; b=KUY9ZNHrHzUbMoQKXngN2B2z617KnWggF58wzb//VriI9PH3FYWrEwQGT8u6xr12AcLae6xGQlKHDUFcEBDQtEKLQXPnBRqw9/BU8PrN1tt84Hz02ueS6WbMgoR8bmelg4Tdpm+yeMhmkYn7ULuXiydD2unJaC8V4wPUpMU5drg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=trillion01.com; spf=pass smtp.mailfrom=trillion01.com; arc=none smtp.client-ip=173.209.37.211 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=trillion01.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=trillion01.com Received: from [45.44.224.220] (port=60118 helo=localhost) by cloud48395.mywhc.ca with esmtpsa (TLS1.2) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.96.2) (envelope-from ) id 1t03KZ-0002kN-1I; Sun, 13 Oct 2024 14:28:39 -0400 From: Olivier Langlois Date: Sun, 13 Oct 2024 14:28:38 -0400 Message-ID: <9f53b5169afa8c7bf3665a0b19dc2f7061173530.1728828877.git.olivier@trillion01.com> In-Reply-To: References: To: Jens Axboe ,Pavel Begunkov ,io-uring@vger.kernel.org Subject: [PATCH v4 2/6] io_uring/napi: fix io_napi_entry RCU accesses X-AntiAbuse: This header was added to track abuse, please include it with any abuse report X-AntiAbuse: Primary Hostname - cloud48395.mywhc.ca X-AntiAbuse: Original Domain - vger.kernel.org X-AntiAbuse: Originator/Caller UID/GID - [47 12] / [47 12] X-AntiAbuse: Sender Address Domain - trillion01.com X-Get-Message-Sender-Via: cloud48395.mywhc.ca: authenticated_id: olivier@trillion01.com X-Authenticated-Sender: cloud48395.mywhc.ca: olivier@trillion01.com X-Source: X-Source-Args: X-Source-Dir: Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: correct 3 RCU structures modifications that were not using the RCU functions to make their update. Signed-off-by: Olivier Langlois --- io_uring/napi.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/io_uring/napi.c b/io_uring/napi.c index dda2e083fb5d..921de9de8d75 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -81,19 +81,24 @@ void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) } hlist_add_tail_rcu(&e->node, hash_list); - list_add_tail(&e->list, &ctx->napi_list); + list_add_tail_rcu(&e->list, &ctx->napi_list); spin_unlock(&ctx->napi_lock); } static void __io_napi_remove_stale(struct io_ring_ctx *ctx) { struct io_napi_entry *e; - unsigned int i; spin_lock(&ctx->napi_lock); - hash_for_each(ctx->napi_ht, i, e, node) { + /* + * list_for_each_entry_safe() is not required as long as: + * 1. list_del_rcu() does not reset the deleted node next pointer + * 2. kfree_rcu() delays the memory freeing until the next quiescent + * state + */ + list_for_each_entry(e, &ctx->napi_list, list) { if (time_after(jiffies, READ_ONCE(e->timeout))) { - list_del(&e->list); + list_del_rcu(&e->list); hash_del_rcu(&e->node); kfree_rcu(e, rcu); } @@ -204,13 +209,13 @@ void io_napi_init(struct io_ring_ctx *ctx) void io_napi_free(struct io_ring_ctx *ctx) { struct io_napi_entry *e; - unsigned int i; spin_lock(&ctx->napi_lock); - hash_for_each(ctx->napi_ht, i, e, node) { + list_for_each_entry(e, &ctx->napi_list, list) { hash_del_rcu(&e->node); kfree_rcu(e, rcu); } + INIT_LIST_HEAD_RCU(&ctx->napi_list); spin_unlock(&ctx->napi_lock); } From patchwork Sun Oct 13 18:28:50 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Olivier Langlois X-Patchwork-Id: 13833437 Received: from cloud48395.mywhc.ca (cloud48395.mywhc.ca [173.209.37.211]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8674B13AD22 for ; Sun, 13 Oct 2024 18:28:52 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=173.209.37.211 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728844133; cv=none; b=mf/WN9NBDz6o3piWo0OQxf03zqIAlG8f5xI7wr6dtiPvF96qMonhIuCjDo+ypPgBwRQLp4QJzUB/6qaLTmHBJ7MsTV3dGr/27IcExad50YBG7l66BnSxpTEohu5/prsxc1noMq6xQfJZYAF7lDfMPFLtPuZndg5LJXjgWK5blCU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728844133; c=relaxed/simple; bh=pygfYihDgA3LjNtu/7GYo38yPcgPaTexokneQUDDEEA=; h=From:Date:Message-ID:In-Reply-To:References:To:Subject; b=B1FM1YGzRHo+oSHApUtd9608pUFMBKpKKVHeE87/MSahJyk+OzKKEg6uoO58KZsJHLfIW49nwQLVfyJI4HmLC0k/n4CJtKUHqKF80onDZtC5oipDsw2JfIvFZCJK6DahiknyMtUX7s7USccdgN3EBe0hadgnr1vaUDMysm3Gxxc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=trillion01.com; spf=pass smtp.mailfrom=trillion01.com; arc=none smtp.client-ip=173.209.37.211 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=trillion01.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=trillion01.com Received: from [45.44.224.220] (port=44464 helo=localhost) by cloud48395.mywhc.ca with esmtpsa (TLS1.2) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.96.2) (envelope-from ) id 1t03Kl-0002l7-0W; Sun, 13 Oct 2024 14:28:51 -0400 From: Olivier Langlois Date: Sun, 13 Oct 2024 14:28:50 -0400 Message-ID: In-Reply-To: References: To: Jens Axboe ,Pavel Begunkov ,io-uring@vger.kernel.org Subject: [PATCH v4 3/6] io_uring/napi: improve __io_napi_add X-AntiAbuse: This header was added to track abuse, please include it with any abuse report X-AntiAbuse: Primary Hostname - cloud48395.mywhc.ca X-AntiAbuse: Original Domain - vger.kernel.org X-AntiAbuse: Originator/Caller UID/GID - [47 12] / [47 12] X-AntiAbuse: Sender Address Domain - trillion01.com X-Get-Message-Sender-Via: cloud48395.mywhc.ca: authenticated_id: olivier@trillion01.com X-Authenticated-Sender: cloud48395.mywhc.ca: olivier@trillion01.com X-Source: X-Source-Args: X-Source-Dir: Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: 1. move the sock->sk pointer validity test outside the function to avoid the function call overhead and to make the function more more reusable 2. change its name to __io_napi_add_id to be more precise about it is doing 3. return an error code to report errors Signed-off-by: Olivier Langlois --- io_uring/napi.c | 19 ++++++------------- io_uring/napi.h | 6 +++--- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/io_uring/napi.c b/io_uring/napi.c index 921de9de8d75..5e2299e7ff8e 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -38,22 +38,14 @@ static inline ktime_t net_to_ktime(unsigned long t) return ns_to_ktime(t << 10); } -void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) +int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) { struct hlist_head *hash_list; - unsigned int napi_id; - struct sock *sk; struct io_napi_entry *e; - sk = sock->sk; - if (!sk) - return; - - napi_id = READ_ONCE(sk->sk_napi_id); - /* Non-NAPI IDs can be rejected. */ if (napi_id < MIN_NAPI_ID) - return; + return -EINVAL; hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; @@ -62,13 +54,13 @@ void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) if (e) { WRITE_ONCE(e->timeout, jiffies + NAPI_TIMEOUT); rcu_read_unlock(); - return; + return -EEXIST; } rcu_read_unlock(); e = kmalloc(sizeof(*e), GFP_NOWAIT); if (!e) - return; + return -ENOMEM; e->napi_id = napi_id; e->timeout = jiffies + NAPI_TIMEOUT; @@ -77,12 +69,13 @@ void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) if (unlikely(io_napi_hash_find(hash_list, napi_id))) { spin_unlock(&ctx->napi_lock); kfree(e); - return; + return -EEXIST; } hlist_add_tail_rcu(&e->node, hash_list); list_add_tail_rcu(&e->list, &ctx->napi_list); spin_unlock(&ctx->napi_lock); + return 0; } static void __io_napi_remove_stale(struct io_ring_ctx *ctx) diff --git a/io_uring/napi.h b/io_uring/napi.h index fd275ef0456d..4ae622f37b30 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h @@ -15,7 +15,7 @@ void io_napi_free(struct io_ring_ctx *ctx); int io_register_napi(struct io_ring_ctx *ctx, void __user *arg); int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg); -void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock); +int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id); void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx); @@ -48,8 +48,8 @@ static inline void io_napi_add(struct io_kiocb *req) return; sock = sock_from_file(req->file); - if (sock) - __io_napi_add(ctx, sock); + if (sock && sock->sk) + __io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id)); } #else From patchwork Sun Oct 13 18:29:02 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Olivier Langlois X-Patchwork-Id: 13833438 Received: from cloud48395.mywhc.ca (cloud48395.mywhc.ca [173.209.37.211]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B062813AD22 for ; Sun, 13 Oct 2024 18:29:03 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=173.209.37.211 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728844145; cv=none; b=dGdCemjC7/2Q4pbqHz57GZwMeReys0ZUqCW28JHZOxiTdxDVS9TgzzzBQ9CY0nfBHB09W1h7oRRakX7/hihKGVzsfj7+7NlvxFzubbKJiVLgbZv+N28J/1H60Dndd4LVf1MgKwOheba1yXuNJxlm/WU+y+pntDqJ6PUoxlgFG3I= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728844145; c=relaxed/simple; bh=jPcXzvTUPk1tjsQYyE6/m3gYkcrwXQrcu3P2eTCKMUM=; h=From:Date:Message-ID:In-Reply-To:References:To:Subject; b=WMvLUP8yklUiSYqP6yMcWFsMVSYhVjftlzMsk5nc6e9bQvjHwq5ZZqrWsKtrtd/OU+QZJuSwMfCkbo0Or6fl8LAxZuY3WplK4guNiFy2WIrUTRpcMz/WakIXS721IsXXF63ji57XfA18Pqoos2cKJ6mHaFPF+3LguKeb+/SvXiA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=trillion01.com; spf=pass smtp.mailfrom=trillion01.com; arc=none smtp.client-ip=173.209.37.211 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=trillion01.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=trillion01.com Received: from [45.44.224.220] (port=52780 helo=localhost) by cloud48395.mywhc.ca with esmtpsa (TLS1.2) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.96.2) (envelope-from ) id 1t03Kw-0002lq-2J; Sun, 13 Oct 2024 14:29:02 -0400 From: Olivier Langlois Date: Sun, 13 Oct 2024 14:29:02 -0400 Message-ID: <2680ca47ee183cfdb89d1a40c84d349edeb620ab.1728828877.git.olivier@trillion01.com> In-Reply-To: References: To: Jens Axboe ,Pavel Begunkov ,io-uring@vger.kernel.org Subject: [PATCH v4 4/6] io_uring/napi: Use lock guards X-AntiAbuse: This header was added to track abuse, please include it with any abuse report X-AntiAbuse: Primary Hostname - cloud48395.mywhc.ca X-AntiAbuse: Original Domain - vger.kernel.org X-AntiAbuse: Originator/Caller UID/GID - [47 12] / [47 12] X-AntiAbuse: Sender Address Domain - trillion01.com X-Get-Message-Sender-Via: cloud48395.mywhc.ca: authenticated_id: olivier@trillion01.com X-Authenticated-Sender: cloud48395.mywhc.ca: olivier@trillion01.com X-Source: X-Source-Args: X-Source-Dir: Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Convert napi locks to use the shiny new Scope-Based Resource Management machinery. Signed-off-by: Olivier Langlois --- io_uring/napi.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/io_uring/napi.c b/io_uring/napi.c index 5e2299e7ff8e..6d5fdd397f2f 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -49,14 +49,13 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; - rcu_read_lock(); - e = io_napi_hash_find(hash_list, napi_id); - if (e) { - WRITE_ONCE(e->timeout, jiffies + NAPI_TIMEOUT); - rcu_read_unlock(); - return -EEXIST; + scoped_guard(rcu) { + e = io_napi_hash_find(hash_list, napi_id); + if (e) { + WRITE_ONCE(e->timeout, jiffies + NAPI_TIMEOUT); + return -EEXIST; + } } - rcu_read_unlock(); e = kmalloc(sizeof(*e), GFP_NOWAIT); if (!e) @@ -65,6 +64,10 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) e->napi_id = napi_id; e->timeout = jiffies + NAPI_TIMEOUT; + /* + * guard(spinlock) is not used to manually unlock it before calling + * kfree() + */ spin_lock(&ctx->napi_lock); if (unlikely(io_napi_hash_find(hash_list, napi_id))) { spin_unlock(&ctx->napi_lock); @@ -82,7 +85,7 @@ static void __io_napi_remove_stale(struct io_ring_ctx *ctx) { struct io_napi_entry *e; - spin_lock(&ctx->napi_lock); + guard(spinlock)(&ctx->napi_lock); /* * list_for_each_entry_safe() is not required as long as: * 1. list_del_rcu() does not reset the deleted node next pointer @@ -96,7 +99,6 @@ static void __io_napi_remove_stale(struct io_ring_ctx *ctx) kfree_rcu(e, rcu); } } - spin_unlock(&ctx->napi_lock); } static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale) @@ -168,11 +170,12 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, if (list_is_singular(&ctx->napi_list)) loop_end_arg = iowq; - rcu_read_lock(); - do { - is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); - } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg); - rcu_read_unlock(); + scoped_guard(rcu) { + do { + is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); + } while (!io_napi_busy_loop_should_end(iowq, start_time) && + !loop_end_arg); + } io_napi_remove_stale(ctx, is_stale); } @@ -203,13 +206,12 @@ void io_napi_free(struct io_ring_ctx *ctx) { struct io_napi_entry *e; - spin_lock(&ctx->napi_lock); + guard(spinlock)(&ctx->napi_lock); list_for_each_entry(e, &ctx->napi_list, list) { hash_del_rcu(&e->node); kfree_rcu(e, rcu); } INIT_LIST_HEAD_RCU(&ctx->napi_list); - spin_unlock(&ctx->napi_lock); } /* @@ -305,9 +307,9 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) if (list_empty_careful(&ctx->napi_list)) return 0; - rcu_read_lock(); - is_stale = __io_napi_do_busy_loop(ctx, NULL); - rcu_read_unlock(); + scoped_guard(rcu) { + is_stale = __io_napi_do_busy_loop(ctx, NULL); + } io_napi_remove_stale(ctx, is_stale); return 1; From patchwork Sun Oct 13 18:29:12 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Olivier Langlois X-Patchwork-Id: 13833439 Received: from cloud48395.mywhc.ca (cloud48395.mywhc.ca [173.209.37.211]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 25EE813AD22 for ; Sun, 13 Oct 2024 18:29:14 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=173.209.37.211 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728844155; cv=none; b=mZLpIAlF5tpaZPejEc/7kkhYICvJCC3+izKOvPF/qvc5WNZj1zmhKlac/NVUywlgw12d9Tvz0vNBGLLKp4dbFeqbv/lOXJv0M1aSUTKt1n/DePeQs2ARzFAJTI2bT1TsU1PrcIu/ej6KNqLP4ROWkRdMliBpx0dgWfd/lZyHx/c= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728844155; c=relaxed/simple; bh=9K1K3OinQUqDyqLpGtddE97QiTGQ15zmQSnMbYZfJm4=; h=From:Date:Message-ID:In-Reply-To:References:To:Subject; b=mVes/mlf05peVJORg7NsbVpmNrvUvcjn5Q11+LG55UFN4hK2D6zREWTlTFcOv14tvb+84U+xMcdW2fZPVJE69+gWFSdSCvOKjemqJCPI77660+s9JWQYIVkynkh25zX/EGWaX1Dzt3ZOvdNPQWoSjc3F0kZZGDcy3eC9IxebaKY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=trillion01.com; spf=pass smtp.mailfrom=trillion01.com; arc=none smtp.client-ip=173.209.37.211 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=trillion01.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=trillion01.com Received: from [45.44.224.220] (port=36810 helo=localhost) by cloud48395.mywhc.ca with esmtpsa (TLS1.2) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.96.2) (envelope-from ) id 1t03L7-0002mW-0D; Sun, 13 Oct 2024 14:29:13 -0400 From: Olivier Langlois Date: Sun, 13 Oct 2024 14:29:12 -0400 Message-ID: In-Reply-To: References: To: Jens Axboe ,Pavel Begunkov ,io-uring@vger.kernel.org Subject: [PATCH v4 5/6] io_uring/napi: clean up __io_napi_do_busy_loop X-AntiAbuse: This header was added to track abuse, please include it with any abuse report X-AntiAbuse: Primary Hostname - cloud48395.mywhc.ca X-AntiAbuse: Original Domain - vger.kernel.org X-AntiAbuse: Originator/Caller UID/GID - [47 12] / [47 12] X-AntiAbuse: Sender Address Domain - trillion01.com X-Get-Message-Sender-Via: cloud48395.mywhc.ca: authenticated_id: olivier@trillion01.com X-Authenticated-Sender: cloud48395.mywhc.ca: olivier@trillion01.com X-Source: X-Source-Args: X-Source-Dir: Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: __io_napi_do_busy_loop now requires to have loop_end in its parameters. This makes the code cleaner and also has the benefit of removing a branch since the only caller not passing NULL for loop_end_arg is also setting the value conditionally. Signed-off-by: Olivier Langlois --- io_uring/napi.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/io_uring/napi.c b/io_uring/napi.c index 6d5fdd397f2f..1de1543d8034 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -137,15 +137,12 @@ static bool io_napi_busy_loop_should_end(void *data, } static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, + bool (*loop_end)(void *, unsigned long), void *loop_end_arg) { struct io_napi_entry *e; - bool (*loop_end)(void *, unsigned long) = NULL; bool is_stale = false; - if (loop_end_arg) - loop_end = io_napi_busy_loop_should_end; - list_for_each_entry_rcu(e, &ctx->napi_list, list) { napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); @@ -161,18 +158,22 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { unsigned long start_time = busy_loop_current_time(); + bool (*loop_end)(void *, unsigned long) = NULL; void *loop_end_arg = NULL; bool is_stale = false; /* Singular lists use a different napi loop end check function and are * only executed once. */ - if (list_is_singular(&ctx->napi_list)) + if (list_is_singular(&ctx->napi_list)) { + loop_end = io_napi_busy_loop_should_end; loop_end_arg = iowq; + } scoped_guard(rcu) { do { - is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); + is_stale = __io_napi_do_busy_loop(ctx, loop_end, + loop_end_arg); } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg); } @@ -308,7 +309,7 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) return 0; scoped_guard(rcu) { - is_stale = __io_napi_do_busy_loop(ctx, NULL); + is_stale = __io_napi_do_busy_loop(ctx, NULL, NULL); } io_napi_remove_stale(ctx, is_stale); From patchwork Sun Oct 13 18:29:24 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Olivier Langlois X-Patchwork-Id: 13833440 Received: from cloud48395.mywhc.ca (cloud48395.mywhc.ca [173.209.37.211]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CF59113AD22 for ; Sun, 13 Oct 2024 18:29:25 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=173.209.37.211 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728844167; cv=none; b=nUCprChlm7OBhabMGJ71SCUfSHB3L8I+YtzA6JRAeE3VCkO0heAWwiW792J/9KsSupEqpeChx8P/1HrbRaF6rrVoWOI1gFUnt+nu+3z3mAKSYa7AlvcmTlAsWu3G8MSqUEolDW7jeYNlHLflV3z3P1F+yKO1KhfHuIWUweWmdZM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728844167; c=relaxed/simple; bh=TQS093TV2nzQd85CQhvjpRgO0vlV4oXvC1KcL1kCcsg=; h=From:Date:Message-ID:In-Reply-To:References:To:Subject; b=s+iV1zYNOu9iyhEKneg67F1uZz/jT230ByF2eMa/HMRBMadu+ZkojBoRoqSD3H/UXSkT+Ut6dhedBCKmJgBgHOib5L/lzX59GNfDjiYYxQcF4PGrRdCuzZTxHxqqHkTvPnQhEpMnlE1j1/pIUCVuG45MfmaefiIgnJqC9iDKO9U= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=trillion01.com; spf=pass smtp.mailfrom=trillion01.com; arc=none smtp.client-ip=173.209.37.211 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=trillion01.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=trillion01.com Received: from [45.44.224.220] (port=43342 helo=localhost) by cloud48395.mywhc.ca with esmtpsa (TLS1.2) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.96.2) (envelope-from ) id 1t03LI-0002na-2Q; Sun, 13 Oct 2024 14:29:24 -0400 From: Olivier Langlois Date: Sun, 13 Oct 2024 14:29:24 -0400 Message-ID: <96943de14968c35a5c599352259ad98f3c0770ba.1728828877.git.olivier@trillion01.com> In-Reply-To: References: To: Jens Axboe ,Pavel Begunkov ,io-uring@vger.kernel.org Subject: [PATCH v4 6/6] io_uring/napi: add static napi tracking strategy X-AntiAbuse: This header was added to track abuse, please include it with any abuse report X-AntiAbuse: Primary Hostname - cloud48395.mywhc.ca X-AntiAbuse: Original Domain - vger.kernel.org X-AntiAbuse: Originator/Caller UID/GID - [47 12] / [47 12] X-AntiAbuse: Sender Address Domain - trillion01.com X-Get-Message-Sender-Via: cloud48395.mywhc.ca: authenticated_id: olivier@trillion01.com X-Authenticated-Sender: cloud48395.mywhc.ca: olivier@trillion01.com X-Source: X-Source-Args: X-Source-Dir: Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Add the static napi tracking strategy. That allows the user to manually manage the napi ids list for busy polling, and eliminate the overhead of dynamically updating the list from the fast path. Signed-off-by: Olivier Langlois --- include/linux/io_uring_types.h | 2 +- include/uapi/linux/io_uring.h | 32 ++++++++++- io_uring/fdinfo.c | 54 ++++++++++++++----- io_uring/napi.c | 97 ++++++++++++++++++++++++++++++---- io_uring/napi.h | 2 +- 5 files changed, 160 insertions(+), 27 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 4b9ba523978d..f435433f29a3 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -409,7 +409,7 @@ struct io_ring_ctx { /* napi busy poll default timeout */ ktime_t napi_busy_poll_dt; bool napi_prefer_busy_poll; - bool napi_enabled; + u8 napi_track_mode; DECLARE_HASHTABLE(napi_ht, 4); #endif diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 86cb385fe0b5..99a7a082421e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -771,12 +771,40 @@ struct io_uring_buf_status { __u32 resv[8]; }; +enum io_uring_napi_op { + /* register/ungister backward compatible opcode */ + IO_URING_NAPI_REGISTER_OP = 0, + + /* opcodes to update napi_list when static tracking is used */ + IO_URING_NAPI_STATIC_ADD_ID = 1, + IO_URING_NAPI_STATIC_DEL_ID = 2 +}; + +enum io_uring_napi_tracking_strategy { + /* value must be 0 for backward compatibility */ + IO_URING_NAPI_TRACKING_DYNAMIC = 0, + IO_URING_NAPI_TRACKING_STATIC = 1, + IO_URING_NAPI_TRACKING_INACTIVE = 255 +}; + /* argument for IORING_(UN)REGISTER_NAPI */ struct io_uring_napi { __u32 busy_poll_to; __u8 prefer_busy_poll; - __u8 pad[3]; - __u64 resv; + + /* a io_uring_napi_op value */ + __u8 opcode; + __u8 pad[2]; + + /* + * for IO_URING_NAPI_REGISTER_OP, it is a + * io_uring_napi_tracking_strategy value. + * + * for IO_URING_NAPI_STATIC_ADD_ID/IO_URING_NAPI_STATIC_DEL_ID + * it is the napi id to add/del from napi_list. + */ + __u32 op_param; + __u32 resv; }; /* diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 6b1247664b35..f58d568060cb 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -46,6 +46,46 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, return 0; } +#ifdef CONFIG_NET_RX_BUSY_POLL +static __cold void common_tracking_show_fdinfo(struct io_ring_ctx *ctx, + struct seq_file *m, + const char *tracking_strategy) +{ + seq_puts(m, "NAPI:\tenabled\n"); + seq_printf(m, "napi tracking:\t%s\n", tracking_strategy); + seq_printf(m, "napi_busy_poll_dt:\t%llu\n", ctx->napi_busy_poll_dt); + if (ctx->napi_prefer_busy_poll) + seq_puts(m, "napi_prefer_busy_poll:\ttrue\n"); + else + seq_puts(m, "napi_prefer_busy_poll:\tfalse\n"); +} + +static __cold void napi_show_fdinfo(struct io_ring_ctx *ctx, + struct seq_file *m) +{ + unsigned int mode = READ_ONCE(ctx->napi_track_mode); + + switch (mode) { + case IO_URING_NAPI_TRACKING_INACTIVE: + seq_puts(m, "NAPI:\tdisabled\n"); + break; + case IO_URING_NAPI_TRACKING_DYNAMIC: + common_tracking_show_fdinfo(ctx, m, "dynamic"); + break; + case IO_URING_NAPI_TRACKING_STATIC: + common_tracking_show_fdinfo(ctx, m, "static"); + break; + default: + seq_printf(m, "NAPI:\tunknown mode (%u)\n", mode); + } +} +#else +static inline void napi_show_fdinfo(struct io_ring_ctx *ctx, + struct seq_file *m) +{ +} +#endif + /* * Caller holds a reference to the file already, we don't need to do * anything else to get an extra reference. @@ -221,18 +261,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) } spin_unlock(&ctx->completion_lock); - -#ifdef CONFIG_NET_RX_BUSY_POLL - if (ctx->napi_enabled) { - seq_puts(m, "NAPI:\tenabled\n"); - seq_printf(m, "napi_busy_poll_dt:\t%llu\n", ctx->napi_busy_poll_dt); - if (ctx->napi_prefer_busy_poll) - seq_puts(m, "napi_prefer_busy_poll:\ttrue\n"); - else - seq_puts(m, "napi_prefer_busy_poll:\tfalse\n"); - } else { - seq_puts(m, "NAPI:\tdisabled\n"); - } -#endif + napi_show_fdinfo(ctx, m); } #endif diff --git a/io_uring/napi.c b/io_uring/napi.c index 1de1543d8034..b1ade3fda30f 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -81,6 +81,27 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) return 0; } +static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id) +{ + struct hlist_head *hash_list; + struct io_napi_entry *e; + + /* Non-NAPI IDs can be rejected. */ + if (napi_id < MIN_NAPI_ID) + return -EINVAL; + + hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; + guard(spinlock)(&ctx->napi_lock); + e = io_napi_hash_find(hash_list, napi_id); + if (!e) + return -ENOENT; + + list_del_rcu(&e->list); + hash_del_rcu(&e->node); + kfree_rcu(e, rcu); + return 0; +} + static void __io_napi_remove_stale(struct io_ring_ctx *ctx) { struct io_napi_entry *e; @@ -136,9 +157,25 @@ static bool io_napi_busy_loop_should_end(void *data, return false; } -static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, - bool (*loop_end)(void *, unsigned long), - void *loop_end_arg) +/* + * never report stale entries + */ +static bool static_tracking_do_busy_loop(struct io_ring_ctx *ctx, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg) +{ + struct io_napi_entry *e; + + list_for_each_entry_rcu(e, &ctx->napi_list, list) + napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, + ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); + return false; +} + +static bool +dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg) { struct io_napi_entry *e; bool is_stale = false; @@ -154,6 +191,16 @@ static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, return is_stale; } +static inline bool +__io_napi_do_busy_loop(struct io_ring_ctx *ctx, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg) +{ + if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC) + return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); + return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); +} + static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { @@ -195,6 +242,7 @@ void io_napi_init(struct io_ring_ctx *ctx) spin_lock_init(&ctx->napi_lock); ctx->napi_prefer_busy_poll = false; ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt); + ctx->napi_track_mode = IO_URING_NAPI_TRACKING_INACTIVE; } /* @@ -215,6 +263,24 @@ void io_napi_free(struct io_ring_ctx *ctx) INIT_LIST_HEAD_RCU(&ctx->napi_list); } +static int io_napi_register_napi(struct io_ring_ctx *ctx, + struct io_uring_napi *napi) +{ + switch (napi->op_param) { + case IO_URING_NAPI_TRACKING_DYNAMIC: + case IO_URING_NAPI_TRACKING_STATIC: + break; + default: + return -EINVAL; + } + /* clean the napi list for new settings */ + io_napi_free(ctx); + WRITE_ONCE(ctx->napi_track_mode, napi->op_param); + WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC); + WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll); + return 0; +} + /* * io_napi_register() - Register napi with io-uring * @ctx: pointer to io-uring context structure @@ -226,7 +292,8 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) { const struct io_uring_napi curr = { .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), - .prefer_busy_poll = ctx->napi_prefer_busy_poll + .prefer_busy_poll = ctx->napi_prefer_busy_poll, + .op_param = ctx->napi_track_mode }; struct io_uring_napi napi; @@ -234,16 +301,26 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) return -EINVAL; if (copy_from_user(&napi, arg, sizeof(napi))) return -EFAULT; - if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) + if (napi.pad[0] || napi.pad[1] || napi.resv) return -EINVAL; if (copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; - WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC); - WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); - WRITE_ONCE(ctx->napi_enabled, true); - return 0; + switch (napi.opcode) { + case IO_URING_NAPI_REGISTER_OP: + return io_napi_register_napi(ctx, &napi); + case IO_URING_NAPI_STATIC_ADD_ID: + if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) + return -EINVAL; + return __io_napi_add_id(ctx, napi.op_param); + case IO_URING_NAPI_STATIC_DEL_ID: + if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) + return -EINVAL; + return __io_napi_del_id(ctx, napi.op_param); + default: + return -EINVAL; + } } /* @@ -266,7 +343,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) WRITE_ONCE(ctx->napi_busy_poll_dt, 0); WRITE_ONCE(ctx->napi_prefer_busy_poll, false); - WRITE_ONCE(ctx->napi_enabled, false); + WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); return 0; } diff --git a/io_uring/napi.h b/io_uring/napi.h index 4ae622f37b30..fa742f42e09b 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h @@ -44,7 +44,7 @@ static inline void io_napi_add(struct io_kiocb *req) struct io_ring_ctx *ctx = req->ctx; struct socket *sock; - if (!READ_ONCE(ctx->napi_enabled)) + if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC) return; sock = sock_from_file(req->file);