From patchwork Thu Jun 16 23:47:09 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 12884989 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 43B80C433EF for ; Thu, 16 Jun 2022 23:48:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1379110AbiFPXsO (ORCPT ); Thu, 16 Jun 2022 19:48:14 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41974 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1348137AbiFPXsN (ORCPT ); Thu, 16 Jun 2022 19:48:13 -0400 Received: from smtp-fw-80006.amazon.com (smtp-fw-80006.amazon.com [99.78.197.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 03F352B1B4 for ; Thu, 16 Jun 2022 16:48:10 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1655423291; x=1686959291; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=bbsoaTkErfZYqhCXfb/FH4xhJEo1JYSAL1s+JkiyIgw=; b=Hh7rWyRVLoHOyiSrU0b9wunmd19g6yK548GeJ/ex3DBunZnaGXDW2vQU 7Q31M1Gb6sardDOsWS8K5VPKq2R5mgJSVehiqUVXLsZXKN/MC2+QMm/Zw BtbNRIOZIuL8Gmu1rhyA56ZwAe2W0D73TR7uV5SW2Qw2YrB0w/WdfXkuB o=; X-IronPort-AV: E=Sophos;i="5.92,306,1650931200"; d="scan'208";a="98685432" Received: from pdx4-co-svc-p1-lb2-vlan3.amazon.com (HELO email-inbound-relay-iad-1e-f771ae83.us-east-1.amazon.com) ([10.25.36.214]) by smtp-border-fw-80006.pdx80.corp.amazon.com with ESMTP; 16 Jun 2022 23:48:11 +0000 Received: from EX13MTAUWB001.ant.amazon.com (iad12-ws-svc-p26-lb9-vlan2.iad.amazon.com [10.40.163.34]) by email-inbound-relay-iad-1e-f771ae83.us-east-1.amazon.com (Postfix) with ESMTPS id A2B8D122AF8; Thu, 16 Jun 2022 23:48:08 +0000 (UTC) Received: from EX13D04ANC001.ant.amazon.com (10.43.157.89) by EX13MTAUWB001.ant.amazon.com (10.43.161.207) with Microsoft SMTP Server (TLS) id 15.0.1497.36; Thu, 16 Jun 2022 23:48:07 +0000 Received: from 88665a182662.ant.amazon.com (10.43.160.26) by EX13D04ANC001.ant.amazon.com (10.43.157.89) with Microsoft SMTP Server (TLS) id 15.0.1497.36; Thu, 16 Jun 2022 23:48:05 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni CC: Amit Shah , Kuniyuki Iwashima , Kuniyuki Iwashima , Subject: [PATCH v1 net-next 1/6] af_unix: Clean up some sock_net() uses. Date: Thu, 16 Jun 2022 16:47:09 -0700 Message-ID: <20220616234714.4291-2-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220616234714.4291-1-kuniyu@amazon.com> References: <20220616234714.4291-1-kuniyu@amazon.com> MIME-Version: 1.0 X-Originating-IP: [10.43.160.26] X-ClientProxiedBy: EX13d09UWC004.ant.amazon.com (10.43.162.114) To EX13D04ANC001.ant.amazon.com (10.43.157.89) Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org Some functions define a net pointer only for one-shot use. Others call sock_net() redundantly even when a net pointer is available. Let's fix these and make the code simpler. Signed-off-by: Kuniyuki Iwashima --- net/unix/af_unix.c | 33 ++++++++++++++------------------- net/unix/diag.c | 3 +-- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3453e0053f76..990257f02e7c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -932,7 +932,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, memset(&u->scm_stat, 0, sizeof(struct scm_stat)); unix_insert_unbound_socket(sk); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + sock_prot_inuse_add(net, sk->sk_prot, 1); return sk; @@ -1293,9 +1293,8 @@ static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; + struct sock *sk = sock->sk; struct sock *other; int err; @@ -1316,7 +1315,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, } restart: - other = unix_find_other(net, sunaddr, alen, sock->type); + other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); if (IS_ERR(other)) { err = PTR_ERR(other); goto out; @@ -1404,15 +1403,13 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); + struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; struct unix_sock *u = unix_sk(sk), *newu, *otheru; - struct sock *newsk = NULL; - struct sock *other = NULL; + struct net *net = sock_net(sk); struct sk_buff *skb = NULL; - int st; - int err; long timeo; + int err; + int st; err = unix_validate_addr(sunaddr, addr_len); if (err) @@ -1432,7 +1429,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, */ /* create new sock for complete connection */ - newsk = unix_create1(sock_net(sk), NULL, 0, sock->type); + newsk = unix_create1(net, NULL, 0, sock->type); if (IS_ERR(newsk)) { err = PTR_ERR(newsk); newsk = NULL; @@ -1840,17 +1837,15 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb) static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); - struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); - struct sock *other = NULL; - int err; - struct sk_buff *skb; - long timeo; + struct sock *sk = sock->sk, *other = NULL; + struct unix_sock *u = unix_sk(sk); struct scm_cookie scm; + struct sk_buff *skb; int data_len = 0; int sk_locked; + long timeo; + int err; wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); @@ -1917,7 +1912,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, if (sunaddr == NULL) goto out_free; - other = unix_find_other(net, sunaddr, msg->msg_namelen, + other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, sk->sk_type); if (IS_ERR(other)) { err = PTR_ERR(other); diff --git a/net/unix/diag.c b/net/unix/diag.c index bb0b5ea1655f..4e3dc8179fa4 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -308,7 +308,6 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct unix_diag_req); - struct net *net = sock_net(skb->sk); if (nlmsg_len(h) < hdrlen) return -EINVAL; @@ -317,7 +316,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) struct netlink_dump_control c = { .dump = unix_diag_dump, }; - return netlink_dump_start(net->diag_nlsk, skb, h, &c); + return netlink_dump_start(sock_net(skb->sk)->diag_nlsk, skb, h, &c); } else return unix_diag_get_exact(skb, h, nlmsg_data(h)); } From patchwork Thu Jun 16 23:47:10 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 12884993 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 18D2EC433EF for ; Thu, 16 Jun 2022 23:48:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1378747AbiFPXsg (ORCPT ); Thu, 16 Jun 2022 19:48:36 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42672 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1348137AbiFPXsg (ORCPT ); Thu, 16 Jun 2022 19:48:36 -0400 Received: from smtp-fw-6001.amazon.com (smtp-fw-6001.amazon.com [52.95.48.154]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7097260DA6 for ; Thu, 16 Jun 2022 16:48:35 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1655423316; x=1686959316; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=tmVf/UlSWzphyug5cwKhsJdjodVKLjdnhhEbC9h0V2M=; b=v1udXTgx4XFCo84zDF+9AeGG5sajiW+3yIsqoif6IiyFxuto5UaYpryM MhTGBH7DqxthBZqD7LeIR7MNnFsMVMQJfEmEKcvUC08H+QiKinxuV8bBL qRmr5e0FHh3J8jedYDQE6oenWMmWmko6vmzA/a9agd51XTAE5Fzt1A1NU w=; Received: from iad12-co-svc-p1-lb1-vlan3.amazon.com (HELO email-inbound-relay-iad-1a-a31e1d63.us-east-1.amazon.com) ([10.43.8.6]) by smtp-border-fw-6001.iad6.amazon.com with ESMTP; 16 Jun 2022 23:48:24 +0000 Received: from EX13MTAUWB001.ant.amazon.com (iad12-ws-svc-p26-lb9-vlan3.iad.amazon.com [10.40.163.38]) by email-inbound-relay-iad-1a-a31e1d63.us-east-1.amazon.com (Postfix) with ESMTPS id 0B79CA2AA7; Thu, 16 Jun 2022 23:48:22 +0000 (UTC) Received: from EX13D04ANC001.ant.amazon.com (10.43.157.89) by EX13MTAUWB001.ant.amazon.com (10.43.161.207) with Microsoft SMTP Server (TLS) id 15.0.1497.36; Thu, 16 Jun 2022 23:48:21 +0000 Received: from 88665a182662.ant.amazon.com (10.43.160.26) by EX13D04ANC001.ant.amazon.com (10.43.157.89) with Microsoft SMTP Server (TLS) id 15.0.1497.36; Thu, 16 Jun 2022 23:48:19 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni CC: Amit Shah , Kuniyuki Iwashima , Kuniyuki Iwashima , Subject: [PATCH v1 net-next 2/6] af_unix: Include the whole hash table size in UNIX_HASH_SIZE. Date: Thu, 16 Jun 2022 16:47:10 -0700 Message-ID: <20220616234714.4291-3-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220616234714.4291-1-kuniyu@amazon.com> References: <20220616234714.4291-1-kuniyu@amazon.com> MIME-Version: 1.0 X-Originating-IP: [10.43.160.26] X-ClientProxiedBy: EX13d09UWC004.ant.amazon.com (10.43.162.114) To EX13D04ANC001.ant.amazon.com (10.43.157.89) Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org Currently, the size of AF_UNIX hash table is UNIX_HASH_SIZE * 2, the first half for bind()ed sockets and the second half for unbound ones. UNIX_HASH_SIZE * 2 is used to define the table and iterate over it. In some places, we use ARRAY_SIZE(unix_socket_table) instead of UNIX_HASH_SIZE * 2. However, we cannot use it anymore because we will allocate the hash table dynamically. Then, we would have to add UNIX_HASH_SIZE * 2 in many places, which would be troublesome. This patch adapts the UNIX_HASH_SIZE definition to include bound and unbound sockets and defines a new UNIX_HASH_MOD macro to ease calculations. Signed-off-by: Kuniyuki Iwashima --- include/net/af_unix.h | 7 ++++--- net/unix/af_unix.c | 18 +++++++++--------- net/unix/diag.c | 6 ++---- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a7ef624ed726..acb56e463db1 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -16,12 +16,13 @@ void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); struct sock *unix_peer_get(struct sock *sk); -#define UNIX_HASH_SIZE 256 +#define UNIX_HASH_MOD (256 - 1) +#define UNIX_HASH_SIZE (256 * 2) #define UNIX_HASH_BITS 8 extern unsigned int unix_tot_inflight; -extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; -extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; +extern spinlock_t unix_table_locks[UNIX_HASH_SIZE]; +extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE]; struct unix_address { refcount_t refcnt; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 990257f02e7c..c0804ae9c96a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -118,9 +118,9 @@ #include "scm.h" -spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; +spinlock_t unix_table_locks[UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_table_locks); -struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; +struct hlist_head unix_socket_table[UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); static atomic_long_t unix_nr_socks; @@ -137,12 +137,12 @@ static unsigned int unix_unbound_hash(struct sock *sk) hash ^= hash >> 8; hash ^= sk->sk_type; - return UNIX_HASH_SIZE + (hash & (UNIX_HASH_SIZE - 1)); + return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); } static unsigned int unix_bsd_hash(struct inode *i) { - return i->i_ino & (UNIX_HASH_SIZE - 1); + return i->i_ino & UNIX_HASH_MOD; } static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, @@ -155,14 +155,14 @@ static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, hash ^= hash >> 8; hash ^= type; - return hash & (UNIX_HASH_SIZE - 1); + return hash & UNIX_HASH_MOD; } static void unix_table_double_lock(unsigned int hash1, unsigned int hash2) { /* hash1 and hash2 is never the same because - * one is between 0 and UNIX_HASH_SIZE - 1, and - * another is between UNIX_HASH_SIZE and UNIX_HASH_SIZE * 2. + * one is between 0 and UNIX_HASH_MOD, and + * another is between UNIX_HASH_MOD + 1 and UNIX_HASH_SIZE - 1. */ if (hash1 > hash2) swap(hash1, hash2); @@ -3239,7 +3239,7 @@ static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) unsigned long bucket = get_bucket(*pos); struct sock *sk; - while (bucket < ARRAY_SIZE(unix_socket_table)) { + while (bucket < UNIX_HASH_SIZE) { spin_lock(&unix_table_locks[bucket]); sk = unix_from_bucket(seq, pos); @@ -3666,7 +3666,7 @@ static int __init af_unix_init(void) BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); - for (i = 0; i < 2 * UNIX_HASH_SIZE; i++) + for (i = 0; i < UNIX_HASH_SIZE; i++) spin_lock_init(&unix_table_locks[i]); rc = proto_register(&unix_dgram_proto, 1); diff --git a/net/unix/diag.c b/net/unix/diag.c index 4e3dc8179fa4..c5d1cca72aa5 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -204,9 +204,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) s_slot = cb->args[0]; num = s_num = cb->args[1]; - for (slot = s_slot; - slot < ARRAY_SIZE(unix_socket_table); - s_num = 0, slot++) { + for (slot = s_slot; slot < UNIX_HASH_SIZE; s_num = 0, slot++) { struct sock *sk; num = 0; @@ -242,7 +240,7 @@ static struct sock *unix_lookup_by_ino(unsigned int ino) struct sock *sk; int i; - for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { + for (i = 0; i < UNIX_HASH_SIZE; i++) { spin_lock(&unix_table_locks[i]); sk_for_each(sk, &unix_socket_table[i]) if (ino == sock_i_ino(sk)) { From patchwork Thu Jun 16 23:47:11 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 12884994 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 40C47C43334 for ; Thu, 16 Jun 2022 23:48:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1378825AbiFPXs5 (ORCPT ); Thu, 16 Jun 2022 19:48:57 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42908 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S236151AbiFPXs4 (ORCPT ); Thu, 16 Jun 2022 19:48:56 -0400 Received: from smtp-fw-33001.amazon.com (smtp-fw-33001.amazon.com [207.171.190.10]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2665B62A34 for ; Thu, 16 Jun 2022 16:48:55 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1655423335; x=1686959335; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=49b8UVFVbTWQk/bn+BX/q7YE4LBhVh1yKHkX5mXzre8=; b=hiLZ0NkIyKADv0DY5D+SfmpPGH7FJA54NwfBizussoVL9hgGYPjETePl 6vJUtNqUdSZUyuU8LOxAt5xj2Z2A4xsDyuHSnG99gwWjvhmzIyo9CDA52 p8UMhka5C94f4IEJDu9pQmlozwipLKnD57ubS8hCE0bhIO22hcWVZge+r I=; X-IronPort-AV: E=Sophos;i="5.92,306,1650931200"; d="scan'208";a="202787023" Received: from iad12-co-svc-p1-lb1-vlan2.amazon.com (HELO email-inbound-relay-iad-1d-10222bbc.us-east-1.amazon.com) ([10.43.8.2]) by smtp-border-fw-33001.sea14.amazon.com with ESMTP; 16 Jun 2022 23:48:40 +0000 Received: from EX13MTAUWB001.ant.amazon.com (iad12-ws-svc-p26-lb9-vlan3.iad.amazon.com [10.40.163.38]) by email-inbound-relay-iad-1d-10222bbc.us-east-1.amazon.com (Postfix) with ESMTPS id BF8061A0CEA; Thu, 16 Jun 2022 23:48:38 +0000 (UTC) Received: from EX13D04ANC001.ant.amazon.com (10.43.157.89) by EX13MTAUWB001.ant.amazon.com (10.43.161.207) with Microsoft SMTP Server (TLS) id 15.0.1497.36; Thu, 16 Jun 2022 23:48:37 +0000 Received: from 88665a182662.ant.amazon.com (10.43.160.26) by EX13D04ANC001.ant.amazon.com (10.43.157.89) with Microsoft SMTP Server (TLS) id 15.0.1497.36; Thu, 16 Jun 2022 23:48:35 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni CC: Amit Shah , Kuniyuki Iwashima , Kuniyuki Iwashima , Subject: [PATCH v1 net-next 3/6] af_unix: Define a per-netns hash table. Date: Thu, 16 Jun 2022 16:47:11 -0700 Message-ID: <20220616234714.4291-4-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220616234714.4291-1-kuniyu@amazon.com> References: <20220616234714.4291-1-kuniyu@amazon.com> MIME-Version: 1.0 X-Originating-IP: [10.43.160.26] X-ClientProxiedBy: EX13d09UWC004.ant.amazon.com (10.43.162.114) To EX13D04ANC001.ant.amazon.com (10.43.157.89) Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org This commit adds a per netns hash table for AF_UNIX. Note that its size is fixed as UNIX_HASH_SIZE for now. Signed-off-by: Kuniyuki Iwashima Reported-by: kernel test robot --- include/net/af_unix.h | 5 +++++ include/net/netns/unix.h | 2 ++ net/unix/af_unix.c | 40 ++++++++++++++++++++++++++++++++++------ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index acb56e463db1..0a17e49af0c9 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight; extern spinlock_t unix_table_locks[UNIX_HASH_SIZE]; extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE]; +struct unix_hashbucket { + spinlock_t lock; + struct hlist_head head; +}; + struct unix_address { refcount_t refcnt; int len; diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h index 91a3d7e39198..975c4e3f8a5b 100644 --- a/include/net/netns/unix.h +++ b/include/net/netns/unix.h @@ -5,8 +5,10 @@ #ifndef __NETNS_UNIX_H__ #define __NETNS_UNIX_H__ +struct unix_hashbucket; struct ctl_table_header; struct netns_unix { + struct unix_hashbucket *hash; int sysctl_max_dgram_qlen; struct ctl_table_header *ctl; }; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c0804ae9c96a..3c07702e2349 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = { static int __net_init unix_net_init(struct net *net) { - int error = -ENOMEM; + int i; net->unx.sysctl_max_dgram_qlen = 10; if (unix_sysctl_register(net)) @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net) #ifdef CONFIG_PROC_FS if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, - sizeof(struct seq_net_private))) { - unix_sysctl_unregister(net); - goto out; + sizeof(struct seq_net_private))) + goto err_sysctl; +#endif + + net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, + GFP_KERNEL); + if (!net->unx.hash) + goto err_proc; + + for (i = 0; i < UNIX_HASH_SIZE; i++) { + INIT_HLIST_HEAD(&net->unx.hash[i].head); + spin_lock_init(&net->unx.hash[i].lock); } + + return 0; + +err_proc: +#ifdef CONFIG_PROC_FS + remove_proc_entry("unix", net->proc_net); #endif - error = 0; +err_sysctl: + unix_sysctl_unregister(net); out: - return error; + return -ENOMEM; } static void __net_exit unix_net_exit(struct net *net) { + kfree(net->unx.hash); unix_sysctl_unregister(net); remove_proc_entry("unix", net->proc_net); } @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void) BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); + init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, + GFP_KERNEL); + if (!init_net.unx.hash) + goto out; + + for (i = 0; i < UNIX_HASH_SIZE; i++) { + INIT_HLIST_HEAD(&init_net.unx.hash[i].head); + spin_lock_init(&init_net.unx.hash[i].lock); + } + for (i = 0; i < UNIX_HASH_SIZE; i++) spin_lock_init(&unix_table_locks[i]); @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void) proto_unregister(&unix_dgram_proto); proto_unregister(&unix_stream_proto); unregister_pernet_subsys(&unix_net_ops); + kfree(init_net.unx.hash); } /* Earlier than device_initcall() so that other drivers invoking From patchwork Thu Jun 16 23:47:12 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 12884995 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 099F1C433EF for ; Thu, 16 Jun 2022 23:49:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1379153AbiFPXtI (ORCPT ); Thu, 16 Jun 2022 19:49:08 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43002 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1378998AbiFPXtH (ORCPT ); Thu, 16 Jun 2022 19:49:07 -0400 Received: from smtp-fw-2101.amazon.com (smtp-fw-2101.amazon.com [72.21.196.25]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E423662BE5 for ; Thu, 16 Jun 2022 16:49:05 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1655423346; x=1686959346; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=yqVrDXMV2EiSGzq40U7sIGdG5vNgxwONxa05pSAApCQ=; b=uX171zBIEFCJwpsYliVSSHhoV//Z1w4G3Q6ZQdeFpWMSiLyuVwr+/FRA vIa/l3OntiHOLWaeU2M26ButAWkHqpmkkJdcqu2X5MTOJcoPLCKUCG4Bq 7djschMGN4OURf9zTdi1BGrM0iWZAR0SbBn/Ttce8cZPaJKypjMtCMG1A 0=; X-IronPort-AV: E=Sophos;i="5.92,306,1650931200"; d="scan'208";a="208621984" Received: from iad12-co-svc-p1-lb1-vlan2.amazon.com (HELO email-inbound-relay-iad-1d-54a073b7.us-east-1.amazon.com) ([10.43.8.2]) by smtp-border-fw-2101.iad2.amazon.com with ESMTP; 16 Jun 2022 23:48:55 +0000 Received: from EX13MTAUWB001.ant.amazon.com (iad12-ws-svc-p26-lb9-vlan3.iad.amazon.com [10.40.163.38]) by email-inbound-relay-iad-1d-54a073b7.us-east-1.amazon.com (Postfix) with ESMTPS id 6870893C1F; Thu, 16 Jun 2022 23:48:53 +0000 (UTC) Received: from EX13D04ANC001.ant.amazon.com (10.43.157.89) by EX13MTAUWB001.ant.amazon.com (10.43.161.207) with Microsoft SMTP Server (TLS) id 15.0.1497.36; Thu, 16 Jun 2022 23:48:52 +0000 Received: from 88665a182662.ant.amazon.com (10.43.160.26) by EX13D04ANC001.ant.amazon.com (10.43.157.89) with Microsoft SMTP Server (TLS) id 15.0.1497.36; Thu, 16 Jun 2022 23:48:50 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni CC: Amit Shah , Kuniyuki Iwashima , Kuniyuki Iwashima , Subject: [PATCH v1 net-next 4/6] af_unix: Acquire/Release per-netns hash table's locks. Date: Thu, 16 Jun 2022 16:47:12 -0700 Message-ID: <20220616234714.4291-5-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220616234714.4291-1-kuniyu@amazon.com> References: <20220616234714.4291-1-kuniyu@amazon.com> MIME-Version: 1.0 X-Originating-IP: [10.43.160.26] X-ClientProxiedBy: EX13d09UWC004.ant.amazon.com (10.43.162.114) To EX13D04ANC001.ant.amazon.com (10.43.157.89) Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org This commit adds extra spin_lock/spin_unlock() for a per-netns hash table inside the existing ones for unix_table_locks. As of this commit, sockets are still linked in the global hash table. After putting sockets in a per-netns hash table in the next patch, we remove the global hash table in the last patch of this series. Signed-off-by: Kuniyuki Iwashima Reported-by: kernel test robot --- net/unix/af_unix.c | 75 +++++++++++++++++++++++++++++++--------------- net/unix/diag.c | 23 +++++++++----- 2 files changed, 66 insertions(+), 32 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3c07702e2349..ae21e3fb86da 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -158,7 +158,8 @@ static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, return hash & UNIX_HASH_MOD; } -static void unix_table_double_lock(unsigned int hash1, unsigned int hash2) +static void unix_table_double_lock(struct net *net, + unsigned int hash1, unsigned int hash2) { /* hash1 and hash2 is never the same because * one is between 0 and UNIX_HASH_MOD, and @@ -169,10 +170,17 @@ static void unix_table_double_lock(unsigned int hash1, unsigned int hash2) spin_lock(&unix_table_locks[hash1]); spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING); + + spin_lock(&net->unx.hash[hash1].lock); + spin_lock(&net->unx.hash[hash2].lock); } -static void unix_table_double_unlock(unsigned int hash1, unsigned int hash2) +static void unix_table_double_unlock(struct net *net, + unsigned int hash1, unsigned int hash2) { + spin_unlock(&net->unx.hash[hash1].lock); + spin_unlock(&net->unx.hash[hash2].lock); + spin_unlock(&unix_table_locks[hash1]); spin_unlock(&unix_table_locks[hash2]); } @@ -316,17 +324,21 @@ static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr, __unix_insert_socket(sk); } -static void unix_remove_socket(struct sock *sk) +static void unix_remove_socket(struct net *net, struct sock *sk) { spin_lock(&unix_table_locks[sk->sk_hash]); + spin_lock(&net->unx.hash[sk->sk_hash].lock); __unix_remove_socket(sk); + spin_unlock(&net->unx.hash[sk->sk_hash].lock); spin_unlock(&unix_table_locks[sk->sk_hash]); } -static void unix_insert_unbound_socket(struct sock *sk) +static void unix_insert_unbound_socket(struct net *net, struct sock *sk) { spin_lock(&unix_table_locks[sk->sk_hash]); + spin_lock(&net->unx.hash[sk->sk_hash].lock); __unix_insert_socket(sk); + spin_unlock(&net->unx.hash[sk->sk_hash].lock); spin_unlock(&unix_table_locks[sk->sk_hash]); } @@ -356,28 +368,33 @@ static inline struct sock *unix_find_socket_byname(struct net *net, struct sock *s; spin_lock(&unix_table_locks[hash]); + spin_lock(&net->unx.hash[hash].lock); s = __unix_find_socket_byname(net, sunname, len, hash); if (s) sock_hold(s); + spin_unlock(&net->unx.hash[hash].lock); spin_unlock(&unix_table_locks[hash]); return s; } -static struct sock *unix_find_socket_byinode(struct inode *i) +static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) { unsigned int hash = unix_bsd_hash(i); struct sock *s; spin_lock(&unix_table_locks[hash]); + spin_lock(&net->unx.hash[hash].lock); sk_for_each(s, &unix_socket_table[hash]) { struct dentry *dentry = unix_sk(s)->path.dentry; if (dentry && d_backing_inode(dentry) == i) { sock_hold(s); + spin_unlock(&net->unx.hash[hash].lock); spin_unlock(&unix_table_locks[hash]); return s; } } + spin_unlock(&net->unx.hash[hash].lock); spin_unlock(&unix_table_locks[hash]); return NULL; } @@ -576,12 +593,12 @@ static void unix_sock_destructor(struct sock *sk) static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); - struct path path; struct sock *skpair; struct sk_buff *skb; + struct path path; int state; - unix_remove_socket(sk); + unix_remove_socket(sock_net(sk), sk); /* Clear state */ unix_state_lock(sk); @@ -930,7 +947,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, init_waitqueue_head(&u->peer_wait); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); memset(&u->scm_stat, 0, sizeof(struct scm_stat)); - unix_insert_unbound_socket(sk); + unix_insert_unbound_socket(net, sk); sock_prot_inuse_add(net, sk->sk_prot, 1); @@ -1015,7 +1032,7 @@ static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, if (!S_ISSOCK(inode->i_mode)) goto path_put; - sk = unix_find_socket_byinode(inode); + sk = unix_find_socket_byinode(net, inode); if (!sk) goto path_put; @@ -1074,6 +1091,7 @@ static int unix_autobind(struct sock *sk) { unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); + struct net *net = sock_net(sk); struct unix_address *addr; u32 lastnum, ordernum; int err; @@ -1102,11 +1120,10 @@ static int unix_autobind(struct sock *sk) sprintf(addr->name->sun_path + 1, "%05x", ordernum); new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); - unix_table_double_lock(old_hash, new_hash); + unix_table_double_lock(net, old_hash, new_hash); - if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, - new_hash)) { - unix_table_double_unlock(old_hash, new_hash); + if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { + unix_table_double_unlock(net, old_hash, new_hash); /* __unix_find_socket_byname() may take long time if many names * are already in use. @@ -1124,7 +1141,7 @@ static int unix_autobind(struct sock *sk) } __unix_set_addr_hash(sk, addr, new_hash); - unix_table_double_unlock(old_hash, new_hash); + unix_table_double_unlock(net, old_hash, new_hash); err = 0; out: mutex_unlock(&u->bindlock); @@ -1138,6 +1155,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); + struct net *net = sock_net(sk); struct user_namespace *ns; // barf... struct unix_address *addr; struct dentry *dentry; @@ -1178,11 +1196,11 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, goto out_unlock; new_hash = unix_bsd_hash(d_backing_inode(dentry)); - unix_table_double_lock(old_hash, new_hash); + unix_table_double_lock(net, old_hash, new_hash); u->path.mnt = mntget(parent.mnt); u->path.dentry = dget(dentry); __unix_set_addr_hash(sk, addr, new_hash); - unix_table_double_unlock(old_hash, new_hash); + unix_table_double_unlock(net, old_hash, new_hash); mutex_unlock(&u->bindlock); done_path_create(&parent, dentry); return 0; @@ -1205,6 +1223,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, { unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); + struct net *net = sock_net(sk); struct unix_address *addr; int err; @@ -1222,19 +1241,18 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, } new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); - unix_table_double_lock(old_hash, new_hash); + unix_table_double_lock(net, old_hash, new_hash); - if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, - new_hash)) + if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) goto out_spin; __unix_set_addr_hash(sk, addr, new_hash); - unix_table_double_unlock(old_hash, new_hash); + unix_table_double_unlock(net, old_hash, new_hash); mutex_unlock(&u->bindlock); return 0; out_spin: - unix_table_double_unlock(old_hash, new_hash); + unix_table_double_unlock(net, old_hash, new_hash); err = -EADDRINUSE; out_mutex: mutex_unlock(&u->bindlock); @@ -3237,15 +3255,18 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) { unsigned long bucket = get_bucket(*pos); + struct net *net = seq_file_net(seq); struct sock *sk; while (bucket < UNIX_HASH_SIZE) { spin_lock(&unix_table_locks[bucket]); + spin_lock(&net->unx.hash[bucket].lock); sk = unix_from_bucket(seq, pos); if (sk) return sk; + spin_unlock(&net->unx.hash[bucket].lock); spin_unlock(&unix_table_locks[bucket]); *pos = set_bucket_offset(++bucket, 1); @@ -3258,11 +3279,13 @@ static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, loff_t *pos) { unsigned long bucket = get_bucket(*pos); + struct net *net = seq_file_net(seq); for (sk = sk_next(sk); sk; sk = sk_next(sk)) - if (sock_net(sk) == seq_file_net(seq)) + if (sock_net(sk) == net) return sk; + spin_unlock(&net->unx.hash[bucket].lock); spin_unlock(&unix_table_locks[bucket]); *pos = set_bucket_offset(++bucket, 1); @@ -3292,8 +3315,10 @@ static void unix_seq_stop(struct seq_file *seq, void *v) { struct sock *sk = v; - if (sk) + if (sk) { + spin_unlock(&seq_file_net(seq)->unx.hash[sk->sk_hash].lock); spin_unlock(&unix_table_locks[sk->sk_hash]); + } } static int unix_seq_show(struct seq_file *seq, void *v) @@ -3381,6 +3406,7 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) { struct bpf_unix_iter_state *iter = seq->private; + struct net *net = seq_file_net(seq); unsigned int expected = 1; struct sock *sk; @@ -3388,7 +3414,7 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) iter->batch[iter->end_sk++] = start_sk; for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { - if (sock_net(sk) != seq_file_net(seq)) + if (sock_net(sk) != net) continue; if (iter->end_sk < iter->max_sk) { @@ -3399,6 +3425,7 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) expected++; } + spin_unlock(&net->unx.hash[start_sk->sk_hash].lock); spin_unlock(&unix_table_locks[start_sk->sk_hash]); return expected; diff --git a/net/unix/diag.c b/net/unix/diag.c index c5d1cca72aa5..41b67b82f51f 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -195,9 +195,9 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct unix_diag_req *req; - int num, s_num, slot, s_slot; struct net *net = sock_net(skb->sk); + int num, s_num, slot, s_slot; + struct unix_diag_req *req; req = nlmsg_data(cb->nlh); @@ -209,6 +209,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = 0; spin_lock(&unix_table_locks[slot]); + spin_lock(&net->unx.hash[slot].lock); sk_for_each(sk, &unix_socket_table[slot]) { if (!net_eq(sock_net(sk), net)) continue; @@ -220,12 +221,14 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) { + spin_unlock(&net->unx.hash[slot].lock); spin_unlock(&unix_table_locks[slot]); goto done; } next: num++; } + spin_unlock(&net->unx.hash[slot].lock); spin_unlock(&unix_table_locks[slot]); } done: @@ -235,19 +238,22 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static struct sock *unix_lookup_by_ino(unsigned int ino) +static struct sock *unix_lookup_by_ino(struct net *net, unsigned int ino) { struct sock *sk; int i; for (i = 0; i < UNIX_HASH_SIZE; i++) { spin_lock(&unix_table_locks[i]); + spin_lock(&net->unx.hash[i].lock); sk_for_each(sk, &unix_socket_table[i]) if (ino == sock_i_ino(sk)) { sock_hold(sk); + spin_unlock(&net->unx.hash[i].lock); spin_unlock(&unix_table_locks[i]); return sk; } + spin_unlock(&net->unx.hash[i].lock); spin_unlock(&unix_table_locks[i]); } return NULL; @@ -257,16 +263,17 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh, struct unix_diag_req *req) { - int err = -EINVAL; - struct sock *sk; - struct sk_buff *rep; - unsigned int extra_len; struct net *net = sock_net(in_skb->sk); + unsigned int extra_len; + struct sk_buff *rep; + struct sock *sk; + int err; + err = -EINVAL; if (req->udiag_ino == 0) goto out_nosk; - sk = unix_lookup_by_ino(req->udiag_ino); + sk = unix_lookup_by_ino(net, req->udiag_ino); err = -ENOENT; if (sk == NULL) goto out_nosk; From patchwork Thu Jun 16 23:47:13 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 12884996 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E46A6C43334 for ; Thu, 16 Jun 2022 23:49:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1379205AbiFPXt3 (ORCPT ); Thu, 16 Jun 2022 19:49:29 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43244 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1379449AbiFPXt1 (ORCPT ); Thu, 16 Jun 2022 19:49:27 -0400 Received: from smtp-fw-9102.amazon.com (smtp-fw-9102.amazon.com [207.171.184.29]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1DD68606E1 for ; Thu, 16 Jun 2022 16:49:26 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1655423367; x=1686959367; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=4FQ2xWDdjpZBZPEnXWZArpa3nA0zh9PWqmSOCy+BPK0=; b=KJQNc2lfKiCNUNqaRvWvVe1OZ2H8SeX/CGxoSD7J+T/e5XuhFxyBJ2Pk I/6veUTRcJNhTS3jp8rfl5eZxiVJHAZQdxQI/REVWKSMMNfXVDYW9PhHp n09aPWzG9CIoZNjzbZYaryz075DM2yblLQdZdasTZo6BtdX1Z5ZydcxL1 Q=; X-IronPort-AV: E=Sophos;i="5.92,306,1650931200"; d="scan'208";a="229031435" Received: from pdx4-co-svc-p1-lb2-vlan3.amazon.com (HELO email-inbound-relay-iad-1a-87b71607.us-east-1.amazon.com) ([10.25.36.214]) by smtp-border-fw-9102.sea19.amazon.com with ESMTP; 16 Jun 2022 23:49:10 +0000 Received: from EX13MTAUWB001.ant.amazon.com (iad12-ws-svc-p26-lb9-vlan3.iad.amazon.com [10.40.163.38]) by email-inbound-relay-iad-1a-87b71607.us-east-1.amazon.com (Postfix) with ESMTPS id 6647B147B31; Thu, 16 Jun 2022 23:49:08 +0000 (UTC) Received: from EX13D04ANC001.ant.amazon.com (10.43.157.89) by EX13MTAUWB001.ant.amazon.com (10.43.161.207) with Microsoft SMTP Server (TLS) id 15.0.1497.36; Thu, 16 Jun 2022 23:49:07 +0000 Received: from 88665a182662.ant.amazon.com (10.43.160.26) by EX13D04ANC001.ant.amazon.com (10.43.157.89) with Microsoft SMTP Server (TLS) id 15.0.1497.36; Thu, 16 Jun 2022 23:49:05 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni CC: Amit Shah , Kuniyuki Iwashima , Kuniyuki Iwashima , Subject: [PATCH v1 net-next 5/6] af_unix: Put a socket into a per-netns hash table. Date: Thu, 16 Jun 2022 16:47:13 -0700 Message-ID: <20220616234714.4291-6-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220616234714.4291-1-kuniyu@amazon.com> References: <20220616234714.4291-1-kuniyu@amazon.com> MIME-Version: 1.0 X-Originating-IP: [10.43.160.26] X-ClientProxiedBy: EX13d09UWC004.ant.amazon.com (10.43.162.114) To EX13D04ANC001.ant.amazon.com (10.43.157.89) Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org This commit replaces the global hash table with a per-netns one and removes the global one. We now link a socket in each netns's hash table so we can save some netns comparisons when iterating through a hash bucket. Signed-off-by: Kuniyuki Iwashima --- include/net/af_unix.h | 1 - net/unix/af_unix.c | 49 +++++++++++++++++-------------------------- net/unix/diag.c | 9 +++----- 3 files changed, 22 insertions(+), 37 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 0a17e49af0c9..cee4f2fca444 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -22,7 +22,6 @@ struct sock *unix_peer_get(struct sock *sk); extern unsigned int unix_tot_inflight; extern spinlock_t unix_table_locks[UNIX_HASH_SIZE]; -extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE]; struct unix_hashbucket { spinlock_t lock; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ae21e3fb86da..a93915066cb6 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -120,8 +120,6 @@ spinlock_t unix_table_locks[UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_table_locks); -struct hlist_head unix_socket_table[UNIX_HASH_SIZE]; -EXPORT_SYMBOL_GPL(unix_socket_table); static atomic_long_t unix_nr_socks; /* SMP locking strategy: @@ -308,20 +306,20 @@ static void __unix_remove_socket(struct sock *sk) sk_del_node_init(sk); } -static void __unix_insert_socket(struct sock *sk) +static void __unix_insert_socket(struct net *net, struct sock *sk) { DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); - sk_add_node(sk, &unix_socket_table[sk->sk_hash]); + sk_add_node(sk, &net->unx.hash[sk->sk_hash].head); } -static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr, - unsigned int hash) +static void __unix_set_addr_hash(struct net *net, struct sock *sk, + struct unix_address *addr, unsigned int hash) { __unix_remove_socket(sk); smp_store_release(&unix_sk(sk)->addr, addr); sk->sk_hash = hash; - __unix_insert_socket(sk); + __unix_insert_socket(net, sk); } static void unix_remove_socket(struct net *net, struct sock *sk) @@ -337,7 +335,7 @@ static void unix_insert_unbound_socket(struct net *net, struct sock *sk) { spin_lock(&unix_table_locks[sk->sk_hash]); spin_lock(&net->unx.hash[sk->sk_hash].lock); - __unix_insert_socket(sk); + __unix_insert_socket(net, sk); spin_unlock(&net->unx.hash[sk->sk_hash].lock); spin_unlock(&unix_table_locks[sk->sk_hash]); } @@ -348,12 +346,9 @@ static struct sock *__unix_find_socket_byname(struct net *net, { struct sock *s; - sk_for_each(s, &unix_socket_table[hash]) { + sk_for_each(s, &net->unx.hash[hash].head) { struct unix_sock *u = unix_sk(s); - if (!net_eq(sock_net(s), net)) - continue; - if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) return s; @@ -384,7 +379,7 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) spin_lock(&unix_table_locks[hash]); spin_lock(&net->unx.hash[hash].lock); - sk_for_each(s, &unix_socket_table[hash]) { + sk_for_each(s, &net->unx.hash[hash].head) { struct dentry *dentry = unix_sk(s)->path.dentry; if (dentry && d_backing_inode(dentry) == i) { @@ -1140,7 +1135,7 @@ static int unix_autobind(struct sock *sk) goto retry; } - __unix_set_addr_hash(sk, addr, new_hash); + __unix_set_addr_hash(net, sk, addr, new_hash); unix_table_double_unlock(net, old_hash, new_hash); err = 0; @@ -1199,7 +1194,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, unix_table_double_lock(net, old_hash, new_hash); u->path.mnt = mntget(parent.mnt); u->path.dentry = dget(dentry); - __unix_set_addr_hash(sk, addr, new_hash); + __unix_set_addr_hash(net, sk, addr, new_hash); unix_table_double_unlock(net, old_hash, new_hash); mutex_unlock(&u->bindlock); done_path_create(&parent, dentry); @@ -1246,7 +1241,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) goto out_spin; - __unix_set_addr_hash(sk, addr, new_hash); + __unix_set_addr_hash(net, sk, addr, new_hash); unix_table_double_unlock(net, old_hash, new_hash); mutex_unlock(&u->bindlock); return 0; @@ -3239,12 +3234,11 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) { unsigned long offset = get_offset(*pos); unsigned long bucket = get_bucket(*pos); - struct sock *sk; unsigned long count = 0; + struct sock *sk; - for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { - if (sock_net(sk) != seq_file_net(seq)) - continue; + for (sk = sk_head(&seq_file_net(seq)->unx.hash[bucket].head); + sk; sk = sk_next(sk)) { if (++count == offset) break; } @@ -3279,13 +3273,12 @@ static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, loff_t *pos) { unsigned long bucket = get_bucket(*pos); - struct net *net = seq_file_net(seq); - for (sk = sk_next(sk); sk; sk = sk_next(sk)) - if (sock_net(sk) == net) - return sk; + sk = sk_next(sk); + if (sk) + return sk; - spin_unlock(&net->unx.hash[bucket].lock); + spin_unlock(&seq_file_net(seq)->unx.hash[bucket].lock); spin_unlock(&unix_table_locks[bucket]); *pos = set_bucket_offset(++bucket, 1); @@ -3406,7 +3399,6 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) { struct bpf_unix_iter_state *iter = seq->private; - struct net *net = seq_file_net(seq); unsigned int expected = 1; struct sock *sk; @@ -3414,9 +3406,6 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) iter->batch[iter->end_sk++] = start_sk; for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { - if (sock_net(sk) != net) - continue; - if (iter->end_sk < iter->max_sk) { sock_hold(sk); iter->batch[iter->end_sk++] = sk; @@ -3425,7 +3414,7 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) expected++; } - spin_unlock(&net->unx.hash[start_sk->sk_hash].lock); + spin_unlock(&seq_file_net(seq)->unx.hash[start_sk->sk_hash].lock); spin_unlock(&unix_table_locks[start_sk->sk_hash]); return expected; diff --git a/net/unix/diag.c b/net/unix/diag.c index 41b67b82f51f..5774172a3ea5 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -210,9 +210,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = 0; spin_lock(&unix_table_locks[slot]); spin_lock(&net->unx.hash[slot].lock); - sk_for_each(sk, &unix_socket_table[slot]) { - if (!net_eq(sock_net(sk), net)) - continue; + sk_for_each(sk, &net->unx.hash[slot].head) { if (num < s_num) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) @@ -246,13 +244,14 @@ static struct sock *unix_lookup_by_ino(struct net *net, unsigned int ino) for (i = 0; i < UNIX_HASH_SIZE; i++) { spin_lock(&unix_table_locks[i]); spin_lock(&net->unx.hash[i].lock); - sk_for_each(sk, &unix_socket_table[i]) + sk_for_each(sk, &net->unx.hash[i].head) { if (ino == sock_i_ino(sk)) { sock_hold(sk); spin_unlock(&net->unx.hash[i].lock); spin_unlock(&unix_table_locks[i]); return sk; } + } spin_unlock(&net->unx.hash[i].lock); spin_unlock(&unix_table_locks[i]); } @@ -277,8 +276,6 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, err = -ENOENT; if (sk == NULL) goto out_nosk; - if (!net_eq(sock_net(sk), net)) - goto out; err = sock_diag_check_cookie(sk, req->udiag_cookie); if (err) From patchwork Thu Jun 16 23:47:14 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 12884997 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 45CC3C43334 for ; Thu, 16 Jun 2022 23:49:44 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1379454AbiFPXtm (ORCPT ); Thu, 16 Jun 2022 19:49:42 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43516 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1379468AbiFPXti (ORCPT ); Thu, 16 Jun 2022 19:49:38 -0400 Received: from smtp-fw-6002.amazon.com (smtp-fw-6002.amazon.com [52.95.49.90]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0D84762CD0 for ; Thu, 16 Jun 2022 16:49:35 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1655423376; x=1686959376; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=pGYUm8n/Oo201e7QE/TBtSMH3sBIH7QeS8u05Gl0RzQ=; b=XMdE2nCu2Gexw75wFpg+Sh1WDjbTzQ53h5gFhRD4pANaqFjtxF/WFcLF liHULgGn8ZRfr9kxF4XiB3YI8lQIUEOR2HDYCcdx04M3rGMAHAzhk3Qm0 cm3t2Kp6pTwQXHEJG8WUccdoFC9WKuBnCyJfuPiZAWF/LplMuIP7gZ7DI o=; X-IronPort-AV: E=Sophos;i="5.92,306,1650931200"; d="scan'208";a="211925959" Received: from iad12-co-svc-p1-lb1-vlan2.amazon.com (HELO email-inbound-relay-iad-1d-1c3c2014.us-east-1.amazon.com) ([10.43.8.2]) by smtp-border-fw-6002.iad6.amazon.com with ESMTP; 16 Jun 2022 23:49:24 +0000 Received: from EX13MTAUWB001.ant.amazon.com (iad12-ws-svc-p26-lb9-vlan2.iad.amazon.com [10.40.163.34]) by email-inbound-relay-iad-1d-1c3c2014.us-east-1.amazon.com (Postfix) with ESMTPS id 6A547CB8A5; Thu, 16 Jun 2022 23:49:23 +0000 (UTC) Received: from EX13D04ANC001.ant.amazon.com (10.43.157.89) by EX13MTAUWB001.ant.amazon.com (10.43.161.207) with Microsoft SMTP Server (TLS) id 15.0.1497.36; Thu, 16 Jun 2022 23:49:22 +0000 Received: from 88665a182662.ant.amazon.com (10.43.160.26) by EX13D04ANC001.ant.amazon.com (10.43.157.89) with Microsoft SMTP Server (TLS) id 15.0.1497.36; Thu, 16 Jun 2022 23:49:20 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni CC: Amit Shah , Kuniyuki Iwashima , Kuniyuki Iwashima , Subject: [PATCH v1 net-next 6/6] af_unix: Remove unix_table_locks. Date: Thu, 16 Jun 2022 16:47:14 -0700 Message-ID: <20220616234714.4291-7-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220616234714.4291-1-kuniyu@amazon.com> References: <20220616234714.4291-1-kuniyu@amazon.com> MIME-Version: 1.0 X-Originating-IP: [10.43.160.26] X-ClientProxiedBy: EX13d09UWC004.ant.amazon.com (10.43.162.114) To EX13D04ANC001.ant.amazon.com (10.43.157.89) Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org unix_table_locks are to protect the global hash table, unix_socket_table. The previous commit removed it, so let's clean up the unnecessary locks. Here is a test result on EC2 c5.9xlarge where 10 processes run concurrently in different netns and bind 100,000 sockets for each. without this series : 1m 38s with this series : 11s It is ~10x faster because the global hash table is split into 10 netns in this case. Signed-off-by: Kuniyuki Iwashima --- include/net/af_unix.h | 1 - net/unix/af_unix.c | 28 +--------------------------- net/unix/diag.c | 6 ------ 3 files changed, 1 insertion(+), 34 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index cee4f2fca444..9cb84d9bde4c 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -21,7 +21,6 @@ struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_BITS 8 extern unsigned int unix_tot_inflight; -extern spinlock_t unix_table_locks[UNIX_HASH_SIZE]; struct unix_hashbucket { spinlock_t lock; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a93915066cb6..469ff9c947a9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -118,8 +118,6 @@ #include "scm.h" -spinlock_t unix_table_locks[UNIX_HASH_SIZE]; -EXPORT_SYMBOL_GPL(unix_table_locks); static atomic_long_t unix_nr_socks; /* SMP locking strategy: @@ -166,9 +164,6 @@ static void unix_table_double_lock(struct net *net, if (hash1 > hash2) swap(hash1, hash2); - spin_lock(&unix_table_locks[hash1]); - spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING); - spin_lock(&net->unx.hash[hash1].lock); spin_lock(&net->unx.hash[hash2].lock); } @@ -178,9 +173,6 @@ static void unix_table_double_unlock(struct net *net, { spin_unlock(&net->unx.hash[hash1].lock); spin_unlock(&net->unx.hash[hash2].lock); - - spin_unlock(&unix_table_locks[hash1]); - spin_unlock(&unix_table_locks[hash2]); } #ifdef CONFIG_SECURITY_NETWORK @@ -324,20 +316,16 @@ static void __unix_set_addr_hash(struct net *net, struct sock *sk, static void unix_remove_socket(struct net *net, struct sock *sk) { - spin_lock(&unix_table_locks[sk->sk_hash]); spin_lock(&net->unx.hash[sk->sk_hash].lock); __unix_remove_socket(sk); spin_unlock(&net->unx.hash[sk->sk_hash].lock); - spin_unlock(&unix_table_locks[sk->sk_hash]); } static void unix_insert_unbound_socket(struct net *net, struct sock *sk) { - spin_lock(&unix_table_locks[sk->sk_hash]); spin_lock(&net->unx.hash[sk->sk_hash].lock); __unix_insert_socket(net, sk); spin_unlock(&net->unx.hash[sk->sk_hash].lock); - spin_unlock(&unix_table_locks[sk->sk_hash]); } static struct sock *__unix_find_socket_byname(struct net *net, @@ -362,13 +350,11 @@ static inline struct sock *unix_find_socket_byname(struct net *net, { struct sock *s; - spin_lock(&unix_table_locks[hash]); spin_lock(&net->unx.hash[hash].lock); s = __unix_find_socket_byname(net, sunname, len, hash); if (s) sock_hold(s); spin_unlock(&net->unx.hash[hash].lock); - spin_unlock(&unix_table_locks[hash]); return s; } @@ -377,7 +363,6 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) unsigned int hash = unix_bsd_hash(i); struct sock *s; - spin_lock(&unix_table_locks[hash]); spin_lock(&net->unx.hash[hash].lock); sk_for_each(s, &net->unx.hash[hash].head) { struct dentry *dentry = unix_sk(s)->path.dentry; @@ -385,12 +370,10 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) if (dentry && d_backing_inode(dentry) == i) { sock_hold(s); spin_unlock(&net->unx.hash[hash].lock); - spin_unlock(&unix_table_locks[hash]); return s; } } spin_unlock(&net->unx.hash[hash].lock); - spin_unlock(&unix_table_locks[hash]); return NULL; } @@ -3253,7 +3236,6 @@ static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) struct sock *sk; while (bucket < UNIX_HASH_SIZE) { - spin_lock(&unix_table_locks[bucket]); spin_lock(&net->unx.hash[bucket].lock); sk = unix_from_bucket(seq, pos); @@ -3261,7 +3243,6 @@ static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) return sk; spin_unlock(&net->unx.hash[bucket].lock); - spin_unlock(&unix_table_locks[bucket]); *pos = set_bucket_offset(++bucket, 1); } @@ -3279,7 +3260,6 @@ static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, return sk; spin_unlock(&seq_file_net(seq)->unx.hash[bucket].lock); - spin_unlock(&unix_table_locks[bucket]); *pos = set_bucket_offset(++bucket, 1); @@ -3308,10 +3288,8 @@ static void unix_seq_stop(struct seq_file *seq, void *v) { struct sock *sk = v; - if (sk) { + if (sk) spin_unlock(&seq_file_net(seq)->unx.hash[sk->sk_hash].lock); - spin_unlock(&unix_table_locks[sk->sk_hash]); - } } static int unix_seq_show(struct seq_file *seq, void *v) @@ -3415,7 +3393,6 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) } spin_unlock(&seq_file_net(seq)->unx.hash[start_sk->sk_hash].lock); - spin_unlock(&unix_table_locks[start_sk->sk_hash]); return expected; } @@ -3709,9 +3686,6 @@ static int __init af_unix_init(void) spin_lock_init(&init_net.unx.hash[i].lock); } - for (i = 0; i < UNIX_HASH_SIZE; i++) - spin_lock_init(&unix_table_locks[i]); - rc = proto_register(&unix_dgram_proto, 1); if (rc != 0) { pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); diff --git a/net/unix/diag.c b/net/unix/diag.c index 5774172a3ea5..370eb268ec63 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -208,7 +208,6 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct sock *sk; num = 0; - spin_lock(&unix_table_locks[slot]); spin_lock(&net->unx.hash[slot].lock); sk_for_each(sk, &net->unx.hash[slot].head) { if (num < s_num) @@ -220,14 +219,12 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) { spin_unlock(&net->unx.hash[slot].lock); - spin_unlock(&unix_table_locks[slot]); goto done; } next: num++; } spin_unlock(&net->unx.hash[slot].lock); - spin_unlock(&unix_table_locks[slot]); } done: cb->args[0] = slot; @@ -242,18 +239,15 @@ static struct sock *unix_lookup_by_ino(struct net *net, unsigned int ino) int i; for (i = 0; i < UNIX_HASH_SIZE; i++) { - spin_lock(&unix_table_locks[i]); spin_lock(&net->unx.hash[i].lock); sk_for_each(sk, &net->unx.hash[i].head) { if (ino == sock_i_ino(sk)) { sock_hold(sk); spin_unlock(&net->unx.hash[i].lock); - spin_unlock(&unix_table_locks[i]); return sk; } } spin_unlock(&net->unx.hash[i].lock); - spin_unlock(&unix_table_locks[i]); } return NULL; }