From patchwork Thu Jul 30 11:59:25 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692833 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D9910913 for ; Thu, 30 Jul 2020 11:59:38 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id C5D6720829 for ; Thu, 30 Jul 2020 11:59:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727113AbgG3L7c (ORCPT ); Thu, 30 Jul 2020 07:59:32 -0400 Received: from relay.sw.ru ([185.231.240.75]:56344 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726774AbgG3L7a (ORCPT ); Thu, 30 Jul 2020 07:59:30 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17DM-0002ux-03; Thu, 30 Jul 2020 14:59:12 +0300 Subject: [PATCH 01/23] ns: Add common refcount into ns_common add use it as counter for net_ns From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 14:59:25 +0300 Message-ID: <159611036589.535980.1765795847221907147.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Currently, every type of namespaces has its own counter, which is stored in ns-specific part. Say, @net has struct net::count, @pid has struct pid_namespace::kref, etc. This patchset introduces unified counter for all types of namespaces, and converts net namespace to use it first. Signed-off-by: Kirill Tkhai Acked-by: Christian Brauner --- include/linux/ns_common.h | 1 + include/net/net_namespace.h | 11 ++++------- net/core/net-sysfs.c | 6 +++--- net/core/net_namespace.c | 6 +++--- net/ipv4/inet_timewait_sock.c | 4 ++-- net/ipv4/tcp_metrics.c | 2 +- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 5fbc4000358f..27db02ebdf36 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -8,6 +8,7 @@ struct ns_common { atomic_long_t stashed; const struct proc_ns_operations *ops; unsigned int inum; + refcount_t count; }; #endif diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 2ee5901bec7a..cb4b33d7834b 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -60,9 +60,6 @@ struct net { refcount_t passive; /* To decide when the network * namespace should be freed. */ - refcount_t count; /* To decided when the network - * namespace should be shut down. - */ spinlock_t rules_mod_lock; unsigned int dev_unreg_count; @@ -245,7 +242,7 @@ void __put_net(struct net *net); static inline struct net *get_net(struct net *net) { - refcount_inc(&net->count); + refcount_inc(&net->ns.count); return net; } @@ -256,14 +253,14 @@ static inline struct net *maybe_get_net(struct net *net) * exists. If the reference count is zero this * function fails and returns NULL. */ - if (!refcount_inc_not_zero(&net->count)) + if (!refcount_inc_not_zero(&net->ns.count)) net = NULL; return net; } static inline void put_net(struct net *net) { - if (refcount_dec_and_test(&net->count)) + if (refcount_dec_and_test(&net->ns.count)) __put_net(net); } @@ -275,7 +272,7 @@ int net_eq(const struct net *net1, const struct net *net2) static inline int check_net(const struct net *net) { - return refcount_read(&net->count) != 0; + return refcount_read(&net->ns.count) != 0; } void net_drop_ns(void *); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9de33b594ff2..655a88b0071c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1025,7 +1025,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) while (--i >= new_num) { struct kobject *kobj = &dev->_rx[i].kobj; - if (!refcount_read(&dev_net(dev)->count)) + if (!refcount_read(&dev_net(dev)->ns.count)) kobj->uevent_suppress = 1; if (dev->sysfs_rx_queue_group) sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); @@ -1603,7 +1603,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) while (--i >= new_num) { struct netdev_queue *queue = dev->_tx + i; - if (!refcount_read(&dev_net(dev)->count)) + if (!refcount_read(&dev_net(dev)->ns.count)) queue->kobj.uevent_suppress = 1; #ifdef CONFIG_BQL sysfs_remove_group(&queue->kobj, &dql_group); @@ -1850,7 +1850,7 @@ void netdev_unregister_kobject(struct net_device *ndev) { struct device *dev = &ndev->dev; - if (!refcount_read(&dev_net(ndev)->count)) + if (!refcount_read(&dev_net(ndev)->ns.count)) dev_set_uevent_suppress(dev, 1); kobject_get(&dev->kobj); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index dcd61aca343e..5f658cbedd34 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -44,7 +44,7 @@ static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) }; #endif struct net init_net = { - .count = REFCOUNT_INIT(1), + .ns.count = REFCOUNT_INIT(1), .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), #ifdef CONFIG_KEYS .key_domain = &init_net_key_domain, @@ -248,7 +248,7 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) { int id; - if (refcount_read(&net->count) == 0) + if (refcount_read(&net->ns.count) == 0) return NETNSA_NSID_NOT_ASSIGNED; spin_lock(&net->nsid_lock); @@ -328,7 +328,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) int error = 0; LIST_HEAD(net_exit_list); - refcount_set(&net->count, 1); + refcount_set(&net->ns.count, 1); refcount_set(&net->passive, 1); get_random_bytes(&net->hash_mix, sizeof(u32)); net->dev_base_seq = 1; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index c411c87ae865..437afe392e66 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -272,14 +272,14 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) continue; tw = inet_twsk(sk); if ((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->count)) + refcount_read(&twsk_net(tw)->ns.count)) continue; if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) continue; if (unlikely((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->count))) { + refcount_read(&twsk_net(tw)->ns.count))) { inet_twsk_put(tw); goto restart; } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 279db8822439..39710c417565 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -887,7 +887,7 @@ static void tcp_metrics_flush_all(struct net *net) pp = &hb->chain; for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { match = net ? net_eq(tm_net(tm), net) : - !refcount_read(&tm_net(tm)->count); + !refcount_read(&tm_net(tm)->ns.count); if (match) { *pp = tm->tcpm_next; kfree_rcu(tm, rcu_head); From patchwork Thu Jul 30 11:59:31 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692879 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C6B2013B1 for ; Thu, 30 Jul 2020 12:01:42 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id AEE6E20838 for ; Thu, 30 Jul 2020 12:01:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727852AbgG3L7f (ORCPT ); Thu, 30 Jul 2020 07:59:35 -0400 Received: from relay.sw.ru ([185.231.240.75]:56402 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727844AbgG3L7e (ORCPT ); Thu, 30 Jul 2020 07:59:34 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17DR-0002va-AS; Thu, 30 Jul 2020 14:59:17 +0300 Subject: [PATCH 02/23] uts: Use generic ns_common::count From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 14:59:31 +0300 Message-ID: <159611037120.535980.13731766189011538488.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Convert uts namespace to use generic counter instead of kref. Signed-off-by: Kirill Tkhai Acked-by: Christian Brauner --- include/linux/utsname.h | 9 ++++----- init/version.c | 2 +- kernel/utsname.c | 7 ++----- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 44429d9142ca..2b1737c9b244 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -4,7 +4,6 @@ #include -#include #include #include #include @@ -22,7 +21,6 @@ struct user_namespace; extern struct user_namespace init_user_ns; struct uts_namespace { - struct kref kref; struct new_utsname name; struct user_namespace *user_ns; struct ucounts *ucounts; @@ -33,16 +31,17 @@ extern struct uts_namespace init_uts_ns; #ifdef CONFIG_UTS_NS static inline void get_uts_ns(struct uts_namespace *ns) { - kref_get(&ns->kref); + refcount_inc(&ns->ns.count); } extern struct uts_namespace *copy_utsname(unsigned long flags, struct user_namespace *user_ns, struct uts_namespace *old_ns); -extern void free_uts_ns(struct kref *kref); +extern void free_uts_ns(struct uts_namespace *ns); static inline void put_uts_ns(struct uts_namespace *ns) { - kref_put(&ns->kref, free_uts_ns); + if (refcount_dec_and_test(&ns->ns.count)) + free_uts_ns(ns); } void uts_ns_init(void); diff --git a/init/version.c b/init/version.c index cba341161b58..80d2b7566b39 100644 --- a/init/version.c +++ b/init/version.c @@ -25,7 +25,7 @@ int version_string(LINUX_VERSION_CODE); #endif struct uts_namespace init_uts_ns = { - .kref = KREF_INIT(2), + .ns.count = REFCOUNT_INIT(2), .name = { .sysname = UTS_SYSNAME, .nodename = UTS_NODENAME, diff --git a/kernel/utsname.c b/kernel/utsname.c index e488d0e2ab45..b1ac3ca870f2 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -33,7 +33,7 @@ static struct uts_namespace *create_uts_ns(void) uts_ns = kmem_cache_alloc(uts_ns_cache, GFP_KERNEL); if (uts_ns) - kref_init(&uts_ns->kref); + refcount_set(&uts_ns->ns.count, 1); return uts_ns; } @@ -103,11 +103,8 @@ struct uts_namespace *copy_utsname(unsigned long flags, return new_ns; } -void free_uts_ns(struct kref *kref) +void free_uts_ns(struct uts_namespace *ns) { - struct uts_namespace *ns; - - ns = container_of(kref, struct uts_namespace, kref); dec_uts_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); From patchwork Thu Jul 30 11:59:36 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692835 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D9AC2722 for ; Thu, 30 Jul 2020 11:59:41 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id C8AC520838 for ; Thu, 30 Jul 2020 11:59:41 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727962AbgG3L7k (ORCPT ); Thu, 30 Jul 2020 07:59:40 -0400 Received: from relay.sw.ru ([185.231.240.75]:56444 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727922AbgG3L7j (ORCPT ); Thu, 30 Jul 2020 07:59:39 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17DW-0002vq-Ki; Thu, 30 Jul 2020 14:59:22 +0300 Subject: [PATCH 03/23] ipc: Use generic ns_common::count From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 14:59:36 +0300 Message-ID: <159611037654.535980.11569207616830163621.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Convert uts namespace to use generic counter. Signed-off-by: Kirill Tkhai Acked-by: Christian Brauner --- include/linux/ipc_namespace.h | 3 +-- ipc/msgutil.c | 2 +- ipc/namespace.c | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index a06a78c67f19..05e22770af51 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -27,7 +27,6 @@ struct ipc_ids { }; struct ipc_namespace { - refcount_t count; struct ipc_ids ids[3]; int sem_ctls[4]; @@ -128,7 +127,7 @@ extern struct ipc_namespace *copy_ipcs(unsigned long flags, static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { if (ns) - refcount_inc(&ns->count); + refcount_inc(&ns->ns.count); return ns; } diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 3149b4a379de..d0a0e877cadd 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -26,7 +26,7 @@ DEFINE_SPINLOCK(mq_lock); * and not CONFIG_IPC_NS. */ struct ipc_namespace init_ipc_ns = { - .count = REFCOUNT_INIT(1), + .ns.count = REFCOUNT_INIT(1), .user_ns = &init_user_ns, .ns.inum = PROC_IPC_INIT_INO, #ifdef CONFIG_IPC_NS diff --git a/ipc/namespace.c b/ipc/namespace.c index 24e7b45320f7..7bd0766ddc3b 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -51,7 +51,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, goto fail_free; ns->ns.ops = &ipcns_operations; - refcount_set(&ns->count, 1); + refcount_set(&ns->ns.count, 1); ns->user_ns = get_user_ns(user_ns); ns->ucounts = ucounts; @@ -164,7 +164,7 @@ static DECLARE_WORK(free_ipc_work, free_ipc); */ void put_ipc_ns(struct ipc_namespace *ns) { - if (refcount_dec_and_lock(&ns->count, &mq_lock)) { + if (refcount_dec_and_lock(&ns->ns.count, &mq_lock)) { mq_clear_sbinfo(ns); spin_unlock(&mq_lock); From patchwork Thu Jul 30 11:59:41 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692841 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A1B7B913 for ; Thu, 30 Jul 2020 12:00:08 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 9033B20838 for ; Thu, 30 Jul 2020 12:00:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728019AbgG3L7v (ORCPT ); Thu, 30 Jul 2020 07:59:51 -0400 Received: from relay.sw.ru ([185.231.240.75]:56482 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727979AbgG3L7p (ORCPT ); Thu, 30 Jul 2020 07:59:45 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17Db-0002w2-W7; Thu, 30 Jul 2020 14:59:28 +0300 Subject: [PATCH 04/23] pid: Use generic ns_common::count From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 14:59:41 +0300 Message-ID: <159611038184.535980.10101517435778277457.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Convert pid namespace to use generic counter. Signed-off-by: Kirill Tkhai Acked-by: Christian Brauner --- include/linux/pid_namespace.h | 4 +--- kernel/pid.c | 2 +- kernel/pid_namespace.c | 13 +++---------- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 5a5cb45ac57e..7c7e627503d2 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -18,7 +17,6 @@ struct fs_pin; struct pid_namespace { - struct kref kref; struct idr idr; struct rcu_head rcu; unsigned int pid_allocated; @@ -43,7 +41,7 @@ extern struct pid_namespace init_pid_ns; static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) { if (ns != &init_pid_ns) - kref_get(&ns->kref); + refcount_inc(&ns->ns.count); return ns; } diff --git a/kernel/pid.c b/kernel/pid.c index de9d29c41d77..3b9e67736ef4 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -72,7 +72,7 @@ int pid_max_max = PID_MAX_LIMIT; * the scheme scales to up to 4 million PIDs, runtime. */ struct pid_namespace init_pid_ns = { - .kref = KREF_INIT(2), + .ns.count = REFCOUNT_INIT(2), .idr = IDR_INIT(init_pid_ns.idr), .pid_allocated = PIDNS_ADDING, .level = 0, diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 0e5ac162c3a8..d02dc1696edf 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -102,7 +102,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns goto out_free_idr; ns->ns.ops = &pidns_operations; - kref_init(&ns->kref); + refcount_set(&ns->ns.count, 1); ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); ns->user_ns = get_user_ns(user_ns); @@ -148,22 +148,15 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, return create_pid_namespace(user_ns, old_ns); } -static void free_pid_ns(struct kref *kref) -{ - struct pid_namespace *ns; - - ns = container_of(kref, struct pid_namespace, kref); - destroy_pid_namespace(ns); -} - void put_pid_ns(struct pid_namespace *ns) { struct pid_namespace *parent; while (ns != &init_pid_ns) { parent = ns->parent; - if (!kref_put(&ns->kref, free_pid_ns)) + if (!refcount_dec_and_test(&ns->ns.count)) break; + destroy_pid_namespace(ns); ns = parent; } } From patchwork Thu Jul 30 11:59:47 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692839 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E3BB8913 for ; Thu, 30 Jul 2020 12:00:06 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D5D5A22B42 for ; Thu, 30 Jul 2020 12:00:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728038AbgG3L7w (ORCPT ); Thu, 30 Jul 2020 07:59:52 -0400 Received: from relay.sw.ru ([185.231.240.75]:56520 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727996AbgG3L7u (ORCPT ); Thu, 30 Jul 2020 07:59:50 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17Dh-0002wF-Am; Thu, 30 Jul 2020 14:59:33 +0300 Subject: [PATCH 05/23] user: Use generic ns_common::count From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 14:59:47 +0300 Message-ID: <159611038719.535980.13960315152927389105.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Convert user namespace to use generic counter. Signed-off-by: Kirill Tkhai Acked-by: Christian Brauner --- include/linux/user_namespace.h | 5 ++--- kernel/user.c | 2 +- kernel/user_namespace.c | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 6ef1c7109fc4..64cf8ebdc4ec 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -57,7 +57,6 @@ struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; struct uid_gid_map projid_map; - atomic_t count; struct user_namespace *parent; int level; kuid_t owner; @@ -109,7 +108,7 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type); static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) - atomic_inc(&ns->count); + refcount_inc(&ns->ns.count); return ns; } @@ -119,7 +118,7 @@ extern void __put_user_ns(struct user_namespace *ns); static inline void put_user_ns(struct user_namespace *ns) { - if (ns && atomic_dec_and_test(&ns->count)) + if (ns && refcount_dec_and_test(&ns->ns.count)) __put_user_ns(ns); } diff --git a/kernel/user.c b/kernel/user.c index b1635d94a1f2..a2478cddf536 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -55,7 +55,7 @@ struct user_namespace init_user_ns = { }, }, }, - .count = ATOMIC_INIT(3), + .ns.count = REFCOUNT_INIT(3), .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, .ns.inum = PROC_USER_INIT_INO, diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 87804e0371fe..7c2bbe8f3e45 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -111,7 +111,7 @@ int create_user_ns(struct cred *new) goto fail_free; ns->ns.ops = &userns_operations; - atomic_set(&ns->count, 1); + refcount_set(&ns->ns.count, 1); /* Leave the new->user_ns reference with the new user namespace. */ ns->parent = parent_ns; ns->level = parent_ns->level + 1; @@ -197,7 +197,7 @@ static void free_user_ns(struct work_struct *work) kmem_cache_free(user_ns_cachep, ns); dec_user_namespaces(ucounts); ns = parent; - } while (atomic_dec_and_test(&parent->count)); + } while (refcount_dec_and_test(&parent->ns.count)); } void __put_user_ns(struct user_namespace *ns) From patchwork Thu Jul 30 11:59:52 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692837 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B2C1E913 for ; Thu, 30 Jul 2020 11:59:58 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id A4F4821744 for ; Thu, 30 Jul 2020 11:59:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728093AbgG3L75 (ORCPT ); Thu, 30 Jul 2020 07:59:57 -0400 Received: from relay.sw.ru ([185.231.240.75]:56584 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728068AbgG3L7z (ORCPT ); Thu, 30 Jul 2020 07:59:55 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17Dm-0002wZ-LI; Thu, 30 Jul 2020 14:59:38 +0300 Subject: [PATCH 06/23] mnt: Use generic ns_common::count From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 14:59:52 +0300 Message-ID: <159611039253.535980.5974330310695200570.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Convert mount namespace to use generic counter. Signed-off-by: Kirill Tkhai Acked-by: Christian Brauner --- fs/mount.h | 3 +-- fs/namespace.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/mount.h b/fs/mount.h index c3e0bb6e5782..f296862032ec 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -7,7 +7,6 @@ #include struct mnt_namespace { - atomic_t count; struct ns_common ns; struct mount * root; /* @@ -130,7 +129,7 @@ static inline void detach_mounts(struct dentry *dentry) static inline void get_mnt_ns(struct mnt_namespace *ns) { - atomic_inc(&ns->count); + refcount_inc(&ns->ns.count); } extern seqlock_t mount_lock; diff --git a/fs/namespace.c b/fs/namespace.c index 31c387794fbd..8c39810e6ec3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3296,7 +3296,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a new_ns->ns.ops = &mntns_operations; if (!anon) new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); - atomic_set(&new_ns->count, 1); + refcount_set(&new_ns->ns.count, 1); INIT_LIST_HEAD(&new_ns->list); init_waitqueue_head(&new_ns->poll); spin_lock_init(&new_ns->ns_lock); @@ -3870,7 +3870,7 @@ void __init mnt_init(void) void put_mnt_ns(struct mnt_namespace *ns) { - if (!atomic_dec_and_test(&ns->count)) + if (!refcount_dec_and_test(&ns->ns.count)) return; drop_collected_mounts(&ns->root->mnt); free_mnt_ns(ns); From patchwork Thu Jul 30 11:59:57 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692847 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 19FCD13B1 for ; Thu, 30 Jul 2020 12:00:21 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 0257122B49 for ; Thu, 30 Jul 2020 12:00:21 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728119AbgG3MAI (ORCPT ); Thu, 30 Jul 2020 08:00:08 -0400 Received: from relay.sw.ru ([185.231.240.75]:56610 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728102AbgG3MAB (ORCPT ); Thu, 30 Jul 2020 08:00:01 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17Ds-0002wm-5h; Thu, 30 Jul 2020 14:59:44 +0300 Subject: [PATCH 07/23] cgroup: Use generic ns_common::count From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 14:59:57 +0300 Message-ID: <159611039786.535980.12848941118631845247.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Convert cgroup namespace to use generic counter. Signed-off-by: Kirill Tkhai Acked-by: Christian Brauner --- include/linux/cgroup.h | 5 ++--- kernel/cgroup/cgroup.c | 2 +- kernel/cgroup/namespace.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 618838c48313..451c2d26a5db 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -854,7 +854,6 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} #endif /* CONFIG_CGROUP_DATA */ struct cgroup_namespace { - refcount_t count; struct ns_common ns; struct user_namespace *user_ns; struct ucounts *ucounts; @@ -889,12 +888,12 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, static inline void get_cgroup_ns(struct cgroup_namespace *ns) { if (ns) - refcount_inc(&ns->count); + refcount_inc(&ns->ns.count); } static inline void put_cgroup_ns(struct cgroup_namespace *ns) { - if (ns && refcount_dec_and_test(&ns->count)) + if (ns && refcount_dec_and_test(&ns->ns.count)) free_cgroup_ns(ns); } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index dd247747ec14..22e466926853 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -199,7 +199,7 @@ static u16 have_canfork_callback __read_mostly; /* cgroup namespace for init task */ struct cgroup_namespace init_cgroup_ns = { - .count = REFCOUNT_INIT(2), + .ns.count = REFCOUNT_INIT(2), .user_ns = &init_user_ns, .ns.ops = &cgroupns_operations, .ns.inum = PROC_CGROUP_INIT_INO, diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c index 812a61afd538..f5e8828c109c 100644 --- a/kernel/cgroup/namespace.c +++ b/kernel/cgroup/namespace.c @@ -32,7 +32,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void) kfree(new_ns); return ERR_PTR(ret); } - refcount_set(&new_ns->count, 1); + refcount_set(&new_ns->ns.count, 1); new_ns->ns.ops = &cgroupns_operations; return new_ns; } From patchwork Thu Jul 30 12:00:03 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692845 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 4BB2B913 for ; Thu, 30 Jul 2020 12:00:20 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3D95022B49 for ; Thu, 30 Jul 2020 12:00:20 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728148AbgG3MAL (ORCPT ); Thu, 30 Jul 2020 08:00:11 -0400 Received: from relay.sw.ru ([185.231.240.75]:56668 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727956AbgG3MAI (ORCPT ); Thu, 30 Jul 2020 08:00:08 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17Dx-0002x1-Fn; Thu, 30 Jul 2020 14:59:49 +0300 Subject: [PATCH 08/23] time: Use generic ns_common::count From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:00:03 +0300 Message-ID: <159611040338.535980.6847379168016198580.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Convert time namespace to use generic counter. Signed-off-by: Kirill Tkhai Acked-by: Christian Brauner --- include/linux/time_namespace.h | 9 ++++----- kernel/time/namespace.c | 9 +++------ 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 5b6031385db0..a51ffc089219 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -4,7 +4,6 @@ #include -#include #include #include #include @@ -18,7 +17,6 @@ struct timens_offsets { }; struct time_namespace { - struct kref kref; struct user_namespace *user_ns; struct ucounts *ucounts; struct ns_common ns; @@ -37,20 +35,21 @@ extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { - kref_get(&ns->kref); + refcount_inc(&ns->ns.count); return ns; } struct time_namespace *copy_time_ns(unsigned long flags, struct user_namespace *user_ns, struct time_namespace *old_ns); -void free_time_ns(struct kref *kref); +void free_time_ns(struct time_namespace *ns); int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); struct vdso_data *arch_get_vdso_data(void *vvar_page); static inline void put_time_ns(struct time_namespace *ns) { - kref_put(&ns->kref, free_time_ns); + if (refcount_dec_and_test(&ns->ns.count)) + free_time_ns(ns); } void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m); diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index afc65e6be33e..c4c829eb3511 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -92,7 +92,7 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, if (!ns) goto fail_dec; - kref_init(&ns->kref); + refcount_set(&ns->ns.count, 1); ns->vvar_page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!ns->vvar_page) @@ -226,11 +226,8 @@ static void timens_set_vvar_page(struct task_struct *task, mutex_unlock(&offset_lock); } -void free_time_ns(struct kref *kref) +void free_time_ns(struct time_namespace *ns) { - struct time_namespace *ns; - - ns = container_of(kref, struct time_namespace, kref); dec_time_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); @@ -464,7 +461,7 @@ const struct proc_ns_operations timens_for_children_operations = { }; struct time_namespace init_time_ns = { - .kref = KREF_INIT(3), + .ns.count = REFCOUNT_INIT(3), .user_ns = &init_user_ns, .ns.inum = PROC_TIME_INIT_INO, .ns.ops = &timens_operations, From patchwork Thu Jul 30 12:00:08 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692843 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 17F5C13B1 for ; Thu, 30 Jul 2020 12:00:17 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 08FCC22B49 for ; Thu, 30 Jul 2020 12:00:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728185AbgG3MAN (ORCPT ); Thu, 30 Jul 2020 08:00:13 -0400 Received: from relay.sw.ru ([185.231.240.75]:56686 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728141AbgG3MAM (ORCPT ); Thu, 30 Jul 2020 08:00:12 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17E2-0002xD-Pn; Thu, 30 Jul 2020 14:59:54 +0300 Subject: [PATCH 09/23] ns: Introduce ns_idr to be able to iterate all allocated namespaces in the system From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:00:08 +0300 Message-ID: <159611040870.535980.13460189038999722608.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org This patch introduces a new IDR and functions to add/remove and iterate registered namespaces in the system. It will be used to list namespaces in /proc/namespaces/... in next patches. The IDR is protected by ns_idr, and it's choosen to be a spinlock (not mutex) to allow calling ns_idr_unregister() from put_xxx_ns() methods, which may be called from (say) softirq context. Spinlock allows us to avoid introduction of kwork on top of put_xxx_ns() to call mutex_lock(). We introduce a new IDR, because there is no appropriate items to reuse instead of this. The closest proc_inum_ida does not fit our goals: it is IDA and its convertation to IDR will bring a big overhead by proc entries, which are not interested in IDR functionality (pointers). Read access to ns_idr is made lockless (see ns_get_next()). This is made for better parallelism and better performance from start. One new requirement to do this is that namespace memory must be freed one RCU grace period after ns_idr_unregister(). Some namespaces types already does this (say, net), the rest will be converted to use kfree_rcu()/etc, where they become linked to the IDR. See next patches in this series for the details. Signed-off-by: Kirill Tkhai --- fs/nsfs.c | 76 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/ns_common.h | 10 ++++++ include/linux/proc_ns.h | 11 ++++--- 3 files changed, 92 insertions(+), 5 deletions(-) diff --git a/fs/nsfs.c b/fs/nsfs.c index 800c1d0eb0d0..ee4be67d3a0b 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -11,10 +11,13 @@ #include #include #include +#include #include "internal.h" static struct vfsmount *nsfs_mnt; +static DEFINE_SPINLOCK(ns_lock); +static DEFINE_IDR(ns_idr); static long ns_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -304,3 +307,76 @@ void __init nsfs_init(void) panic("can't set nsfs up\n"); nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER; } + +/* + * Add a newly created ns to ns_idr. The ns must be fully + * initialized since it becomes available for ns_get_next() + * right after we exit this function. + */ +int ns_idr_register(struct ns_common *ns) +{ + int ret, id = ns->inum - PROC_NS_MIN_INO; + + if (WARN_ON(id < 0)) + return -EINVAL; + + idr_preload(GFP_KERNEL); + spin_lock_irq(&ns_lock); + ret = idr_alloc(&ns_idr, ns, id, id + 1, GFP_ATOMIC); + spin_unlock_irq(&ns_lock); + idr_preload_end(); + + return ret < 0 ? ret : 0; +} + +/* + * Remove a dead ns from ns_idr. Note, that ns memory must + * be freed not earlier then one RCU grace period after + * this function, since ns_get_next() uses RCU to iterate the IDR. + */ +void ns_idr_unregister(struct ns_common *ns) +{ + int id = ns->inum - PROC_NS_MIN_INO; + unsigned long flags; + + if (WARN_ON(id < 0)) + return; + + spin_lock_irqsave(&ns_lock, flags); + idr_remove(&ns_idr, id); + spin_unlock_irqrestore(&ns_lock, flags); +} + +/* + * This returns ns with inum greater than @id or NULL. + * @id is updated to refer the ns inum. + */ +struct ns_common *ns_get_next(unsigned int *id) +{ + struct ns_common *ns; + + if (*id < PROC_NS_MIN_INO - 1) + *id = PROC_NS_MIN_INO - 1; + + *id += 1; + *id -= PROC_NS_MIN_INO; + + rcu_read_lock(); + do { + ns = idr_get_next(&ns_idr, id); + if (!ns) + break; + if (!refcount_inc_not_zero(&ns->count)) { + ns = NULL; + *id += 1; + } + } while (!ns); + rcu_read_unlock(); + + if (ns) { + *id += PROC_NS_MIN_INO; + WARN_ON(*id != ns->inum); + } + + return ns; +} diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 27db02ebdf36..5f460e97151a 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -4,6 +4,12 @@ struct proc_ns_operations; +/* + * Common part of all namespaces. Note, that we link namespaces + * into IDR, and they are dereferenced via RCU. So, a namespace + * memory is allowed to be freed one RCU grace period after final + * .count put. See ns_get_next() for the details. + */ struct ns_common { atomic_long_t stashed; const struct proc_ns_operations *ops; @@ -11,4 +17,8 @@ struct ns_common { refcount_t count; }; +extern int ns_idr_register(struct ns_common *ns); +extern void ns_idr_unregister(struct ns_common *ns); +extern struct ns_common *ns_get_next(unsigned int *id); + #endif diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 75807ecef880..906e6ebb43e4 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -40,12 +40,13 @@ extern const struct proc_ns_operations timens_for_children_operations; */ enum { PROC_ROOT_INO = 1, - PROC_IPC_INIT_INO = 0xEFFFFFFFU, - PROC_UTS_INIT_INO = 0xEFFFFFFEU, - PROC_USER_INIT_INO = 0xEFFFFFFDU, - PROC_PID_INIT_INO = 0xEFFFFFFCU, - PROC_CGROUP_INIT_INO = 0xEFFFFFFBU, PROC_TIME_INIT_INO = 0xEFFFFFFAU, + PROC_NS_MIN_INO = PROC_TIME_INIT_INO, + PROC_CGROUP_INIT_INO = 0xEFFFFFFBU, + PROC_PID_INIT_INO = 0xEFFFFFFCU, + PROC_USER_INIT_INO = 0xEFFFFFFDU, + PROC_UTS_INIT_INO = 0xEFFFFFFEU, + PROC_IPC_INIT_INO = 0xEFFFFFFFU, }; #ifdef CONFIG_PROC_FS From patchwork Thu Jul 30 12:00:14 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692849 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9F73F913 for ; Thu, 30 Jul 2020 12:00:26 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 86E6B20842 for ; Thu, 30 Jul 2020 12:00:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728219AbgG3MAU (ORCPT ); Thu, 30 Jul 2020 08:00:20 -0400 Received: from relay.sw.ru ([185.231.240.75]:56744 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728141AbgG3MAS (ORCPT ); Thu, 30 Jul 2020 08:00:18 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17E8-0002xT-2q; Thu, 30 Jul 2020 15:00:00 +0300 Subject: [PATCH 10/23] fs: Rename fs/proc/namespaces.c into fs/proc/task_namespaces.c From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:00:14 +0300 Message-ID: <159611041399.535980.15920460479176140405.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org This file is about task namespaces, so we rename it. No functional changes. Signed-off-by: Kirill Tkhai --- fs/proc/Makefile | 2 fs/proc/internal.h | 2 fs/proc/namespaces.c | 183 --------------------------------------------- fs/proc/task_namespaces.c | 183 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 185 insertions(+), 185 deletions(-) delete mode 100644 fs/proc/namespaces.c create mode 100644 fs/proc/task_namespaces.c diff --git a/fs/proc/Makefile b/fs/proc/Makefile index bd08616ed8ba..dc2d51f42905 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -24,7 +24,7 @@ proc-y += uptime.o proc-y += util.o proc-y += version.o proc-y += softirqs.o -proc-y += namespaces.o +proc-y += task_namespaces.o proc-y += self.o proc-y += thread_self.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 917cc85e3466..572757ff97be 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -223,7 +223,7 @@ extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry extern void proc_entry_rundown(struct proc_dir_entry *); /* - * proc_namespaces.c + * task_namespaces.c */ extern const struct inode_operations proc_ns_dir_inode_operations; extern const struct file_operations proc_ns_dir_operations; diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c deleted file mode 100644 index 8e159fc78c0a..000000000000 --- a/fs/proc/namespaces.c +++ /dev/null @@ -1,183 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "internal.h" - - -static const struct proc_ns_operations *ns_entries[] = { -#ifdef CONFIG_NET_NS - &netns_operations, -#endif -#ifdef CONFIG_UTS_NS - &utsns_operations, -#endif -#ifdef CONFIG_IPC_NS - &ipcns_operations, -#endif -#ifdef CONFIG_PID_NS - &pidns_operations, - &pidns_for_children_operations, -#endif -#ifdef CONFIG_USER_NS - &userns_operations, -#endif - &mntns_operations, -#ifdef CONFIG_CGROUPS - &cgroupns_operations, -#endif -#ifdef CONFIG_TIME_NS - &timens_operations, - &timens_for_children_operations, -#endif -}; - -static const char *proc_ns_get_link(struct dentry *dentry, - struct inode *inode, - struct delayed_call *done) -{ - const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; - struct task_struct *task; - struct path ns_path; - int error = -EACCES; - - if (!dentry) - return ERR_PTR(-ECHILD); - - task = get_proc_task(inode); - if (!task) - return ERR_PTR(-EACCES); - - if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) - goto out; - - error = ns_get_path(&ns_path, task, ns_ops); - if (error) - goto out; - - error = nd_jump_link(&ns_path); -out: - put_task_struct(task); - return ERR_PTR(error); -} - -static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) -{ - struct inode *inode = d_inode(dentry); - const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; - struct task_struct *task; - char name[50]; - int res = -EACCES; - - task = get_proc_task(inode); - if (!task) - return res; - - if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { - res = ns_get_name(name, sizeof(name), task, ns_ops); - if (res >= 0) - res = readlink_copy(buffer, buflen, name); - } - put_task_struct(task); - return res; -} - -static const struct inode_operations proc_ns_link_inode_operations = { - .readlink = proc_ns_readlink, - .get_link = proc_ns_get_link, - .setattr = proc_setattr, -}; - -static struct dentry *proc_ns_instantiate(struct dentry *dentry, - struct task_struct *task, const void *ptr) -{ - const struct proc_ns_operations *ns_ops = ptr; - struct inode *inode; - struct proc_inode *ei; - - inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO); - if (!inode) - return ERR_PTR(-ENOENT); - - ei = PROC_I(inode); - inode->i_op = &proc_ns_link_inode_operations; - ei->ns_ops = ns_ops; - pid_update_inode(task, inode); - - d_set_d_op(dentry, &pid_dentry_operations); - return d_splice_alias(inode, dentry); -} - -static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) -{ - struct task_struct *task = get_proc_task(file_inode(file)); - const struct proc_ns_operations **entry, **last; - - if (!task) - return -ENOENT; - - if (!dir_emit_dots(file, ctx)) - goto out; - if (ctx->pos >= 2 + ARRAY_SIZE(ns_entries)) - goto out; - entry = ns_entries + (ctx->pos - 2); - last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; - while (entry <= last) { - const struct proc_ns_operations *ops = *entry; - if (!proc_fill_cache(file, ctx, ops->name, strlen(ops->name), - proc_ns_instantiate, task, ops)) - break; - ctx->pos++; - entry++; - } -out: - put_task_struct(task); - return 0; -} - -const struct file_operations proc_ns_dir_operations = { - .read = generic_read_dir, - .iterate_shared = proc_ns_dir_readdir, - .llseek = generic_file_llseek, -}; - -static struct dentry *proc_ns_dir_lookup(struct inode *dir, - struct dentry *dentry, unsigned int flags) -{ - struct task_struct *task = get_proc_task(dir); - const struct proc_ns_operations **entry, **last; - unsigned int len = dentry->d_name.len; - struct dentry *res = ERR_PTR(-ENOENT); - - if (!task) - goto out_no_task; - - last = &ns_entries[ARRAY_SIZE(ns_entries)]; - for (entry = ns_entries; entry < last; entry++) { - if (strlen((*entry)->name) != len) - continue; - if (!memcmp(dentry->d_name.name, (*entry)->name, len)) - break; - } - if (entry == last) - goto out; - - res = proc_ns_instantiate(dentry, task, *entry); -out: - put_task_struct(task); -out_no_task: - return res; -} - -const struct inode_operations proc_ns_dir_inode_operations = { - .lookup = proc_ns_dir_lookup, - .getattr = pid_getattr, - .setattr = proc_setattr, -}; diff --git a/fs/proc/task_namespaces.c b/fs/proc/task_namespaces.c new file mode 100644 index 000000000000..8e159fc78c0a --- /dev/null +++ b/fs/proc/task_namespaces.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + + +static const struct proc_ns_operations *ns_entries[] = { +#ifdef CONFIG_NET_NS + &netns_operations, +#endif +#ifdef CONFIG_UTS_NS + &utsns_operations, +#endif +#ifdef CONFIG_IPC_NS + &ipcns_operations, +#endif +#ifdef CONFIG_PID_NS + &pidns_operations, + &pidns_for_children_operations, +#endif +#ifdef CONFIG_USER_NS + &userns_operations, +#endif + &mntns_operations, +#ifdef CONFIG_CGROUPS + &cgroupns_operations, +#endif +#ifdef CONFIG_TIME_NS + &timens_operations, + &timens_for_children_operations, +#endif +}; + +static const char *proc_ns_get_link(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) +{ + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; + struct task_struct *task; + struct path ns_path; + int error = -EACCES; + + if (!dentry) + return ERR_PTR(-ECHILD); + + task = get_proc_task(inode); + if (!task) + return ERR_PTR(-EACCES); + + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) + goto out; + + error = ns_get_path(&ns_path, task, ns_ops); + if (error) + goto out; + + error = nd_jump_link(&ns_path); +out: + put_task_struct(task); + return ERR_PTR(error); +} + +static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) +{ + struct inode *inode = d_inode(dentry); + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; + struct task_struct *task; + char name[50]; + int res = -EACCES; + + task = get_proc_task(inode); + if (!task) + return res; + + if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { + res = ns_get_name(name, sizeof(name), task, ns_ops); + if (res >= 0) + res = readlink_copy(buffer, buflen, name); + } + put_task_struct(task); + return res; +} + +static const struct inode_operations proc_ns_link_inode_operations = { + .readlink = proc_ns_readlink, + .get_link = proc_ns_get_link, + .setattr = proc_setattr, +}; + +static struct dentry *proc_ns_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) +{ + const struct proc_ns_operations *ns_ops = ptr; + struct inode *inode; + struct proc_inode *ei; + + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO); + if (!inode) + return ERR_PTR(-ENOENT); + + ei = PROC_I(inode); + inode->i_op = &proc_ns_link_inode_operations; + ei->ns_ops = ns_ops; + pid_update_inode(task, inode); + + d_set_d_op(dentry, &pid_dentry_operations); + return d_splice_alias(inode, dentry); +} + +static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) +{ + struct task_struct *task = get_proc_task(file_inode(file)); + const struct proc_ns_operations **entry, **last; + + if (!task) + return -ENOENT; + + if (!dir_emit_dots(file, ctx)) + goto out; + if (ctx->pos >= 2 + ARRAY_SIZE(ns_entries)) + goto out; + entry = ns_entries + (ctx->pos - 2); + last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; + while (entry <= last) { + const struct proc_ns_operations *ops = *entry; + if (!proc_fill_cache(file, ctx, ops->name, strlen(ops->name), + proc_ns_instantiate, task, ops)) + break; + ctx->pos++; + entry++; + } +out: + put_task_struct(task); + return 0; +} + +const struct file_operations proc_ns_dir_operations = { + .read = generic_read_dir, + .iterate_shared = proc_ns_dir_readdir, + .llseek = generic_file_llseek, +}; + +static struct dentry *proc_ns_dir_lookup(struct inode *dir, + struct dentry *dentry, unsigned int flags) +{ + struct task_struct *task = get_proc_task(dir); + const struct proc_ns_operations **entry, **last; + unsigned int len = dentry->d_name.len; + struct dentry *res = ERR_PTR(-ENOENT); + + if (!task) + goto out_no_task; + + last = &ns_entries[ARRAY_SIZE(ns_entries)]; + for (entry = ns_entries; entry < last; entry++) { + if (strlen((*entry)->name) != len) + continue; + if (!memcmp(dentry->d_name.name, (*entry)->name, len)) + break; + } + if (entry == last) + goto out; + + res = proc_ns_instantiate(dentry, task, *entry); +out: + put_task_struct(task); +out_no_task: + return res; +} + +const struct inode_operations proc_ns_dir_inode_operations = { + .lookup = proc_ns_dir_lookup, + .getattr = pid_getattr, + .setattr = proc_setattr, +}; From patchwork Thu Jul 30 12:00:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692851 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 786C0913 for ; Thu, 30 Jul 2020 12:00:30 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 5D6E320829 for ; Thu, 30 Jul 2020 12:00:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728248AbgG3MA1 (ORCPT ); Thu, 30 Jul 2020 08:00:27 -0400 Received: from relay.sw.ru ([185.231.240.75]:56774 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728112AbgG3MAY (ORCPT ); Thu, 30 Jul 2020 08:00:24 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17ED-0002xr-BS; Thu, 30 Jul 2020 15:00:05 +0300 Subject: [PATCH 11/23] fs: Add /proc/namespaces/ directory From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:00:19 +0300 Message-ID: <159611041929.535980.14513096920129728440.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org This is a new directory to show all namespaces, which can be accessed from this /proc tasks credentials. Every /proc is related to a pid_namespace, and the pid_namespace is related to a user_namespace. The items, we show in this /proc/namespaces/ directory, are the namespaces, whose user_namespaces are the same as /proc's user_namespace, or their descendants. Say, /proc has pid_ns->user_ns, so in /proc/namespace we show only a ns, which is in_userns(pid_ns->user_ns, ns->user_ns). The final result is like below: # ls /proc/namespaces/ -l lrwxrwxrwx 1 root root 0 Jul 29 16:50 'cgroup:[4026531835]' -> 'cgroup:[4026531835]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'ipc:[4026531839]' -> 'ipc:[4026531839]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531840]' -> 'mnt:[4026531840]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531861]' -> 'mnt:[4026531861]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532133]' -> 'mnt:[4026532133]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532134]' -> 'mnt:[4026532134]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532135]' -> 'mnt:[4026532135]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532136]' -> 'mnt:[4026532136]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'net:[4026531993]' -> 'net:[4026531993]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'pid:[4026531836]' -> 'pid:[4026531836]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'time:[4026531834]' -> 'time:[4026531834]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'user:[4026531837]' -> 'user:[4026531837]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'uts:[4026531838]' -> 'uts:[4026531838]' Every namespace may be open like ordinary file in /proc/[pid]/ns. Signed-off-by: Kirill Tkhai Reported-by: kernel test robot Reported-by: kernel test robot Reported-by: kernel test robot --- fs/nsfs.c | 2 fs/proc/Makefile | 1 fs/proc/internal.h | 16 ++ fs/proc/namespaces.c | 314 +++++++++++++++++++++++++++++++++++++++++++++++ fs/proc/root.c | 17 ++- include/linux/proc_fs.h | 1 6 files changed, 345 insertions(+), 6 deletions(-) create mode 100644 fs/proc/namespaces.c diff --git a/fs/nsfs.c b/fs/nsfs.c index ee4be67d3a0b..61b789d2089c 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -58,7 +58,7 @@ static void nsfs_evict(struct inode *inode) ns->ops->put(ns); } -static int __ns_get_path(struct path *path, struct ns_common *ns) +int __ns_get_path(struct path *path, struct ns_common *ns) { struct vfsmount *mnt = nsfs_mnt; struct dentry *dentry; diff --git a/fs/proc/Makefile b/fs/proc/Makefile index dc2d51f42905..34ff671c6d59 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -25,6 +25,7 @@ proc-y += util.o proc-y += version.o proc-y += softirqs.o proc-y += task_namespaces.o +proc-y += namespaces.o proc-y += self.o proc-y += thread_self.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 572757ff97be..d19fe5574799 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -134,10 +134,11 @@ void task_dump_owner(struct task_struct *task, umode_t mode, kuid_t *ruid, kgid_t *rgid); unsigned name_to_int(const struct qstr *qstr); -/* - * Offset of the first process in the /proc root directory.. - */ -#define FIRST_PROCESS_ENTRY 256 + +/* Offset of "namespaces" entry in /proc root directory */ +#define NAMESPACES_ENTRY 256 +/* Offset of the first process in the /proc root directory */ +#define FIRST_PROCESS_ENTRY (NAMESPACES_ENTRY + 1) /* Worst case buffer size needed for holding an integer. */ #define PROC_NUMBUF 13 @@ -168,6 +169,7 @@ extern void proc_pid_evict_inode(struct proc_inode *); extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t); extern void pid_update_inode(struct task_struct *, struct inode *); extern int pid_delete_dentry(const struct dentry *); +extern int proc_emit_namespaces(struct file *, struct dir_context *); extern int proc_pid_readdir(struct file *, struct dir_context *); struct dentry *proc_pid_lookup(struct dentry *, unsigned int); extern loff_t mem_lseek(struct file *, loff_t, int); @@ -222,6 +224,12 @@ void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); extern void proc_entry_rundown(struct proc_dir_entry *); +/* + * namespaces.c + */ +extern int proc_setup_namespaces(struct super_block *); +extern void proc_namespaces_init(void); + /* * task_namespaces.c */ diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c new file mode 100644 index 000000000000..ab47e1555619 --- /dev/null +++ b/fs/proc/namespaces.c @@ -0,0 +1,314 @@ +#include +#include +#include +#include "internal.h" + +static unsigned namespaces_inum __ro_after_init; + +int proc_emit_namespaces(struct file *file, struct dir_context *ctx) +{ + struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb); + struct inode *inode = d_inode(fs_info->proc_namespaces); + + return dir_emit(ctx, "namespaces", 10, inode->i_ino, DT_DIR); +} + +static int parse_namespace_dentry_name(const struct dentry *dentry, + const char **type, unsigned int *type_len, unsigned int *inum) +{ + const char *p, *name; + int count; + + *type = name = dentry->d_name.name; + p = strchr(name, ':'); + *type_len = p - name; + if (!p || p == name) + return -ENOENT; + + p += 1; + if (sscanf(p, "[%u]%n", inum, &count) != 1 || *(p + count) != '\0' || + *inum < PROC_NS_MIN_INO) + return -ENOENT; + + return 0; +} + +static struct ns_common *get_namespace_by_dentry(struct pid_namespace *pid_ns, + const struct dentry *dentry) +{ + unsigned int type_len, inum, p_inum; + struct user_namespace *user_ns; + struct ns_common *ns; + const char *type; + + if (parse_namespace_dentry_name(dentry, &type, &type_len, &inum) < 0) + return NULL; + + p_inum = inum - 1; + ns = ns_get_next(&p_inum); + if (!ns) + return NULL; + + if (ns->inum != inum || strncmp(type, ns->ops->name, type_len) != 0 || + ns->ops->name[type_len] != '\0') { + ns->ops->put(ns); + return NULL; + } + + if (ns->ops != &userns_operations) + user_ns = ns->ops->owner(ns); + else + user_ns = container_of(ns, struct user_namespace, ns); + + if (!in_userns(pid_ns->user_ns, user_ns)) { + ns->ops->put(ns); + return NULL; + } + + return ns; +} + +static struct dentry *proc_namespace_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr); + +static struct dentry *proc_namespaces_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb); + struct task_struct *task; + struct ns_common *ns; + + ns = get_namespace_by_dentry(pid_ns, dentry); + if (!ns) + return ERR_PTR(-ENOENT); + + read_lock(&tasklist_lock); + task = get_task_struct(pid_ns->child_reaper); + read_unlock(&tasklist_lock); + + dentry = proc_namespace_instantiate(dentry, task, ns); + put_task_struct(task); + ns->ops->put(ns); + + return dentry; +} + +static int proc_namespaces_permission(struct inode *inode, int mask) +{ + if ((mask & MAY_EXEC) && S_ISLNK(inode->i_mode)) + return -EACCES; + + return 0; +} + +static int proc_namespaces_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); + + generic_fillattr(inode, stat); + return 0; +} + +static const struct inode_operations proc_namespaces_inode_operations = { + .lookup = proc_namespaces_lookup, + .permission = proc_namespaces_permission, + .getattr = proc_namespaces_getattr, +}; + +static int proc_namespaces_readlink(struct dentry *dentry, char __user *buffer, int buflen) +{ + struct inode *dir = dentry->d_parent->d_inode; + struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb); + struct ns_common *ns; + + ns = get_namespace_by_dentry(pid_ns, dentry); + if (!ns) + return -ENOENT; + ns->ops->put(ns); + + /* proc_namespaces_readdir() creates dentry names in namespace format */ + return readlink_copy(buffer, buflen, dentry->d_iname); +} + +int __ns_get_path(struct path *path, struct ns_common *ns); + +static const char *proc_namespaces_getlink(struct dentry *dentry, + struct inode *inode, struct delayed_call *done) +{ + struct pid_namespace *pid_ns = proc_pid_ns(inode->i_sb); + struct ns_common *ns; + struct path path; + int ret; + + if (!dentry) + return ERR_PTR(-ECHILD); + + while (1) { + ret = -ENOENT; + ns = get_namespace_by_dentry(pid_ns, dentry); + if (!ns) + goto out; + + ret = __ns_get_path(&path, ns); + if (ret == -EAGAIN) + continue; + if (ret) + goto out; + break; + } + + ret = nd_jump_link(&path); +out: + return ERR_PTR(ret); +} + +static const struct inode_operations proc_namespaces_link_inode_operations = { + .readlink = proc_namespaces_readlink, + .get_link = proc_namespaces_getlink, +}; + +static int namespace_delete_dentry(const struct dentry *dentry) +{ + struct inode *dir = dentry->d_parent->d_inode; + struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb); + struct ns_common *ns; + + ns = get_namespace_by_dentry(pid_ns, dentry); + if (!ns) + return 1; + + ns->ops->put(ns); + return 0; +} + +const struct dentry_operations namespaces_dentry_operations = { + .d_delete = namespace_delete_dentry, +}; + +static void namespace_update_inode(struct inode *inode) +{ + struct user_namespace *user_ns = proc_pid_ns(inode->i_sb)->user_ns; + + inode->i_uid = make_kuid(user_ns, 0); + if (!uid_valid(inode->i_uid)) + inode->i_uid = GLOBAL_ROOT_UID; + + inode->i_gid = make_kgid(user_ns, 0); + if (!gid_valid(inode->i_gid)) + inode->i_gid = GLOBAL_ROOT_GID; +} + +static struct dentry *proc_namespace_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) +{ + const struct ns_common *ns = ptr; + struct inode *inode; + struct proc_inode *ei; + + /* + * Create inode with credentials of @task, and add it to @task's + * quick removal list. + */ + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO); + if (!inode) + return ERR_PTR(-ENOENT); + + ei = PROC_I(inode); + inode->i_op = &proc_namespaces_link_inode_operations; + ei->ns_ops = ns->ops; + namespace_update_inode(inode); + + d_set_d_op(dentry, &namespaces_dentry_operations); + return d_splice_alias(inode, dentry); +} + +static int proc_namespaces_readdir(struct file *file, struct dir_context *ctx) +{ + struct pid_namespace *pid_ns = proc_pid_ns(file_inode(file)->i_sb); + struct user_namespace *user_ns; + struct task_struct *task; + struct ns_common *ns; + unsigned int inum; + + read_lock(&tasklist_lock); + task = get_task_struct(pid_ns->child_reaper); + read_unlock(&tasklist_lock); + + if (!dir_emit_dots(file, ctx)) + goto out; + + inum = ctx->pos - 2; + while ((ns = ns_get_next(&inum)) != NULL) { + unsigned int len; + char name[32]; + + if (ns->ops != &userns_operations) + user_ns = ns->ops->owner(ns); + else + user_ns = container_of(ns, struct user_namespace, ns); + + if (!in_userns(pid_ns->user_ns, user_ns)) + goto next; + + len = snprintf(name, sizeof(name), "%s:[%u]", ns->ops->name, inum); + + if (!proc_fill_cache(file, ctx, name, len, + proc_namespace_instantiate, task, ns)) { + ns->ops->put(ns); + break; + } +next: + ns->ops->put(ns); + ctx->pos = inum + 2; + } +out: + put_task_struct(task); + return 0; +} + +static const struct file_operations proc_namespaces_file_operations = { + .read = generic_read_dir, + .iterate_shared = proc_namespaces_readdir, + .llseek = generic_file_llseek, +}; + +int proc_setup_namespaces(struct super_block *s) +{ + struct proc_fs_info *fs_info = proc_sb_info(s); + struct inode *root_inode = d_inode(s->s_root); + struct dentry *namespaces; + int ret = -ENOMEM; + + inode_lock(root_inode); + namespaces = d_alloc_name(s->s_root, "namespaces"); + if (namespaces) { + struct inode *inode = new_inode_pseudo(s); + if (inode) { + inode->i_ino = namespaces_inum; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_op = &proc_namespaces_inode_operations; + inode->i_fop = &proc_namespaces_file_operations; + d_add(namespaces, inode); + ret = 0; + } else { + dput(namespaces); + } + } + inode_unlock(root_inode); + + if (ret) + pr_err("proc_setup_namespaces: can't allocate /proc/namespaces\n"); + else + fs_info->proc_namespaces = namespaces; + + return ret; +} + +void __init proc_namespaces_init(void) +{ + proc_alloc_inum(&namespaces_inum); +} diff --git a/fs/proc/root.c b/fs/proc/root.c index 5e444d4f9717..e4e4f90fca3d 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -206,6 +206,10 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc) return -ENOMEM; } + ret = proc_setup_namespaces(s); + if (ret) + return ret; + ret = proc_setup_self(s); if (ret) { return ret; @@ -272,6 +276,9 @@ static void proc_kill_sb(struct super_block *sb) dput(fs_info->proc_self); dput(fs_info->proc_thread_self); + if (fs_info->proc_namespaces) + dput(fs_info->proc_namespaces); + kill_anon_super(sb); put_pid_ns(fs_info->pid_ns); kfree(fs_info); @@ -289,6 +296,7 @@ void __init proc_root_init(void) { proc_init_kmemcache(); set_proc_pid_nlink(); + proc_namespaces_init(); proc_self_init(); proc_thread_self_init(); proc_symlink("mounts", NULL, "self/mounts"); @@ -326,8 +334,15 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr static int proc_root_readdir(struct file *file, struct dir_context *ctx) { - if (ctx->pos < FIRST_PROCESS_ENTRY) { + if (ctx->pos < NAMESPACES_ENTRY) { int error = proc_readdir(file, ctx); + if (unlikely(error <= 0)) + return error; + ctx->pos = NAMESPACES_ENTRY; + } + + if (ctx->pos == NAMESPACES_ENTRY) { + int error = proc_emit_namespaces(file, ctx); if (unlikely(error <= 0)) return error; ctx->pos = FIRST_PROCESS_ENTRY; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 97b3f5f06db9..8b0002a6cacf 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -61,6 +61,7 @@ struct proc_fs_info { struct pid_namespace *pid_ns; struct dentry *proc_self; /* For /proc/self */ struct dentry *proc_thread_self; /* For /proc/thread-self */ + struct dentry *proc_namespaces; /* For /proc/namespaces */ kgid_t pid_gid; enum proc_hidepid hide_pid; enum proc_pidonly pidonly; From patchwork Thu Jul 30 12:00:24 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692853 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6360F913 for ; Thu, 30 Jul 2020 12:00:34 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 51B4020838 for ; Thu, 30 Jul 2020 12:00:34 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728272AbgG3MAd (ORCPT ); Thu, 30 Jul 2020 08:00:33 -0400 Received: from relay.sw.ru ([185.231.240.75]:56830 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728005AbgG3MA1 (ORCPT ); Thu, 30 Jul 2020 08:00:27 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17EI-0002y6-PL; Thu, 30 Jul 2020 15:00:10 +0300 Subject: [PATCH 12/23] user: Free user_ns one RCU grace period after final counter put From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:00:24 +0300 Message-ID: <159611042455.535980.12021168777274312453.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org This is needed to link user_ns into ns_idr in next patch. Signed-off-by: Kirill Tkhai --- include/linux/user_namespace.h | 5 ++++- kernel/user_namespace.c | 9 ++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 64cf8ebdc4ec..58fede304201 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -79,7 +79,10 @@ struct user_namespace { #ifdef CONFIG_PERSISTENT_KEYRINGS struct key *persistent_keyring_register; #endif - struct work_struct work; + union { + struct work_struct work; + struct rcu_head rcu; + }; #ifdef CONFIG_SYSCTL struct ctl_table_set set; struct ctl_table_header *sysctls; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 7c2bbe8f3e45..367a942bb484 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -171,6 +171,13 @@ int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) return err; } +static void free_user_ns_rcu(struct rcu_head *head) +{ + struct user_namespace *ns = container_of(head, struct user_namespace, + rcu); + kmem_cache_free(user_ns_cachep, ns); +} + static void free_user_ns(struct work_struct *work) { struct user_namespace *parent, *ns = @@ -194,7 +201,7 @@ static void free_user_ns(struct work_struct *work) retire_userns_sysctls(ns); key_free_user_ns(ns); ns_free_inum(&ns->ns); - kmem_cache_free(user_ns_cachep, ns); + call_rcu(&ns->rcu, free_user_ns_rcu); dec_user_namespaces(ucounts); ns = parent; } while (refcount_dec_and_test(&parent->ns.count)); From patchwork Thu Jul 30 12:00:30 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692855 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 4CFF9913 for ; Thu, 30 Jul 2020 12:00:37 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3F4B020838 for ; Thu, 30 Jul 2020 12:00:37 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728301AbgG3MAg (ORCPT ); Thu, 30 Jul 2020 08:00:36 -0400 Received: from relay.sw.ru ([185.231.240.75]:56868 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728112AbgG3MAd (ORCPT ); Thu, 30 Jul 2020 08:00:33 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17EO-0002yI-Is; Thu, 30 Jul 2020 15:00:16 +0300 Subject: [PATCH 13/23] user: Add user namespaces into ns_idr From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:00:30 +0300 Message-ID: <159611043002.535980.3019217992777905831.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Now they are exposed in /proc/namespace/ directory. Signed-off-by: Kirill Tkhai --- kernel/user_namespace.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 367a942bb484..bbfd7f0f9e7c 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -137,7 +137,13 @@ int create_user_ns(struct cred *new) goto fail_keyring; set_cred_user_ns(new, ns); + + if (ns_idr_register(&ns->ns)) + goto fail_sysctl; + return 0; +fail_sysctl: + retire_userns_sysctls(ns); fail_keyring: #ifdef CONFIG_PERSISTENT_KEYRINGS key_put(ns->persistent_keyring_register); @@ -186,6 +192,7 @@ static void free_user_ns(struct work_struct *work) do { struct ucounts *ucounts = ns->ucounts; parent = ns->parent; + ns_idr_unregister(&ns->ns); if (ns->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) { kfree(ns->gid_map.forward); kfree(ns->gid_map.reverse); @@ -1327,6 +1334,7 @@ const struct proc_ns_operations userns_operations = { static __init int user_namespaces_init(void) { user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); - return 0; + + return ns_idr_register(&init_user_ns.ns); } subsys_initcall(user_namespaces_init); From patchwork Thu Jul 30 12:00:35 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692857 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 47F5713B1 for ; Thu, 30 Jul 2020 12:00:45 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3A6CF20838 for ; Thu, 30 Jul 2020 12:00:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728355AbgG3MAo (ORCPT ); Thu, 30 Jul 2020 08:00:44 -0400 Received: from relay.sw.ru ([185.231.240.75]:56914 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728318AbgG3MAk (ORCPT ); Thu, 30 Jul 2020 08:00:40 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17ET-0002yZ-U7; Thu, 30 Jul 2020 15:00:21 +0300 Subject: [PATCH 14/23] net: Add net namespaces into ns_idr From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:00:35 +0300 Message-ID: <159611043578.535980.14525601933828392397.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Now they are exposed in /proc/namespace/ directory. We already wait RCU grace period in cleanup_net() before pernet_operations exit, so ns_idr_unregister() works as expected. Signed-off-by: Kirill Tkhai --- net/core/net_namespace.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 5f658cbedd34..f78655a670e5 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -701,14 +701,24 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_pid); static __net_init int net_ns_net_init(struct net *net) { + int ret; #ifdef CONFIG_NET_NS net->ns.ops = &netns_operations; #endif - return ns_alloc_inum(&net->ns); + ret = ns_alloc_inum(&net->ns); + if (ret) + return ret; + + ret = ns_idr_register(&net->ns); + if (ret < 0) + ns_free_inum(&net->ns); + + return ret; } static __net_exit void net_ns_net_exit(struct net *net) { + ns_idr_unregister(&net->ns); ns_free_inum(&net->ns); } From patchwork Thu Jul 30 12:00:41 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692859 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6D51413B1 for ; Thu, 30 Jul 2020 12:00:50 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 5F56320829 for ; Thu, 30 Jul 2020 12:00:50 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728467AbgG3MAt (ORCPT ); Thu, 30 Jul 2020 08:00:49 -0400 Received: from relay.sw.ru ([185.231.240.75]:56960 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726989AbgG3MAp (ORCPT ); Thu, 30 Jul 2020 08:00:45 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17EZ-0002yn-7t; Thu, 30 Jul 2020 15:00:27 +0300 Subject: [PATCH 15/23] pid: Eextract child_reaper check from pidns_for_children_get() From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:00:41 +0300 Message-ID: <159611044115.535980.10236831314879436296.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org This check if for prohibiting access to /proc/[pid]/ns/pid_for_children before first task of the pid namespace is created. /proc/namespaces/ code will use this check too, so we move it into a separate function. Signed-off-by: Kirill Tkhai --- kernel/pid_namespace.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index d02dc1696edf..4a01328e8763 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -343,6 +343,21 @@ static struct ns_common *pidns_get(struct task_struct *task) return ns ? &ns->ns : NULL; } +static bool pidns_can_get(struct ns_common *ns) +{ + struct pid_namespace *pid_ns; + bool ret = true; + + pid_ns = container_of(ns, struct pid_namespace, ns); + + read_lock(&tasklist_lock); + if (!pid_ns->child_reaper) + ret = false; + read_unlock(&tasklist_lock); + + return ret; +} + static struct ns_common *pidns_for_children_get(struct task_struct *task) { struct pid_namespace *ns = NULL; @@ -354,13 +369,9 @@ static struct ns_common *pidns_for_children_get(struct task_struct *task) } task_unlock(task); - if (ns) { - read_lock(&tasklist_lock); - if (!ns->child_reaper) { - put_pid_ns(ns); - ns = NULL; - } - read_unlock(&tasklist_lock); + if (ns && !pidns_can_get(&ns->ns)) { + put_pid_ns(ns); + ns = NULL; } return ns ? &ns->ns : NULL; From patchwork Thu Jul 30 12:00:46 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692861 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8963913B1 for ; Thu, 30 Jul 2020 12:00:53 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 79C7E20838 for ; Thu, 30 Jul 2020 12:00:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728589AbgG3MAw (ORCPT ); Thu, 30 Jul 2020 08:00:52 -0400 Received: from relay.sw.ru ([185.231.240.75]:56990 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728234AbgG3MAu (ORCPT ); Thu, 30 Jul 2020 08:00:50 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17Ee-0002z4-HB; Thu, 30 Jul 2020 15:00:32 +0300 Subject: [PATCH 16/23] proc_ns_operations: Add can_get method From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:00:46 +0300 Message-ID: <159611044644.535980.6920767742450563865.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org This is a new method to prohibit some namespaces in intermediate state. Currently, it's used to prohibit pid namespace, whose child reaper is not created yet (similar to we have in /proc/[pid]/pid_for_children). Signed-off-by: Kirill Tkhai --- fs/proc/namespaces.c | 5 +++++ include/linux/proc_ns.h | 1 + kernel/pid_namespace.c | 1 + 3 files changed, 7 insertions(+) diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index ab47e1555619..70fc23295315 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -149,6 +149,11 @@ static const char *proc_namespaces_getlink(struct dentry *dentry, ns = get_namespace_by_dentry(pid_ns, dentry); if (!ns) goto out; + ret = -ESRCH; + if (ns->ops->can_get && !ns->ops->can_get(ns)) { + ns->ops->put(ns); + goto out; + } ret = __ns_get_path(&path, ns); if (ret == -EAGAIN) diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 906e6ebb43e4..e44ec466711a 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -19,6 +19,7 @@ struct proc_ns_operations { int type; struct ns_common *(*get)(struct task_struct *task); void (*put)(struct ns_common *ns); + bool (*can_get)(struct ns_common *ns); int (*install)(struct nsset *nsset, struct ns_common *ns); struct user_namespace *(*owner)(struct ns_common *ns); struct ns_common *(*get_parent)(struct ns_common *ns); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 4a01328e8763..da8490390f51 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -452,6 +452,7 @@ const struct proc_ns_operations pidns_for_children_operations = { .real_ns_name = "pid", .type = CLONE_NEWPID, .get = pidns_for_children_get, + .can_get = pidns_can_get, .put = pidns_put, .install = pidns_install, .owner = pidns_owner, From patchwork Thu Jul 30 12:00:51 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692863 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 01198913 for ; Thu, 30 Jul 2020 12:00:59 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id E663B20838 for ; Thu, 30 Jul 2020 12:00:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728635AbgG3MAz (ORCPT ); Thu, 30 Jul 2020 08:00:55 -0400 Received: from relay.sw.ru ([185.231.240.75]:57040 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728592AbgG3MAy (ORCPT ); Thu, 30 Jul 2020 08:00:54 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17Ej-0002zO-S1; Thu, 30 Jul 2020 15:00:37 +0300 Subject: [PATCH 17/23] pid: Add pid namespaces into ns_idr From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:00:51 +0300 Message-ID: <159611045173.535980.13125182914304511849.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Now they are exposed in /proc/namespace/ directory. Note, that we already wait RCU grace period before pid namespace's memory is freed. Signed-off-by: Kirill Tkhai --- kernel/pid_namespace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index da8490390f51..06398a7c4c59 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -109,8 +109,13 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->ucounts = ucounts; ns->pid_allocated = PIDNS_ADDING; + if (ns_idr_register(&ns->ns) < 0) + goto out_free_inum; + return ns; +out_free_inum: + ns_free_inum(&ns->ns); out_free_idr: idr_destroy(&ns->idr); kmem_cache_free(pid_ns_cachep, ns); @@ -132,6 +137,7 @@ static void delayed_free_pidns(struct rcu_head *p) static void destroy_pid_namespace(struct pid_namespace *ns) { + ns_idr_unregister(&ns->ns); ns_free_inum(&ns->ns); idr_destroy(&ns->idr); @@ -466,6 +472,7 @@ static __init int pid_namespaces_init(void) #ifdef CONFIG_CHECKPOINT_RESTORE register_sysctl_paths(kern_path, pid_ns_ctl_table); #endif + WARN_ON(ns_idr_register(&init_pid_ns.ns) < 0); return 0; } From patchwork Thu Jul 30 12:00:57 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692865 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 42D1813B1 for ; Thu, 30 Jul 2020 12:01:09 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 271FB208A9 for ; Thu, 30 Jul 2020 12:01:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728667AbgG3MBH (ORCPT ); Thu, 30 Jul 2020 08:01:07 -0400 Received: from relay.sw.ru ([185.231.240.75]:57080 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728592AbgG3MBA (ORCPT ); Thu, 30 Jul 2020 08:01:00 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17Ep-0002zc-Ew; Thu, 30 Jul 2020 15:00:43 +0300 Subject: [PATCH 18/23] uts: Free uts namespace one RCU grace period after final counter put From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:00:57 +0300 Message-ID: <159611045706.535980.12888496197045005870.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org This is needed to link uts_ns into ns_idr in next patch. Signed-off-by: Kirill Tkhai --- include/linux/utsname.h | 1 + kernel/utsname.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 2b1737c9b244..b783d0fe6ca4 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -25,6 +25,7 @@ struct uts_namespace { struct user_namespace *user_ns; struct ucounts *ucounts; struct ns_common ns; + struct rcu_head rcu; } __randomize_layout; extern struct uts_namespace init_uts_ns; diff --git a/kernel/utsname.c b/kernel/utsname.c index b1ac3ca870f2..aebf4df5f592 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -103,12 +103,20 @@ struct uts_namespace *copy_utsname(unsigned long flags, return new_ns; } +static void free_uts_ns_rcu(struct rcu_head *head) +{ + struct uts_namespace *ns; + + ns = container_of(head, struct uts_namespace, rcu); + kmem_cache_free(uts_ns_cache, ns); +} + void free_uts_ns(struct uts_namespace *ns) { dec_uts_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); - kmem_cache_free(uts_ns_cache, ns); + call_rcu(&ns->rcu, free_uts_ns_rcu); } static inline struct uts_namespace *to_uts_ns(struct ns_common *ns) From patchwork Thu Jul 30 12:01:02 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692867 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A526A913 for ; Thu, 30 Jul 2020 12:01:13 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 973DC20842 for ; Thu, 30 Jul 2020 12:01:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728703AbgG3MBL (ORCPT ); Thu, 30 Jul 2020 08:01:11 -0400 Received: from relay.sw.ru ([185.231.240.75]:57134 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728665AbgG3MBJ (ORCPT ); Thu, 30 Jul 2020 08:01:09 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17Eu-0002zq-Q1; Thu, 30 Jul 2020 15:00:48 +0300 Subject: [PATCH 19/23] uts: Add uts namespaces into ns_idr From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:01:02 +0300 Message-ID: <159611046266.535980.16940792342033728104.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Now they are exposed in /proc/namespace/ directory. Signed-off-by: Kirill Tkhai --- kernel/utsname.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/utsname.c b/kernel/utsname.c index aebf4df5f592..883855ca16cd 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -70,8 +70,16 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); ns->user_ns = get_user_ns(user_ns); up_read(&uts_sem); + + err = ns_idr_register(&ns->ns); + if (err) + goto fail_put; + return ns; +fail_put: + put_user_ns(user_ns); + ns_free_inum(&ns->ns); fail_free: kmem_cache_free(uts_ns_cache, ns); fail_dec: @@ -113,6 +121,7 @@ static void free_uts_ns_rcu(struct rcu_head *head) void free_uts_ns(struct uts_namespace *ns) { + ns_idr_unregister(&ns->ns); dec_uts_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); @@ -182,4 +191,5 @@ void __init uts_ns_init(void) offsetof(struct uts_namespace, name), sizeof_field(struct uts_namespace, name), NULL); + WARN_ON(ns_idr_register(&init_uts_ns.ns) < 0); } From patchwork Thu Jul 30 12:01:08 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692875 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3CD31913 for ; Thu, 30 Jul 2020 12:01:34 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 2F92C20829 for ; Thu, 30 Jul 2020 12:01:34 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728719AbgG3MBN (ORCPT ); Thu, 30 Jul 2020 08:01:13 -0400 Received: from relay.sw.ru ([185.231.240.75]:57156 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728675AbgG3MBK (ORCPT ); Thu, 30 Jul 2020 08:01:10 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17F0-0002zz-44; Thu, 30 Jul 2020 15:00:54 +0300 Subject: [PATCH 20/23] ipc: Add ipc namespaces into ns_idr From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:01:08 +0300 Message-ID: <159611046802.535980.15486117399701470621.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Now they are exposed in /proc/namespace/ directory. Signed-off-by: Kirill Tkhai --- ipc/namespace.c | 13 ++++++++++++- ipc/shm.c | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ipc/namespace.c b/ipc/namespace.c index 7bd0766ddc3b..ce6f87dd6d08 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -63,8 +63,17 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, msg_init_ns(ns); shm_init_ns(ns); + err = ns_idr_register(&ns->ns); + if (err) + goto fail_exit; + return ns; +fail_exit: + mq_put_mnt(ns); + sem_exit_ns(ns); + msg_exit_ns(ns); + shm_exit_ns(ns); fail_put: put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); @@ -117,6 +126,8 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, static void free_ipc_ns(struct ipc_namespace *ns) { + ns_idr_unregister(&ns->ns); + /* mq_put_mnt() waits for a grace period as kern_unmount() * uses synchronize_rcu(). */ @@ -128,7 +139,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) dec_ipc_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); - kfree(ns); + kfree(ns); /* RCU grace period wait is done in mq_put_mnt */ } static LLIST_HEAD(free_ipc_list); diff --git a/ipc/shm.c b/ipc/shm.c index 6cf24a5994ec..9e83556d9dcb 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -137,6 +137,7 @@ void shm_exit_ns(struct ipc_namespace *ns) static int __init ipc_ns_init(void) { shm_init_ns(&init_ipc_ns); + WARN_ON(ns_idr_register(&init_ipc_ns.ns) < 0); return 0; } From patchwork Thu Jul 30 12:01:13 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692869 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 41F3B13B1 for ; Thu, 30 Jul 2020 12:01:19 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 2AB9C208A9 for ; Thu, 30 Jul 2020 12:01:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728863AbgG3MBS (ORCPT ); Thu, 30 Jul 2020 08:01:18 -0400 Received: from relay.sw.ru ([185.231.240.75]:57218 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728737AbgG3MBR (ORCPT ); Thu, 30 Jul 2020 08:01:17 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17F5-00030I-Fo; Thu, 30 Jul 2020 15:00:59 +0300 Subject: [PATCH 21/23] mnt: Add mount namespaces into ns_idr From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:01:13 +0300 Message-ID: <159611047332.535980.13828558388565780541.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Now they are exposed in /proc/namespace/ directory. Signed-off-by: Kirill Tkhai --- fs/mount.h | 1 + fs/namespace.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/mount.h b/fs/mount.h index f296862032ec..cde7f7bed8ec 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -23,6 +23,7 @@ struct mnt_namespace { u64 event; unsigned int mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; + struct rcu_head rcu; } __randomize_layout; struct mnt_pcp { diff --git a/fs/namespace.c b/fs/namespace.c index 8c39810e6ec3..756e43fd21f3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3258,7 +3258,7 @@ static void free_mnt_ns(struct mnt_namespace *ns) ns_free_inum(&ns->ns); dec_mnt_namespaces(ns->ucounts); put_user_ns(ns->user_ns); - kfree(ns); + kfree_rcu(ns, rcu); } /* @@ -3382,6 +3382,12 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, if (pwdmnt) mntput(pwdmnt); + if (ns_idr_register(&new_ns->ns) < 0) { + drop_collected_mounts(&new_ns->root->mnt); + free_mnt_ns(new_ns); + new_ns = ERR_PTR(-ENOMEM); + } + return new_ns; } @@ -3824,6 +3830,7 @@ static void __init init_mount_tree(void) list_add(&m->mnt_list, &ns->list); init_task.nsproxy->mnt_ns = ns; get_mnt_ns(ns); + WARN_ON(ns_idr_register(&ns->ns) < 0); root.mnt = mnt; root.dentry = mnt->mnt_root; @@ -3872,6 +3879,7 @@ void put_mnt_ns(struct mnt_namespace *ns) { if (!refcount_dec_and_test(&ns->ns.count)) return; + ns_idr_unregister(&ns->ns); drop_collected_mounts(&ns->root->mnt); free_mnt_ns(ns); } From patchwork Thu Jul 30 12:01:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692871 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 86729913 for ; Thu, 30 Jul 2020 12:01:28 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 76F00208A9 for ; Thu, 30 Jul 2020 12:01:28 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728947AbgG3MBY (ORCPT ); Thu, 30 Jul 2020 08:01:24 -0400 Received: from relay.sw.ru ([185.231.240.75]:57242 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728918AbgG3MBV (ORCPT ); Thu, 30 Jul 2020 08:01:21 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17FA-00030d-Pn; Thu, 30 Jul 2020 15:01:04 +0300 Subject: [PATCH 22/23] cgroup: Add cgroup namespaces into ns_idr From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:01:18 +0300 Message-ID: <159611047870.535980.3790860133632973446.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Now they are exposed in /proc/namespace/ directory. Signed-off-by: Kirill Tkhai --- include/linux/cgroup.h | 1 + kernel/cgroup/namespace.c | 23 +++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 451c2d26a5db..38913d91fa92 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -858,6 +858,7 @@ struct cgroup_namespace { struct user_namespace *user_ns; struct ucounts *ucounts; struct css_set *root_cset; + struct rcu_head rcu; }; extern struct cgroup_namespace init_cgroup_ns; diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c index f5e8828c109c..64393bbafb2c 100644 --- a/kernel/cgroup/namespace.c +++ b/kernel/cgroup/namespace.c @@ -39,11 +39,12 @@ static struct cgroup_namespace *alloc_cgroup_ns(void) void free_cgroup_ns(struct cgroup_namespace *ns) { + ns_idr_unregister(&ns->ns); put_css_set(ns->root_cset); dec_cgroup_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); - kfree(ns); + kfree_rcu(ns, rcu); } EXPORT_SYMBOL(free_cgroup_ns); @@ -54,6 +55,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, struct cgroup_namespace *new_ns; struct ucounts *ucounts; struct css_set *cset; + int err; BUG_ON(!old_ns); @@ -78,16 +80,28 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, new_ns = alloc_cgroup_ns(); if (IS_ERR(new_ns)) { - put_css_set(cset); - dec_cgroup_namespaces(ucounts); - return new_ns; + err = PTR_ERR(new_ns); + goto err_put_css_set; } new_ns->user_ns = get_user_ns(user_ns); new_ns->ucounts = ucounts; new_ns->root_cset = cset; + err = ns_idr_register(&new_ns->ns); + if (err < 0) + goto err_put_user_ns; + return new_ns; + +err_put_user_ns: + put_user_ns(new_ns->user_ns); + ns_free_inum(&new_ns->ns); + kfree(new_ns); +err_put_css_set: + put_css_set(cset); + dec_cgroup_namespaces(ucounts); + return ERR_PTR(err); } static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns) @@ -152,6 +166,7 @@ const struct proc_ns_operations cgroupns_operations = { static __init int cgroup_namespaces_init(void) { + WARN_ON(ns_idr_register(&init_cgroup_ns.ns) < 0); return 0; } subsys_initcall(cgroup_namespaces_init); From patchwork Thu Jul 30 12:01:24 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kirill Tkhai X-Patchwork-Id: 11692873 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id ED829913 for ; Thu, 30 Jul 2020 12:01:29 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D2B0D20829 for ; Thu, 30 Jul 2020 12:01:29 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729017AbgG3MB2 (ORCPT ); Thu, 30 Jul 2020 08:01:28 -0400 Received: from relay.sw.ru ([185.231.240.75]:57292 "EHLO relay3.sw.ru" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728951AbgG3MB0 (ORCPT ); Thu, 30 Jul 2020 08:01:26 -0400 Received: from [192.168.15.64] (helo=localhost.localdomain) by relay3.sw.ru with esmtp (Exim 4.93) (envelope-from ) id 1k17FG-00030u-52; Thu, 30 Jul 2020 15:01:10 +0300 Subject: [PATCH 23/23] time: Add time namespaces into ns_idr From: Kirill Tkhai To: viro@zeniv.linux.org.uk, adobriyan@gmail.com, davem@davemloft.net, ebiederm@xmission.com, akpm@linux-foundation.org, christian.brauner@ubuntu.com, areber@redhat.com, serge@hallyn.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, ktkhai@virtuozzo.com Date: Thu, 30 Jul 2020 15:01:24 +0300 Message-ID: <159611048400.535980.3963260192974391169.stgit@localhost.localdomain> In-Reply-To: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> References: <159611007271.535980.15362304262237658692.stgit@localhost.localdomain> User-Agent: StGit/0.19 MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Now they are exposed in /proc/namespace/ directory. Signed-off-by: Kirill Tkhai --- include/linux/time_namespace.h | 1 + kernel/time/namespace.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index a51ffc089219..18eb8a9f7d68 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -24,6 +24,7 @@ struct time_namespace { struct page *vvar_page; /* If set prevents changing offsets after any task joined namespace. */ bool frozen_offsets; + struct rcu_head rcu; } __randomize_layout; extern struct time_namespace init_time_ns; diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index c4c829eb3511..164a057ccbfc 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -107,8 +107,15 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, ns->user_ns = get_user_ns(user_ns); ns->offsets = old_ns->offsets; ns->frozen_offsets = false; + + err = ns_idr_register(&ns->ns); + if (err) + goto fail_put_userns; return ns; +fail_put_userns: + put_user_ns(user_ns); + ns_free_inum(&ns->ns); fail_free_page: __free_page(ns->vvar_page); fail_free: @@ -228,11 +235,12 @@ static void timens_set_vvar_page(struct task_struct *task, void free_time_ns(struct time_namespace *ns) { + ns_idr_unregister(&ns->ns); dec_time_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); __free_page(ns->vvar_page); - kfree(ns); + kfree_rcu(ns, rcu); } static struct time_namespace *to_time_ns(struct ns_common *ns) @@ -470,6 +478,7 @@ struct time_namespace init_time_ns = { static int __init time_ns_init(void) { + WARN_ON(ns_idr_register(&init_time_ns.ns) < 0); return 0; } subsys_initcall(time_ns_init);