diff mbox series

[RFC,1/2] ns: introduce binfmt_misc namespace

Message ID 20180930234628.25528-2-laurent@vivier.eu (mailing list archive)
State New, archived
Headers show
Series ns: introduce binfmt_misc namespace | expand

Commit Message

Laurent Vivier Sept. 30, 2018, 11:46 p.m. UTC
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
---
 fs/proc/namespaces.c             |   3 +
 include/linux/binfmt_namespace.h |  51 +++++++++++
 include/linux/nsproxy.h          |   2 +
 include/linux/proc_ns.h          |   2 +
 include/linux/user_namespace.h   |   1 +
 include/uapi/linux/sched.h       |   1 +
 init/Kconfig                     |   8 ++
 kernel/Makefile                  |   1 +
 kernel/binfmt_namespace.c        | 153 +++++++++++++++++++++++++++++++
 kernel/fork.c                    |   3 +-
 kernel/nsproxy.c                 |  18 +++-
 11 files changed, 240 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/binfmt_namespace.h
 create mode 100644 kernel/binfmt_namespace.c

Comments

Greg Kroah-Hartman Oct. 1, 2018, 1:21 a.m. UTC | #1
On Mon, Oct 01, 2018 at 01:46:27AM +0200, Laurent Vivier wrote:
> Signed-off-by: Laurent Vivier <laurent@vivier.eu>
> ---

I don't take patches without any changelog text, I don't know if other
maintainers are as nice.  But for a new feature, you really should write
something...

thanks,

greg k-h
Laurent Vivier Oct. 1, 2018, 7 a.m. UTC | #2
Le 01/10/2018 à 03:21, Greg KH a écrit :
> On Mon, Oct 01, 2018 at 01:46:27AM +0200, Laurent Vivier wrote:
>> Signed-off-by: Laurent Vivier <laurent@vivier.eu>
>> ---
> 
> I don't take patches without any changelog text, I don't know if other
> maintainers are as nice.  But for a new feature, you really should write
> something...

Yes, I know. But it's an RFC and all the explanations are in the cover
letter for now. I will fill the changelog once I know if the feature is
interesting or not.

Thank you for your comment.

Laurent
diff mbox series

Patch

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index dd2b35f78b09..4d86549a788f 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -33,6 +33,9 @@  static const struct proc_ns_operations *ns_entries[] = {
 #ifdef CONFIG_CGROUPS
 	&cgroupns_operations,
 #endif
+#ifdef CONFIG_BINFMT_NS
+	&binfmtns_operations,
+#endif
 };
 
 static const char *proc_ns_get_link(struct dentry *dentry,
diff --git a/include/linux/binfmt_namespace.h b/include/linux/binfmt_namespace.h
new file mode 100644
index 000000000000..8688869ee254
--- /dev/null
+++ b/include/linux/binfmt_namespace.h
@@ -0,0 +1,51 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BINFMT_NAMESPACE_H
+#define _LINUX_BINFMT_NAMESPACE_H
+
+struct user_namespace;
+extern struct user_namespace init_user_ns;
+
+struct binfmt_namespace {
+	struct kref kref;
+	struct user_namespace *user_ns;
+	struct ucounts *ucounts;
+	struct ns_common ns;
+} __randomize_layout;
+extern struct binfmt_namespace init_binfmt_ns;
+
+#ifdef CONFIG_BINFMT_NS
+static inline void get_binfmt_ns(struct binfmt_namespace *ns)
+{
+	if (ns)
+		kref_get(&ns->kref);
+}
+
+extern struct binfmt_namespace *copy_binfmt_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct binfmt_namespace *old_ns);
+extern void free_binfmt_ns(struct kref *kref);
+
+static inline void put_binfmt_ns(struct binfmt_namespace *ns)
+{
+	if (ns)
+		kref_put(&ns->kref, free_binfmt_ns);
+}
+
+#else
+static inline void get_binfmt_ns(struct binfmt_namespace *ns)
+{
+}
+
+static inline void put_binfmt_ns(struct binfmt_namespace *ns)
+{
+}
+
+static inline struct binfmt_namespace *copy_binfmt_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct binfmt_namespace *old_ns)
+{
+	if (flags & CLONE_NEWBINFMT)
+		return ERR_PTR(-EINVAL);
+
+	return old_ns;
+}
+#endif
+#endif /* _LINUX_BINFMT_NAMESPACE_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 2ae1b1a4d84d..8d2294477095 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -10,6 +10,7 @@  struct uts_namespace;
 struct ipc_namespace;
 struct pid_namespace;
 struct cgroup_namespace;
+struct binfmt_namespace;
 struct fs_struct;
 
 /*
@@ -36,6 +37,7 @@  struct nsproxy {
 	struct pid_namespace *pid_ns_for_children;
 	struct net 	     *net_ns;
 	struct cgroup_namespace *cgroup_ns;
+	struct binfmt_namespace *binfmt_ns;
 };
 extern struct nsproxy init_nsproxy;
 
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index d31cb6215905..6afa2dbc5204 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -32,6 +32,7 @@  extern const struct proc_ns_operations pidns_for_children_operations;
 extern const struct proc_ns_operations userns_operations;
 extern const struct proc_ns_operations mntns_operations;
 extern const struct proc_ns_operations cgroupns_operations;
+extern const struct proc_ns_operations binfmtns_operations;
 
 /*
  * We always define these enumerators
@@ -43,6 +44,7 @@  enum {
 	PROC_USER_INIT_INO	= 0xEFFFFFFDU,
 	PROC_PID_INIT_INO	= 0xEFFFFFFCU,
 	PROC_CGROUP_INIT_INO	= 0xEFFFFFFBU,
+	PROC_BINFMT_INIT_INO	= 0xEFFFFFFAU,
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index d6b74b91096b..81365a22362c 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -45,6 +45,7 @@  enum ucount_type {
 	UCOUNT_NET_NAMESPACES,
 	UCOUNT_MNT_NAMESPACES,
 	UCOUNT_CGROUP_NAMESPACES,
+	UCOUNT_BINFMT_NAMESPACES,
 #ifdef CONFIG_INOTIFY_USER
 	UCOUNT_INOTIFY_INSTANCES,
 	UCOUNT_INOTIFY_WATCHES,
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 22627f80063e..51fe40681e8e 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -10,6 +10,7 @@ 
 #define CLONE_FS	0x00000200	/* set if fs info shared between processes */
 #define CLONE_FILES	0x00000400	/* set if open files shared between processes */
 #define CLONE_SIGHAND	0x00000800	/* set if signal handlers and blocked signals shared */
+#define CLONE_NEWBINFMT	0x00001000	/* New binfmt_misc namespace */
 #define CLONE_PTRACE	0x00002000	/* set if we want to let tracing continue on the child too */
 #define CLONE_VFORK	0x00004000	/* set if the parent wants the child to wake it up on mm_release */
 #define CLONE_PARENT	0x00008000	/* set if we want to have the same parent as the cloner */
diff --git a/init/Kconfig b/init/Kconfig
index 1e234e2f1cba..4874719a2799 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -965,6 +965,14 @@  config NET_NS
 	  Allow user space to create what appear to be multiple instances
 	  of the network stack.
 
+config BINFMT_NS
+	bool "binfmt_misc Namespace"
+	depends on BINFMT_MISC
+	default y
+	help
+	  This allows to use several binfmt_misc configurations on
+	  the same system.
+
 endif # NAMESPACES
 
 config CHECKPOINT_RESTORE
diff --git a/kernel/Makefile b/kernel/Makefile
index 7a63d567fdb5..313c80f5883f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -72,6 +72,7 @@  obj-$(CONFIG_CGROUPS) += cgroup/
 obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_USER_NS) += user_namespace.o
 obj-$(CONFIG_PID_NS) += pid_namespace.o
+obj-$(CONFIG_BINFMT_NS) += binfmt_namespace.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_SMP) += stop_machine.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
diff --git a/kernel/binfmt_namespace.c b/kernel/binfmt_namespace.c
new file mode 100644
index 000000000000..63a80bcd70df
--- /dev/null
+++ b/kernel/binfmt_namespace.c
@@ -0,0 +1,153 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/slab.h>
+#include <linux/user_namespace.h>
+#include <linux/cred.h>
+#include <linux/binfmt_namespace.h>
+#include <linux/proc_ns.h>
+#include <linux/sched/task.h>
+
+static struct ucounts *inc_binfmt_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, current_euid(), UCOUNT_BINFMT_NAMESPACES);
+}
+
+static void dec_binfmt_namespaces(struct ucounts *ucounts)
+{
+	dec_ucount(ucounts, UCOUNT_BINFMT_NAMESPACES);
+}
+
+static struct binfmt_namespace *create_binfmt_ns(void)
+{
+	struct binfmt_namespace *binfmt_ns;
+
+	binfmt_ns = kmalloc(sizeof(struct binfmt_namespace), GFP_KERNEL);
+	if (binfmt_ns)
+		kref_init(&binfmt_ns->kref);
+	return binfmt_ns;
+}
+
+static struct binfmt_namespace *clone_binfmt_ns(struct user_namespace *user_ns,
+					       struct binfmt_namespace *old_ns)
+{
+	struct binfmt_namespace *ns;
+	struct ucounts *ucounts;
+	int err;
+
+	err = -ENOSPC;
+	ucounts = inc_binfmt_namespaces(user_ns);
+	if (!ucounts)
+		goto fail;
+
+	err = -ENOMEM;
+	ns = create_binfmt_ns();
+	if (!ns)
+		goto fail_dec;
+
+	err = ns_alloc_inum(&ns->ns);
+	if (err)
+		goto fail_free;
+
+	ns->ucounts = ucounts;
+	ns->ns.ops = &binfmtns_operations;
+	ns->user_ns = get_user_ns(user_ns);
+	return ns;
+
+fail_free:
+	kfree(ns);
+fail_dec:
+	dec_binfmt_namespaces(ucounts);
+fail:
+	return ERR_PTR(err);
+}
+
+struct binfmt_namespace *copy_binfmt_ns(unsigned long flags,
+		struct user_namespace *user_ns, struct binfmt_namespace *old_ns)
+{
+	if (!(flags & CLONE_NEWBINFMT)) {
+		get_binfmt_ns(old_ns);
+		return old_ns;
+	}
+
+	return clone_binfmt_ns(user_ns, old_ns);
+}
+
+void free_binfmt_ns(struct kref *kref)
+{
+	struct binfmt_namespace *ns;
+
+	ns = container_of(kref, struct binfmt_namespace, kref);
+	dec_binfmt_namespaces(ns->ucounts);
+	put_user_ns(ns->user_ns);
+	ns_free_inum(&ns->ns);
+	kfree(ns);
+}
+
+static inline struct binfmt_namespace *to_binfmt_ns(struct ns_common *ns)
+{
+	return container_of(ns, struct binfmt_namespace, ns);
+}
+
+static struct ns_common *binfmtns_get(struct task_struct *task)
+{
+	struct binfmt_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	task_lock(task);
+	nsproxy = task->nsproxy;
+	if (nsproxy) {
+		ns = nsproxy->binfmt_ns;
+		get_binfmt_ns(ns);
+	}
+	task_unlock(task);
+
+	return ns ? &ns->ns : NULL;
+}
+
+static void binfmtns_put(struct ns_common *ns)
+{
+	put_binfmt_ns(to_binfmt_ns(ns));
+}
+
+static int binfmtns_install(struct nsproxy *nsproxy, struct ns_common *new)
+{
+	struct binfmt_namespace *ns = to_binfmt_ns(new);
+
+	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
+	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+		return -EPERM;
+
+	get_binfmt_ns(ns);
+	put_binfmt_ns(nsproxy->binfmt_ns);
+	nsproxy->binfmt_ns = ns;
+	return 0;
+}
+
+static struct user_namespace *binfmtns_owner(struct ns_common *ns)
+{
+	return to_binfmt_ns(ns)->user_ns;
+}
+
+const struct proc_ns_operations binfmtns_operations = {
+	.name		= "binfmt_misc",
+	.type		= CLONE_NEWBINFMT,
+	.get		= binfmtns_get,
+	.put		= binfmtns_put,
+	.install	= binfmtns_install,
+	.owner		= binfmtns_owner,
+};
+
+struct binfmt_namespace init_binfmt_ns = {
+	.kref = KREF_INIT(2),
+	.user_ns = &init_user_ns,
+	.ns.inum = PROC_BINFMT_INIT_INO,
+#ifdef CONFIG_BINFMT_NS
+	.ns.ops = &binfmtns_operations,
+#endif
+};
+
+static int __init binfmt_ns_init(void)
+{
+	return 0;
+}
+subsys_initcall(binfmt_ns_init);
diff --git a/kernel/fork.c b/kernel/fork.c
index f0b58479534f..d89cf8b89e43 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2365,7 +2365,8 @@  static int check_unshare_flags(unsigned long unshare_flags)
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
 				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
-				CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP))
+				CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP|
+				CLONE_NEWBINFMT))
 		return -EINVAL;
 	/*
 	 * Not implemented, but pretend it works if there is nothing
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f6c5d330059a..386028e6da39 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -22,6 +22,7 @@ 
 #include <linux/pid_namespace.h>
 #include <net/net_namespace.h>
 #include <linux/ipc_namespace.h>
+#include <linux/binfmt_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/file.h>
 #include <linux/syscalls.h>
@@ -44,6 +45,9 @@  struct nsproxy init_nsproxy = {
 #ifdef CONFIG_CGROUPS
 	.cgroup_ns		= &init_cgroup_ns,
 #endif
+#if IS_ENABLED(BINFMT_MISC)
+	.binfmt_ns		= &init_binfmt_ns,
+#endif
 };
 
 static inline struct nsproxy *create_nsproxy(void)
@@ -110,6 +114,13 @@  static struct nsproxy *create_new_namespaces(unsigned long flags,
 		goto out_net;
 	}
 
+	new_nsp->binfmt_ns = copy_binfmt_ns(flags, user_ns,
+					    tsk->nsproxy->binfmt_ns);
+	if (IS_ERR(new_nsp->binfmt_ns)) {
+		err = PTR_ERR(new_nsp->binfmt_ns);
+		goto out_net;
+	}
+
 	return new_nsp;
 
 out_net:
@@ -143,7 +154,7 @@  int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 
 	if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
 			      CLONE_NEWPID | CLONE_NEWNET |
-			      CLONE_NEWCGROUP)))) {
+			      CLONE_NEWCGROUP | CLONE_NEWBINFMT)))) {
 		get_nsproxy(old_ns);
 		return 0;
 	}
@@ -180,6 +191,8 @@  void free_nsproxy(struct nsproxy *ns)
 		put_ipc_ns(ns->ipc_ns);
 	if (ns->pid_ns_for_children)
 		put_pid_ns(ns->pid_ns_for_children);
+	if (ns->binfmt_ns)
+		put_binfmt_ns(ns->binfmt_ns);
 	put_cgroup_ns(ns->cgroup_ns);
 	put_net(ns->net_ns);
 	kmem_cache_free(nsproxy_cachep, ns);
@@ -196,7 +209,8 @@  int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 	int err = 0;
 
 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			       CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP)))
+			       CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
+			       CLONE_NEWBINFMT)))
 		return 0;
 
 	user_ns = new_cred ? new_cred->user_ns : current_user_ns();