diff mbox

[2/5] kernel: add a helper to get an owning user namespace for a namespace

Message ID 1468548742-32136-2-git-send-email-avagin@openvz.org (mailing list archive)
State New, archived
Headers show

Commit Message

Andrey Vagin July 15, 2016, 2:12 a.m. UTC
Return -EPERM if an owning user namespace is outside of a process
current user namespace.

Signed-off-by: Andrey Vagin <avagin@openvz.org>
---
 include/linux/user_namespace.h |  7 +++++++
 kernel/user_namespace.c        | 24 ++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

Comments

Eric W. Biederman July 24, 2016, 5:03 a.m. UTC | #1
Andrey Vagin <avagin@openvz.org> writes:

> Return -EPERM if an owning user namespace is outside of a process
> current user namespace.
>
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index a5bc78c..6382e5e 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -994,6 +994,30 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
>  	return commit_creds(cred);
>  }
>  
> +struct ns_common *ns_get_owner(struct ns_common *ns)
> +{
> +	const struct cred *cred = current_cred();
> +	struct user_namespace *user_ns, *p;
> +
> +	user_ns = p = ns->user_ns;
> +	if (user_ns == NULL) { /* ns is init_user_ns */
> +		/* Unprivileged user should not know that it's init_user_ns. */
> +		if (capable(CAP_SYS_ADMIN))
> +			return ERR_PTR(-ENOENT);
> +		return ERR_PTR(-EPERM);
> +	}

This permission check is not what I meant to request.  This does not
handle nested user namespaces.

> +	for (;;) {
> +		if (p == cred->user_ns)
> +			break;
> +		if (p == &init_user_ns)
> +			return ERR_PTR(-EPERM);
> +		p = p->parent;
> +	}
> +

The permission check really needs to be down here. And be:

	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
        	return ERR_PTR(-EPERM).

That cleanly and easily handles more than a depth of a single user
namespace.

> +	return &get_user_ns(user_ns)->ns;
> +}
> +
>  const struct proc_ns_operations userns_operations = {
>  	.name		= "user",
>  	.type		= CLONE_NEWUSER,


Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrey Vagin July 24, 2016, 6:37 a.m. UTC | #2
On Sun, Jul 24, 2016 at 12:03:49AM -0500, Eric W. Biederman wrote:
> Andrey Vagin <avagin@openvz.org> writes:
> 
> > Return -EPERM if an owning user namespace is outside of a process
> > current user namespace.
> >
> > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> > index a5bc78c..6382e5e 100644
> > --- a/kernel/user_namespace.c
> > +++ b/kernel/user_namespace.c
> > @@ -994,6 +994,30 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
> >  	return commit_creds(cred);
> >  }
> >  
> > +struct ns_common *ns_get_owner(struct ns_common *ns)
> > +{
> > +	const struct cred *cred = current_cred();
> > +	struct user_namespace *user_ns, *p;
> > +
> > +	user_ns = p = ns->user_ns;
> > +	if (user_ns == NULL) { /* ns is init_user_ns */
> > +		/* Unprivileged user should not know that it's init_user_ns. */
> > +		if (capable(CAP_SYS_ADMIN))
> > +			return ERR_PTR(-ENOENT);
> > +		return ERR_PTR(-EPERM);
> > +	}
> 
> This permission check is not what I meant to request.  This does not
> handle nested user namespaces.

Here I handle a case when ns is init_user_ns. init_user_ns doesn't have
a parent, so we need to return an error. We can't return ENOENT in all
cases, because we don't want to expose "that file descriptor is for the
root user namespace" to unprivileged users.
(Trevor suggested to add this check and it looks resonable for me too).
> 
> > +	for (;;) {
> > +		if (p == cred->user_ns)
> > +			break;
> > +		if (p == &init_user_ns)
> > +			return ERR_PTR(-EPERM);
> > +		p = p->parent;
> > +	}
> > +
> 
> The permission check really needs to be down here. And be:
> 
> 	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
>         	return ERR_PTR(-EPERM).
> 
> That cleanly and easily handles more than a depth of a single user
> namespace.
> 
> > +	return &get_user_ns(user_ns)->ns;
> > +}
> > +
> >  const struct proc_ns_operations userns_operations = {
> >  	.name		= "user",
> >  	.type		= CLONE_NEWUSER,
> 
> 
> Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman July 24, 2016, 2:30 p.m. UTC | #3
Andrew Vagin <avagin@virtuozzo.com> writes:

> On Sun, Jul 24, 2016 at 12:03:49AM -0500, Eric W. Biederman wrote:
>> Andrey Vagin <avagin@openvz.org> writes:
>> 
>> > Return -EPERM if an owning user namespace is outside of a process
>> > current user namespace.
>> >
>> > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
>> > index a5bc78c..6382e5e 100644
>> > --- a/kernel/user_namespace.c
>> > +++ b/kernel/user_namespace.c
>> > @@ -994,6 +994,30 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
>> >  	return commit_creds(cred);
>> >  }
>> >  
>> > +struct ns_common *ns_get_owner(struct ns_common *ns)
>> > +{
>> > +	const struct cred *cred = current_cred();
>> > +	struct user_namespace *user_ns, *p;
>> > +
>> > +	user_ns = p = ns->user_ns;
>> > +	if (user_ns == NULL) { /* ns is init_user_ns */
>> > +		/* Unprivileged user should not know that it's init_user_ns. */
>> > +		if (capable(CAP_SYS_ADMIN))
>> > +			return ERR_PTR(-ENOENT);
>> > +		return ERR_PTR(-EPERM);
>> > +	}
>> 
>> This permission check is not what I meant to request.  This does not
>> handle nested user namespaces.
>
> Here I handle a case when ns is init_user_ns. init_user_ns doesn't have
> a parent, so we need to return an error. We can't return ENOENT in all
> cases, because we don't want to expose "that file descriptor is for the
> root user namespace" to unprivileged users.
> (Trevor suggested to add this check and it looks resonable for me
> too).

Apologies. I was skimming and misread the code.  I mistook that loop for
some useful part of getting the owner.  Looking in more detail...

Your code says:
+struct ns_common *ns_get_owner(struct ns_common *ns)
+{
+	const struct cred *cred = current_cred();
+	struct user_namespace *user_ns, *p;
+
+	user_ns = p = ns->user_ns;
+	if (user_ns == NULL) { /* ns is init_user_ns */
+		/* Unprivileged user should not know that it's init_user_ns. */
+		if (capable(CAP_SYS_ADMIN))
+			return ERR_PTR(-ENOENT);
+		return ERR_PTR(-EPERM);
+	}
+
+	for (;;) {
+		if (p == cred->user_ns)
+			break;
+		if (p == &init_user_ns)
+			return ERR_PTR(-EPERM);
+		p = p->parent;
+	}
+
+	return &get_user_ns(user_ns)->ns;
+}

And all else being equal it could say:

+struct ns_common *ns_get_owner(struct ns_common *ns)
s+{
+	struct user_namespace *user_ns = ns->user_ns;
+
+	/* Are we allowed to see the user namespace? */
+	if (!ns_capable(user_ns?user_ns:&init_user_ns, CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	if (!user_ns)
+		return ERR_PTR(-ENOENT);
+
+	return &get_user_ns(user_ns)->ns;
+}

Which I think is the root of my confusion.  You hand rolled ns_capable,
and I did not recognize it because I was skimming, and just looking to
be certain the permission check was present.

Given that you have to hand roll the pid namespace check that hand
rolling may not be so bad.  But it certainly was confusing the first
time I saw it especially without a comment.

Hmm.

I am not at all certain it makes sense to return -ENOENT.

Without the -ENOENT check the code is much cleaner, and clearer.

I may be blinkered but I don't see the value in letting someone know we
are talking about the initial namespace.  If anything that information
is likely to cause issues with weird corner cases of checkpoint/restart,
as it acts different if you are in a container or not.

When things act different in a container that almost always is a source
of a problem somewhere.

So we could simplify the filter in the code to just this.

+struct ns_common *ns_get_owner(struct ns_common *ns)
+{
+	struct user_namespace *my_user_ns = current_user_ns();
+	struct user_namespace *owner, *p;
+
+	/* See if the owner is in the current user namespace */
+	owner = p = ns->user_ns;
+	for (;;) {
+		if (!p)
+			return ERR_PTR(-EPERM);
+		if (p == my_user_ns)
+			break;
+		p = p->parent;
+	}
+
+	return &get_user_ns(owner)->ns;
+}

And on reflection I do see the point of not using ns_capable as that
requires having privileges in a namespace while all we want here
is to see if someone is in a visible namespace.

So please ignore my comments about ns_capable on the pid namespace
parent.

But please simplify the loop and put an appropriate comment on it like I
have above.  The fewer special cases the easier the code is to get
correct, and the easier it is to read.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
W. Trevor King July 24, 2016, 4:54 p.m. UTC | #4
On Thu, Jul 14, 2016 at 07:12:19PM -0700, Andrey Vagin wrote:
> +struct ns_common *ns_get_owner(struct ns_common *ns)
> +{
> +	…
> +	return &get_user_ns(user_ns)->ns;
> +}

Is there a reason to return the generic ‘struct ns_common *’ here
instead of ‘struct user_namespace *’?  The current use case doesn't
need access to the additional information, but future ng_get_owner
callers might, and we know the returned namespace (if any) will be a
user namespace.

Cheers,
Trevor
W. Trevor King July 24, 2016, 5:05 p.m. UTC | #5
On Sun, Jul 24, 2016 at 09:30:03AM -0500, Eric W. Biederman wrote:
> I am not at all certain it makes sense to return -ENOENT.
> 
> Without the -ENOENT check the code is much cleaner, and clearer.

This is fine with me, and makes even more sense for owner (user)
namespaces than it does for net namespaces [1].  At least, I can't
think of a reason why the root user namespace would have special
userspace-visible behavior ;).

Cheers,
Trevor

[1]: http://thread.gmane.org/gmane.linux.kernel.api/20626/focus=30639
     Subject: Re: [PATCH 0/5 RFC] Add an interface to discover relationships between namespaces                                                                              
     Date: Sat, 23 Jul 2016 23:51:07 -0500
     Message-ID: <877fcboczo.fsf@x220.int.ebiederm.org>
diff mbox

Patch

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index a941b44..e416b76 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -76,6 +76,8 @@  extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t,
 extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
 extern int proc_setgroups_show(struct seq_file *m, void *v);
 extern bool userns_may_setgroups(const struct user_namespace *ns);
+
+struct ns_common *ns_get_owner(struct ns_common *ns);
 #else
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -104,6 +106,11 @@  static inline bool userns_may_setgroups(const struct user_namespace *ns)
 {
 	return true;
 }
+
+static inline struct ns_common *ns_get_owner(struct ns_common *ns)
+{
+	return ERR_PTR(-ENOENT);
+}
 #endif
 
 #endif /* _LINUX_USER_H */
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index a5bc78c..6382e5e 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -994,6 +994,30 @@  static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 	return commit_creds(cred);
 }
 
+struct ns_common *ns_get_owner(struct ns_common *ns)
+{
+	const struct cred *cred = current_cred();
+	struct user_namespace *user_ns, *p;
+
+	user_ns = p = ns->user_ns;
+	if (user_ns == NULL) { /* ns is init_user_ns */
+		/* Unprivileged user should not know that it's init_user_ns. */
+		if (capable(CAP_SYS_ADMIN))
+			return ERR_PTR(-ENOENT);
+		return ERR_PTR(-EPERM);
+	}
+
+	for (;;) {
+		if (p == cred->user_ns)
+			break;
+		if (p == &init_user_ns)
+			return ERR_PTR(-EPERM);
+		p = p->parent;
+	}
+
+	return &get_user_ns(user_ns)->ns;
+}
+
 const struct proc_ns_operations userns_operations = {
 	.name		= "user",
 	.type		= CLONE_NEWUSER,