diff mbox series

[RFC,06/27] containers, vfs: Allow syscall dirfd arguments to take a container fd

Message ID 155024688620.21651.16013251077091180213.stgit@warthog.procyon.org.uk (mailing list archive)
State New, archived
Headers show
Series Containers and using authenticated filesystems | expand

Commit Message

David Howells Feb. 15, 2019, 4:08 p.m. UTC
Some filesystem system calls, such as mkdirat(), take a 'directory fd' to
specify the pathwalk origin.  This takes either AT_FDCWD or a file
descriptor that refers to an open directory.

Make it possible to supply a container fd, as obtained from
container_create(), instead thereby specifying the container's root as the
origin.  This performs the filesystem operation into the container's mount
namespace.  For example:

	int cfd = container_create("fred", CONTAINER_NEW_MNT_NS, 0);
	mkdirat(cfd, "/fred", 0755);

A better way to do this might be to temporarily override current->fs and
current->nsproxy, but this requires splitting those fields so that procfs
doesn't see the override.

A sequence number and lock are available to protect the root pointer in
case container_chroot() and/or container_pivot_root() are implemented.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 fs/namei.c |   45 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 11 deletions(-)

Comments

Eric W. Biederman Feb. 19, 2019, 4:45 p.m. UTC | #1
David Howells <dhowells@redhat.com> writes:

> Some filesystem system calls, such as mkdirat(), take a 'directory fd' to
> specify the pathwalk origin.  This takes either AT_FDCWD or a file
> descriptor that refers to an open directory.
>
> Make it possible to supply a container fd, as obtained from
> container_create(), instead thereby specifying the container's root as the
> origin.  This performs the filesystem operation into the container's mount
> namespace.  For example:
>
> 	int cfd = container_create("fred", CONTAINER_NEW_MNT_NS, 0);
> 	mkdirat(cfd, "/fred", 0755);
>
> A better way to do this might be to temporarily override current->fs and
> current->nsproxy, but this requires splitting those fields so that procfs
> doesn't see the override.
>
> A sequence number and lock are available to protect the root pointer in
> case container_chroot() and/or container_pivot_root() are implemented.

If this is desirable we can do this without a ``container''.  We already
have mount namespaces.

Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com>

In fact if you take care to use a path that starts with '/' the normal
dirfd based operations work just fine.

So I don't see the point of this system call at all.


> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
>
>  fs/namei.c |   45 ++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 34 insertions(+), 11 deletions(-)
>
> diff --git a/fs/namei.c b/fs/namei.c
> index a85deb55d0c9..4932b5467285 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -2232,20 +2232,43 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
>  		if (!f.file)
>  			return ERR_PTR(-EBADF);
>  
> -		dentry = f.file->f_path.dentry;
> +		if (is_container_file(f.file)) {
> +			struct container *c = f.file->private_data;
> +			unsigned seq;
>  
> -		if (*s && unlikely(!d_can_lookup(dentry))) {
> -			fdput(f);
> -			return ERR_PTR(-ENOTDIR);
> -		}
> +			if (!*s)
> +				return ERR_PTR(-EINVAL);
>  
> -		nd->path = f.file->f_path;
> -		if (flags & LOOKUP_RCU) {
> -			nd->inode = nd->path.dentry->d_inode;
> -			nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
> +			if (flags & LOOKUP_RCU) {
> +				do {
> +					seq = read_seqcount_begin(&c->seq);
> +					nd->path = c->root;
> +					nd->inode = nd->path.dentry->d_inode;
> +					nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
> +				} while (read_seqcount_retry(&c->seq, seq));
> +			} else {
> +				spin_lock(&c->lock);
> +				nd->path = c->root;
> +				path_get(&nd->path);
> +				spin_unlock(&c->lock);
> +				nd->inode = nd->path.dentry->d_inode;
> +			}
>  		} else {
> -			path_get(&nd->path);
> -			nd->inode = nd->path.dentry->d_inode;
> +			dentry = f.file->f_path.dentry;
> +
> +			if (*s && unlikely(!d_can_lookup(dentry))) {
> +				fdput(f);
> +				return ERR_PTR(-ENOTDIR);
> +			}
> +
> +			nd->path = f.file->f_path;
> +			if (flags & LOOKUP_RCU) {
> +				nd->inode = nd->path.dentry->d_inode;
> +				nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
> +			} else {
> +				path_get(&nd->path);
> +				nd->inode = nd->path.dentry->d_inode;
> +			}
>  		}
>  		fdput(f);
>  		return s;
David Howells Feb. 19, 2019, 11:24 p.m. UTC | #2
Eric W. Biederman <ebiederm@xmission.com> wrote:

> In fact if you take care to use a path that starts with '/' the normal
> dirfd based operations work just fine.

If the path starts with '/', dirfd is ignored.  And there's an error in my
patch description - it should be:

 	mkdirat(cfd, "fred", 0755);

David
diff mbox series

Patch

diff --git a/fs/namei.c b/fs/namei.c
index a85deb55d0c9..4932b5467285 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2232,20 +2232,43 @@  static const char *path_init(struct nameidata *nd, unsigned flags)
 		if (!f.file)
 			return ERR_PTR(-EBADF);
 
-		dentry = f.file->f_path.dentry;
+		if (is_container_file(f.file)) {
+			struct container *c = f.file->private_data;
+			unsigned seq;
 
-		if (*s && unlikely(!d_can_lookup(dentry))) {
-			fdput(f);
-			return ERR_PTR(-ENOTDIR);
-		}
+			if (!*s)
+				return ERR_PTR(-EINVAL);
 
-		nd->path = f.file->f_path;
-		if (flags & LOOKUP_RCU) {
-			nd->inode = nd->path.dentry->d_inode;
-			nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+			if (flags & LOOKUP_RCU) {
+				do {
+					seq = read_seqcount_begin(&c->seq);
+					nd->path = c->root;
+					nd->inode = nd->path.dentry->d_inode;
+					nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+				} while (read_seqcount_retry(&c->seq, seq));
+			} else {
+				spin_lock(&c->lock);
+				nd->path = c->root;
+				path_get(&nd->path);
+				spin_unlock(&c->lock);
+				nd->inode = nd->path.dentry->d_inode;
+			}
 		} else {
-			path_get(&nd->path);
-			nd->inode = nd->path.dentry->d_inode;
+			dentry = f.file->f_path.dentry;
+
+			if (*s && unlikely(!d_can_lookup(dentry))) {
+				fdput(f);
+				return ERR_PTR(-ENOTDIR);
+			}
+
+			nd->path = f.file->f_path;
+			if (flags & LOOKUP_RCU) {
+				nd->inode = nd->path.dentry->d_inode;
+				nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+			} else {
+				path_get(&nd->path);
+				nd->inode = nd->path.dentry->d_inode;
+			}
 		}
 		fdput(f);
 		return s;