@@ -676,7 +676,7 @@ static int unlazy_walk(struct nameidata *nd)
goto out1;
if (!nd->root.mnt) {
/* Restart from path_init() if nd->root was cleared. */
- if (nd->flags & LOOKUP_BENEATH)
+ if (nd->flags & (LOOKUP_BENEATH | LOOKUP_IN_ROOT))
goto out;
} else if (!(nd->flags & LOOKUP_ROOT)) {
if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq)))
@@ -809,10 +809,18 @@ static int complete_walk(struct nameidata *nd)
return status;
}
-static void set_root(struct nameidata *nd)
+static int set_root(struct nameidata *nd)
{
struct fs_struct *fs = current->fs;
+ /*
+ * Jumping to the real root as part of LOOKUP_IN_ROOT is a BUG in namei,
+ * but we still have to ensure it doesn't happen because it will cause a
+ * breakout from the dirfd.
+ */
+ if (WARN_ON(nd->flags & LOOKUP_IN_ROOT))
+ return -ENOTRECOVERABLE;
+
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
@@ -824,6 +832,7 @@ static void set_root(struct nameidata *nd)
} else {
get_fs_root(fs, &nd->root);
}
+ return 0;
}
static void path_put_conditional(struct path *path, struct nameidata *nd)
@@ -854,6 +863,11 @@ static int nd_jump_root(struct nameidata *nd)
if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt)
return -EXDEV;
}
+ if (!nd->root.mnt) {
+ int error = set_root(nd);
+ if (error)
+ return error;
+ }
if (nd->flags & LOOKUP_RCU) {
struct dentry *d;
nd->path = nd->root;
@@ -1100,15 +1114,13 @@ const char *get_link(struct nameidata *nd)
if (unlikely(nd->flags & LOOKUP_NO_MAGICLINKS))
return ERR_PTR(-ELOOP);
/* Not currently safe. */
- if (unlikely(nd->flags & LOOKUP_BENEATH))
+ if (unlikely(nd->flags & (LOOKUP_BENEATH | LOOKUP_IN_ROOT)))
return ERR_PTR(-EXDEV);
}
if (IS_ERR_OR_NULL(res))
return res;
}
if (*res == '/') {
- if (!nd->root.mnt)
- set_root(nd);
error = nd_jump_root(nd);
if (unlikely(error))
return ERR_PTR(error);
@@ -1744,15 +1756,20 @@ static inline int may_lookup(struct nameidata *nd)
static inline int handle_dots(struct nameidata *nd, int type)
{
if (type == LAST_DOTDOT) {
+ int error = 0;
+
/*
* LOOKUP_BENEATH resolving ".." is not currently safe -- races
* can cause our parent to have moved outside of the root and
* us to skip over it.
*/
- if (unlikely(nd->flags & LOOKUP_BENEATH))
+ if (unlikely(nd->flags & (LOOKUP_BENEATH | LOOKUP_IN_ROOT)))
return -EXDEV;
- if (!nd->root.mnt)
- set_root(nd);
+ if (!nd->root.mnt) {
+ error = set_root(nd);
+ if (error)
+ return error;
+ }
if (nd->flags & LOOKUP_RCU) {
return follow_dotdot_rcu(nd);
} else
@@ -2251,9 +2268,13 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
nd->m_seq = read_seqbegin(&mount_lock);
+ /* LOOKUP_IN_ROOT treats absolute paths as being relative-to-dirfd. */
+ if (flags & LOOKUP_IN_ROOT)
+ while (*s == '/')
+ s++;
+
/* Figure out the starting path and root (if needed). */
if (*s == '/') {
- set_root(nd);
error = nd_jump_root(nd);
if (unlikely(error))
return ERR_PTR(error);
@@ -2298,7 +2319,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
fdput(f);
}
/* For scoped-lookups we need to set the root to the dirfd as well. */
- if (flags & LOOKUP_BENEATH) {
+ if (flags & (LOOKUP_BENEATH | LOOKUP_IN_ROOT)) {
nd->root = nd->path;
if (flags & LOOKUP_RCU)
nd->root_seq = nd->seq;
@@ -57,6 +57,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
#define LOOKUP_NO_MAGICLINKS 0x080000 /* No /proc/$pid/fd/ "symlink" crossing. */
#define LOOKUP_NO_SYMLINKS 0x100000 /* No symlink crossing *at all*.
Implies LOOKUP_NO_MAGICLINKS. */
+#define LOOKUP_IN_ROOT 0x200000 /* Treat dirfd as %current->fs->root. */
extern int path_pts(struct path *path);
The primary motivation for the need for this flag is container runtimes which have to interact with malicious root filesystems in the host namespaces. One of the first requirements for a container runtime to be secure against a malicious rootfs is that they correctly scope symlinks (that is, they should be scoped as though they are chroot(2)ed into the container's rootfs) and ".."-style paths[*]. The already-existing LOOKUP_NO_XDEV and LOOKUP_NO_MAGICLINKS help defend against other potential attacks in a malicious rootfs scenario. Currently most container runtimes try to do this resolution in userspace[1], causing many potential race conditions. In addition, the "obvious" alternative (actually performing a {ch,pivot_}root(2)) requires a fork+exec (for some runtimes) which is *very* costly if necessary for every filesystem operation involving a container. [*] At the moment, ".." and magic-link jumping are disallowed for the same reason it is disabled for LOOKUP_BENEATH -- currently it is not safe to allow it. Future patches may enable it unconditionally once we have resolved the possible races (for "..") and semantics (for magic-link jumping). The most significant *at(2) semantic change with LOOKUP_IN_ROOT is that absolute pathnames no longer cause the dirfd to be ignored completely. The rationale is that LOOKUP_IN_ROOT must necessarily chroot-scope symlinks with absolute paths to dirfd, and so doing it for the base path seems to be the most consistent behaviour (and also avoids foot-gunning users who want to scope paths that are absolute). [1]: https://github.com/cyphar/filepath-securejoin Suggested-by: Christian Brauner <christian@brauner.io> Signed-off-by: Aleksa Sarai <cyphar@cyphar.com> --- fs/namei.c | 41 +++++++++++++++++++++++++++++++---------- include/linux/namei.h | 1 + 2 files changed, 32 insertions(+), 10 deletions(-)