@@ -461,5 +461,6 @@
530 common getegid sys_getegid
531 common geteuid sys_geteuid
532 common getppid sys_getppid
+533 common resolveat sys_resolveat
# all other architectures have common numbers for new syscall, alpha
# is the exception.
@@ -437,3 +437,4 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+435 common resolveat sys_resolveat
@@ -44,7 +44,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 424
+#define __NR_compat_syscalls 436
#endif
#define __ARCH_WANT_SYS_CLONE
@@ -867,6 +867,9 @@ __SYSCALL(__NR_futex_time64, sys_futex)
#define __NR_sched_rr_get_interval_time64 423
__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
+#define __NR_resolveat 435
+__SYSCALL(__NR_resolveat, sys_sched_rr_get_interval)
+
/*
* Please add new compat syscalls above this comment and update
* __NR_compat_syscalls in asm/unistd.h.
@@ -344,3 +344,4 @@
332 common pkey_free sys_pkey_free
333 common rseq sys_rseq
# 334 through 423 are reserved to sync up with other architectures
+435 common resolveat sys_resolveat
@@ -423,3 +423,4 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+435 common resolveat sys_resolveat
@@ -429,3 +429,4 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+435 common resolveat sys_resolveat
@@ -362,3 +362,4 @@
421 n32 rt_sigtimedwait_time64 compat_sys_rt_sigtimedwait_time64
422 n32 futex_time64 sys_futex
423 n32 sched_rr_get_interval_time64 sys_sched_rr_get_interval
+435 n32 resolveat sys_resolveat
@@ -338,3 +338,4 @@
327 n64 rseq sys_rseq
328 n64 io_pgetevents sys_io_pgetevents
# 329 through 423 are reserved to sync up with other architectures
+435 n64 resolveat sys_resolveat
@@ -411,3 +411,4 @@
421 o32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 o32 futex_time64 sys_futex sys_futex
423 o32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+435 o32 resolveat sys_resolveat sys_resolveat
@@ -420,3 +420,4 @@
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+435 common resolveat sys_resolveat sys_resolveat
@@ -505,3 +505,4 @@
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+435 common resolveat sys_resolveat sys_resolveat
@@ -426,3 +426,4 @@
421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 - sys_futex
423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval
+435 common resolveat sys_resolveat -
@@ -426,3 +426,4 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+435 common resolveat sys_resolveat
@@ -469,3 +469,4 @@
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+435 common resolveat sys_resolveat sys_resolveat
@@ -438,3 +438,4 @@
425 i386 io_uring_setup sys_io_uring_setup __ia32_sys_io_uring_setup
426 i386 io_uring_enter sys_io_uring_enter __ia32_sys_io_uring_enter
427 i386 io_uring_register sys_io_uring_register __ia32_sys_io_uring_register
+435 i386 resolveat sys_resolveat __ia32_sys_resolveat
@@ -355,6 +355,7 @@
425 common io_uring_setup __x64_sys_io_uring_setup
426 common io_uring_enter __x64_sys_io_uring_enter
427 common io_uring_register __x64_sys_io_uring_register
+435 common resolveat __x64_sys_resolveat
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -394,3 +394,4 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+435 common resolveat sys_resolveat
@@ -3790,6 +3790,56 @@ struct file *do_filp_open(int dfd, struct filename *pathname,
return filp;
}
+SYSCALL_DEFINE3(resolveat, int, dfd, const char __user *, path,
+ unsigned long, flags)
+{
+ int fd;
+ struct filename *tmp;
+ struct open_flags op = {
+ .open_flag = O_PATH | O_CLOEXEC,
+ .opath_mask = FMODE_PATH_READ | FMODE_PATH_WRITE,
+ };
+
+ if (flags & ~VALID_RESOLVE_FLAGS)
+ return -EINVAL;
+
+ if (flags & RESOLVE_UPGRADE_NOREAD)
+ op.opath_mask &= ~FMODE_PATH_READ;
+ if (flags & RESOLVE_UPGRADE_NOWRITE)
+ op.opath_mask &= ~FMODE_PATH_WRITE;
+
+ if (!(flags & RESOLVE_NO_FOLLOW))
+ op.lookup_flags |= LOOKUP_FOLLOW;
+ if (flags & RESOLVE_BENEATH)
+ op.lookup_flags |= LOOKUP_BENEATH;
+ if (flags & RESOLVE_XDEV)
+ op.lookup_flags |= LOOKUP_XDEV;
+ if (flags & RESOLVE_NO_MAGICLINKS)
+ op.lookup_flags |= LOOKUP_NO_MAGICLINKS;
+ if (flags & RESOLVE_NO_SYMLINKS)
+ op.lookup_flags |= LOOKUP_NO_SYMLINKS;
+ if (flags & RESOLVE_IN_ROOT)
+ op.lookup_flags |= LOOKUP_IN_ROOT;
+
+ tmp = getname(path);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+
+ fd = get_unused_fd_flags(op.open_flag);
+ if (fd >= 0) {
+ struct file *f = do_filp_open(dfd, tmp, &op);
+ if (IS_ERR(f)) {
+ put_unused_fd(fd);
+ fd = PTR_ERR(f);
+ } else {
+ fsnotify_open(f);
+ fd_install(fd, f);
+ }
+ }
+ putname(tmp);
+ return fd;
+}
+
struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
const char *name, const struct open_flags *op)
{
@@ -4,13 +4,19 @@
#include <uapi/linux/fcntl.h>
-/* list of all valid flags for the open/openat flags argument: */
+/* List of all valid flags for the open/openat flags argument: */
#define VALID_OPEN_FLAGS \
(O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | \
O_APPEND | O_NDELAY | O_NONBLOCK | O_NDELAY | __O_SYNC | O_DSYNC | \
FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \
O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE | O_EMPTYPATH)
+/* List of all valid flags for resolveat(2). */
+#define VALID_RESOLVE_FLAGS \
+ (RESOLVE_UPGRADE_NOWRITE | RESOLVE_UPGRADE_NOREAD | \
+ RESOLVE_NO_FOLLOW | RESOLVE_BENEATH | RESOLVE_XDEV | \
+ RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS | RESOLVE_IN_ROOT)
+
#ifndef force_o_largefile
#define force_o_largefile() (!IS_ENABLED(CONFIG_ARCH_32BIT_OFF_T))
#endif
@@ -833,8 +833,11 @@ __SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
#define __NR_io_uring_register 427
__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
+#define __NR_resolveat 435
+__SYSCALL(__NR_resolveat, sys_resolveat)
+
#undef __NR_syscalls
-#define __NR_syscalls 428
+#define __NR_syscalls 436
/*
* 32 bit systems traditionally used different
@@ -93,5 +93,15 @@
#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+/* Bottom bit is reserved for RESOLVE_UPGRADE_NOEXEC. */
+#define RESOLVE_UPGRADE_NOWRITE 0x002 /* Disallow re-opening for write. */
+#define RESOLVE_UPGRADE_NOREAD 0x004 /* Disallow re-opening for read. */
+#define RESOLVE_NO_FOLLOW 0x008 /* Don't follow trailing symlinks. */
+
+#define RESOLVE_BENEATH 0x010 /* Block "lexical" trickery like "..", symlinks, absolute paths, etc. */
+#define RESOLVE_XDEV 0x020 /* Block mount-point crossings (includes bind-mounts). */
+#define RESOLVE_NO_MAGICLINKS 0x040 /* Block procfs-style "magic" symlinks. */
+#define RESOLVE_NO_SYMLINKS 0x080 /* Block all symlinks (implies AT_NO_MAGICLINKS). */
+#define RESOLVE_IN_ROOT 0x100 /* Scope ".." and "/" resolution to dirfd (like chroot(2)). */
#endif /* _UAPI_LINUX_FCNTL_H */
The most obvious syscall to add support for the new LOOKUP_* scoping flags would be openat(2) (along with the required execveat(2) change included in this series). However, there are a few reasons to not do this: * The new LOOKUP_* flags are intended to be security features, and openat(2) will silently ignore all unknown flags. This means that users would need to avoid foot-gunning themselves constantly when using this interface if it were part of openat(2). * Resolution scoping feels like a different operation to the existing O_* flags. And since openat(2) has limited flag space, it seems to be quite wasteful to clutter it with 5 flags that are all resolution-related. Arguably O_NOFOLLOw is also a resolution flag but its entire purpose is to error out if you encounter a trailing symlink not to scope resolution. * Other systems would be able to reimplement this syscall allowing for cross-OS standardisation rather than being hidden amongst O_* flags which may result in it not being used by all the parties that might want to use it (file servers, web servers, container runtimes, etc). * It gives us the opportunity to iterate on the O_PATH interface. In particular, the RESOLVE_UPGRADE_* flags are only possible because we have a clean slate without needing to re-use the ACC_MODE flag design. To this end, we introduce the resolveat(2) syscall. At the moment it's effectively another way of getting a bog-standard O_PATH descriptor but with the ability to use the new LOOKUP_* flags. Following the example of other new file-handle-creation syscalls (pidfds, seccomp notify fds) we default to O_CLOEXEC and users can unset O_CLOEXEC using fcntl(2) if they really want to. Because resolveat(2) only provides the ability to get O_PATH descriptors, users will need to get creative with /proc/self/fd in order to get a usable file descriptor for other uses. However, the new addition of O_EMPTYPATH shortcuts this problem and allows for easy usage of the new LOOKUP_* flags with openat(2) without cluttering the open(2) flag bits. In order to allow for userspace to lock down their usage of file descriptor re-opening, resolveat(2) has flags (RESOLVE_UPGRADE_*) which allow users to disallow certain re-opening modes. Co-developed-by: Christian Brauner <christian@brauner.io> Signed-off-by: Aleksa Sarai <cyphar@cyphar.com> --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 3 ++ arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + fs/namei.c | 50 +++++++++++++++++++++ include/linux/fcntl.h | 8 +++- include/uapi/asm-generic/unistd.h | 5 ++- include/uapi/linux/fcntl.h | 10 +++++ 22 files changed, 91 insertions(+), 3 deletions(-)