@@ -461,5 +461,6 @@
530 common getegid sys_getegid
531 common geteuid sys_geteuid
532 common getppid sys_getppid
+533 common resolveat sys_resolveat
# all other architectures have common numbers for new syscall, alpha
# is the exception.
@@ -437,3 +437,4 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+428 common resolveat sys_resolveat
@@ -344,3 +344,4 @@
332 common pkey_free sys_pkey_free
333 common rseq sys_rseq
# 334 through 423 are reserved to sync up with other architectures
+428 common resolveat sys_resolveat
@@ -423,3 +423,4 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+428 common resolveat sys_resolveat
@@ -429,3 +429,4 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+428 common resolveat sys_resolveat
@@ -362,3 +362,4 @@
421 n32 rt_sigtimedwait_time64 compat_sys_rt_sigtimedwait_time64
422 n32 futex_time64 sys_futex
423 n32 sched_rr_get_interval_time64 sys_sched_rr_get_interval
+428 n32 resolveat sys_resolveat
@@ -338,3 +338,4 @@
327 n64 rseq sys_rseq
328 n64 io_pgetevents sys_io_pgetevents
# 329 through 423 are reserved to sync up with other architectures
+428 n64 resolveat sys_resolveat
@@ -411,3 +411,4 @@
421 o32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 o32 futex_time64 sys_futex sys_futex
423 o32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+428 o32 resolveat sys_resolveat sys_resolveat
@@ -420,3 +420,4 @@
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+428 common resolveat sys_resolveat sys_resolveat
@@ -505,3 +505,4 @@
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+428 common resolveat sys_resolveat sys_resolveat
@@ -426,3 +426,4 @@
421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 - sys_futex
423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval
+428 common resolveat sys_resolveat -
@@ -426,3 +426,4 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+428 common resolveat sys_resolveat
@@ -469,3 +469,4 @@
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+428 common resolveat sys_resolveat sys_resolveat
@@ -438,3 +438,4 @@
425 i386 io_uring_setup sys_io_uring_setup __ia32_sys_io_uring_setup
426 i386 io_uring_enter sys_io_uring_enter __ia32_sys_io_uring_enter
427 i386 io_uring_register sys_io_uring_register __ia32_sys_io_uring_register
+428 i386 resolveat sys_resolveat __ia32_sys_resolveat
@@ -355,6 +355,7 @@
425 common io_uring_setup __x64_sys_io_uring_setup
426 common io_uring_enter __x64_sys_io_uring_enter
427 common io_uring_register __x64_sys_io_uring_register
+428 common resolveat __x64_sys_resolveat
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -394,3 +394,4 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+428 common resolveat sys_resolveat
@@ -3656,6 +3656,52 @@ struct file *do_filp_open(int dfd, struct filename *pathname,
return filp;
}
+SYSCALL_DEFINE3(resolveat, int, dfd, const char __user *, path,
+ unsigned long, flags)
+{
+ int fd;
+ struct filename *tmp;
+ struct open_flags op = {
+ .open_flag = O_PATH,
+ };
+
+ if (flags & ~VALID_RESOLVE_FLAGS)
+ return -EINVAL;
+
+ if (flags & RESOLVE_CLOEXEC)
+ op.open_flag |= O_CLOEXEC;
+ if (!(flags & RESOLVE_NOFOLLOW))
+ op.lookup_flags |= LOOKUP_FOLLOW;
+ if (flags & RESOLVE_BENEATH)
+ op.lookup_flags |= LOOKUP_BENEATH;
+ if (flags & RESOLVE_XDEV)
+ op.lookup_flags |= LOOKUP_XDEV;
+ if (flags & RESOLVE_NO_MAGICLINKS)
+ op.lookup_flags |= LOOKUP_NO_MAGICLINKS;
+ if (flags & RESOLVE_NO_SYMLINKS)
+ op.lookup_flags |= LOOKUP_NO_SYMLINKS;
+ if (flags & RESOLVE_THIS_ROOT)
+ op.lookup_flags |= LOOKUP_IN_ROOT;
+
+ tmp = getname(path);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+
+ fd = get_unused_fd_flags(op.open_flag);
+ if (fd >= 0) {
+ struct file *f = do_filp_open(dfd, tmp, &op);
+ if (IS_ERR(f)) {
+ put_unused_fd(fd);
+ fd = PTR_ERR(f);
+ } else {
+ fsnotify_open(f);
+ fd_install(fd, f);
+ }
+ }
+ putname(tmp);
+ return fd;
+}
+
struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
const char *name, const struct open_flags *op)
{
@@ -100,4 +100,16 @@
#define AT_NO_SYMLINKS 0x080000 /* - Block all symlinks (implies AT_NO_MAGICLINKS). */
#define AT_THIS_ROOT 0x100000 /* - Scope ".." resolution to dirfd (like chroot(2)). */
+/* First two bits of RESOLVE_* are reserved for future ACC_MODE extensions. */
+#define RESOLVE_CLOEXEC 0x004 /* Set O_CLOEXEC on the returned fd. */
+#define RESOLVE_NOFOLLOW 0x008 /* Don't follow trailing symlinks. */
+#define RESOLVE_RESOLUTION_TYPE 0x1F0 /* Type of path-resolution scoping we are applying. */
+#define RESOLVE_BENEATH 0x010 /* - Block "lexical" trickery like "..", symlinks, absolute paths, etc. */
+#define RESOLVE_XDEV 0x020 /* - Block mount-point crossings (includes bind-mounts). */
+#define RESOLVE_NO_MAGICLINKS 0x040 /* - Block procfs-style "magic" symlinks. */
+#define RESOLVE_NO_SYMLINKS 0x080 /* - Block all symlinks (implies AT_NO_MAGICLINKS). */
+#define RESOLVE_THIS_ROOT 0x100 /* - Scope ".." resolution to dirfd (like chroot(2)). */
+
+#define VALID_RESOLVE_FLAGS (RESOLVE_CLOEXEC | RESOLVE_NOFOLLOW | RESOLVE_RESOLUTION_TYPE)
+
#endif /* _UAPI_LINUX_FCNTL_H */
The most obvious syscall to add support for the new LOOKUP_* scoping flags would be openat(2) (along with the required execveat(2) change included in this series). However, there are a few reasons to not do this: * The new LOOKUP_* flags are intended to be security features, and openat(2) will silently ignore all unknown flags. This means that users would need to avoid foot-gunning themselves constantly when using this interface if it were part of openat(2). * Resolution scoping feels like a different operation to the existing O_* flags. And since openat(2) has limited flag space, it seems to be quite wasteful to clutter it with 5 flags that are all resolution-related. Arguably O_NOFOLLOw is also a resolution flag but its entire purpose is to error out if you encounter a trailing symlink not to scope resolution. * Other systems would be able to reimplement this syscall allowing for cross-OS standardisation rather than being hidden amongst O_* flags which may result in it not being used by all the parties that might want to use it (file servers, web servers, container runtimes, etc). * It gives us the opportunity to iterate on the O_PATH interface in the future. There are some potential security improvements that can be made to O_PATH (handling /proc/self/fd re-opening of file descriptors much more sanely) which could be made even better with some other bits (such as ACC_MODE bits which work for O_PATH). To this end, we introduce the resolveat(2) syscall. At the moment it's effectively another way of getting a bog-standard O_PATH descriptor but with the ability to use the new LOOKUP_* flags. Because resolveat(2) only provides the ability to get O_PATH descriptors, users will need to get creative with /proc/self/fd in order to get a usable file descriptor for other uses. However, in future we can add O_EMPTYPATH support to openat(2) which would allow for re-opening without procfs (though as mentioned above there are some security improvements that should be made to the interfaces). NOTE: This patch adds the syscall to all architectures using the new unified syscall numbering, but several architectures are missing newer (nr > 423) syscalls -- hence the uneven gaps in the syscall tables. Cc: Christian Brauner <christian@brauner.io> Signed-off-by: Aleksa Sarai <cyphar@cyphar.com> --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + fs/namei.c | 46 +++++++++++++++++++++ include/uapi/linux/fcntl.h | 12 ++++++ 18 files changed, 74 insertions(+)