Message ID | 22d50b05f3387e23094eaf1f42ef4d435dd555b8.1726774919.git.mvogt@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v5,1/1] linux-user: add openat2 support in linux-user | expand |
Hi, your PATCH 1/1 doesn't appear to be a reply of PATCH 0/1 (mail header tag "In-Reply-To:"/"Reference") so it is not correctly collected by patchew.org. Do you have the 'thread' parameter for git-send-email? See my comments below: Le 19/09/2024 à 21:46, Michael Vogt a écrit : > This commit adds support for the `openat2()` syscall in the > `linux-user` userspace emulator. > > It is implemented by extracting a new helper `maybe_do_fake_open()` > out of the exiting `do_guest_openat()` and share that with the > new `do_guest_openat2()`. Unfortunately we cannot just make > do_guest_openat2() a superset of do_guest_openat() because the > openat2() syscall is stricter with the argument checking and > will return an error for invalid flags or mode combinations (which > open()/openat() will ignore). > > The implementation is similar to SYSCALL_DEFINE(openat2), i.e. > a new `copy_struct_from_user()` is used that works the same > as the kernels version to support backwards-compatibility > for struct syscall argument. > > Instead of including openat2.h we create a copy of `open_how` > as `open_how_ver0` to ensure that if the structure grows we > can log a LOG_UNIMP warning. > > Note that in this commit using openat2() for a "faked" file in > /proc will ignore the "resolve" flags. This is not great but it > seems similar to the exiting behavior when openat() is called > with a dirfd to "/proc". Here too the fake file lookup may > not catch the special file because "realpath()" is used to > determine if the path is in /proc. Alternatively to ignoring > we could simply fail with `-TARGET_ENOSYS` (or similar) if > `resolve` flags are passed and we found something that looks > like a file in /proc that needs faking. > > Signed-off-by: Michael Vogt <mvogt@redhat.com> > Buglink: https://github.com/osbuild/bootc-image-builder/issues/619 > --- > linux-user/syscall.c | 107 +++++++++++++++++++++++++++++++++++++- > linux-user/syscall_defs.h | 7 +++ > 2 files changed, 112 insertions(+), 2 deletions(-) > > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > index b693aeff5b..99f3afece7 100644 > --- a/linux-user/syscall.c > +++ b/linux-user/syscall.c > @@ -602,6 +602,34 @@ static int check_zeroed_user(abi_long addr, size_t ksize, size_t usize) > return 1; > } > > +/* > + * Copies a target struct to a host struct, in a way that guarantees > + * backwards-compatibility for struct syscall arguments. > + * > + * Similar to kernels uaccess.h:copy_struct_from_user() > + */ > +static int > +copy_struct_from_user(void *dst, size_t ksize, abi_ptr src, size_t usize) > +{ > + size_t size = MIN(ksize, usize); > + size_t rest = MAX(ksize, usize) - size; > + > + /* Deal with trailing bytes. */ > + if (usize < ksize) { > + memset(dst + size, 0, rest); > + } else if (usize > ksize) { > + int ret = check_zeroed_user(src, ksize, usize); > + if (ret <= 0) { > + return ret ?: -TARGET_E2BIG; > + } > + } > + /* Copy the interoperable parts of the struct. */ > + if (copy_from_user(dst, src, size)) { > + return -TARGET_EFAULT; > + } > + return 0; > +} > + > #define safe_syscall0(type, name) \ > static type safe_##name(void) \ > { \ > @@ -653,6 +681,15 @@ safe_syscall3(ssize_t, read, int, fd, void *, buff, size_t, count) > safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count) > safe_syscall4(int, openat, int, dirfd, const char *, pathname, \ > int, flags, mode_t, mode) > + > +struct open_how_ver0 { > + __u64 flags; > + __u64 mode; > + __u64 resolve; > +}; > +safe_syscall4(int, openat2, int, dirfd, const char *, pathname, \ > + const struct open_how_ver0 *, how, size_t, size) > + > #if defined(TARGET_NR_wait4) || defined(TARGET_NR_waitpid) > safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \ > struct rusage *, rusage) > @@ -8334,8 +8371,9 @@ static int open_net_route(CPUArchState *cpu_env, int fd) > } > #endif > > -int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, > - int flags, mode_t mode, bool safe) > +static int maybe_do_fake_open(CPUArchState *cpu_env, int dirfd, > + const char *fname, int flags, mode_t mode, > + bool safe) > { > g_autofree char *proc_name = NULL; > const char *pathname; > @@ -8418,6 +8456,17 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, > return fd; > } > > + return -2; > +} > + > +int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *pathname, > + int flags, mode_t mode, bool safe) > +{ > + int fd = maybe_do_fake_open(cpu_env, dirfd, pathname, flags, mode, safe); > + if (fd > -2) { > + return get_errno(fd); Don't put the get_errno() here, because safe_openat() and openat() below don't have one, and moreover the callers are doing get_errno(do_guest_openat()). > + } > + > if (safe) { > return safe_openat(dirfd, path(pathname), flags, mode); > } else { > @@ -8425,6 +8474,55 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, > } > } > > + > +static int do_openat2(CPUArchState *cpu_env, abi_long dirfd, > + abi_ptr guest_pathname, abi_ptr guest_open_how, > + abi_long guest_size) > +{ > + struct open_how_ver0 how = {0}; > + int ret; > + > + if (guest_size < sizeof(struct target_open_how_ver0)) { > + return -TARGET_EINVAL; > + } > + ret = copy_struct_from_user(&how, sizeof(how), guest_open_how, guest_size); > + if (ret) { > + if (ret == -TARGET_E2BIG) { > + qemu_log_mask(LOG_UNIMP, > + "Unimplemented openat2 open_how size: %lu\n", > + guest_size); > + } > + return ret; > + } > + char *pathname = lock_user_string(guest_pathname); Don't put the declaration in the middle of the code. See https://qemu-project.gitlab.io/qemu/devel/style.html#declarations > + if (!pathname) { > + return -TARGET_EFAULT; > + } > + > + how.flags = target_to_host_bitmask(how.flags, fcntl_flags_tbl); > + how.mode = tswap64(how.mode); > + how.resolve = tswap64(how.resolve); > + > + /* > + * Ideally we would pass "how->resolve" flags into this helper too but > + * the lookup for files that need faking is based on "realpath()" so > + * neither a dirfd for "proc" nor restrictions via "resolve" flags can > + * be honored right now. > + */ > + int fd = maybe_do_fake_open(cpu_env, dirfd, pathname, how.flags, how.mode, > + true); > + if (fd > -2) { > + return get_errno(fd); it's better to set "ret = get_errno(fd);" and not return to execute the fd_trans_unregister() and unlock_user() below. > + } else { > + ret = get_errno(safe_openat2(dirfd, pathname, &how, > + sizeof(struct open_how_ver0))); > + } > + > + fd_trans_unregister(ret); > + unlock_user(pathname, guest_pathname, 0); > + return ret; > +} > + Thanks, Laurent
Le 19/09/2024 à 21:46, Michael Vogt a écrit : > This commit adds support for the `openat2()` syscall in the > `linux-user` userspace emulator. > > It is implemented by extracting a new helper `maybe_do_fake_open()` > out of the exiting `do_guest_openat()` and share that with the > new `do_guest_openat2()`. Unfortunately we cannot just make > do_guest_openat2() a superset of do_guest_openat() because the > openat2() syscall is stricter with the argument checking and > will return an error for invalid flags or mode combinations (which > open()/openat() will ignore). > > The implementation is similar to SYSCALL_DEFINE(openat2), i.e. > a new `copy_struct_from_user()` is used that works the same > as the kernels version to support backwards-compatibility > for struct syscall argument. > > Instead of including openat2.h we create a copy of `open_how` > as `open_how_ver0` to ensure that if the structure grows we > can log a LOG_UNIMP warning. > > Note that in this commit using openat2() for a "faked" file in > /proc will ignore the "resolve" flags. This is not great but it > seems similar to the exiting behavior when openat() is called > with a dirfd to "/proc". Here too the fake file lookup may > not catch the special file because "realpath()" is used to > determine if the path is in /proc. Alternatively to ignoring > we could simply fail with `-TARGET_ENOSYS` (or similar) if > `resolve` flags are passed and we found something that looks > like a file in /proc that needs faking. > > Signed-off-by: Michael Vogt <mvogt@redhat.com> And I think it's better if the "From:" address is the same as the S-o-b address. Thanks, Laurent
diff --git a/linux-user/syscall.c b/linux-user/syscall.c index b693aeff5b..99f3afece7 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -602,6 +602,34 @@ static int check_zeroed_user(abi_long addr, size_t ksize, size_t usize) return 1; } +/* + * Copies a target struct to a host struct, in a way that guarantees + * backwards-compatibility for struct syscall arguments. + * + * Similar to kernels uaccess.h:copy_struct_from_user() + */ +static int +copy_struct_from_user(void *dst, size_t ksize, abi_ptr src, size_t usize) +{ + size_t size = MIN(ksize, usize); + size_t rest = MAX(ksize, usize) - size; + + /* Deal with trailing bytes. */ + if (usize < ksize) { + memset(dst + size, 0, rest); + } else if (usize > ksize) { + int ret = check_zeroed_user(src, ksize, usize); + if (ret <= 0) { + return ret ?: -TARGET_E2BIG; + } + } + /* Copy the interoperable parts of the struct. */ + if (copy_from_user(dst, src, size)) { + return -TARGET_EFAULT; + } + return 0; +} + #define safe_syscall0(type, name) \ static type safe_##name(void) \ { \ @@ -653,6 +681,15 @@ safe_syscall3(ssize_t, read, int, fd, void *, buff, size_t, count) safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count) safe_syscall4(int, openat, int, dirfd, const char *, pathname, \ int, flags, mode_t, mode) + +struct open_how_ver0 { + __u64 flags; + __u64 mode; + __u64 resolve; +}; +safe_syscall4(int, openat2, int, dirfd, const char *, pathname, \ + const struct open_how_ver0 *, how, size_t, size) + #if defined(TARGET_NR_wait4) || defined(TARGET_NR_waitpid) safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \ struct rusage *, rusage) @@ -8334,8 +8371,9 @@ static int open_net_route(CPUArchState *cpu_env, int fd) } #endif -int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, - int flags, mode_t mode, bool safe) +static int maybe_do_fake_open(CPUArchState *cpu_env, int dirfd, + const char *fname, int flags, mode_t mode, + bool safe) { g_autofree char *proc_name = NULL; const char *pathname; @@ -8418,6 +8456,17 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, return fd; } + return -2; +} + +int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *pathname, + int flags, mode_t mode, bool safe) +{ + int fd = maybe_do_fake_open(cpu_env, dirfd, pathname, flags, mode, safe); + if (fd > -2) { + return get_errno(fd); + } + if (safe) { return safe_openat(dirfd, path(pathname), flags, mode); } else { @@ -8425,6 +8474,55 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, } } + +static int do_openat2(CPUArchState *cpu_env, abi_long dirfd, + abi_ptr guest_pathname, abi_ptr guest_open_how, + abi_long guest_size) +{ + struct open_how_ver0 how = {0}; + int ret; + + if (guest_size < sizeof(struct target_open_how_ver0)) { + return -TARGET_EINVAL; + } + ret = copy_struct_from_user(&how, sizeof(how), guest_open_how, guest_size); + if (ret) { + if (ret == -TARGET_E2BIG) { + qemu_log_mask(LOG_UNIMP, + "Unimplemented openat2 open_how size: %lu\n", + guest_size); + } + return ret; + } + char *pathname = lock_user_string(guest_pathname); + if (!pathname) { + return -TARGET_EFAULT; + } + + how.flags = target_to_host_bitmask(how.flags, fcntl_flags_tbl); + how.mode = tswap64(how.mode); + how.resolve = tswap64(how.resolve); + + /* + * Ideally we would pass "how->resolve" flags into this helper too but + * the lookup for files that need faking is based on "realpath()" so + * neither a dirfd for "proc" nor restrictions via "resolve" flags can + * be honored right now. + */ + int fd = maybe_do_fake_open(cpu_env, dirfd, pathname, how.flags, how.mode, + true); + if (fd > -2) { + return get_errno(fd); + } else { + ret = get_errno(safe_openat2(dirfd, pathname, &how, + sizeof(struct open_how_ver0))); + } + + fd_trans_unregister(ret); + unlock_user(pathname, guest_pathname, 0); + return ret; +} + ssize_t do_guest_readlink(const char *pathname, char *buf, size_t bufsiz) { ssize_t ret; @@ -9197,6 +9295,11 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, fd_trans_unregister(ret); unlock_user(p, arg2, 0); return ret; +#if defined(TARGET_NR_openat2) + case TARGET_NR_openat2: + ret = do_openat2(cpu_env, arg1, arg2, arg3, arg4); + return ret; +#endif #if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) case TARGET_NR_name_to_handle_at: ret = do_name_to_handle_at(arg1, arg2, arg3, arg4, arg5); diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index 8ed53904ed..b83fa34663 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -2753,4 +2753,11 @@ struct target_sched_param { abi_int sched_priority; }; +/* from kernel's include/uapi/linux/openat2.h */ +struct target_open_how_ver0 { + abi_ullong flags; + abi_ullong mode; + abi_ullong resolve; +}; + #endif
This commit adds support for the `openat2()` syscall in the `linux-user` userspace emulator. It is implemented by extracting a new helper `maybe_do_fake_open()` out of the exiting `do_guest_openat()` and share that with the new `do_guest_openat2()`. Unfortunately we cannot just make do_guest_openat2() a superset of do_guest_openat() because the openat2() syscall is stricter with the argument checking and will return an error for invalid flags or mode combinations (which open()/openat() will ignore). The implementation is similar to SYSCALL_DEFINE(openat2), i.e. a new `copy_struct_from_user()` is used that works the same as the kernels version to support backwards-compatibility for struct syscall argument. Instead of including openat2.h we create a copy of `open_how` as `open_how_ver0` to ensure that if the structure grows we can log a LOG_UNIMP warning. Note that in this commit using openat2() for a "faked" file in /proc will ignore the "resolve" flags. This is not great but it seems similar to the exiting behavior when openat() is called with a dirfd to "/proc". Here too the fake file lookup may not catch the special file because "realpath()" is used to determine if the path is in /proc. Alternatively to ignoring we could simply fail with `-TARGET_ENOSYS` (or similar) if `resolve` flags are passed and we found something that looks like a file in /proc that needs faking. Signed-off-by: Michael Vogt <mvogt@redhat.com> Buglink: https://github.com/osbuild/bootc-image-builder/issues/619 --- linux-user/syscall.c | 107 +++++++++++++++++++++++++++++++++++++- linux-user/syscall_defs.h | 7 +++ 2 files changed, 112 insertions(+), 2 deletions(-)