diff mbox series

linux-user: add openat2 support in linux-user

Message ID 20240828144227.32977-2-mvogt@redhat.com (mailing list archive)
State New
Headers show
Series linux-user: add openat2 support in linux-user | expand

Commit Message

Michael Vogt Aug. 28, 2024, 2:42 p.m. UTC
This commit adds support for the `openat2()` syscall in the
`linux-user` userspace emulator.

It is implemented by extracting a new helper `maybe_do_fake_open()`
out of the exiting `do_guest_openat()` and share that with the
new `do_guest_openat2()`. Unfortunatly we cannot just make
do_guest_openat2() a superset of do_guest_openat() because the
openat2() syscall is stricter with the argument checking and
will return an error for invalid flags or mode combinations (which
open()/openat() will ignore).

Note that in this commit using openat2() for a "faked" file in
/proc will ignore the "resolve" flags. This is not great but it
seems similar to the exiting behavior when openat() is called
with a dirfd to "/proc". Here too the fake file lookup may
not catch the special file because "realpath()" is used to
determine if the path is in /proc. Alternatively to ignoring
we could simply fail with `-TARGET_ENOSYS` (or similar) if
`resolve` flags are passed and we found something that looks
like a file in /proc that needs faking.

Signed-off-by: Michael Vogt <mvogt@redhat.com>

Buglink: https://github.com/osbuild/bootc-image-builder/issues/619
---
 linux-user/qemu.h         |  4 +++
 linux-user/syscall.c      | 73 ++++++++++++++++++++++++++++++++++++---
 linux-user/syscall_defs.h |  7 ++++
 meson.build               |  1 +
 4 files changed, 81 insertions(+), 4 deletions(-)

Comments

Richard Henderson Aug. 29, 2024, 1:23 a.m. UTC | #1
On 8/29/24 00:42, Michael Vogt wrote:
> This commit adds support for the `openat2()` syscall in the
> `linux-user` userspace emulator.
> 
> It is implemented by extracting a new helper `maybe_do_fake_open()`
> out of the exiting `do_guest_openat()` and share that with the
> new `do_guest_openat2()`. Unfortunatly we cannot just make
> do_guest_openat2() a superset of do_guest_openat() because the
> openat2() syscall is stricter with the argument checking and
> will return an error for invalid flags or mode combinations (which
> open()/openat() will ignore).
> 
> Note that in this commit using openat2() for a "faked" file in
> /proc will ignore the "resolve" flags. This is not great but it
> seems similar to the exiting behavior when openat() is called
> with a dirfd to "/proc". Here too the fake file lookup may
> not catch the special file because "realpath()" is used to
> determine if the path is in /proc. Alternatively to ignoring
> we could simply fail with `-TARGET_ENOSYS` (or similar) if
> `resolve` flags are passed and we found something that looks
> like a file in /proc that needs faking.
> 
> Signed-off-by: Michael Vogt <mvogt@redhat.com>
> 
> Buglink: https://github.com/osbuild/bootc-image-builder/issues/619
> ---
>   linux-user/qemu.h         |  4 +++
>   linux-user/syscall.c      | 73 ++++++++++++++++++++++++++++++++++++---
>   linux-user/syscall_defs.h |  7 ++++
>   meson.build               |  1 +
>   4 files changed, 81 insertions(+), 4 deletions(-)
> 
> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
> index 2e90a97175..47b6d7da88 100644
> --- a/linux-user/qemu.h
> +++ b/linux-user/qemu.h
> @@ -164,6 +164,10 @@ struct TaskState {
>   abi_long do_brk(abi_ulong new_brk);
>   int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *pathname,
>                       int flags, mode_t mode, bool safe);
> +#ifdef HAVE_OPENAT2_H
> +int do_guest_openat2(CPUArchState *cpu_env, int dirfd, const char *pathname,
> +                     struct target_open_how *how, bool safe);
> +#endif

Not needed.  The only reason do_guest_openat is declared here is for gdbstub/.

> +#ifdef HAVE_OPENAT2_H
> +#include <linux/openat2.h>
> +/* glibc has no header for SYS_openat2 so we need to get it via syscall.h */
> +#include <sys/syscall.h>
> +#endif

No need for sys/syscall.h, handled by safe_syscall4.

> +#ifdef HAVE_OPENAT2_H
> +int do_guest_openat2(CPUArchState *cpu_env, int dirfd, const char *fname,
> +                     struct target_open_how *how, bool safe)

No need for safe argument, that is only used by gdbstub/.

> +{
> +    /*
> +     * Ideally we would pass "how->resolve" flags into this helper too but
> +     * the lookup for files that need faking is based on "realpath()" so
> +     * neither a dirfd for "proc" nor restrictions via "resolve" flags can
> +     * be honored right now.
> +     */
> +    int fd = maybe_do_fake_open(cpu_env, dirfd, fname, how->flags, how->mode,
> +                                safe);
> +    if (fd >= 0)
> +        return fd;
> +
> +    if (safe) {
> +        return safe_openat2(dirfd, fname, (struct open_how *)how,

This cast is an indication that you're doing something wrong.

> +#if defined(TARGET_NR_openat2) && defined(HAVE_OPENAT2_H)
> +    case TARGET_NR_openat2:
> +        {
> +            struct target_open_how how, *target_how;
> +            if (!(p = lock_user_string(arg2)))
> +                return -TARGET_EFAULT;

No assignment within if.

> +            if (!(lock_user_struct(VERIFY_READ, target_how, arg3, 1)))
> +                return -TARGET_EFAULT;
> +            how.flags = target_to_host_bitmask(target_how->flags,
> +                                               fcntl_flags_tbl);
> +            how.mode = tswap64(target_how->mode);
> +            how.resolve = tswap64(target_how->resolve);
> +            ret = get_errno(do_guest_openat2(cpu_env, arg1, p, &how, true));
> +            fd_trans_unregister(ret);
> +            unlock_user_struct(target_how, arg3, 0);
> +            unlock_user(p, arg2, 0);
> +            return ret;
> +        }
> +#endif

Move all of this code into the helper function for clarity.

Missing validation of small arg4 (EINVAL).
Missing validation of zeros for large arg4 (E2BIG).

The 'how' variable should be the host open_how structure.
Then you don't need the incorrect cast above.
Missing a zero of the structure, which in the future might contain extra fields.

Given linux/openat2.h appeared in linux 5.6, we might be coming to the point at which all 
supported host os must have it.  Otherwise, are you using anything besides struct open_how 
itself?  Perhaps we're better off always defining the structure locally, so that way it 
cannot expand on us with a mere upgrade of the system linux headers package?

> +/* from kernel's include/uapi/linux/openat2.h */
> +struct target_open_how {
> +    __u64 flags;
> +    __u64 mode;
> +    __u64 resolve;
> +};

Using host __u64 is always wrong for target structures.
Use "abi_*" types for target alignment requirements.


r~
diff mbox series

Patch

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 2e90a97175..47b6d7da88 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -164,6 +164,10 @@  struct TaskState {
 abi_long do_brk(abi_ulong new_brk);
 int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *pathname,
                     int flags, mode_t mode, bool safe);
+#ifdef HAVE_OPENAT2_H
+int do_guest_openat2(CPUArchState *cpu_env, int dirfd, const char *pathname,
+                     struct target_open_how *how, bool safe);
+#endif
 ssize_t do_guest_readlink(const char *pathname, char *buf, size_t bufsiz);
 
 /* user access */
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 9d5415674d..f3a47bbfc0 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -85,6 +85,12 @@ 
 #include <sys/kcov.h>
 #endif
 
+#ifdef HAVE_OPENAT2_H
+#include <linux/openat2.h>
+/* glibc has no header for SYS_openat2 so we need to get it via syscall.h */
+#include <sys/syscall.h>
+#endif
+
 #define termios host_termios
 #define winsize host_winsize
 #define termio host_termio
@@ -653,6 +659,10 @@  safe_syscall3(ssize_t, read, int, fd, void *, buff, size_t, count)
 safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count)
 safe_syscall4(int, openat, int, dirfd, const char *, pathname, \
               int, flags, mode_t, mode)
+#ifdef HAVE_OPENAT2_H
+safe_syscall4(int, openat2, int, dirfd, const char *, pathname, \
+              const struct open_how *, how, size_t, size);
+#endif
 #if defined(TARGET_NR_wait4) || defined(TARGET_NR_waitpid)
 safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \
               struct rusage *, rusage)
@@ -8334,8 +8344,9 @@  static int open_net_route(CPUArchState *cpu_env, int fd)
 }
 #endif
 
-int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
-                    int flags, mode_t mode, bool safe)
+static int maybe_do_fake_open(CPUArchState *cpu_env, int dirfd,
+                              const char *fname, int flags, mode_t mode,
+                              bool safe)
 {
     g_autofree char *proc_name = NULL;
     const char *pathname;
@@ -8418,13 +8429,48 @@  int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
         return fd;
     }
 
+    return -1;
+}
+
+int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname,
+                    int flags, mode_t mode, bool safe)
+{
+    int fd = maybe_do_fake_open(cpu_env, dirfd, fname, flags, mode, safe);
+    if (fd >= 0)
+        return fd;
+
     if (safe) {
-        return safe_openat(dirfd, path(pathname), flags, mode);
+        return safe_openat(dirfd, path(fname), flags, mode);
     } else {
-        return openat(dirfd, path(pathname), flags, mode);
+        return openat(dirfd, path(fname), flags, mode);
     }
 }
 
+#ifdef HAVE_OPENAT2_H
+int do_guest_openat2(CPUArchState *cpu_env, int dirfd, const char *fname,
+                     struct target_open_how *how, bool safe)
+{
+    /*
+     * Ideally we would pass "how->resolve" flags into this helper too but
+     * the lookup for files that need faking is based on "realpath()" so
+     * neither a dirfd for "proc" nor restrictions via "resolve" flags can
+     * be honored right now.
+     */
+    int fd = maybe_do_fake_open(cpu_env, dirfd, fname, how->flags, how->mode,
+                                safe);
+    if (fd >= 0)
+        return fd;
+
+    if (safe) {
+        return safe_openat2(dirfd, fname, (struct open_how *)how,
+                            sizeof(struct target_open_how));
+    } else {
+        return syscall(SYS_openat2, dirfd, fname, (struct open_hosw *)how,
+                       sizeof(struct target_open_how));
+    }
+}
+#endif
+
 ssize_t do_guest_readlink(const char *pathname, char *buf, size_t bufsiz)
 {
     ssize_t ret;
@@ -9197,6 +9243,25 @@  static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
         fd_trans_unregister(ret);
         unlock_user(p, arg2, 0);
         return ret;
+#if defined(TARGET_NR_openat2) && defined(HAVE_OPENAT2_H)
+    case TARGET_NR_openat2:
+        {
+            struct target_open_how how, *target_how;
+            if (!(p = lock_user_string(arg2)))
+                return -TARGET_EFAULT;
+            if (!(lock_user_struct(VERIFY_READ, target_how, arg3, 1)))
+                return -TARGET_EFAULT;
+            how.flags = target_to_host_bitmask(target_how->flags,
+                                               fcntl_flags_tbl);
+            how.mode = tswap64(target_how->mode);
+            how.resolve = tswap64(target_how->resolve);
+            ret = get_errno(do_guest_openat2(cpu_env, arg1, p, &how, true));
+            fd_trans_unregister(ret);
+            unlock_user_struct(target_how, arg3, 0);
+            unlock_user(p, arg2, 0);
+            return ret;
+        }
+#endif
 #if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE)
     case TARGET_NR_name_to_handle_at:
         ret = do_name_to_handle_at(arg1, arg2, arg3, arg4, arg5);
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index a00b617cae..2ae694261c 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -2754,4 +2754,11 @@  struct target_sched_param {
     abi_int sched_priority;
 };
 
+/* from kernel's include/uapi/linux/openat2.h */
+struct target_open_how {
+    __u64 flags;
+    __u64 mode;
+    __u64 resolve;
+};
+
 #endif
diff --git a/meson.build b/meson.build
index fbda17c987..220ccbcbe6 100644
--- a/meson.build
+++ b/meson.build
@@ -2465,6 +2465,7 @@  config_host_data.set('CONFIG_LINUX_MAGIC_H', cc.has_header('linux/magic.h'))
 config_host_data.set('CONFIG_VALGRIND_H', cc.has_header('valgrind/valgrind.h'))
 config_host_data.set('HAVE_BTRFS_H', cc.has_header('linux/btrfs.h'))
 config_host_data.set('HAVE_DRM_H', cc.has_header('libdrm/drm.h'))
+config_host_data.set('HAVE_OPENAT2_H', cc.has_header('linux/openat2.h'))
 config_host_data.set('HAVE_PTY_H', cc.has_header('pty.h'))
 config_host_data.set('HAVE_SYS_DISK_H', cc.has_header('sys/disk.h'))
 config_host_data.set('HAVE_SYS_IOCCOM_H', cc.has_header('sys/ioccom.h'))