diff mbox series

[v2] linux-user: implement execveat

Message ID 20221103145719.3470738-1-sir@cmpwn.com (mailing list archive)
State New, archived
Headers show
Series [v2] linux-user: implement execveat | expand

Commit Message

Drew DeVault Nov. 3, 2022, 2:57 p.m. UTC
References: https://gitlab.com/qemu-project/qemu/-/issues/1007
Signed-off-by: Drew DeVault <sir@cmpwn.com>
---
 linux-user/syscall.c | 204 +++++++++++++++++++++++--------------------
 1 file changed, 111 insertions(+), 93 deletions(-)

Comments

Laurent Vivier Nov. 3, 2022, 5:11 p.m. UTC | #1
Le 03/11/2022 à 15:57, Drew DeVault a écrit :
> References: https://gitlab.com/qemu-project/qemu/-/issues/1007
> Signed-off-by: Drew DeVault <sir@cmpwn.com>
> ---
>   linux-user/syscall.c | 204 +++++++++++++++++++++++--------------------
>   1 file changed, 111 insertions(+), 93 deletions(-)
> 
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index f55cdebee5..57f0b2f0e8 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -633,7 +633,12 @@ safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \
>   #endif
>   safe_syscall5(int, waitid, idtype_t, idtype, id_t, id, siginfo_t *, infop, \
>                 int, options, struct rusage *, rusage)
> +#if defined(TARGET_NR_execveat)

execveat has been introduced in kernel 3.19 and all target sycall tables have been updated to 5.13 
by 2fa4ad3f9000 and 3a2f19b7ee3a (except cris, stuck to 4.16), so I think you can remove the "#if 
defined()".

> +safe_syscall5(int, execveat, int, dirfd, const char *, filename,
> +        char **, argv, char **, envp, int, flags)
> +#else
>   safe_syscall3(int, execve, const char *, filename, char **, argv, char **, envp)
> +#endif
>   #if defined(TARGET_NR_select) || defined(TARGET_NR__newselect) || \
>       defined(TARGET_NR_pselect6) || defined(TARGET_NR_pselect6_time64)
>   safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \
> @@ -8281,6 +8286,107 @@ static int do_openat(CPUArchState *cpu_env, int dirfd, const char *pathname, int
>       return safe_openat(dirfd, path(pathname), flags, mode);
>   }
>   
> +static int do_execveat(CPUArchState *cpu_env, int dirfd, abi_long pathname, abi_long guest_argp, abi_long guest_envp, int flags)
> +{
> +    int ret;
> +    char **argp, **envp;
> +    int argc, envc;
> +    abi_ulong gp;
> +    abi_ulong addr;
> +    char **q;
> +    void *p;
> +
> +    argc = 0;
> +
> +    for (gp = guest_argp; gp; gp += sizeof(abi_ulong)) {
> +        if (get_user_ual(addr, gp))
> +            return -TARGET_EFAULT;
> +        if (!addr)
> +            break;
> +        argc++;
> +    }
> +    envc = 0;
> +    for (gp = guest_envp; gp; gp += sizeof(abi_ulong)) {
> +        if (get_user_ual(addr, gp))
> +            return -TARGET_EFAULT;
> +        if (!addr)
> +            break;
> +        envc++;
> +    }
> +
> +    argp = g_new0(char *, argc + 1);
> +    envp = g_new0(char *, envc + 1);
> +
> +    for (gp = guest_argp, q = argp; gp;
> +          gp += sizeof(abi_ulong), q++) {
> +        if (get_user_ual(addr, gp))
> +            goto execve_efault;
> +        if (!addr)
> +            break;
> +        if (!(*q = lock_user_string(addr)))
> +            goto execve_efault;
> +    }
> +    *q = NULL;
> +
> +    for (gp = guest_envp, q = envp; gp;
> +          gp += sizeof(abi_ulong), q++) {
> +        if (get_user_ual(addr, gp))
> +            goto execve_efault;
> +        if (!addr)
> +            break;
> +        if (!(*q = lock_user_string(addr)))
> +            goto execve_efault;
> +    }
> +    *q = NULL;
> +
> +    /* Although execve() is not an interruptible syscall it is
> +     * a special case where we must use the safe_syscall wrapper:
> +     * if we allow a signal to happen before we make the host
> +     * syscall then we will 'lose' it, because at the point of
> +     * execve the process leaves QEMU's control. So we use the
> +     * safe syscall wrapper to ensure that we either take the
> +     * signal as a guest signal, or else it does not happen
> +     * before the execve completes and makes it the other
> +     * program's problem.
> +     */
> +    if (!(p = lock_user_string(pathname)))
> +        goto execve_efault;
> +
> +#if defined(TARGET_NR_execveat)
> +    ret = get_errno(safe_execveat(dirfd, p, argp, envp, flags));
> +#else
> +    assert(dirfd == AT_FDCWD && flags == 0);
> +    ret = get_errno(safe_execve(p, argp, envp));
> +#endif

You don't need the #ifdef, you can call execveat() in both cases, like the kernel.

You need to check for "is_proc_myself(p, "exe")" to manage the case fixed by
  f07eb1c4f805 ("linux-user: handle /proc/self/exe with execve() syscall")

Thanks,
Laurent
diff mbox series

Patch

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index f55cdebee5..57f0b2f0e8 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -633,7 +633,12 @@  safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \
 #endif
 safe_syscall5(int, waitid, idtype_t, idtype, id_t, id, siginfo_t *, infop, \
               int, options, struct rusage *, rusage)
+#if defined(TARGET_NR_execveat)
+safe_syscall5(int, execveat, int, dirfd, const char *, filename,
+        char **, argv, char **, envp, int, flags)
+#else
 safe_syscall3(int, execve, const char *, filename, char **, argv, char **, envp)
+#endif
 #if defined(TARGET_NR_select) || defined(TARGET_NR__newselect) || \
     defined(TARGET_NR_pselect6) || defined(TARGET_NR_pselect6_time64)
 safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \
@@ -8281,6 +8286,107 @@  static int do_openat(CPUArchState *cpu_env, int dirfd, const char *pathname, int
     return safe_openat(dirfd, path(pathname), flags, mode);
 }
 
+static int do_execveat(CPUArchState *cpu_env, int dirfd, abi_long pathname, abi_long guest_argp, abi_long guest_envp, int flags)
+{
+    int ret;
+    char **argp, **envp;
+    int argc, envc;
+    abi_ulong gp;
+    abi_ulong addr;
+    char **q;
+    void *p;
+
+    argc = 0;
+
+    for (gp = guest_argp; gp; gp += sizeof(abi_ulong)) {
+        if (get_user_ual(addr, gp))
+            return -TARGET_EFAULT;
+        if (!addr)
+            break;
+        argc++;
+    }
+    envc = 0;
+    for (gp = guest_envp; gp; gp += sizeof(abi_ulong)) {
+        if (get_user_ual(addr, gp))
+            return -TARGET_EFAULT;
+        if (!addr)
+            break;
+        envc++;
+    }
+
+    argp = g_new0(char *, argc + 1);
+    envp = g_new0(char *, envc + 1);
+
+    for (gp = guest_argp, q = argp; gp;
+          gp += sizeof(abi_ulong), q++) {
+        if (get_user_ual(addr, gp))
+            goto execve_efault;
+        if (!addr)
+            break;
+        if (!(*q = lock_user_string(addr)))
+            goto execve_efault;
+    }
+    *q = NULL;
+
+    for (gp = guest_envp, q = envp; gp;
+          gp += sizeof(abi_ulong), q++) {
+        if (get_user_ual(addr, gp))
+            goto execve_efault;
+        if (!addr)
+            break;
+        if (!(*q = lock_user_string(addr)))
+            goto execve_efault;
+    }
+    *q = NULL;
+
+    /* Although execve() is not an interruptible syscall it is
+     * a special case where we must use the safe_syscall wrapper:
+     * if we allow a signal to happen before we make the host
+     * syscall then we will 'lose' it, because at the point of
+     * execve the process leaves QEMU's control. So we use the
+     * safe syscall wrapper to ensure that we either take the
+     * signal as a guest signal, or else it does not happen
+     * before the execve completes and makes it the other
+     * program's problem.
+     */
+    if (!(p = lock_user_string(pathname)))
+        goto execve_efault;
+
+#if defined(TARGET_NR_execveat)
+    ret = get_errno(safe_execveat(dirfd, p, argp, envp, flags));
+#else
+    assert(dirfd == AT_FDCWD && flags == 0);
+    ret = get_errno(safe_execve(p, argp, envp));
+#endif
+
+    unlock_user(p, pathname, 0);
+
+    goto execve_end;
+
+execve_efault:
+    ret = -TARGET_EFAULT;
+
+execve_end:
+    for (gp = guest_argp, q = argp; *q;
+          gp += sizeof(abi_ulong), q++) {
+        if (get_user_ual(addr, gp)
+            || !addr)
+            break;
+        unlock_user(*q, addr, 0);
+    }
+    for (gp = guest_envp, q = envp; *q;
+          gp += sizeof(abi_ulong), q++) {
+        if (get_user_ual(addr, gp)
+            || !addr)
+            break;
+        unlock_user(*q, addr, 0);
+    }
+
+    g_free(argp);
+    g_free(envp);
+    return ret;
+}
+
 #define TIMER_MAGIC 0x0caf0000
 #define TIMER_MAGIC_MASK 0xffff0000
 
@@ -8748,101 +8854,13 @@  static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
         ret = get_errno(unlinkat(arg1, p, arg3));
         unlock_user(p, arg2, 0);
         return ret;
+#endif
+#if defined(TARGET_NR_execveat)
+    case TARGET_NR_execveat:
+        return do_execveat(cpu_env, arg1, arg2, arg3, arg4, arg5);
 #endif
     case TARGET_NR_execve:
-        {
-            char **argp, **envp;
-            int argc, envc;
-            abi_ulong gp;
-            abi_ulong guest_argp;
-            abi_ulong guest_envp;
-            abi_ulong addr;
-            char **q;
-
-            argc = 0;
-            guest_argp = arg2;
-            for (gp = guest_argp; gp; gp += sizeof(abi_ulong)) {
-                if (get_user_ual(addr, gp))
-                    return -TARGET_EFAULT;
-                if (!addr)
-                    break;
-                argc++;
-            }
-            envc = 0;
-            guest_envp = arg3;
-            for (gp = guest_envp; gp; gp += sizeof(abi_ulong)) {
-                if (get_user_ual(addr, gp))
-                    return -TARGET_EFAULT;
-                if (!addr)
-                    break;
-                envc++;
-            }
-
-            argp = g_new0(char *, argc + 1);
-            envp = g_new0(char *, envc + 1);
-
-            for (gp = guest_argp, q = argp; gp;
-                  gp += sizeof(abi_ulong), q++) {
-                if (get_user_ual(addr, gp))
-                    goto execve_efault;
-                if (!addr)
-                    break;
-                if (!(*q = lock_user_string(addr)))
-                    goto execve_efault;
-            }
-            *q = NULL;
-
-            for (gp = guest_envp, q = envp; gp;
-                  gp += sizeof(abi_ulong), q++) {
-                if (get_user_ual(addr, gp))
-                    goto execve_efault;
-                if (!addr)
-                    break;
-                if (!(*q = lock_user_string(addr)))
-                    goto execve_efault;
-            }
-            *q = NULL;
-
-            if (!(p = lock_user_string(arg1)))
-                goto execve_efault;
-            /* Although execve() is not an interruptible syscall it is
-             * a special case where we must use the safe_syscall wrapper:
-             * if we allow a signal to happen before we make the host
-             * syscall then we will 'lose' it, because at the point of
-             * execve the process leaves QEMU's control. So we use the
-             * safe syscall wrapper to ensure that we either take the
-             * signal as a guest signal, or else it does not happen
-             * before the execve completes and makes it the other
-             * program's problem.
-             */
-            ret = get_errno(safe_execve(p, argp, envp));
-            unlock_user(p, arg1, 0);
-
-            goto execve_end;
-
-        execve_efault:
-            ret = -TARGET_EFAULT;
-
-        execve_end:
-            for (gp = guest_argp, q = argp; *q;
-                  gp += sizeof(abi_ulong), q++) {
-                if (get_user_ual(addr, gp)
-                    || !addr)
-                    break;
-                unlock_user(*q, addr, 0);
-            }
-            for (gp = guest_envp, q = envp; *q;
-                  gp += sizeof(abi_ulong), q++) {
-                if (get_user_ual(addr, gp)
-                    || !addr)
-                    break;
-                unlock_user(*q, addr, 0);
-            }
-
-            g_free(argp);
-            g_free(envp);
-        }
-        return ret;
+        return do_execveat(cpu_env, AT_FDCWD, arg1, arg2, arg3, 0);
     case TARGET_NR_chdir:
         if (!(p = lock_user_string(arg1)))
             return -TARGET_EFAULT;