diff mbox series

[v1] linux-user: Add option to run `execve`d programs through QEMU

Message ID 20240830223601.2796327-1-goldstein.w.n@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v1] linux-user: Add option to run `execve`d programs through QEMU | expand

Commit Message

Noah Goldstein Aug. 30, 2024, 10:36 p.m. UTC
The new option '-qemu-children' makes it so that on `execve` the child
process will be launch by the same `qemu` executable that is currently
running along with its current commandline arguments.

The motivation for the change is to make it so that plugins running
through `qemu` can continue to run on children.  Why not just
`binfmt`?: Plugins can be desirable regardless of system/architecture
emulation, and can sometimes be useful for elf files that can run
natively. Enabling `binfmt` for all natively runnable elf files may
not be desirable.

Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
---
 linux-user/main.c           | 22 ++++++++++++++++++++++
 linux-user/syscall.c        | 20 +++++++++++++++-----
 linux-user/user-internals.h |  4 ++++
 3 files changed, 41 insertions(+), 5 deletions(-)

Comments

Noah Goldstein Sept. 10, 2024, 10:06 p.m. UTC | #1
On Fri, Aug 30, 2024 at 3:37 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Fri, Aug 30, 2024 at 3:36 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > The new option '-qemu-children' makes it so that on `execve` the child
> > process will be launch by the same `qemu` executable that is currently
> > running along with its current commandline arguments.
> >
> > The motivation for the change is to make it so that plugins running
> > through `qemu` can continue to run on children.  Why not just
> > `binfmt`?: Plugins can be desirable regardless of system/architecture
> > emulation, and can sometimes be useful for elf files that can run
> > natively. Enabling `binfmt` for all natively runnable elf files may
> > not be desirable.
> >
> > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> > ---
> >  linux-user/main.c           | 22 ++++++++++++++++++++++
> >  linux-user/syscall.c        | 20 +++++++++++++++-----
> >  linux-user/user-internals.h |  4 ++++
> >  3 files changed, 41 insertions(+), 5 deletions(-)
> >
> > diff --git a/linux-user/main.c b/linux-user/main.c
> > index 8143a0d4b0..dfb303a1f2 100644
> > --- a/linux-user/main.c
> > +++ b/linux-user/main.c
> > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
> >  uintptr_t guest_base;
> >  bool have_guest_base;
> >
> > +bool qemu_dup_for_children;
> > +int qemu_argc;
> > +char ** qemu_argv;
> > +
> >  /*
> >   * Used to implement backwards-compatibility for the `-strace`, and
> >   * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
> >      perf_enable_jitdump();
> >  }
> >
> > +static void handle_arg_qemu_children(const char *arg)
> > +{
> > +    qemu_dup_for_children = true;
> > +}
> > +
> >  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
> >
> >  #ifdef CONFIG_PLUGIN
> > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
> >       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
> >      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
> >       "",           "Generate a jit-${pid}.dump file for perf"},
> > +    {"qemu-children",
> > +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> > +     "",           "Run child processes (created with execve) with qemu "
> > +                   "(as instantiated for the parent)"},
> >      {NULL, NULL, false, NULL, NULL, NULL}
> >  };
> >
> > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
> >
> >      optind = parse_args(argc, argv);
> >
> > +    if (qemu_dup_for_children) {
> > +        int i;
> > +        qemu_argc = optind;
> > +        qemu_argv = g_new0(char *, qemu_argc);
> > +        for (i = 0; i < optind; ++i) {
> > +            qemu_argv[i] = strdup(argv[i]);
> > +        }
> > +    }
> > +
> >      qemu_set_log_filename_flags(last_log_filename,
> >                                  last_log_mask | (enable_strace * LOG_STRACE),
> >                                  &error_fatal);
> > diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> > index 9d5415674d..732ef89054 100644
> > --- a/linux-user/syscall.c
> > +++ b/linux-user/syscall.c
> > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >                      abi_long pathname, abi_long guest_argp,
> >                      abi_long guest_envp, int flags, bool is_execveat)
> >  {
> > -    int ret;
> > +    int ret, argp_offset;
> >      char **argp, **envp;
> >      int argc, envc;
> >      abi_ulong gp;
> >      abi_ulong addr;
> >      char **q;
> >      void *p;
> > +    bool through_qemu = !is_execveat && qemu_dup_for_children;
> >
> I wasn't sure if this was feasible for `evecveat`. If anyone has any thoughts
> it would be nice to support that as well.
>
> >      argc = 0;
> >
> > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >          envc++;
> >      }
> >
> > -    argp = g_new0(char *, argc + 1);
> > +    argp_offset = through_qemu ? qemu_argc : 0;
> > +    argp = g_new0(char *, argc + argp_offset + 1);
> >      envp = g_new0(char *, envc + 1);
> >
> > -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
> > +    for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) {
> >          if (get_user_ual(addr, gp)) {
> >              goto execve_efault;
> >          }
> > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >      }
> >
> >      const char *exe = p;
> > -    if (is_proc_myself(p, "exe")) {
> > +    if (through_qemu) {
> > +        int i;
> > +        for (i = 0; i < argp_offset; ++i) {
> > +            argp[i] = qemu_argv[i];
> > +        }
> > +        exe = qemu_argv[0];
> > +    }
> > +    else if (is_proc_myself(p, "exe")) {
> >          exe = exec_path;
> >      }
> > +
> >      ret = is_execveat
> >          ? safe_execveat(dirfd, exe, argp, envp, flags)
> >          : safe_execve(exe, argp, envp);
> > @@ -8553,7 +8563,7 @@ execve_efault:
> >      ret = -TARGET_EFAULT;
> >
> >  execve_end:
> > -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
> > +    for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) {
> >          if (get_user_ual(addr, gp) || !addr) {
> >              break;
> >          }
> > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
> > index 5c7f173ceb..0719e65ff4 100644
> > --- a/linux-user/user-internals.h
> > +++ b/linux-user/user-internals.h
> > @@ -30,6 +30,10 @@ void stop_all_tasks(void);
> >  extern const char *qemu_uname_release;
> >  extern unsigned long mmap_min_addr;
> >
> > +extern bool qemu_dup_for_children;
> > +extern int qemu_argc;
> > +extern char ** qemu_argv;
> > +
> >  typedef struct IOCTLEntry IOCTLEntry;
> >
> >  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
> > --
> > 2.34.1
> >

ping
Noah Goldstein Sept. 24, 2024, 2:43 p.m. UTC | #2
On Tue, Sep 10, 2024 at 3:06 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Fri, Aug 30, 2024 at 3:37 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Fri, Aug 30, 2024 at 3:36 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > The new option '-qemu-children' makes it so that on `execve` the child
> > > process will be launch by the same `qemu` executable that is currently
> > > running along with its current commandline arguments.
> > >
> > > The motivation for the change is to make it so that plugins running
> > > through `qemu` can continue to run on children.  Why not just
> > > `binfmt`?: Plugins can be desirable regardless of system/architecture
> > > emulation, and can sometimes be useful for elf files that can run
> > > natively. Enabling `binfmt` for all natively runnable elf files may
> > > not be desirable.
> > >
> > > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> > > ---
> > >  linux-user/main.c           | 22 ++++++++++++++++++++++
> > >  linux-user/syscall.c        | 20 +++++++++++++++-----
> > >  linux-user/user-internals.h |  4 ++++
> > >  3 files changed, 41 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/linux-user/main.c b/linux-user/main.c
> > > index 8143a0d4b0..dfb303a1f2 100644
> > > --- a/linux-user/main.c
> > > +++ b/linux-user/main.c
> > > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
> > >  uintptr_t guest_base;
> > >  bool have_guest_base;
> > >
> > > +bool qemu_dup_for_children;
> > > +int qemu_argc;
> > > +char ** qemu_argv;
> > > +
> > >  /*
> > >   * Used to implement backwards-compatibility for the `-strace`, and
> > >   * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> > > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
> > >      perf_enable_jitdump();
> > >  }
> > >
> > > +static void handle_arg_qemu_children(const char *arg)
> > > +{
> > > +    qemu_dup_for_children = true;
> > > +}
> > > +
> > >  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
> > >
> > >  #ifdef CONFIG_PLUGIN
> > > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
> > >       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
> > >      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
> > >       "",           "Generate a jit-${pid}.dump file for perf"},
> > > +    {"qemu-children",
> > > +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> > > +     "",           "Run child processes (created with execve) with qemu "
> > > +                   "(as instantiated for the parent)"},
> > >      {NULL, NULL, false, NULL, NULL, NULL}
> > >  };
> > >
> > > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
> > >
> > >      optind = parse_args(argc, argv);
> > >
> > > +    if (qemu_dup_for_children) {
> > > +        int i;
> > > +        qemu_argc = optind;
> > > +        qemu_argv = g_new0(char *, qemu_argc);
> > > +        for (i = 0; i < optind; ++i) {
> > > +            qemu_argv[i] = strdup(argv[i]);
> > > +        }
> > > +    }
> > > +
> > >      qemu_set_log_filename_flags(last_log_filename,
> > >                                  last_log_mask | (enable_strace * LOG_STRACE),
> > >                                  &error_fatal);
> > > diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> > > index 9d5415674d..732ef89054 100644
> > > --- a/linux-user/syscall.c
> > > +++ b/linux-user/syscall.c
> > > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> > >                      abi_long pathname, abi_long guest_argp,
> > >                      abi_long guest_envp, int flags, bool is_execveat)
> > >  {
> > > -    int ret;
> > > +    int ret, argp_offset;
> > >      char **argp, **envp;
> > >      int argc, envc;
> > >      abi_ulong gp;
> > >      abi_ulong addr;
> > >      char **q;
> > >      void *p;
> > > +    bool through_qemu = !is_execveat && qemu_dup_for_children;
> > >
> > I wasn't sure if this was feasible for `evecveat`. If anyone has any thoughts
> > it would be nice to support that as well.
> >
> > >      argc = 0;
> > >
> > > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> > >          envc++;
> > >      }
> > >
> > > -    argp = g_new0(char *, argc + 1);
> > > +    argp_offset = through_qemu ? qemu_argc : 0;
> > > +    argp = g_new0(char *, argc + argp_offset + 1);
> > >      envp = g_new0(char *, envc + 1);
> > >
> > > -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
> > > +    for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) {
> > >          if (get_user_ual(addr, gp)) {
> > >              goto execve_efault;
> > >          }
> > > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> > >      }
> > >
> > >      const char *exe = p;
> > > -    if (is_proc_myself(p, "exe")) {
> > > +    if (through_qemu) {
> > > +        int i;
> > > +        for (i = 0; i < argp_offset; ++i) {
> > > +            argp[i] = qemu_argv[i];
> > > +        }
> > > +        exe = qemu_argv[0];
> > > +    }
> > > +    else if (is_proc_myself(p, "exe")) {
> > >          exe = exec_path;
> > >      }
> > > +
> > >      ret = is_execveat
> > >          ? safe_execveat(dirfd, exe, argp, envp, flags)
> > >          : safe_execve(exe, argp, envp);
> > > @@ -8553,7 +8563,7 @@ execve_efault:
> > >      ret = -TARGET_EFAULT;
> > >
> > >  execve_end:
> > > -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
> > > +    for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) {
> > >          if (get_user_ual(addr, gp) || !addr) {
> > >              break;
> > >          }
> > > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
> > > index 5c7f173ceb..0719e65ff4 100644
> > > --- a/linux-user/user-internals.h
> > > +++ b/linux-user/user-internals.h
> > > @@ -30,6 +30,10 @@ void stop_all_tasks(void);
> > >  extern const char *qemu_uname_release;
> > >  extern unsigned long mmap_min_addr;
> > >
> > > +extern bool qemu_dup_for_children;
> > > +extern int qemu_argc;
> > > +extern char ** qemu_argv;
> > > +
> > >  typedef struct IOCTLEntry IOCTLEntry;
> > >
> > >  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
> > > --
> > > 2.34.1
> > >
>
> ping
ping
Ilya Leoshkevich Oct. 2, 2024, 8:08 a.m. UTC | #3
On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> The new option '-qemu-children' makes it so that on `execve` the
> child
> process will be launch by the same `qemu` executable that is
> currently
> running along with its current commandline arguments.
> 
> The motivation for the change is to make it so that plugins running
> through `qemu` can continue to run on children.  Why not just
> `binfmt`?: Plugins can be desirable regardless of system/architecture
> emulation, and can sometimes be useful for elf files that can run
> natively. Enabling `binfmt` for all natively runnable elf files may
> not be desirable.

Another reason to have this is that one may not have root permissions
to configure binfmt-misc.

There was a similar patch posted to the mailing list some years back,
which I used to cherry-pick when I needed this. I'm not sure what
happened to that discussion though.

> Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> ---
>  linux-user/main.c           | 22 ++++++++++++++++++++++
>  linux-user/syscall.c        | 20 +++++++++++++++-----
>  linux-user/user-internals.h |  4 ++++
>  3 files changed, 41 insertions(+), 5 deletions(-)
> 
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 8143a0d4b0..dfb303a1f2 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
>  uintptr_t guest_base;
>  bool have_guest_base;
>  
> +bool qemu_dup_for_children;
> +int qemu_argc;
> +char ** qemu_argv;

Style: ** belong to the variable name.
There are a couple other issues, please check the output of

git format-patch -1 --stdout | ./scripts/checkpatch.pl -

> +
>  /*
>   * Used to implement backwards-compatibility for the `-strace`, and
>   * QEMU_STRACE options. Without this, the QEMU_LOG can be
> overwritten by
> @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
>      perf_enable_jitdump();
>  }
>  
> +static void handle_arg_qemu_children(const char *arg)
> +{
> +    qemu_dup_for_children = true;
> +}
> +
>  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
>  
>  #ifdef CONFIG_PLUGIN
> @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] =
> {
>       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
>      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
>       "",           "Generate a jit-${pid}.dump file for perf"},
> +    {"qemu-children",
> +                   "QEMU_CHILDREN",    false,
> handle_arg_qemu_children,
> +     "",           "Run child processes (created with execve) with
> qemu "
> +                   "(as instantiated for the parent)"},
>      {NULL, NULL, false, NULL, NULL, NULL}
>  };
>  
> @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
>  
>      optind = parse_args(argc, argv);
>  
> +    if (qemu_dup_for_children) {
> +        int i;

I get the following build error:

qemu/linux-user/main.c: In function ‘main’:
qemu/linux-user/main.c:746:13: error: declaration of ‘i’ shadows a
previous local [-Werror=shadow=compatible-local]
  746 |         int i;
      |             ^
qemu/linux-user/main.c:699:9: note: shadowed declaration is here
  699 |     int i;
      |         ^

I don't think this variable is needed at all.

> +        qemu_argc = optind;
> +        qemu_argv = g_new0(char *, qemu_argc);
> +        for (i = 0; i < optind; ++i) {
> +            qemu_argv[i] = strdup(argv[i]);
> +        }
> +    }
> +
>      qemu_set_log_filename_flags(last_log_filename,
>                                  last_log_mask | (enable_strace *
> LOG_STRACE),
>                                  &error_fatal);
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 9d5415674d..732ef89054 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env,
> int dirfd,
>                      abi_long pathname, abi_long guest_argp,
>                      abi_long guest_envp, int flags, bool
> is_execveat)
>  {
> -    int ret;
> +    int ret, argp_offset;
>      char **argp, **envp;
>      int argc, envc;
>      abi_ulong gp;
>      abi_ulong addr;
>      char **q;
>      void *p;
> +    bool through_qemu = !is_execveat && qemu_dup_for_children;

Wouldn't it be better to check for dirfd == AT_FDCWD?

>      argc = 0;
>  
> @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env,
> int dirfd,
>          envc++;
>      }
>  
> -    argp = g_new0(char *, argc + 1);
> +    argp_offset = through_qemu ? qemu_argc : 0;
> +    argp = g_new0(char *, argc + argp_offset + 1);
>      envp = g_new0(char *, envc + 1);
>  
> -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong),
> q++) {
> +    for (gp = guest_argp, q = argp + argp_offset; gp; gp +=
> sizeof(abi_ulong), q++) {
>          if (get_user_ual(addr, gp)) {
>              goto execve_efault;
>          }
> @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int
> dirfd,
>      }
>  
>      const char *exe = p;
> -    if (is_proc_myself(p, "exe")) {
> +    if (through_qemu) {
> +        int i;
> +        for (i = 0; i < argp_offset; ++i) {
> +            argp[i] = qemu_argv[i];
> +        }
> +        exe = qemu_argv[0];
> +    }
> +    else if (is_proc_myself(p, "exe")) {
>          exe = exec_path;
>      }
> +
>      ret = is_execveat
>          ? safe_execveat(dirfd, exe, argp, envp, flags)
>          : safe_execve(exe, argp, envp);
> @@ -8553,7 +8563,7 @@ execve_efault:
>      ret = -TARGET_EFAULT;
>  
>  execve_end:
> -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong),
> q++) {
> +    for (gp = guest_argp, q = argp + argp_offset; *q; gp +=
> sizeof(abi_ulong), q++) {
>          if (get_user_ual(addr, gp) || !addr) {
>              break;
>          }
> diff --git a/linux-user/user-internals.h b/linux-user/user-
> internals.h
> index 5c7f173ceb..0719e65ff4 100644
> --- a/linux-user/user-internals.h
> +++ b/linux-user/user-internals.h
> @@ -30,6 +30,10 @@ void stop_all_tasks(void);
>  extern const char *qemu_uname_release;
>  extern unsigned long mmap_min_addr;
>  
> +extern bool qemu_dup_for_children;
> +extern int qemu_argc;
> +extern char ** qemu_argv;
> +
>  typedef struct IOCTLEntry IOCTLEntry;
>  
>  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t
> *buf_temp,
Noah Goldstein Oct. 2, 2024, 2:05 p.m. UTC | #4
On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > The new option '-qemu-children' makes it so that on `execve` the
> > child
> > process will be launch by the same `qemu` executable that is
> > currently
> > running along with its current commandline arguments.
> >
> > The motivation for the change is to make it so that plugins running
> > through `qemu` can continue to run on children.  Why not just
> > `binfmt`?: Plugins can be desirable regardless of system/architecture
> > emulation, and can sometimes be useful for elf files that can run
> > natively. Enabling `binfmt` for all natively runnable elf files may
> > not be desirable.
>
> Another reason to have this is that one may not have root permissions
> to configure binfmt-misc.
>
+1

> There was a similar patch posted to the mailing list some years back,
> which I used to cherry-pick when I needed this. I'm not sure what
> happened to that discussion though.

Yes(ish): https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/

>
> > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> > ---
> >  linux-user/main.c           | 22 ++++++++++++++++++++++
> >  linux-user/syscall.c        | 20 +++++++++++++++-----
> >  linux-user/user-internals.h |  4 ++++
> >  3 files changed, 41 insertions(+), 5 deletions(-)
> >
> > diff --git a/linux-user/main.c b/linux-user/main.c
> > index 8143a0d4b0..dfb303a1f2 100644
> > --- a/linux-user/main.c
> > +++ b/linux-user/main.c
> > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
> >  uintptr_t guest_base;
> >  bool have_guest_base;
> >
> > +bool qemu_dup_for_children;
> > +int qemu_argc;
> > +char ** qemu_argv;
>
> Style: ** belong to the variable name.
> There are a couple other issues, please check the output of
>
> git format-patch -1 --stdout | ./scripts/checkpatch.pl -
>

Will Do.

> > +
> >  /*
> >   * Used to implement backwards-compatibility for the `-strace`, and
> >   * QEMU_STRACE options. Without this, the QEMU_LOG can be
> > overwritten by
> > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
> >      perf_enable_jitdump();
> >  }
> >
> > +static void handle_arg_qemu_children(const char *arg)
> > +{
> > +    qemu_dup_for_children = true;
> > +}
> > +
> >  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
> >
> >  #ifdef CONFIG_PLUGIN
> > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] =
> > {
> >       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
> >      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
> >       "",           "Generate a jit-${pid}.dump file for perf"},
> > +    {"qemu-children",
> > +                   "QEMU_CHILDREN",    false,
> > handle_arg_qemu_children,
> > +     "",           "Run child processes (created with execve) with
> > qemu "
> > +                   "(as instantiated for the parent)"},
> >      {NULL, NULL, false, NULL, NULL, NULL}
> >  };
> >
> > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
> >
> >      optind = parse_args(argc, argv);
> >
> > +    if (qemu_dup_for_children) {
> > +        int i;
>
> I get the following build error:
>
> qemu/linux-user/main.c: In function ‘main’:
> qemu/linux-user/main.c:746:13: error: declaration of ‘i’ shadows a
> previous local [-Werror=shadow=compatible-local]
>   746 |         int i;
>       |             ^
> qemu/linux-user/main.c:699:9: note: shadowed declaration is here
>   699 |     int i;
>       |         ^
>
> I don't think this variable is needed at all.
>

Bah sorry.

> > +        qemu_argc = optind;
> > +        qemu_argv = g_new0(char *, qemu_argc);
> > +        for (i = 0; i < optind; ++i) {
> > +            qemu_argv[i] = strdup(argv[i]);
> > +        }
> > +    }
> > +
> >      qemu_set_log_filename_flags(last_log_filename,
> >                                  last_log_mask | (enable_strace *
> > LOG_STRACE),
> >                                  &error_fatal);
> > diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> > index 9d5415674d..732ef89054 100644
> > --- a/linux-user/syscall.c
> > +++ b/linux-user/syscall.c
> > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env,
> > int dirfd,
> >                      abi_long pathname, abi_long guest_argp,
> >                      abi_long guest_envp, int flags, bool
> > is_execveat)
> >  {
> > -    int ret;
> > +    int ret, argp_offset;
> >      char **argp, **envp;
> >      int argc, envc;
> >      abi_ulong gp;
> >      abi_ulong addr;
> >      char **q;
> >      void *p;
> > +    bool through_qemu = !is_execveat && qemu_dup_for_children;
>
> Wouldn't it be better to check for dirfd == AT_FDCWD?

Yeah that works.
Ill fix in V2.
>
> >      argc = 0;
> >
> > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env,
> > int dirfd,
> >          envc++;
> >      }
> >
> > -    argp = g_new0(char *, argc + 1);
> > +    argp_offset = through_qemu ? qemu_argc : 0;
> > +    argp = g_new0(char *, argc + argp_offset + 1);
> >      envp = g_new0(char *, envc + 1);
> >
> > -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong),
> > q++) {
> > +    for (gp = guest_argp, q = argp + argp_offset; gp; gp +=
> > sizeof(abi_ulong), q++) {
> >          if (get_user_ual(addr, gp)) {
> >              goto execve_efault;
> >          }
> > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int
> > dirfd,
> >      }
> >
> >      const char *exe = p;
> > -    if (is_proc_myself(p, "exe")) {
> > +    if (through_qemu) {
> > +        int i;
> > +        for (i = 0; i < argp_offset; ++i) {
> > +            argp[i] = qemu_argv[i];
> > +        }
> > +        exe = qemu_argv[0];
> > +    }
> > +    else if (is_proc_myself(p, "exe")) {
> >          exe = exec_path;
> >      }
> > +
> >      ret = is_execveat
> >          ? safe_execveat(dirfd, exe, argp, envp, flags)
> >          : safe_execve(exe, argp, envp);
> > @@ -8553,7 +8563,7 @@ execve_efault:
> >      ret = -TARGET_EFAULT;
> >
> >  execve_end:
> > -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong),
> > q++) {
> > +    for (gp = guest_argp, q = argp + argp_offset; *q; gp +=
> > sizeof(abi_ulong), q++) {
> >          if (get_user_ual(addr, gp) || !addr) {
> >              break;
> >          }
> > diff --git a/linux-user/user-internals.h b/linux-user/user-
> > internals.h
> > index 5c7f173ceb..0719e65ff4 100644
> > --- a/linux-user/user-internals.h
> > +++ b/linux-user/user-internals.h
> > @@ -30,6 +30,10 @@ void stop_all_tasks(void);
> >  extern const char *qemu_uname_release;
> >  extern unsigned long mmap_min_addr;
> >
> > +extern bool qemu_dup_for_children;
> > +extern int qemu_argc;
> > +extern char ** qemu_argv;
> > +
> >  typedef struct IOCTLEntry IOCTLEntry;
> >
> >  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t
> > *buf_temp,
>
Laurent Vivier Oct. 2, 2024, 2:08 p.m. UTC | #5
Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
>> The new option '-qemu-children' makes it so that on `execve` the
>> child
>> process will be launch by the same `qemu` executable that is
>> currently
>> running along with its current commandline arguments.
>>
>> The motivation for the change is to make it so that plugins running
>> through `qemu` can continue to run on children.  Why not just
>> `binfmt`?: Plugins can be desirable regardless of system/architecture
>> emulation, and can sometimes be useful for elf files that can run
>> natively. Enabling `binfmt` for all natively runnable elf files may
>> not be desirable.
> 
> Another reason to have this is that one may not have root permissions
> to configure binfmt-misc.

A little note on that: binfmt_misc is now part of the user namespace (since linux v6.7), so you can 
configure binfmt_misc as a non root user in a given namepace.

There is helper to use it with unshare from util-linux, you can do things like that:

   With 'F' flag, load the interpreter from the initial namespace:

     $ /bin/qemu-m68k-static --version
     qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
     Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers
     $ unshare --map-root-user --fork --pid 
--load-interp=":qemu-m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/qemu-m68k-static:OCF" 
--root=chroot/m68k/sid
     # QEMU_VERSION= ls
     qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
     Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers
     # /qemu-m68k  --version
     qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
     Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project developers

   Without 'F' flag, from inside the namespace:

     $ unshare --map-root-user --fork --pid 
--load-interp=":qemu-m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu-m68k:OC" 
--root=chroot/m68k/sid
     # QEMU_VERSION= ls
     qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
     Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project developers
     # /qemu-m68k  --version
     qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
     Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project developers

Thanks,
Laurent
Ilya Leoshkevich Oct. 2, 2024, 2:25 p.m. UTC | #6
On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > The new option '-qemu-children' makes it so that on `execve` the
> > > child
> > > process will be launch by the same `qemu` executable that is
> > > currently
> > > running along with its current commandline arguments.
> > > 
> > > The motivation for the change is to make it so that plugins
> > > running
> > > through `qemu` can continue to run on children.  Why not just
> > > `binfmt`?: Plugins can be desirable regardless of
> > > system/architecture
> > > emulation, and can sometimes be useful for elf files that can run
> > > natively. Enabling `binfmt` for all natively runnable elf files
> > > may
> > > not be desirable.
> > 
> > Another reason to have this is that one may not have root
> > permissions
> > to configure binfmt-misc.
> 
> A little note on that: binfmt_misc is now part of the user namespace
> (since linux v6.7), so you can 
> configure binfmt_misc as a non root user in a given namepace.
> 
> There is helper to use it with unshare from util-linux, you can do
> things like that:
> 
>    With 'F' flag, load the interpreter from the initial namespace:
> 
>      $ /bin/qemu-m68k-static --version
>      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
>      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> developers
>      $ unshare --map-root-user --fork --pid 
> --load-interp=":qemu-
> m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x
> 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\
> \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/qemu
> -m68k-static:OCF" 
> --root=chroot/m68k/sid
>      # QEMU_VERSION= ls
>      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
>      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> developers
>      # /qemu-m68k  --version
>      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
>      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> developers
> 
>    Without 'F' flag, from inside the namespace:
> 
>      $ unshare --map-root-user --fork --pid 
> --load-interp=":qemu-
> m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x
> 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\
> \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu-
> m68k:OC" 
> --root=chroot/m68k/sid
>      # QEMU_VERSION= ls
>      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
>      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> developers
>      # /qemu-m68k  --version
>      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
>      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> developers
> 
> Thanks,
> Laurent
> 

Thanks for posting this, I wasn't aware of this feature and it looks
really useful. 

IIUC it also resolves the main problem this patch is dealing with:

  Enabling `binfmt` for all natively runnable elf files may
  not be desirable.
Noah Goldstein Oct. 2, 2024, 2:44 p.m. UTC | #7
On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > The new option '-qemu-children' makes it so that on `execve` the
> > > > child
> > > > process will be launch by the same `qemu` executable that is
> > > > currently
> > > > running along with its current commandline arguments.
> > > >
> > > > The motivation for the change is to make it so that plugins
> > > > running
> > > > through `qemu` can continue to run on children.  Why not just
> > > > `binfmt`?: Plugins can be desirable regardless of
> > > > system/architecture
> > > > emulation, and can sometimes be useful for elf files that can run
> > > > natively. Enabling `binfmt` for all natively runnable elf files
> > > > may
> > > > not be desirable.
> > >
> > > Another reason to have this is that one may not have root
> > > permissions
> > > to configure binfmt-misc.
> >
> > A little note on that: binfmt_misc is now part of the user namespace
> > (since linux v6.7), so you can
> > configure binfmt_misc as a non root user in a given namepace.
> >
> > There is helper to use it with unshare from util-linux, you can do
> > things like that:
> >
> >    With 'F' flag, load the interpreter from the initial namespace:
> >
> >      $ /bin/qemu-m68k-static --version
> >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> > developers
> >      $ unshare --map-root-user --fork --pid
> > --load-interp=":qemu-
> > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x
> > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\
> > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/qemu
> > -m68k-static:OCF"
> > --root=chroot/m68k/sid
> >      # QEMU_VERSION= ls
> >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> > developers
> >      # /qemu-m68k  --version
> >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > developers
> >
> >    Without 'F' flag, from inside the namespace:
> >
> >      $ unshare --map-root-user --fork --pid
> > --load-interp=":qemu-
> > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x
> > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\x00\
> > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu-
> > m68k:OC"
> > --root=chroot/m68k/sid
> >      # QEMU_VERSION= ls
> >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > developers
> >      # /qemu-m68k  --version
> >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > developers
> >
> > Thanks,
> > Laurent
> >
>
> Thanks for posting this, I wasn't aware of this feature and it looks
> really useful.
>
> IIUC it also resolves the main problem this patch is dealing with:

I might misunderstand, but I don't think it does in the sense
that it still might not be desirable to use the same qemu flags
for the entire class of executables.

I.e the original motivating case was wanting to attach
some plugins to a process and its children and AFAICT
binfmt still doesn't give that level of control.
>
>   Enabling `binfmt` for all natively runnable elf files may
>   not be desirable.
Ilya Leoshkevich Oct. 2, 2024, 2:53 p.m. UTC | #8
On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
> On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich <iii@linux.ibm.com>
> wrote:
> > 
> > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > The new option '-qemu-children' makes it so that on `execve`
> > > > > the
> > > > > child
> > > > > process will be launch by the same `qemu` executable that is
> > > > > currently
> > > > > running along with its current commandline arguments.
> > > > > 
> > > > > The motivation for the change is to make it so that plugins
> > > > > running
> > > > > through `qemu` can continue to run on children.  Why not just
> > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > system/architecture
> > > > > emulation, and can sometimes be useful for elf files that can
> > > > > run
> > > > > natively. Enabling `binfmt` for all natively runnable elf
> > > > > files
> > > > > may
> > > > > not be desirable.
> > > > 
> > > > Another reason to have this is that one may not have root
> > > > permissions
> > > > to configure binfmt-misc.
> > > 
> > > A little note on that: binfmt_misc is now part of the user
> > > namespace
> > > (since linux v6.7), so you can
> > > configure binfmt_misc as a non root user in a given namepace.
> > > 
> > > There is helper to use it with unshare from util-linux, you can
> > > do
> > > things like that:
> > > 
> > >    With 'F' flag, load the interpreter from the initial
> > > namespace:
> > > 
> > >      $ /bin/qemu-m68k-static --version
> > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> > > developers
> > >      $ unshare --map-root-user --fork --pid
> > > --load-interp=":qemu-
> > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0
> > > 0\\x
> > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\
> > > x00\
> > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/
> > > qemu
> > > -m68k-static:OCF"
> > > --root=chroot/m68k/sid
> > >      # QEMU_VERSION= ls
> > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> > > developers
> > >      # /qemu-m68k  --version
> > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > > developers
> > > 
> > >    Without 'F' flag, from inside the namespace:
> > > 
> > >      $ unshare --map-root-user --fork --pid
> > > --load-interp=":qemu-
> > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0
> > > 0\\x
> > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\
> > > x00\
> > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu
> > > -
> > > m68k:OC"
> > > --root=chroot/m68k/sid
> > >      # QEMU_VERSION= ls
> > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > > developers
> > >      # /qemu-m68k  --version
> > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > > developers
> > > 
> > > Thanks,
> > > Laurent
> > > 
> > 
> > Thanks for posting this, I wasn't aware of this feature and it
> > looks
> > really useful.
> > 
> > IIUC it also resolves the main problem this patch is dealing with:
> 
> I might misunderstand, but I don't think it does in the sense
> that it still might not be desirable to use the same qemu flags
> for the entire class of executables.
> 
> I.e the original motivating case was wanting to attach
> some plugins to a process and its children and AFAICT
> binfmt still doesn't give that level of control.

I think if you start a process in a user namespace, which has a
binfmt_misc handler for a certain class of binaries, then this handler
will affect only this process and its children, and not the rest of the
system.

> >   Enabling `binfmt` for all natively runnable elf files may
> >   not be desirable.
Noah Goldstein Oct. 2, 2024, 3:10 p.m. UTC | #9
On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
> > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > wrote:
> > >
> > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > > The new option '-qemu-children' makes it so that on `execve`
> > > > > > the
> > > > > > child
> > > > > > process will be launch by the same `qemu` executable that is
> > > > > > currently
> > > > > > running along with its current commandline arguments.
> > > > > >
> > > > > > The motivation for the change is to make it so that plugins
> > > > > > running
> > > > > > through `qemu` can continue to run on children.  Why not just
> > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > system/architecture
> > > > > > emulation, and can sometimes be useful for elf files that can
> > > > > > run
> > > > > > natively. Enabling `binfmt` for all natively runnable elf
> > > > > > files
> > > > > > may
> > > > > > not be desirable.
> > > > >
> > > > > Another reason to have this is that one may not have root
> > > > > permissions
> > > > > to configure binfmt-misc.
> > > >
> > > > A little note on that: binfmt_misc is now part of the user
> > > > namespace
> > > > (since linux v6.7), so you can
> > > > configure binfmt_misc as a non root user in a given namepace.
> > > >
> > > > There is helper to use it with unshare from util-linux, you can
> > > > do
> > > > things like that:
> > > >
> > > >    With 'F' flag, load the interpreter from the initial
> > > > namespace:
> > > >
> > > >      $ /bin/qemu-m68k-static --version
> > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> > > > developers
> > > >      $ unshare --map-root-user --fork --pid
> > > > --load-interp=":qemu-
> > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0
> > > > 0\\x
> > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\
> > > > x00\
> > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/
> > > > qemu
> > > > -m68k-static:OCF"
> > > > --root=chroot/m68k/sid
> > > >      # QEMU_VERSION= ls
> > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
> > > > developers
> > > >      # /qemu-m68k  --version
> > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > > > developers
> > > >
> > > >    Without 'F' flag, from inside the namespace:
> > > >
> > > >      $ unshare --map-root-user --fork --pid
> > > > --load-interp=":qemu-
> > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0
> > > > 0\\x
> > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\
> > > > x00\
> > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu
> > > > -
> > > > m68k:OC"
> > > > --root=chroot/m68k/sid
> > > >      # QEMU_VERSION= ls
> > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > > > developers
> > > >      # /qemu-m68k  --version
> > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
> > > > developers
> > > >
> > > > Thanks,
> > > > Laurent
> > > >
> > >
> > > Thanks for posting this, I wasn't aware of this feature and it
> > > looks
> > > really useful.
> > >
> > > IIUC it also resolves the main problem this patch is dealing with:
> >
> > I might misunderstand, but I don't think it does in the sense
> > that it still might not be desirable to use the same qemu flags
> > for the entire class of executables.
> >
> > I.e the original motivating case was wanting to attach
> > some plugins to a process and its children and AFAICT
> > binfmt still doesn't give that level of control.
>
> I think if you start a process in a user namespace, which has a
> binfmt_misc handler for a certain class of binaries, then this handler
> will affect only this process and its children, and not the rest of the
> system.

It won't also affect other binaries in the user namespace?

>
> > >   Enabling `binfmt` for all natively runnable elf files may
> > >   not be desirable.
>
Laurent Vivier Oct. 2, 2024, 3:59 p.m. UTC | #10
Le 02/10/2024 à 16:53, Ilya Leoshkevich a écrit :
> On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
>> On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich <iii@linux.ibm.com>
>> wrote:
>>>
>>> On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
>>>> Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
>>>>> On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
>>>>>> The new option '-qemu-children' makes it so that on `execve`
>>>>>> the
>>>>>> child
>>>>>> process will be launch by the same `qemu` executable that is
>>>>>> currently
>>>>>> running along with its current commandline arguments.
>>>>>>
>>>>>> The motivation for the change is to make it so that plugins
>>>>>> running
>>>>>> through `qemu` can continue to run on children.  Why not just
>>>>>> `binfmt`?: Plugins can be desirable regardless of
>>>>>> system/architecture
>>>>>> emulation, and can sometimes be useful for elf files that can
>>>>>> run
>>>>>> natively. Enabling `binfmt` for all natively runnable elf
>>>>>> files
>>>>>> may
>>>>>> not be desirable.
>>>>>
>>>>> Another reason to have this is that one may not have root
>>>>> permissions
>>>>> to configure binfmt-misc.
>>>>
>>>> A little note on that: binfmt_misc is now part of the user
>>>> namespace
>>>> (since linux v6.7), so you can
>>>> configure binfmt_misc as a non root user in a given namepace.
>>>>
>>>> There is helper to use it with unshare from util-linux, you can
>>>> do
>>>> things like that:
>>>>
>>>>     With 'F' flag, load the interpreter from the initial
>>>> namespace:
>>>>
>>>>       $ /bin/qemu-m68k-static --version
>>>>       qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
>>>>       Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
>>>> developers
>>>>       $ unshare --map-root-user --fork --pid
>>>> --load-interp=":qemu-
>>>> m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0
>>>> 0\\x
>>>> 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\
>>>> x00\
>>>> \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/bin/
>>>> qemu
>>>> -m68k-static:OCF"
>>>> --root=chroot/m68k/sid
>>>>       # QEMU_VERSION= ls
>>>>       qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
>>>>       Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project
>>>> developers
>>>>       # /qemu-m68k  --version
>>>>       qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
>>>>       Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
>>>> developers
>>>>
>>>>     Without 'F' flag, from inside the namespace:
>>>>
>>>>       $ unshare --map-root-user --fork --pid
>>>> --load-interp=":qemu-
>>>> m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x0
>>>> 0\\x
>>>> 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\
>>>> x00\
>>>> \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/qemu
>>>> -
>>>> m68k:OC"
>>>> --root=chroot/m68k/sid
>>>>       # QEMU_VERSION= ls
>>>>       qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
>>>>       Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
>>>> developers
>>>>       # /qemu-m68k  --version
>>>>       qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
>>>>       Copyright (c) 2003-2022 Fabrice Bellard and the QEMU Project
>>>> developers
>>>>
>>>> Thanks,
>>>> Laurent
>>>>
>>>
>>> Thanks for posting this, I wasn't aware of this feature and it
>>> looks
>>> really useful.
>>>
>>> IIUC it also resolves the main problem this patch is dealing with:
>>
>> I might misunderstand, but I don't think it does in the sense
>> that it still might not be desirable to use the same qemu flags
>> for the entire class of executables.
>>
>> I.e the original motivating case was wanting to attach
>> some plugins to a process and its children and AFAICT
>> binfmt still doesn't give that level of control.
> 
> I think if you start a process in a user namespace, which has a
> binfmt_misc handler for a certain class of binaries, then this handler
> will affect only this process and its children, and not the rest of the
> system.
>

Yes, the binfmt_misc configuration is only available in the given namespace.

Thanks,
Laurent
Ilya Leoshkevich Oct. 2, 2024, 4:14 p.m. UTC | #11
On Wed, 2024-10-02 at 10:10 -0500, Noah Goldstein wrote:
> On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich <iii@linux.ibm.com>
> wrote:
> > 
> > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
> > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich
> > > <iii@linux.ibm.com>
> > > wrote:
> > > > 
> > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > > > The new option '-qemu-children' makes it so that on
> > > > > > > `execve`
> > > > > > > the
> > > > > > > child
> > > > > > > process will be launch by the same `qemu` executable that
> > > > > > > is
> > > > > > > currently
> > > > > > > running along with its current commandline arguments.
> > > > > > > 
> > > > > > > The motivation for the change is to make it so that
> > > > > > > plugins
> > > > > > > running
> > > > > > > through `qemu` can continue to run on children.  Why not
> > > > > > > just
> > > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > > system/architecture
> > > > > > > emulation, and can sometimes be useful for elf files that
> > > > > > > can
> > > > > > > run
> > > > > > > natively. Enabling `binfmt` for all natively runnable elf
> > > > > > > files
> > > > > > > may
> > > > > > > not be desirable.
> > > > > > 
> > > > > > Another reason to have this is that one may not have root
> > > > > > permissions
> > > > > > to configure binfmt-misc.
> > > > > 
> > > > > A little note on that: binfmt_misc is now part of the user
> > > > > namespace
> > > > > (since linux v6.7), so you can
> > > > > configure binfmt_misc as a non root user in a given namepace.
> > > > > 
> > > > > There is helper to use it with unshare from util-linux, you
> > > > > can
> > > > > do
> > > > > things like that:
> > > > > 
> > > > >    With 'F' flag, load the interpreter from the initial
> > > > > namespace:
> > > > > 
> > > > >      $ /bin/qemu-m68k-static --version
> > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > Project
> > > > > developers
> > > > >      $ unshare --map-root-user --fork --pid
> > > > > --load-interp=":qemu-
> > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00
> > > > > \\x0
> > > > > 0\\x
> > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\x
> > > > > fe\\
> > > > > x00\
> > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/
> > > > > bin/
> > > > > qemu
> > > > > -m68k-static:OCF"
> > > > > --root=chroot/m68k/sid
> > > > >      # QEMU_VERSION= ls
> > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > Project
> > > > > developers
> > > > >      # /qemu-m68k  --version
> > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > Project
> > > > > developers
> > > > > 
> > > > >    Without 'F' flag, from inside the namespace:
> > > > > 
> > > > >      $ unshare --map-root-user --fork --pid
> > > > > --load-interp=":qemu-
> > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00
> > > > > \\x0
> > > > > 0\\x
> > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\x
> > > > > fe\\
> > > > > x00\
> > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/
> > > > > qemu
> > > > > -
> > > > > m68k:OC"
> > > > > --root=chroot/m68k/sid
> > > > >      # QEMU_VERSION= ls
> > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > Project
> > > > > developers
> > > > >      # /qemu-m68k  --version
> > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > Project
> > > > > developers
> > > > > 
> > > > > Thanks,
> > > > > Laurent
> > > > > 
> > > > 
> > > > Thanks for posting this, I wasn't aware of this feature and it
> > > > looks
> > > > really useful.
> > > > 
> > > > IIUC it also resolves the main problem this patch is dealing
> > > > with:
> > > 
> > > I might misunderstand, but I don't think it does in the sense
> > > that it still might not be desirable to use the same qemu flags
> > > for the entire class of executables.
> > > 
> > > I.e the original motivating case was wanting to attach
> > > some plugins to a process and its children and AFAICT
> > > binfmt still doesn't give that level of control.
> > 
> > I think if you start a process in a user namespace, which has a
> > binfmt_misc handler for a certain class of binaries, then this
> > handler
> > will affect only this process and its children, and not the rest of
> > the
> > system.
> 
> It won't also affect other binaries in the user namespace?

It would, but you should be able to create a user namespace just
for your program. It should also be possible to nest user namespaces.

> > > >   Enabling `binfmt` for all natively runnable elf files may
> > > >   not be desirable.
Noah Goldstein Oct. 2, 2024, 4:24 p.m. UTC | #12
On Wed, Oct 2, 2024 at 11:14 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Wed, 2024-10-02 at 10:10 -0500, Noah Goldstein wrote:
> > On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > wrote:
> > >
> > > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
> > > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich
> > > > <iii@linux.ibm.com>
> > > > wrote:
> > > > >
> > > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > > > > The new option '-qemu-children' makes it so that on
> > > > > > > > `execve`
> > > > > > > > the
> > > > > > > > child
> > > > > > > > process will be launch by the same `qemu` executable that
> > > > > > > > is
> > > > > > > > currently
> > > > > > > > running along with its current commandline arguments.
> > > > > > > >
> > > > > > > > The motivation for the change is to make it so that
> > > > > > > > plugins
> > > > > > > > running
> > > > > > > > through `qemu` can continue to run on children.  Why not
> > > > > > > > just
> > > > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > > > system/architecture
> > > > > > > > emulation, and can sometimes be useful for elf files that
> > > > > > > > can
> > > > > > > > run
> > > > > > > > natively. Enabling `binfmt` for all natively runnable elf
> > > > > > > > files
> > > > > > > > may
> > > > > > > > not be desirable.
> > > > > > >
> > > > > > > Another reason to have this is that one may not have root
> > > > > > > permissions
> > > > > > > to configure binfmt-misc.
> > > > > >
> > > > > > A little note on that: binfmt_misc is now part of the user
> > > > > > namespace
> > > > > > (since linux v6.7), so you can
> > > > > > configure binfmt_misc as a non root user in a given namepace.
> > > > > >
> > > > > > There is helper to use it with unshare from util-linux, you
> > > > > > can
> > > > > > do
> > > > > > things like that:
> > > > > >
> > > > > >    With 'F' flag, load the interpreter from the initial
> > > > > > namespace:
> > > > > >
> > > > > >      $ /bin/qemu-m68k-static --version
> > > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > > Project
> > > > > > developers
> > > > > >      $ unshare --map-root-user --fork --pid
> > > > > > --load-interp=":qemu-
> > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00
> > > > > > \\x0
> > > > > > 0\\x
> > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\x
> > > > > > fe\\
> > > > > > x00\
> > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/
> > > > > > bin/
> > > > > > qemu
> > > > > > -m68k-static:OCF"
> > > > > > --root=chroot/m68k/sid
> > > > > >      # QEMU_VERSION= ls
> > > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > > Project
> > > > > > developers
> > > > > >      # /qemu-m68k  --version
> > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > Project
> > > > > > developers
> > > > > >
> > > > > >    Without 'F' flag, from inside the namespace:
> > > > > >
> > > > > >      $ unshare --map-root-user --fork --pid
> > > > > > --load-interp=":qemu-
> > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\\x00
> > > > > > \\x0
> > > > > > 0\\x
> > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xff\\x
> > > > > > fe\\
> > > > > > x00\
> > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff:/
> > > > > > qemu
> > > > > > -
> > > > > > m68k:OC"
> > > > > > --root=chroot/m68k/sid
> > > > > >      # QEMU_VERSION= ls
> > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > Project
> > > > > > developers
> > > > > >      # /qemu-m68k  --version
> > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > Project
> > > > > > developers
> > > > > >
> > > > > > Thanks,
> > > > > > Laurent
> > > > > >
> > > > >
> > > > > Thanks for posting this, I wasn't aware of this feature and it
> > > > > looks
> > > > > really useful.
> > > > >
> > > > > IIUC it also resolves the main problem this patch is dealing
> > > > > with:
> > > >
> > > > I might misunderstand, but I don't think it does in the sense
> > > > that it still might not be desirable to use the same qemu flags
> > > > for the entire class of executables.
> > > >
> > > > I.e the original motivating case was wanting to attach
> > > > some plugins to a process and its children and AFAICT
> > > > binfmt still doesn't give that level of control.
> > >
> > > I think if you start a process in a user namespace, which has a
> > > binfmt_misc handler for a certain class of binaries, then this
> > > handler
> > > will affect only this process and its children, and not the rest of
> > > the
> > > system.
> >
> > It won't also affect other binaries in the user namespace?
>
> It would, but you should be able to create a user namespace just
> for your program. It should also be possible to nest user namespaces.

Okay fair enough. Still pro this patch as an easier means
but guess it loses any necessity.

To be clear, are you rejecting?
>
> > > > >   Enabling `binfmt` for all natively runnable elf files may
> > > > >   not be desirable.
>
Ilya Leoshkevich Oct. 2, 2024, 4:35 p.m. UTC | #13
On Wed, 2024-10-02 at 11:24 -0500, Noah Goldstein wrote:
> On Wed, Oct 2, 2024 at 11:14 AM Ilya Leoshkevich <iii@linux.ibm.com>
> wrote:
> > 
> > On Wed, 2024-10-02 at 10:10 -0500, Noah Goldstein wrote:
> > > On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich
> > > <iii@linux.ibm.com>
> > > wrote:
> > > > 
> > > > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
> > > > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich
> > > > > <iii@linux.ibm.com>
> > > > > wrote:
> > > > > > 
> > > > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > > > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein
> > > > > > > > wrote:
> > > > > > > > > The new option '-qemu-children' makes it so that on
> > > > > > > > > `execve`
> > > > > > > > > the
> > > > > > > > > child
> > > > > > > > > process will be launch by the same `qemu` executable
> > > > > > > > > that
> > > > > > > > > is
> > > > > > > > > currently
> > > > > > > > > running along with its current commandline arguments.
> > > > > > > > > 
> > > > > > > > > The motivation for the change is to make it so that
> > > > > > > > > plugins
> > > > > > > > > running
> > > > > > > > > through `qemu` can continue to run on children.  Why
> > > > > > > > > not
> > > > > > > > > just
> > > > > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > > > > system/architecture
> > > > > > > > > emulation, and can sometimes be useful for elf files
> > > > > > > > > that
> > > > > > > > > can
> > > > > > > > > run
> > > > > > > > > natively. Enabling `binfmt` for all natively runnable
> > > > > > > > > elf
> > > > > > > > > files
> > > > > > > > > may
> > > > > > > > > not be desirable.
> > > > > > > > 
> > > > > > > > Another reason to have this is that one may not have
> > > > > > > > root
> > > > > > > > permissions
> > > > > > > > to configure binfmt-misc.
> > > > > > > 
> > > > > > > A little note on that: binfmt_misc is now part of the
> > > > > > > user
> > > > > > > namespace
> > > > > > > (since linux v6.7), so you can
> > > > > > > configure binfmt_misc as a non root user in a given
> > > > > > > namepace.
> > > > > > > 
> > > > > > > There is helper to use it with unshare from util-linux,
> > > > > > > you
> > > > > > > can
> > > > > > > do
> > > > > > > things like that:
> > > > > > > 
> > > > > > >    With 'F' flag, load the interpreter from the initial
> > > > > > > namespace:
> > > > > > > 
> > > > > > >      $ /bin/qemu-m68k-static --version
> > > > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > > > Project
> > > > > > > developers
> > > > > > >      $ unshare --map-root-user --fork --pid
> > > > > > > --load-interp=":qemu-
> > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\
> > > > > > > \x00
> > > > > > > \\x0
> > > > > > > 0\\x
> > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xf
> > > > > > > f\\x
> > > > > > > fe\\
> > > > > > > x00\
> > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\x
> > > > > > > ff:/
> > > > > > > bin/
> > > > > > > qemu
> > > > > > > -m68k-static:OCF"
> > > > > > > --root=chroot/m68k/sid
> > > > > > >      # QEMU_VERSION= ls
> > > > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > > > Project
> > > > > > > developers
> > > > > > >      # /qemu-m68k  --version
> > > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > > Project
> > > > > > > developers
> > > > > > > 
> > > > > > >    Without 'F' flag, from inside the namespace:
> > > > > > > 
> > > > > > >      $ unshare --map-root-user --fork --pid
> > > > > > > --load-interp=":qemu-
> > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\
> > > > > > > \x00
> > > > > > > \\x0
> > > > > > > 0\\x
> > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xf
> > > > > > > f\\x
> > > > > > > fe\\
> > > > > > > x00\
> > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\x
> > > > > > > ff:/
> > > > > > > qemu
> > > > > > > -
> > > > > > > m68k:OC"
> > > > > > > --root=chroot/m68k/sid
> > > > > > >      # QEMU_VERSION= ls
> > > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > > Project
> > > > > > > developers
> > > > > > >      # /qemu-m68k  --version
> > > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > > Project
> > > > > > > developers
> > > > > > > 
> > > > > > > Thanks,
> > > > > > > Laurent
> > > > > > > 
> > > > > > 
> > > > > > Thanks for posting this, I wasn't aware of this feature and
> > > > > > it
> > > > > > looks
> > > > > > really useful.
> > > > > > 
> > > > > > IIUC it also resolves the main problem this patch is
> > > > > > dealing
> > > > > > with:
> > > > > 
> > > > > I might misunderstand, but I don't think it does in the sense
> > > > > that it still might not be desirable to use the same qemu
> > > > > flags
> > > > > for the entire class of executables.
> > > > > 
> > > > > I.e the original motivating case was wanting to attach
> > > > > some plugins to a process and its children and AFAICT
> > > > > binfmt still doesn't give that level of control.
> > > > 
> > > > I think if you start a process in a user namespace, which has a
> > > > binfmt_misc handler for a certain class of binaries, then this
> > > > handler
> > > > will affect only this process and its children, and not the
> > > > rest of
> > > > the
> > > > system.
> > > 
> > > It won't also affect other binaries in the user namespace?
> > 
> > It would, but you should be able to create a user namespace just
> > for your program. It should also be possible to nest user
> > namespaces.
> 
> Okay fair enough. Still pro this patch as an easier means
> but guess it loses any necessity.
> 
> To be clear, are you rejecting?

No, personally I still find it useful, because the systems with old
kernels will be around for quite some time.

> > > > > >   Enabling `binfmt` for all natively runnable elf files may
> > > > > >   not be desirable.
Noah Goldstein Oct. 2, 2024, 4:36 p.m. UTC | #14
On Wed, Oct 2, 2024 at 11:35 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Wed, 2024-10-02 at 11:24 -0500, Noah Goldstein wrote:
> > On Wed, Oct 2, 2024 at 11:14 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > wrote:
> > >
> > > On Wed, 2024-10-02 at 10:10 -0500, Noah Goldstein wrote:
> > > > On Wed, Oct 2, 2024 at 9:53 AM Ilya Leoshkevich
> > > > <iii@linux.ibm.com>
> > > > wrote:
> > > > >
> > > > > On Wed, 2024-10-02 at 09:44 -0500, Noah Goldstein wrote:
> > > > > > On Wed, Oct 2, 2024 at 9:38 AM Ilya Leoshkevich
> > > > > > <iii@linux.ibm.com>
> > > > > > wrote:
> > > > > > >
> > > > > > > On Wed, 2024-10-02 at 16:08 +0200, Laurent Vivier wrote:
> > > > > > > > Le 02/10/2024 à 10:08, Ilya Leoshkevich a écrit :
> > > > > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein
> > > > > > > > > wrote:
> > > > > > > > > > The new option '-qemu-children' makes it so that on
> > > > > > > > > > `execve`
> > > > > > > > > > the
> > > > > > > > > > child
> > > > > > > > > > process will be launch by the same `qemu` executable
> > > > > > > > > > that
> > > > > > > > > > is
> > > > > > > > > > currently
> > > > > > > > > > running along with its current commandline arguments.
> > > > > > > > > >
> > > > > > > > > > The motivation for the change is to make it so that
> > > > > > > > > > plugins
> > > > > > > > > > running
> > > > > > > > > > through `qemu` can continue to run on children.  Why
> > > > > > > > > > not
> > > > > > > > > > just
> > > > > > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > > > > > system/architecture
> > > > > > > > > > emulation, and can sometimes be useful for elf files
> > > > > > > > > > that
> > > > > > > > > > can
> > > > > > > > > > run
> > > > > > > > > > natively. Enabling `binfmt` for all natively runnable
> > > > > > > > > > elf
> > > > > > > > > > files
> > > > > > > > > > may
> > > > > > > > > > not be desirable.
> > > > > > > > >
> > > > > > > > > Another reason to have this is that one may not have
> > > > > > > > > root
> > > > > > > > > permissions
> > > > > > > > > to configure binfmt-misc.
> > > > > > > >
> > > > > > > > A little note on that: binfmt_misc is now part of the
> > > > > > > > user
> > > > > > > > namespace
> > > > > > > > (since linux v6.7), so you can
> > > > > > > > configure binfmt_misc as a non root user in a given
> > > > > > > > namepace.
> > > > > > > >
> > > > > > > > There is helper to use it with unshare from util-linux,
> > > > > > > > you
> > > > > > > > can
> > > > > > > > do
> > > > > > > > things like that:
> > > > > > > >
> > > > > > > >    With 'F' flag, load the interpreter from the initial
> > > > > > > > namespace:
> > > > > > > >
> > > > > > > >      $ /bin/qemu-m68k-static --version
> > > > > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > > > > Project
> > > > > > > > developers
> > > > > > > >      $ unshare --map-root-user --fork --pid
> > > > > > > > --load-interp=":qemu-
> > > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\
> > > > > > > > \x00
> > > > > > > > \\x0
> > > > > > > > 0\\x
> > > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xf
> > > > > > > > f\\x
> > > > > > > > fe\\
> > > > > > > > x00\
> > > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\x
> > > > > > > > ff:/
> > > > > > > > bin/
> > > > > > > > qemu
> > > > > > > > -m68k-static:OCF"
> > > > > > > > --root=chroot/m68k/sid
> > > > > > > >      # QEMU_VERSION= ls
> > > > > > > >      qemu-m68k version 8.2.2 (qemu-8.2.2-1.fc40)
> > > > > > > >      Copyright (c) 2003-2023 Fabrice Bellard and the QEMU
> > > > > > > > Project
> > > > > > > > developers
> > > > > > > >      # /qemu-m68k  --version
> > > > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > > > Project
> > > > > > > > developers
> > > > > > > >
> > > > > > > >    Without 'F' flag, from inside the namespace:
> > > > > > > >
> > > > > > > >      $ unshare --map-root-user --fork --pid
> > > > > > > > --load-interp=":qemu-
> > > > > > > > m68k:M::\\x7fELF\\x01\\x02\\x01\\x00\\x00\\x00\\x00\\x00\
> > > > > > > > \x00
> > > > > > > > \\x0
> > > > > > > > 0\\x
> > > > > > > > 00\\x00\\x00\\x02\\x00\\x04:\\xff\\xff\\xff\\xff\\xff\\xf
> > > > > > > > f\\x
> > > > > > > > fe\\
> > > > > > > > x00\
> > > > > > > > \xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\x
> > > > > > > > ff:/
> > > > > > > > qemu
> > > > > > > > -
> > > > > > > > m68k:OC"
> > > > > > > > --root=chroot/m68k/sid
> > > > > > > >      # QEMU_VERSION= ls
> > > > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > > > Project
> > > > > > > > developers
> > > > > > > >      # /qemu-m68k  --version
> > > > > > > >      qemu-m68k version 8.0.50 (v8.0.0-340-gb1cff5e2da95)
> > > > > > > >      Copyright (c) 2003-2022 Fabrice Bellard and the QEMU
> > > > > > > > Project
> > > > > > > > developers
> > > > > > > >
> > > > > > > > Thanks,
> > > > > > > > Laurent
> > > > > > > >
> > > > > > >
> > > > > > > Thanks for posting this, I wasn't aware of this feature and
> > > > > > > it
> > > > > > > looks
> > > > > > > really useful.
> > > > > > >
> > > > > > > IIUC it also resolves the main problem this patch is
> > > > > > > dealing
> > > > > > > with:
> > > > > >
> > > > > > I might misunderstand, but I don't think it does in the sense
> > > > > > that it still might not be desirable to use the same qemu
> > > > > > flags
> > > > > > for the entire class of executables.
> > > > > >
> > > > > > I.e the original motivating case was wanting to attach
> > > > > > some plugins to a process and its children and AFAICT
> > > > > > binfmt still doesn't give that level of control.
> > > > >
> > > > > I think if you start a process in a user namespace, which has a
> > > > > binfmt_misc handler for a certain class of binaries, then this
> > > > > handler
> > > > > will affect only this process and its children, and not the
> > > > > rest of
> > > > > the
> > > > > system.
> > > >
> > > > It won't also affect other binaries in the user namespace?
> > >
> > > It would, but you should be able to create a user namespace just
> > > for your program. It should also be possible to nest user
> > > namespaces.
> >
> > Okay fair enough. Still pro this patch as an easier means
> > but guess it loses any necessity.
> >
> > To be clear, are you rejecting?
>
> No, personally I still find it useful, because the systems with old
> kernels will be around for quite some time.

:)

>
> > > > > > >   Enabling `binfmt` for all natively runnable elf files may
> > > > > > >   not be desirable.
>
Ilya Leoshkevich Oct. 2, 2024, 4:39 p.m. UTC | #15
On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote:
> On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com>
> wrote:
> > 
> > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > The new option '-qemu-children' makes it so that on `execve` the
> > > child
> > > process will be launch by the same `qemu` executable that is
> > > currently
> > > running along with its current commandline arguments.
> > > 
> > > The motivation for the change is to make it so that plugins
> > > running
> > > through `qemu` can continue to run on children.  Why not just
> > > `binfmt`?: Plugins can be desirable regardless of
> > > system/architecture
> > > emulation, and can sometimes be useful for elf files that can run
> > > natively. Enabling `binfmt` for all natively runnable elf files
> > > may
> > > not be desirable.
> > 
> > Another reason to have this is that one may not have root
> > permissions
> > to configure binfmt-misc.
> > 
> +1
> 
> > There was a similar patch posted to the mailing list some years
> > back,
> > which I used to cherry-pick when I needed this. I'm not sure what
> > happened to that discussion though.
> 
> Yes(ish):
> https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/

Thanks for finding this! Don't we need the shebang handling here as
well?

Laurent, do you per chance know why was it not accepted back
then?Unfortunately I cannot find any discussion associated with v3 or
v4
[1]. There were some concerns regarding v1 [2], but from what I can see
they all were addressed.

[1]
https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/
[2]
https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
Noah Goldstein Oct. 2, 2024, 4:42 p.m. UTC | #16
On Wed, Oct 2, 2024 at 11:39 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote:
> > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > wrote:
> > >
> > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > The new option '-qemu-children' makes it so that on `execve` the
> > > > child
> > > > process will be launch by the same `qemu` executable that is
> > > > currently
> > > > running along with its current commandline arguments.
> > > >
> > > > The motivation for the change is to make it so that plugins
> > > > running
> > > > through `qemu` can continue to run on children.  Why not just
> > > > `binfmt`?: Plugins can be desirable regardless of
> > > > system/architecture
> > > > emulation, and can sometimes be useful for elf files that can run
> > > > natively. Enabling `binfmt` for all natively runnable elf files
> > > > may
> > > > not be desirable.
> > >
> > > Another reason to have this is that one may not have root
> > > permissions
> > > to configure binfmt-misc.
> > >
> > +1
> >
> > > There was a similar patch posted to the mailing list some years
> > > back,
> > > which I used to cherry-pick when I needed this. I'm not sure what
> > > happened to that discussion though.
> >
> > Yes(ish):
> > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/
>
> Thanks for finding this! Don't we need the shebang handling here as
> well?
>
I don't think so. In this case we aren't making it so execve can point to
some arbitrary impl, just that we propagate the current running qemu
env.

> Laurent, do you per chance know why was it not accepted back
> then?Unfortunately I cannot find any discussion associated with v3 or
> v4
> [1]. There were some concerns regarding v1 [2], but from what I can see
> they all were addressed.
>
> [1]
> https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/
> [2]
> https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
Noah Goldstein Oct. 11, 2024, 6:14 p.m. UTC | #17
On Wed, Oct 2, 2024 at 11:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Wed, Oct 2, 2024 at 11:39 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
> >
> > On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote:
> > > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > > wrote:
> > > >
> > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > The new option '-qemu-children' makes it so that on `execve` the
> > > > > child
> > > > > process will be launch by the same `qemu` executable that is
> > > > > currently
> > > > > running along with its current commandline arguments.
> > > > >
> > > > > The motivation for the change is to make it so that plugins
> > > > > running
> > > > > through `qemu` can continue to run on children.  Why not just
> > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > system/architecture
> > > > > emulation, and can sometimes be useful for elf files that can run
> > > > > natively. Enabling `binfmt` for all natively runnable elf files
> > > > > may
> > > > > not be desirable.
> > > >
> > > > Another reason to have this is that one may not have root
> > > > permissions
> > > > to configure binfmt-misc.
> > > >
> > > +1
> > >
> > > > There was a similar patch posted to the mailing list some years
> > > > back,
> > > > which I used to cherry-pick when I needed this. I'm not sure what
> > > > happened to that discussion though.
> > >
> > > Yes(ish):
> > > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/
> >
> > Thanks for finding this! Don't we need the shebang handling here as
> > well?
> >
> I don't think so. In this case we aren't making it so execve can point to
> some arbitrary impl, just that we propagate the current running qemu
> env.
>

ping
> > Laurent, do you per chance know why was it not accepted back
> > then?Unfortunately I cannot find any discussion associated with v3 or
> > v4
> > [1]. There were some concerns regarding v1 [2], but from what I can see
> > they all were addressed.
> >
> > [1]
> > https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/
> > [2]
> > https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
Noah Goldstein Oct. 22, 2024, 10:06 p.m. UTC | #18
On Fri, Oct 11, 2024 at 1:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Wed, Oct 2, 2024 at 11:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Wed, Oct 2, 2024 at 11:39 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
> > >
> > > On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote:
> > > > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > > > wrote:
> > > > >
> > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > > The new option '-qemu-children' makes it so that on `execve` the
> > > > > > child
> > > > > > process will be launch by the same `qemu` executable that is
> > > > > > currently
> > > > > > running along with its current commandline arguments.
> > > > > >
> > > > > > The motivation for the change is to make it so that plugins
> > > > > > running
> > > > > > through `qemu` can continue to run on children.  Why not just
> > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > system/architecture
> > > > > > emulation, and can sometimes be useful for elf files that can run
> > > > > > natively. Enabling `binfmt` for all natively runnable elf files
> > > > > > may
> > > > > > not be desirable.
> > > > >
> > > > > Another reason to have this is that one may not have root
> > > > > permissions
> > > > > to configure binfmt-misc.
> > > > >
> > > > +1
> > > >
> > > > > There was a similar patch posted to the mailing list some years
> > > > > back,
> > > > > which I used to cherry-pick when I needed this. I'm not sure what
> > > > > happened to that discussion though.
> > > >
> > > > Yes(ish):
> > > > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/
> > >
> > > Thanks for finding this! Don't we need the shebang handling here as
> > > well?
> > >
> > I don't think so. In this case we aren't making it so execve can point to
> > some arbitrary impl, just that we propagate the current running qemu
> > env.
> >
>
> ping
ping2
> > > Laurent, do you per chance know why was it not accepted back
> > > then?Unfortunately I cannot find any discussion associated with v3 or
> > > v4
> > > [1]. There were some concerns regarding v1 [2], but from what I can see
> > > they all were addressed.
> > >
> > > [1]
> > > https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/
> > > [2]
> > > https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
Noah Goldstein Oct. 29, 2024, 2:51 p.m. UTC | #19
On Tue, Oct 22, 2024 at 5:06 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Fri, Oct 11, 2024 at 1:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Wed, Oct 2, 2024 at 11:42 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > On Wed, Oct 2, 2024 at 11:39 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
> > > >
> > > > On Wed, 2024-10-02 at 09:05 -0500, Noah Goldstein wrote:
> > > > > On Wed, Oct 2, 2024 at 3:08 AM Ilya Leoshkevich <iii@linux.ibm.com>
> > > > > wrote:
> > > > > >
> > > > > > On Fri, 2024-08-30 at 15:36 -0700, Noah Goldstein wrote:
> > > > > > > The new option '-qemu-children' makes it so that on `execve` the
> > > > > > > child
> > > > > > > process will be launch by the same `qemu` executable that is
> > > > > > > currently
> > > > > > > running along with its current commandline arguments.
> > > > > > >
> > > > > > > The motivation for the change is to make it so that plugins
> > > > > > > running
> > > > > > > through `qemu` can continue to run on children.  Why not just
> > > > > > > `binfmt`?: Plugins can be desirable regardless of
> > > > > > > system/architecture
> > > > > > > emulation, and can sometimes be useful for elf files that can run
> > > > > > > natively. Enabling `binfmt` for all natively runnable elf files
> > > > > > > may
> > > > > > > not be desirable.
> > > > > >
> > > > > > Another reason to have this is that one may not have root
> > > > > > permissions
> > > > > > to configure binfmt-misc.
> > > > > >
> > > > > +1
> > > > >
> > > > > > There was a similar patch posted to the mailing list some years
> > > > > > back,
> > > > > > which I used to cherry-pick when I needed this. I'm not sure what
> > > > > > happened to that discussion though.
> > > > >
> > > > > Yes(ish):
> > > > > https://patchwork.ozlabs.org/project/qemu-devel/patch/1455515507-26877-1-git-send-email-petrosagg@resin.io/
> > > >
> > > > Thanks for finding this! Don't we need the shebang handling here as
> > > > well?
> > > >
> > > I don't think so. In this case we aren't making it so execve can point to
> > > some arbitrary impl, just that we propagate the current running qemu
> > > env.
> > >
> >
> > ping
> ping2
pint3
> > > > Laurent, do you per chance know why was it not accepted back
> > > > then?Unfortunately I cannot find any discussion associated with v3 or
> > > > v4
> > > > [1]. There were some concerns regarding v1 [2], but from what I can see
> > > > they all were addressed.
> > > >
> > > > [1]
> > > > https://patchew.org/QEMU/20200730160106.16613-1-rj.bcjesus@gmail.com/
> > > > [2]
> > > > https://patchwork.kernel.org/project/qemu-devel/patch/1453091602-21843-1-git-send-email-petrosagg@gmail.com/
Alex Bennée Oct. 29, 2024, 3:23 p.m. UTC | #20
Noah Goldstein <goldstein.w.n@gmail.com> writes:

> The new option '-qemu-children' makes it so that on `execve` the child
> process will be launch by the same `qemu` executable that is currently
> running along with its current commandline arguments.
>
> The motivation for the change is to make it so that plugins running
> through `qemu` can continue to run on children.  Why not just
> `binfmt`?: Plugins can be desirable regardless of system/architecture
> emulation, and can sometimes be useful for elf files that can run
> natively. Enabling `binfmt` for all natively runnable elf files may
> not be desirable.

Broadly I don't see anything wrong with the patch. However my principle
concern is how test it. Currently nothing in check-tcg ensures this
mechanism works and stresses the argument copying.

>
> Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> ---
>  linux-user/main.c           | 22 ++++++++++++++++++++++
>  linux-user/syscall.c        | 20 +++++++++++++++-----
>  linux-user/user-internals.h |  4 ++++
>  3 files changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 8143a0d4b0..dfb303a1f2 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
>  uintptr_t guest_base;
>  bool have_guest_base;
>  
> +bool qemu_dup_for_children;
> +int qemu_argc;
> +char ** qemu_argv;
> +
>  /*
>   * Used to implement backwards-compatibility for the `-strace`, and
>   * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
>      perf_enable_jitdump();
>  }
>  
> +static void handle_arg_qemu_children(const char *arg)
> +{
> +    qemu_dup_for_children = true;
> +}
> +
>  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
>  
>  #ifdef CONFIG_PLUGIN
> @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
>       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
>      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
>       "",           "Generate a jit-${pid}.dump file for perf"},
> +    {"qemu-children",
> +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> +     "",           "Run child processes (created with execve) with qemu "
> +                   "(as instantiated for the parent)"},
>      {NULL, NULL, false, NULL, NULL, NULL}
>  };
>  
> @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
>  
>      optind = parse_args(argc, argv);
>  
> +    if (qemu_dup_for_children) {
> +        int i;
> +        qemu_argc = optind;
> +        qemu_argv = g_new0(char *, qemu_argc);
> +        for (i = 0; i < optind; ++i) {
> +            qemu_argv[i] = strdup(argv[i]);
> +        }
> +    }
> +
>      qemu_set_log_filename_flags(last_log_filename,
>                                  last_log_mask | (enable_strace * LOG_STRACE),
>                                  &error_fatal);
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 9d5415674d..732ef89054 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>                      abi_long pathname, abi_long guest_argp,
>                      abi_long guest_envp, int flags, bool is_execveat)
>  {
> -    int ret;
> +    int ret, argp_offset;
>      char **argp, **envp;
>      int argc, envc;
>      abi_ulong gp;
>      abi_ulong addr;
>      char **q;
>      void *p;
> +    bool through_qemu = !is_execveat && qemu_dup_for_children;
>  
>      argc = 0;
>  
> @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>          envc++;
>      }
>  
> -    argp = g_new0(char *, argc + 1);
> +    argp_offset = through_qemu ? qemu_argc : 0;
> +    argp = g_new0(char *, argc + argp_offset + 1);
>      envp = g_new0(char *, envc + 1);
>  
> -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
> +    for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) {
>          if (get_user_ual(addr, gp)) {
>              goto execve_efault;
>          }
> @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
>      }
>  
>      const char *exe = p;
> -    if (is_proc_myself(p, "exe")) {
> +    if (through_qemu) {
> +        int i;
> +        for (i = 0; i < argp_offset; ++i) {
> +            argp[i] = qemu_argv[i];
> +        }
> +        exe = qemu_argv[0];
> +    }
> +    else if (is_proc_myself(p, "exe")) {
>          exe = exec_path;
>      }
> +
>      ret = is_execveat
>          ? safe_execveat(dirfd, exe, argp, envp, flags)
>          : safe_execve(exe, argp, envp);
> @@ -8553,7 +8563,7 @@ execve_efault:
>      ret = -TARGET_EFAULT;
>  
>  execve_end:
> -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
> +    for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) {
>          if (get_user_ual(addr, gp) || !addr) {
>              break;
>          }
> diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
> index 5c7f173ceb..0719e65ff4 100644
> --- a/linux-user/user-internals.h
> +++ b/linux-user/user-internals.h
> @@ -30,6 +30,10 @@ void stop_all_tasks(void);
>  extern const char *qemu_uname_release;
>  extern unsigned long mmap_min_addr;
>  
> +extern bool qemu_dup_for_children;
> +extern int qemu_argc;
> +extern char ** qemu_argv;
> +
>  typedef struct IOCTLEntry IOCTLEntry;
>  
>  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
Noah Goldstein Oct. 29, 2024, 3:27 p.m. UTC | #21
On Tue, Oct 29, 2024 at 10:23 AM Alex Bennée <alex.bennee@linaro.org> wrote:
>
> Noah Goldstein <goldstein.w.n@gmail.com> writes:
>
> > The new option '-qemu-children' makes it so that on `execve` the child
> > process will be launch by the same `qemu` executable that is currently
> > running along with its current commandline arguments.
> >
> > The motivation for the change is to make it so that plugins running
> > through `qemu` can continue to run on children.  Why not just
> > `binfmt`?: Plugins can be desirable regardless of system/architecture
> > emulation, and can sometimes be useful for elf files that can run
> > natively. Enabling `binfmt` for all natively runnable elf files may
> > not be desirable.
>
> Broadly I don't see anything wrong with the patch. However my principle
> concern is how test it. Currently nothing in check-tcg ensures this
> mechanism works and stresses the argument copying.

Fair enough. I will add some tests.
>
> >
> > Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
> > ---
> >  linux-user/main.c           | 22 ++++++++++++++++++++++
> >  linux-user/syscall.c        | 20 +++++++++++++++-----
> >  linux-user/user-internals.h |  4 ++++
> >  3 files changed, 41 insertions(+), 5 deletions(-)
> >
> > diff --git a/linux-user/main.c b/linux-user/main.c
> > index 8143a0d4b0..dfb303a1f2 100644
> > --- a/linux-user/main.c
> > +++ b/linux-user/main.c
> > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
> >  uintptr_t guest_base;
> >  bool have_guest_base;
> >
> > +bool qemu_dup_for_children;
> > +int qemu_argc;
> > +char ** qemu_argv;
> > +
> >  /*
> >   * Used to implement backwards-compatibility for the `-strace`, and
> >   * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
> > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
> >      perf_enable_jitdump();
> >  }
> >
> > +static void handle_arg_qemu_children(const char *arg)
> > +{
> > +    qemu_dup_for_children = true;
> > +}
> > +
> >  static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
> >
> >  #ifdef CONFIG_PLUGIN
> > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
> >       "",           "Generate a /tmp/perf-${pid}.map file for perf"},
> >      {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
> >       "",           "Generate a jit-${pid}.dump file for perf"},
> > +    {"qemu-children",
> > +                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
> > +     "",           "Run child processes (created with execve) with qemu "
> > +                   "(as instantiated for the parent)"},
> >      {NULL, NULL, false, NULL, NULL, NULL}
> >  };
> >
> > @@ -729,6 +742,15 @@ int main(int argc, char **argv, char **envp)
> >
> >      optind = parse_args(argc, argv);
> >
> > +    if (qemu_dup_for_children) {
> > +        int i;
> > +        qemu_argc = optind;
> > +        qemu_argv = g_new0(char *, qemu_argc);
> > +        for (i = 0; i < optind; ++i) {
> > +            qemu_argv[i] = strdup(argv[i]);
> > +        }
> > +    }
> > +
> >      qemu_set_log_filename_flags(last_log_filename,
> >                                  last_log_mask | (enable_strace * LOG_STRACE),
> >                                  &error_fatal);
> > diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> > index 9d5415674d..732ef89054 100644
> > --- a/linux-user/syscall.c
> > +++ b/linux-user/syscall.c
> > @@ -8459,13 +8459,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >                      abi_long pathname, abi_long guest_argp,
> >                      abi_long guest_envp, int flags, bool is_execveat)
> >  {
> > -    int ret;
> > +    int ret, argp_offset;
> >      char **argp, **envp;
> >      int argc, envc;
> >      abi_ulong gp;
> >      abi_ulong addr;
> >      char **q;
> >      void *p;
> > +    bool through_qemu = !is_execveat && qemu_dup_for_children;
> >
> >      argc = 0;
> >
> > @@ -8489,10 +8490,11 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >          envc++;
> >      }
> >
> > -    argp = g_new0(char *, argc + 1);
> > +    argp_offset = through_qemu ? qemu_argc : 0;
> > +    argp = g_new0(char *, argc + argp_offset + 1);
> >      envp = g_new0(char *, envc + 1);
> >
> > -    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
> > +    for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) {
> >          if (get_user_ual(addr, gp)) {
> >              goto execve_efault;
> >          }
> > @@ -8537,9 +8539,17 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
> >      }
> >
> >      const char *exe = p;
> > -    if (is_proc_myself(p, "exe")) {
> > +    if (through_qemu) {
> > +        int i;
> > +        for (i = 0; i < argp_offset; ++i) {
> > +            argp[i] = qemu_argv[i];
> > +        }
> > +        exe = qemu_argv[0];
> > +    }
> > +    else if (is_proc_myself(p, "exe")) {
> >          exe = exec_path;
> >      }
> > +
> >      ret = is_execveat
> >          ? safe_execveat(dirfd, exe, argp, envp, flags)
> >          : safe_execve(exe, argp, envp);
> > @@ -8553,7 +8563,7 @@ execve_efault:
> >      ret = -TARGET_EFAULT;
> >
> >  execve_end:
> > -    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
> > +    for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) {
> >          if (get_user_ual(addr, gp) || !addr) {
> >              break;
> >          }
> > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
> > index 5c7f173ceb..0719e65ff4 100644
> > --- a/linux-user/user-internals.h
> > +++ b/linux-user/user-internals.h
> > @@ -30,6 +30,10 @@ void stop_all_tasks(void);
> >  extern const char *qemu_uname_release;
> >  extern unsigned long mmap_min_addr;
> >
> > +extern bool qemu_dup_for_children;
> > +extern int qemu_argc;
> > +extern char ** qemu_argv;
> > +
> >  typedef struct IOCTLEntry IOCTLEntry;
> >
> >  typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
>
> --
> Alex Bennée
> Virtualisation Tech Lead @ Linaro
diff mbox series

Patch

diff --git a/linux-user/main.c b/linux-user/main.c
index 8143a0d4b0..dfb303a1f2 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -81,6 +81,10 @@  unsigned long mmap_min_addr;
 uintptr_t guest_base;
 bool have_guest_base;
 
+bool qemu_dup_for_children;
+int qemu_argc;
+char ** qemu_argv;
+
 /*
  * Used to implement backwards-compatibility for the `-strace`, and
  * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
@@ -451,6 +455,11 @@  static void handle_arg_jitdump(const char *arg)
     perf_enable_jitdump();
 }
 
+static void handle_arg_qemu_children(const char *arg)
+{
+    qemu_dup_for_children = true;
+}
+
 static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
 
 #ifdef CONFIG_PLUGIN
@@ -526,6 +535,10 @@  static const struct qemu_argument arg_table[] = {
      "",           "Generate a /tmp/perf-${pid}.map file for perf"},
     {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
      "",           "Generate a jit-${pid}.dump file for perf"},
+    {"qemu-children",
+                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
+     "",           "Run child processes (created with execve) with qemu "
+                   "(as instantiated for the parent)"},
     {NULL, NULL, false, NULL, NULL, NULL}
 };
 
@@ -729,6 +742,15 @@  int main(int argc, char **argv, char **envp)
 
     optind = parse_args(argc, argv);
 
+    if (qemu_dup_for_children) {
+        int i;
+        qemu_argc = optind;
+        qemu_argv = g_new0(char *, qemu_argc);
+        for (i = 0; i < optind; ++i) {
+            qemu_argv[i] = strdup(argv[i]);
+        }
+    }
+
     qemu_set_log_filename_flags(last_log_filename,
                                 last_log_mask | (enable_strace * LOG_STRACE),
                                 &error_fatal);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 9d5415674d..732ef89054 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8459,13 +8459,14 @@  static int do_execv(CPUArchState *cpu_env, int dirfd,
                     abi_long pathname, abi_long guest_argp,
                     abi_long guest_envp, int flags, bool is_execveat)
 {
-    int ret;
+    int ret, argp_offset;
     char **argp, **envp;
     int argc, envc;
     abi_ulong gp;
     abi_ulong addr;
     char **q;
     void *p;
+    bool through_qemu = !is_execveat && qemu_dup_for_children;
 
     argc = 0;
 
@@ -8489,10 +8490,11 @@  static int do_execv(CPUArchState *cpu_env, int dirfd,
         envc++;
     }
 
-    argp = g_new0(char *, argc + 1);
+    argp_offset = through_qemu ? qemu_argc : 0;
+    argp = g_new0(char *, argc + argp_offset + 1);
     envp = g_new0(char *, envc + 1);
 
-    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
+    for (gp = guest_argp, q = argp + argp_offset; gp; gp += sizeof(abi_ulong), q++) {
         if (get_user_ual(addr, gp)) {
             goto execve_efault;
         }
@@ -8537,9 +8539,17 @@  static int do_execv(CPUArchState *cpu_env, int dirfd,
     }
 
     const char *exe = p;
-    if (is_proc_myself(p, "exe")) {
+    if (through_qemu) {
+        int i;
+        for (i = 0; i < argp_offset; ++i) {
+            argp[i] = qemu_argv[i];
+        }
+        exe = qemu_argv[0];
+    }
+    else if (is_proc_myself(p, "exe")) {
         exe = exec_path;
     }
+
     ret = is_execveat
         ? safe_execveat(dirfd, exe, argp, envp, flags)
         : safe_execve(exe, argp, envp);
@@ -8553,7 +8563,7 @@  execve_efault:
     ret = -TARGET_EFAULT;
 
 execve_end:
-    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
+    for (gp = guest_argp, q = argp + argp_offset; *q; gp += sizeof(abi_ulong), q++) {
         if (get_user_ual(addr, gp) || !addr) {
             break;
         }
diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
index 5c7f173ceb..0719e65ff4 100644
--- a/linux-user/user-internals.h
+++ b/linux-user/user-internals.h
@@ -30,6 +30,10 @@  void stop_all_tasks(void);
 extern const char *qemu_uname_release;
 extern unsigned long mmap_min_addr;
 
+extern bool qemu_dup_for_children;
+extern int qemu_argc;
+extern char ** qemu_argv;
+
 typedef struct IOCTLEntry IOCTLEntry;
 
 typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,