The new option '-qemu-children' makes it so that on `execve` the child
process will be launch by the same `qemu` executable that is currently
running along with its current commandline arguments.

The motivation for the change is to make it so that plugins running
through `qemu` can continue to run on children.  Why not just
`binfmt`?: Plugins can be desirable regardless of system/architecture
emulation, and can sometimes be useful for elf files that can run
natively. Enabling `binfmt` for all natively runnable elf files may
not be desirable.

Signed-off-by: Noah Goldstein <goldstein....@gmail.com>
---
 linux-user/main.c                             | 21 ++++++
 linux-user/syscall.c                          | 21 ++++--
 linux-user/user-internals.h                   |  4 ++
 tests/tcg/multiarch/Makefile.target           |  8 +++
 .../linux/linux-execve-qemu-children.c        | 68 +++++++++++++++++++
 5 files changed, 117 insertions(+), 5 deletions(-)
 create mode 100644 tests/tcg/multiarch/linux/linux-execve-qemu-children.c

diff --git a/linux-user/main.c b/linux-user/main.c
index 8143a0d4b0..5e3d41dc2b 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -81,6 +81,10 @@ unsigned long mmap_min_addr;
 uintptr_t guest_base;
 bool have_guest_base;
 
+bool qemu_dup_for_children;
+int qemu_argc;
+char **qemu_argv;
+
 /*
  * Used to implement backwards-compatibility for the `-strace`, and
  * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
@@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg)
     perf_enable_jitdump();
 }
 
+static void handle_arg_qemu_children(const char *arg)
+{
+    qemu_dup_for_children = true;
+}
+
 static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
 
 #ifdef CONFIG_PLUGIN
@@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = {
      "",           "Generate a /tmp/perf-${pid}.map file for perf"},
     {"jitdump",    "QEMU_JITDUMP",     false, handle_arg_jitdump,
      "",           "Generate a jit-${pid}.dump file for perf"},
+    {"qemu-children",
+                   "QEMU_CHILDREN",    false, handle_arg_qemu_children,
+     "",           "Run child processes (created with execve) with qemu "
+                   "(as instantiated for the parent)"},
     {NULL, NULL, false, NULL, NULL, NULL}
 };
 
@@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp)
 
     optind = parse_args(argc, argv);
 
+    if (qemu_dup_for_children) {
+        qemu_argc = optind;
+        qemu_argv = g_new0(char *, qemu_argc);
+        for (i = 0; i < optind; ++i) {
+            qemu_argv[i] = strdup(argv[i]);
+        }
+    }
+
     qemu_set_log_filename_flags(last_log_filename,
                                 last_log_mask | (enable_strace * LOG_STRACE),
                                 &error_fatal);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 59b2080b98..96b105e9ce 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8550,13 +8550,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
                     abi_long pathname, abi_long guest_argp,
                     abi_long guest_envp, int flags, bool is_execveat)
 {
-    int ret;
+    int ret, argp_offset;
     char **argp, **envp;
     int argc, envc;
     abi_ulong gp;
     abi_ulong addr;
     char **q;
     void *p;
+    bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children;
 
     argc = 0;
 
@@ -8580,10 +8581,12 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
         envc++;
     }
 
-    argp = g_new0(char *, argc + 1);
+    argp_offset = through_qemu ? qemu_argc : 0;
+    argp = g_new0(char *, argc + argp_offset + 1);
     envp = g_new0(char *, envc + 1);
 
-    for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) {
+    for (gp = guest_argp, q = argp + argp_offset;
+         gp; gp += sizeof(abi_ulong), q++) {
         if (get_user_ual(addr, gp)) {
             goto execve_efault;
         }
@@ -8628,9 +8631,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
     }
 
     const char *exe = p;
-    if (is_proc_myself(p, "exe")) {
+    if (through_qemu) {
+        int i;
+        for (i = 0; i < argp_offset; ++i) {
+            argp[i] = qemu_argv[i];
+        }
+        exe = qemu_argv[0];
+    } else if (is_proc_myself(p, "exe")) {
         exe = exec_path;
     }
+
     ret = is_execveat
         ? safe_execveat(dirfd, exe, argp, envp, flags)
         : safe_execve(exe, argp, envp);
@@ -8644,7 +8654,8 @@ execve_efault:
     ret = -TARGET_EFAULT;
 
 execve_end:
-    for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) {
+    for (gp = guest_argp, q = argp + argp_offset;
+         *q; gp += sizeof(abi_ulong), q++) {
         if (get_user_ual(addr, gp) || !addr) {
             break;
         }
diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
index 46ffc093f4..ed3ed666a0 100644
--- a/linux-user/user-internals.h
+++ b/linux-user/user-internals.h
@@ -30,6 +30,10 @@ void stop_all_tasks(void);
 extern const char *qemu_uname_release;
 extern unsigned long mmap_min_addr;
 
+extern bool qemu_dup_for_children;
+extern int qemu_argc;
+extern char **qemu_argv;
+
 typedef struct IOCTLEntry IOCTLEntry;
 
 typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
diff --git a/tests/tcg/multiarch/Makefile.target 
b/tests/tcg/multiarch/Makefile.target
index 78b83d5575..0e220953e7 100644
--- a/tests/tcg/multiarch/Makefile.target
+++ b/tests/tcg/multiarch/Makefile.target
@@ -30,6 +30,14 @@ run-float_%: float_%
        $(call 
conditional-diff-out,$<,$(SRC_PATH)/tests/tcg/$(TARGET_NAME)/$<.ref)
 
 
+run-linux-execve-qemu-children: linux-execve-qemu-children
+       $(call run-test,$<, $(QEMU) $(QEMU_OPTS) -qemu-children $< $(QEMU) 0)
+       $(call run-test,$<, $(QEMU) $(QEMU_OPTS) $< linux-execve 0 skip)
+
+run-plugin-linux-execve-qemu-children-with-%: linux-execve-qemu-children
+       $(call run-test,$<, $(QEMU) $(QEMU_OPTS) -qemu-children $< $(QEMU) 0)
+       $(call run-test,$<, $(QEMU) $(QEMU_OPTS) $< linux-execve 0 skip)
+
 testthread: LDFLAGS+=-lpthread
 
 threadcount: LDFLAGS+=-lpthread
diff --git a/tests/tcg/multiarch/linux/linux-execve-qemu-children.c 
b/tests/tcg/multiarch/linux/linux-execve-qemu-children.c
new file mode 100644
index 0000000000..60d6537666
--- /dev/null
+++ b/tests/tcg/multiarch/linux/linux-execve-qemu-children.c
@@ -0,0 +1,68 @@
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#define MAX_COMM_SIZE (4096)
+
+int
+main(int argc, char ** argv, char ** envp) {
+    int          fd;
+    char         next_arg[2];
+    char *       buf;
+    ssize_t      off;
+    const char * expec_comm;
+    assert(argc == 3 || argc == 4);
+    fd = open("/proc/self/comm", O_RDONLY);
+    assert(fd > 0);
+
+    buf = calloc(MAX_COMM_SIZE + 1, 1);
+    assert(buf != NULL);
+
+    off = 0;
+    for (;;) {
+        ssize_t res = read(fd, buf + off, 1);
+        if (res < 0 && errno != EAGAIN) {
+            perror("Failed to read comm");
+            return -1;
+        }
+        if (res == 0) {
+            break;
+        }
+
+        off += res;
+
+        if (off >= MAX_COMM_SIZE) {
+            fprintf(stderr, "/proc/self/comm too large for test\n");
+            return -1;
+        }
+    }
+    assert(off && buf[off] == '\0' && buf[off - 1] == '\n');
+    buf[off - 1] = '\0';
+    expec_comm   = basename(argv[1]);
+    if (argc == 3 && strncmp(buf, expec_comm, strlen(expec_comm))) {
+        fprintf(stderr,
+                "Didn't propagate qemu settings\nComm:  '%s'\nExpec: '%s'\n",
+                buf, expec_comm);
+        return -1;
+    }
+    free(buf);
+    next_arg[0] = argv[2][0];
+    next_arg[1] = '\0';
+    if (next_arg[0] == '9') {
+        return 0;
+    }
+    next_arg[0] += 1;
+    char * next_args[] = { argv[0], argv[1], next_arg, NULL };
+    int    eres        = execve(argv[0], &next_args[0], envp);
+    if (eres != 0) {
+        fprintf(stderr, "Unable to execve: %d/%d -> %s\n", eres, errno,
+                strerror(errno));
+        return -1;
+    }
+    return 0;
+}
-- 
2.43.0


Reply via email to