On Wed, Oct 30, 2024 at 9:10 AM Noah Goldstein <goldstein....@gmail.com> wrote: > > The new option '-qemu-children' makes it so that on `execve` the child > process will be launch by the same `qemu` executable that is currently > running along with its current commandline arguments. > > The motivation for the change is to make it so that plugins running > through `qemu` can continue to run on children. Why not just > `binfmt`?: Plugins can be desirable regardless of system/architecture > emulation, and can sometimes be useful for elf files that can run > natively. Enabling `binfmt` for all natively runnable elf files may > not be desirable. > > Signed-off-by: Noah Goldstein <goldstein....@gmail.com> > --- > linux-user/main.c | 21 ++++++ > linux-user/syscall.c | 21 ++++-- > linux-user/user-internals.h | 4 ++ > tests/tcg/multiarch/Makefile.target | 8 +++ > .../linux/linux-execve-qemu-children.c | 68 +++++++++++++++++++ > 5 files changed, 117 insertions(+), 5 deletions(-) > create mode 100644 tests/tcg/multiarch/linux/linux-execve-qemu-children.c > > diff --git a/linux-user/main.c b/linux-user/main.c > index 8143a0d4b0..5e3d41dc2b 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -81,6 +81,10 @@ unsigned long mmap_min_addr; > uintptr_t guest_base; > bool have_guest_base; > > +bool qemu_dup_for_children; > +int qemu_argc; > +char **qemu_argv; > + > /* > * Used to implement backwards-compatibility for the `-strace`, and > * QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by > @@ -451,6 +455,11 @@ static void handle_arg_jitdump(const char *arg) > perf_enable_jitdump(); > } > > +static void handle_arg_qemu_children(const char *arg) > +{ > + qemu_dup_for_children = true; > +} > + > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > #ifdef CONFIG_PLUGIN > @@ -526,6 +535,10 @@ static const struct qemu_argument arg_table[] = { > "", "Generate a /tmp/perf-${pid}.map file for perf"}, > {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > "", "Generate a jit-${pid}.dump file for perf"}, > + {"qemu-children", > + "QEMU_CHILDREN", false, handle_arg_qemu_children, > + "", "Run child processes (created with execve) with qemu " > + "(as instantiated for the parent)"}, > {NULL, NULL, false, NULL, NULL, NULL} > }; > > @@ -729,6 +742,14 @@ int main(int argc, char **argv, char **envp) > > optind = parse_args(argc, argv); > > + if (qemu_dup_for_children) { > + qemu_argc = optind; > + qemu_argv = g_new0(char *, qemu_argc); > + for (i = 0; i < optind; ++i) { > + qemu_argv[i] = strdup(argv[i]); > + } > + } > + > qemu_set_log_filename_flags(last_log_filename, > last_log_mask | (enable_strace * LOG_STRACE), > &error_fatal); > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > index 59b2080b98..96b105e9ce 100644 > --- a/linux-user/syscall.c > +++ b/linux-user/syscall.c > @@ -8550,13 +8550,14 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > abi_long pathname, abi_long guest_argp, > abi_long guest_envp, int flags, bool is_execveat) > { > - int ret; > + int ret, argp_offset; > char **argp, **envp; > int argc, envc; > abi_ulong gp; > abi_ulong addr; > char **q; > void *p; > + bool through_qemu = dirfd == AT_FDCWD && qemu_dup_for_children; > > argc = 0; > > @@ -8580,10 +8581,12 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > envc++; > } > > - argp = g_new0(char *, argc + 1); > + argp_offset = through_qemu ? qemu_argc : 0; > + argp = g_new0(char *, argc + argp_offset + 1); > envp = g_new0(char *, envc + 1); > > - for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) { > + for (gp = guest_argp, q = argp + argp_offset; > + gp; gp += sizeof(abi_ulong), q++) { > if (get_user_ual(addr, gp)) { > goto execve_efault; > } > @@ -8628,9 +8631,16 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, > } > > const char *exe = p; > - if (is_proc_myself(p, "exe")) { > + if (through_qemu) { > + int i; > + for (i = 0; i < argp_offset; ++i) { > + argp[i] = qemu_argv[i]; > + } > + exe = qemu_argv[0]; > + } else if (is_proc_myself(p, "exe")) { > exe = exec_path; > } > + > ret = is_execveat > ? safe_execveat(dirfd, exe, argp, envp, flags) > : safe_execve(exe, argp, envp); > @@ -8644,7 +8654,8 @@ execve_efault: > ret = -TARGET_EFAULT; > > execve_end: > - for (gp = guest_argp, q = argp; *q; gp += sizeof(abi_ulong), q++) { > + for (gp = guest_argp, q = argp + argp_offset; > + *q; gp += sizeof(abi_ulong), q++) { > if (get_user_ual(addr, gp) || !addr) { > break; > } > diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h > index 46ffc093f4..ed3ed666a0 100644 > --- a/linux-user/user-internals.h > +++ b/linux-user/user-internals.h > @@ -30,6 +30,10 @@ void stop_all_tasks(void); > extern const char *qemu_uname_release; > extern unsigned long mmap_min_addr; > > +extern bool qemu_dup_for_children; > +extern int qemu_argc; > +extern char **qemu_argv; > + > typedef struct IOCTLEntry IOCTLEntry; > > typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp, > diff --git a/tests/tcg/multiarch/Makefile.target > b/tests/tcg/multiarch/Makefile.target > index 78b83d5575..0e220953e7 100644 > --- a/tests/tcg/multiarch/Makefile.target > +++ b/tests/tcg/multiarch/Makefile.target > @@ -30,6 +30,14 @@ run-float_%: float_% > $(call > conditional-diff-out,$<,$(SRC_PATH)/tests/tcg/$(TARGET_NAME)/$<.ref) > > > +run-linux-execve-qemu-children: linux-execve-qemu-children > + $(call run-test,$<, $(QEMU) $(QEMU_OPTS) -qemu-children $< $(QEMU) 0) > + $(call run-test,$<, $(QEMU) $(QEMU_OPTS) $< linux-execve 0 skip) > + > +run-plugin-linux-execve-qemu-children-with-%: linux-execve-qemu-children > + $(call run-test,$<, $(QEMU) $(QEMU_OPTS) -qemu-children $< $(QEMU) 0) > + $(call run-test,$<, $(QEMU) $(QEMU_OPTS) $< linux-execve 0 skip) > + > testthread: LDFLAGS+=-lpthread > > threadcount: LDFLAGS+=-lpthread > diff --git a/tests/tcg/multiarch/linux/linux-execve-qemu-children.c > b/tests/tcg/multiarch/linux/linux-execve-qemu-children.c > new file mode 100644 > index 0000000000..60d6537666 > --- /dev/null > +++ b/tests/tcg/multiarch/linux/linux-execve-qemu-children.c > @@ -0,0 +1,68 @@ > +#include <assert.h> > +#include <errno.h> > +#include <fcntl.h> > +#include <libgen.h> > +#include <malloc.h> > +#include <stdio.h> > +#include <string.h> > +#include <unistd.h> > + > +#define MAX_COMM_SIZE (4096) > + > +int > +main(int argc, char ** argv, char ** envp) { > + int fd; > + char next_arg[2]; > + char * buf; > + ssize_t off; > + const char * expec_comm; > + assert(argc == 3 || argc == 4); > + fd = open("/proc/self/comm", O_RDONLY); > + assert(fd > 0); > + > + buf = calloc(MAX_COMM_SIZE + 1, 1); > + assert(buf != NULL); > + > + off = 0; > + for (;;) { > + ssize_t res = read(fd, buf + off, 1); > + if (res < 0 && errno != EAGAIN) { > + perror("Failed to read comm"); > + return -1; > + } > + if (res == 0) { > + break; > + } > + > + off += res; > + > + if (off >= MAX_COMM_SIZE) { > + fprintf(stderr, "/proc/self/comm too large for test\n"); > + return -1; > + } > + } > + assert(off && buf[off] == '\0' && buf[off - 1] == '\n'); > + buf[off - 1] = '\0'; > + expec_comm = basename(argv[1]); > + if (argc == 3 && strncmp(buf, expec_comm, strlen(expec_comm))) { > + fprintf(stderr, > + "Didn't propagate qemu settings\nComm: '%s'\nExpec: '%s'\n", > + buf, expec_comm); > + return -1; > + } > + free(buf); > + next_arg[0] = argv[2][0]; > + next_arg[1] = '\0'; > + if (next_arg[0] == '9') { > + return 0; > + } > + next_arg[0] += 1; > + char * next_args[] = { argv[0], argv[1], next_arg, NULL }; > + int eres = execve(argv[0], &next_args[0], envp); > + if (eres != 0) { > + fprintf(stderr, "Unable to execve: %d/%d -> %s\n", eres, errno, > + strerror(errno)); > + return -1; > + } > + return 0; > +} > -- > 2.43.0 >
Added test that tests both old behavior (no propagation of qemu) and new behavior (propagation of qemu + cmdline). Tested on Aarch64 + Linux with: ``` make check-tcg ```