Hi,

On Tue, 2025-01-14 at 20:30 +0900, Hajime Tazaki wrote:
> This patchset is another spin of nommu mode addition to UML.  It doesn't
> change a lot since the last version (v5), but contain clean ups.  It would
> be nice to hear about your opinions on that.
> 
> There are still several limitations/issues which we already found;
> here is the list of those issues.
> 
> - memory mapped by loadable modules are not distinguished from
>   userspace memory.

Maybe I am missing it, but I do not yet see proper FP register
handling. This will be needed for task/thread switches and also signal
emission/sigreturn. I am attaching the test program that I used to
verify the correct behaviour when dealing with the recent changes to FP
register handling in UML.

Benjamin

> 
> -- Hajime
> 
> v6:
> - rebase to the latest uml/next tree
> - more clean up on mmu/nommu for signal handling [10/13]
> - rename functions of mcontext routines [06,10/13]
> - added Acked-by tag for binfmt_elf_fdpic [02/13]
> 
> v5:
> - clean up stack manipulation code [05,06,07,10/13]
> -
> https://lore.kernel.org/linux-um/cover.1733998168.git.thehaj...@gmail.com/
> 
> v4:
> - add arch/um/nommu, arch/x86/um/nommu to contain !MMU specific codes
> - remove zpoline patch
> - drop binfmt_elf_fdpic patch
> - reduce ifndef CONFIG_MMU if possible
> - split to elf header cleanup patch [01/13]
> - fix kernel test robot warnings [06/13]
> - fix coding styles [07/13]
> - move task_top_of_stack definition [05/13]
> -
> https://lore.kernel.org/linux-um/cover.1733652929.git.thehaj...@gmail.com/
> 
> v3:
> -
> https://lore.kernel.org/linux-um/cover.1733199769.git.thehaj...@gmail.com/
> - add seccomp-based syscall hook in addition to zpoline [06/13]
> - remove RFC, add a line to MAINTAINERS file
> - fix kernel test robot warnings [02/13,08/13,10/13]
> - add base-commit tag to cover letter
> - pull the latest uml/next
> - clean up SIGSEGV handling [10/13]
> - detect fsgsbase availability with elf aux vector [08/13]
> - simplify vdso code with macros [09/13]
> 
> RFC v2:
> -
> https://lore.kernel.org/linux-um/cover.1731290567.git.thehaj...@gmail.com/
> - base branch is now uml/linux.git instead of torvalds/linux.git.
> - reorganize the patch series to clean up
> - fixed various coding styles issues
> - clean up exec code path [07/13]
> - fixed the crash/SIGSEGV case on userspace programs [10/13]
> - add seccomp filter to limit syscall caller address [06/13]
> - detect fsgsbase availability with sigsetjmp/siglongjmp [08/13]
> - removes unrelated changes
> - removes unneeded ifndef CONFIG_MMU
> - convert UML_CONFIG_MMU to CONFIG_MMU as using uml/linux.git
> - proposed a patch of maple-tree issue (resolving a limitation in RFC
> v1)
>  
> https://lore.kernel.org/linux-mm/20241108222834.3625217-1-thehaj...@gmail.com/
> 
> RFC:
> -
> https://lore.kernel.org/linux-um/cover.1729770373.git.thehaj...@gmail.com/
> 
> Hajime Tazaki (13):
>   x86/um: clean up elf specific definitions
>   x86/um: nommu: elf loader for fdpic
>   um: decouple MMU specific code from the common part
>   um: nommu: memory handling
>   x86/um: nommu: syscall handling
>   um: nommu: seccomp syscalls hook
>   x86/um: nommu: process/thread handling
>   um: nommu: configure fs register on host syscall invocation
>   x86/um/vdso: nommu: vdso memory update
>   x86/um: nommu: signal handling
>   um: change machine name for uname output
>   um: nommu: add documentation of nommu UML
>   um: nommu: plug nommu code into build system
> 
>  Documentation/virt/uml/nommu-uml.rst    | 177 ++++++++++++++++++++++
>  MAINTAINERS                             |   1 +
>  arch/um/Kconfig                         |  14 +-
>  arch/um/Makefile                        |  10 ++
>  arch/um/configs/x86_64_nommu_defconfig  |  64 ++++++++
>  arch/um/include/asm/Kbuild              |   1 +
>  arch/um/include/asm/futex.h             |   4 +
>  arch/um/include/asm/mmu.h               |   8 +
>  arch/um/include/asm/mmu_context.h       |   2 +
>  arch/um/include/asm/ptrace-generic.h    |   6 +
>  arch/um/include/asm/uaccess.h           |   7 +-
>  arch/um/include/shared/kern_util.h      |  12 ++
>  arch/um/include/shared/os.h             |  16 ++
>  arch/um/kernel/Makefile                 |   5 +-
>  arch/um/kernel/mem-pgtable.c            |  55 +++++++
>  arch/um/kernel/mem.c                    |  39 +----
>  arch/um/kernel/process.c                |  25 ++++
>  arch/um/kernel/skas/process.c           |  27 ----
>  arch/um/kernel/um_arch.c                |   3 +
>  arch/um/nommu/Makefile                  |   3 +
>  arch/um/nommu/os-Linux/Makefile         |   7 +
>  arch/um/nommu/os-Linux/signal.c         |  28 ++++
>  arch/um/nommu/trap.c                    | 188
> ++++++++++++++++++++++++
>  arch/um/os-Linux/Makefile               |   8 +-
>  arch/um/os-Linux/internal.h             |   5 +
>  arch/um/os-Linux/mem.c                  |   4 +
>  arch/um/os-Linux/process.c              | 149 ++++++++++++++++++-
>  arch/um/os-Linux/seccomp.c              |  87 +++++++++++
>  arch/um/os-Linux/signal.c               |  31 +++-
>  arch/um/os-Linux/skas/process.c         | 132 -----------------
>  arch/um/os-Linux/start_up.c             |  20 +++
>  arch/um/os-Linux/util.c                 |   3 +-
>  arch/x86/um/Makefile                    |   7 +-
>  arch/x86/um/asm/elf.h                   |   8 +-
>  arch/x86/um/asm/module.h                |  24 ---
>  arch/x86/um/nommu/Makefile              |   8 +
>  arch/x86/um/nommu/do_syscall_64.c       |  74 ++++++++++
>  arch/x86/um/nommu/entry_64.S            | 113 ++++++++++++++
>  arch/x86/um/nommu/os-Linux/Makefile     |   6 +
>  arch/x86/um/nommu/os-Linux/mcontext.c   |  24 +++
>  arch/x86/um/nommu/syscalls.h            |  16 ++
>  arch/x86/um/nommu/syscalls_64.c         | 115 +++++++++++++++
>  arch/x86/um/shared/sysdep/mcontext.h    |   4 +
>  arch/x86/um/shared/sysdep/syscalls_64.h |   6 +
>  arch/x86/um/signal.c                    |   7 +
>  arch/x86/um/vdso/vma.c                  |  17 ++-
>  fs/Kconfig.binfmt                       |   2 +-
>  47 files changed, 1328 insertions(+), 244 deletions(-)
>  create mode 100644 Documentation/virt/uml/nommu-uml.rst
>  create mode 100644 arch/um/configs/x86_64_nommu_defconfig
>  create mode 100644 arch/um/kernel/mem-pgtable.c
>  create mode 100644 arch/um/nommu/Makefile
>  create mode 100644 arch/um/nommu/os-Linux/Makefile
>  create mode 100644 arch/um/nommu/os-Linux/signal.c
>  create mode 100644 arch/um/nommu/trap.c
>  create mode 100644 arch/um/os-Linux/seccomp.c
>  delete mode 100644 arch/x86/um/asm/module.h
>  create mode 100644 arch/x86/um/nommu/Makefile
>  create mode 100644 arch/x86/um/nommu/do_syscall_64.c
>  create mode 100644 arch/x86/um/nommu/entry_64.S
>  create mode 100644 arch/x86/um/nommu/os-Linux/Makefile
>  create mode 100644 arch/x86/um/nommu/os-Linux/mcontext.c
>  create mode 100644 arch/x86/um/nommu/syscalls.h
>  create mode 100644 arch/x86/um/nommu/syscalls_64.c
> 
> 
> base-commit: 2d2b61ae38bd91217ea7cc5bc700a2b9e75b3937

/*
 * gcc test-signal-restore.c -o test-signal-restore-amd64
 * gcc -m32 -march=i686 -lm test-signal-restore.c -o test-signal-restore-i386
 */

/* Is there a better way to *not* include bits/sigcontext.h? */
#include <features.h>
#undef __USE_MISC
#include <asm/sigcontext.h>

#include <elf.h>
#include <math.h>
#include <stdio.h>
#include <signal.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <errno.h>
#include <unistd.h>
#include <sys/ptrace.h>
#include <sys/user.h>
#include <sys/uio.h>
#include <asm/unistd.h>

#define ST0_EXP_ADD 10

void *scratch_page;

void sighandler(int sig, siginfo_t *info, void *p)
{
	ucontext_t *uc = p;

	printf("sighandler: extended_size: %d, xstate_size: %d\n",
	       ((struct _fpstate *)uc->uc_mcontext.__fpregs)->sw_reserved.extended_size,
	        ((struct _fpstate *)uc->uc_mcontext.__fpregs)->sw_reserved.xstate_size);

	uc->uc_mcontext.__fpregs->_st[0].__exponent += ST0_EXP_ADD;
}

int test_fp()
{
	double num = 0.5;
	long ret;

	printf("pre-signal: %g\n", num);
	/*
	 * This does kill(getpid(), SIGUSR1); with "num" being passed in AND
	 * out of the floating point stack. We can therefore modify num by
	 * changing st[0] when handling the signal.
	 */
#ifdef __i386__
	asm volatile (
		"int $0x80;"
		: "=t" (num), "=a" (ret)
		: "0" (num), "1" (__NR_kill), "b" (getpid()), "c" (SIGUSR1) : );
#else
	asm volatile (
		"syscall;"
		: "=t" (num), "=a" (ret)
		: "0" (num), "1" (__NR_kill), "D" (getpid()), "S" (SIGUSR1) : "r11", "rcx");
#endif
	printf("post-signal: %g\n", num);

	if (num != pow(2, ST0_EXP_ADD - 1)) {
		printf("floating point register was not manipulated\n");
		return 1;
	}

	return 0;
}

enum source {
	S_FPREGS = 0,
	S_FPXREGS = 1,
	S_GETREGS_FPREGS = 2,
	S_GETREGS_XFPREGS = 3,
	S_GETREGS_XSTATE = 4,
};

int test_fp_ptrace(enum source source)
{
	int pid, status, ret;

	pid = fork();
	if (pid < 0)
		return 127;

	if (pid == 0) {
		/* child */
		ptrace(PTRACE_TRACEME, 0, 0, 0);
		kill(getpid(), SIGSTOP);
		
		if (test_fp())
			exit(1);

		exit(0);
	}

	/* Wait for child to stop itself */
	do {
		ret = waitpid(pid, &status, 0);
	} while (ret < 0 && errno == EINTR);
	if (!WIFSTOPPED(status))
		return 127;

	/* Continue until SIGUSR1 to self */
	ptrace(PTRACE_CONT, pid, NULL, 0);
	do {
		ret = waitpid(pid, &status, 0);
	} while (ret < 0 && errno == EINTR);
	if (!WIFSTOPPED(status))
		return 127;

	if (source == S_FPXREGS || source == S_GETREGS_XFPREGS) {
#ifdef __i386__
		struct user_fpxregs_struct *fpstate;
		struct iovec iov = {
			.iov_len = sizeof(*fpstate),
		};
		int ret;

		fpstate = scratch_page + 4096 - iov.iov_len;
		iov.iov_base = fpstate;

		if (source == S_GETREGS_XFPREGS)
			ret = ptrace(PTRACE_GETREGSET, pid, NT_PRXFPREG, &iov);
		else
			ret = ptrace(PTRACE_GETFPXREGS, pid, NULL, fpstate);

		if (ret) {
			kill(pid, SIGKILL);
			if (errno == EINVAL) {
				printf("Getting FPX regs not supported\n");
				return 0;
			} else {
				printf("Error getting FPX regs: %d\n", errno);
				return 127;
			}
		}
		((struct _fpxreg*)&fpstate->st_space[0])->exponent += ST0_EXP_ADD;

		if (source == S_GETREGS_XFPREGS)
			ret = ptrace(PTRACE_SETREGSET, pid, NT_PRXFPREG, &iov);
		else
			ret = ptrace(PTRACE_SETFPXREGS, pid, NULL, fpstate);
		if (ret)
			return -127;

#else
		printf("No FPXREGS on x86_64\n");
		kill(pid, SIGKILL);
		return 127;
#endif
	} else if (source == S_FPREGS || source == S_GETREGS_FPREGS) {
		struct _fpstate *fpstate;
		struct iovec iov = {
			.iov_len = sizeof(*fpstate),
		};

		fpstate = scratch_page; // + 4096 - sizeof(*fpstate);
		iov.iov_base = fpstate;

		if (source == S_GETREGS_FPREGS)
			ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
		else
			ret = ptrace(PTRACE_GETFPREGS, pid, NULL, fpstate);


		if (ret) {
			kill(pid, SIGKILL);
			if (errno == EINVAL) {
				printf("Getting FP regs not supported\n");
				return 0;
			} else {
				printf("Error getting FPX regs: %d\n", errno);
				return 127;
			}
		}
#ifdef __i386__
		((struct _fpreg*) &fpstate->_st[0])->exponent += ST0_EXP_ADD;
#else
		((struct _fpxreg*) &fpstate->st_space[0])->exponent += ST0_EXP_ADD;
#endif

		if (source == S_GETREGS_FPREGS)
			ret = ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov);
		else
			ret = ptrace(PTRACE_SETFPREGS, pid, NULL, fpstate);

		if (ret)
			return 127;
	} else if (source == S_GETREGS_XSTATE) {
#ifdef __i386__
		struct user_fpxregs_struct *fpstate;
#else
		struct user_fpregs_struct *fpstate;
#endif
		struct iovec iov = {
			.iov_len = 4096,
		};

		fpstate = scratch_page + 4096 - iov.iov_len;
		iov.iov_base = fpstate;

		ret = ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov);
		if (ret) {
			kill(pid, SIGKILL);
			if (errno == EINVAL) {
				printf("Getting XSTATE not supported\n");
				return 0;
			} else {
				printf("Error getting XSTATE size: %d\n", errno);
				return 127;
			}
		}

		printf("host xstate size: %ld\n", iov.iov_len);

		/* Second time with the exact length (to test the kernel) */
		fpstate = scratch_page + 4096 - iov.iov_len;
		iov.iov_base = fpstate;

		ret = ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov);
		if (ret) {
			printf("Error getting XSTATE: %d\n", errno);
			return 127;
		}

		fpstate = scratch_page + 4096 - iov.iov_len;
		iov.iov_base = fpstate;

		ret = ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov);
		if (ret) {
			kill(pid, SIGKILL);
			printf("Error getting XSTATE (with correct size): %d\n", errno);
			return 127;
		}

#ifdef __i386__
		((struct _fpxreg *)&fpstate->st_space[0])->exponent += ST0_EXP_ADD;
#else
		((struct _fpxreg *)&fpstate->st_space[0])->exponent += ST0_EXP_ADD;
#endif

		ret = ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov);
		if (ret) {
			printf("Failed to set XSTATE: %d\n", errno);
			return 127;
		}

	} else {
		return 127;
	}

	/* Run until completion (without handling the signal) */
	ptrace(PTRACE_CONT, pid, NULL, 0);
	do {
		ret = waitpid(pid, &status, 0);
	} while (ret < 0 && errno == EINTR);

	if (!WIFEXITED(status))
		return 127;

	return WEXITSTATUS(status);
}

int main()
{
	struct sigaction sa = {
		.sa_flags = SA_SIGINFO,
		.sa_handler = (void (*)(int))sighandler,
	};
	int ret;

	scratch_page = mmap(NULL, 8192, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
	munmap(scratch_page + 4096, 4096);

	sigaction(SIGUSR1, &sa, NULL);

	if (test_fp())
		return 1;

	sa.sa_handler = SIG_DFL;
	sigaction(SIGUSR1, &sa, NULL);

	printf("\nmodify using ptrace PTRACE_SETFPREGS instead of sighandler:\n");
	ret = test_fp_ptrace(S_FPREGS);
	if (ret)
		return ret;

#ifdef __i386__
	printf("\nmodify using ptrace PTRACE_SETFPXREGS instead of sighandler:\n");
	ret = test_fp_ptrace(S_FPXREGS);
	if (ret)
		return ret;
#endif


	printf("\nmodify using ptrace PTRACE_SETREGSET, via NT_PRFPREG instead of sighandler:\n");
	ret = test_fp_ptrace(S_GETREGS_FPREGS);
	if (ret)
		return ret;

#ifdef __i386__
	printf("\nmodify using ptrace PTRACE_SETREGSET, via NT_XFPREGS instead of sighandler:\n");
	ret = test_fp_ptrace(S_GETREGS_XFPREGS);
	if (ret)
		return ret;
#endif

	printf("\nmodify using ptrace PTRACE_SETREGSET, via NT_X86_XSTATE instead of sighandler:\n");
	ret = test_fp_ptrace(S_GETREGS_XSTATE);
	if (ret)
		return ret;

	return 0;
}

Reply via email to