[ Upstream commit d55c571e4333fac71826e8db3b9753fadfbead6a ]

This script

        #!/usr/bin/bash

        echo 0 > /proc/sys/kernel/randomize_va_space

        echo 'void main(void) {}' > TEST.c

        # -fcf-protection to ensure that the 1st endbr32 insn can't be emulated
        gcc -m32 -fcf-protection=branch TEST.c -o test

        bpftrace -e 'uprobe:./test:main {}' -c ./test

"hangs", the probed ./test task enters an endless loop.

The problem is that with randomize_va_space == 0
get_unmapped_area(TASK_SIZE - PAGE_SIZE) called by xol_add_vma() can not
just return the "addr == TASK_SIZE - PAGE_SIZE" hint, this addr is used
by the stack vma.

arch_get_unmapped_area_topdown() doesn't take TIF_ADDR32 into account and
in_32bit_syscall() is false, this leads to info.high_limit > TASK_SIZE.
vm_unmapped_area() happily returns the high address > TASK_SIZE and then
get_unmapped_area() returns -ENOMEM after the "if (addr > TASK_SIZE - len)"
check.

handle_swbp() doesn't report this failure (probably it should) and silently
restarts the probed insn. Endless loop.

I think that the right fix should change the x86 get_unmapped_area() paths
to rely on TIF_ADDR32 rather than in_32bit_syscall(). Note also that if
CONFIG_X86_X32_ABI=y, in_x32_syscall() falsely returns true in this case
because ->orig_ax = -1.

But we need a simple fix for -stable, so this patch just sets TS_COMPAT if
the probed task is 32-bit to make in_ia32_syscall() true.

Fixes: 1b028f784e8c ("x86/mm: Introduce mmap_compat_base() for 32-bit mmap()")
Reported-by: Paulo Andrade <[email protected]>
Signed-off-by: Oleg Nesterov <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lore.kernel.org/all/[email protected]/
Cc: [email protected]
Link: https://patch.msgid.link/[email protected]
---
 arch/x86/kernel/uprobes.c | 24 ++++++++++++++++++++++++
 include/linux/uprobes.h   |  1 +
 kernel/events/uprobes.c   | 10 +++++++---
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 6c07f6daaa22..6b431589305b 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -1097,3 +1097,27 @@ bool arch_uretprobe_is_alive(struct return_instance 
*ret, enum rp_check ctx,
        else
                return regs->sp <= ret->stack;
 }
+
+#ifdef CONFIG_IA32_EMULATION
+unsigned long arch_uprobe_get_xol_area(void)
+{
+       struct thread_info *ti = current_thread_info();
+       unsigned long vaddr;
+
+       /*
+        * HACK: we are not in a syscall, but x86 get_unmapped_area() paths
+        * ignore TIF_ADDR32 and rely on in_32bit_syscall() to calculate
+        * vm_unmapped_area_info.high_limit.
+        *
+        * The #ifdef above doesn't cover the CONFIG_X86_X32_ABI=y case,
+        * but in this case in_32bit_syscall() -> in_x32_syscall() always
+        * (falsely) returns true because ->orig_ax == -1.
+        */
+       if (test_thread_flag(TIF_ADDR32))
+               ti->status |= TS_COMPAT;
+       vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
+       ti->status &= ~TS_COMPAT;
+
+       return vaddr;
+}
+#endif
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index f46e0ca0169c..3461199c4ec0 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -138,6 +138,7 @@ extern bool arch_uretprobe_is_alive(struct return_instance 
*ret, enum rp_check c
 extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
 extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
                                         void *src, unsigned long len);
+extern unsigned long arch_uprobe_get_xol_area(void);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 4e6ada6a11c7..3bd85f043881 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1437,6 +1437,12 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned 
long start, unsigned lon
                set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
 }
 
+unsigned long __weak arch_uprobe_get_xol_area(void)
+{
+       /* Try to map as high as possible, this is only a hint. */
+       return get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
+}
+
 /* Slot allocation for XOL */
 static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 {
@@ -1452,9 +1458,7 @@ static int xol_add_vma(struct mm_struct *mm, struct 
xol_area *area)
        }
 
        if (!area->vaddr) {
-               /* Try to map as high as possible, this is only a hint. */
-               area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE,
-                                               PAGE_SIZE, 0, 0);
+               area->vaddr = arch_uprobe_get_xol_area();
                if (IS_ERR_VALUE(area->vaddr)) {
                        ret = area->vaddr;
                        goto fail;
-- 
2.52.0



Reply via email to