Hi Michael, I tested v1 of this. I ran the test from the bug with a range of stack sizes, in a loop, for several hours and didn't see any crashes/signal delivery failures.
I retested v2 for a few minutes just to be sure, and I ran stress-ng's stack, stackmmap and bad-altstack stressors to make sure no obvious kernel bugs were exposed. Nothing crashed. All tests done on a P8 LE guest under KVM. On that basis: Tested-by: Daniel Axtens <d...@axtens.net> The more I look at this the less qualified I feel to Review it, but certainly it looks better than my ugly hack from late last year. Kind regards, Daniel > We have powerpc specific logic in our page fault handling to decide if > an access to an unmapped address below the stack pointer should expand > the stack VMA. > > The logic aims to prevent userspace from doing bad accesses below the > stack pointer. However as long as the stack is < 1MB in size, we allow > all accesses without further checks. Adding some debug I see that I > can do a full kernel build and LTP run, and not a single process has > used more than 1MB of stack. So for the majority of processes the > logic never even fires. > > We also recently found a nasty bug in this code which could cause > userspace programs to be killed during signal delivery. It went > unnoticed presumably because most processes use < 1MB of stack. > > The generic mm code has also grown support for stack guard pages since > this code was originally written, so the most heinous case of the > stack expanding into other mappings is now handled for us. > > Finally although some other arches have special logic in this path, > from what I can tell none of x86, arm64, arm and s390 impose any extra > checks other than those in expand_stack(). > > So drop our complicated logic and like other architectures just let > the stack expand as long as its within the rlimit. > > Signed-off-by: Michael Ellerman <m...@ellerman.id.au> > --- > arch/powerpc/mm/fault.c | 109 ++-------------------------------------- > 1 file changed, 5 insertions(+), 104 deletions(-) > > v2: no change just rebased. > > diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c > index 3ebb1792e636..925a7231abb3 100644 > --- a/arch/powerpc/mm/fault.c > +++ b/arch/powerpc/mm/fault.c > @@ -42,39 +42,7 @@ > #include <asm/kup.h> > #include <asm/inst.h> > > -/* > - * Check whether the instruction inst is a store using > - * an update addressing form which will update r1. > - */ > -static bool store_updates_sp(struct ppc_inst inst) > -{ > - /* check for 1 in the rA field */ > - if (((ppc_inst_val(inst) >> 16) & 0x1f) != 1) > - return false; > - /* check major opcode */ > - switch (ppc_inst_primary_opcode(inst)) { > - case OP_STWU: > - case OP_STBU: > - case OP_STHU: > - case OP_STFSU: > - case OP_STFDU: > - return true; > - case OP_STD: /* std or stdu */ > - return (ppc_inst_val(inst) & 3) == 1; > - case OP_31: > - /* check minor opcode */ > - switch ((ppc_inst_val(inst) >> 1) & 0x3ff) { > - case OP_31_XOP_STDUX: > - case OP_31_XOP_STWUX: > - case OP_31_XOP_STBUX: > - case OP_31_XOP_STHUX: > - case OP_31_XOP_STFSUX: > - case OP_31_XOP_STFDUX: > - return true; > - } > - } > - return false; > -} > + > /* > * do_page_fault error handling helpers > */ > @@ -267,57 +235,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, > unsigned long error_code, > return false; > } > > -// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE > -#define SIGFRAME_MAX_SIZE (4096 + 128) > - > -static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, > - struct vm_area_struct *vma, unsigned int flags, > - bool *must_retry) > -{ > - /* > - * N.B. The POWER/Open ABI allows programs to access up to > - * 288 bytes below the stack pointer. > - * The kernel signal delivery code writes a bit over 4KB > - * below the stack pointer (r1) before decrementing it. > - * The exec code can write slightly over 640kB to the stack > - * before setting the user r1. Thus we allow the stack to > - * expand to 1MB without further checks. > - */ > - if (address + 0x100000 < vma->vm_end) { > - struct ppc_inst __user *nip = (struct ppc_inst __user > *)regs->nip; > - /* get user regs even if this fault is in kernel mode */ > - struct pt_regs *uregs = current->thread.regs; > - if (uregs == NULL) > - return true; > - > - /* > - * A user-mode access to an address a long way below > - * the stack pointer is only valid if the instruction > - * is one which would update the stack pointer to the > - * address accessed if the instruction completed, > - * i.e. either stwu rs,n(r1) or stwux rs,r1,rb > - * (or the byte, halfword, float or double forms). > - * > - * If we don't check this then any write to the area > - * between the last mapped region and the stack will > - * expand the stack rather than segfaulting. > - */ > - if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1]) > - return false; > - > - if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && > - access_ok(nip, sizeof(*nip))) { > - struct ppc_inst inst; > - > - if (!probe_user_read_inst(&inst, nip)) > - return !store_updates_sp(inst); > - *must_retry = true; > - } > - return true; > - } > - return false; > -} > - > #ifdef CONFIG_PPC_MEM_KEYS > static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, > struct vm_area_struct *vma) > @@ -483,7 +400,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned > long address, > int is_user = user_mode(regs); > int is_write = page_fault_is_write(error_code); > vm_fault_t fault, major = 0; > - bool must_retry = false; > bool kprobe_fault = kprobe_page_fault(regs, 11); > > if (unlikely(debugger_fault_handler(regs) || kprobe_fault)) > @@ -572,30 +488,15 @@ static int __do_page_fault(struct pt_regs *regs, > unsigned long address, > vma = find_vma(mm, address); > if (unlikely(!vma)) > return bad_area(regs, address); > - if (likely(vma->vm_start <= address)) > - goto good_area; > - if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) > - return bad_area(regs, address); > > - /* The stack is being expanded, check if it's valid */ > - if (unlikely(bad_stack_expansion(regs, address, vma, flags, > - &must_retry))) { > - if (!must_retry) > + if (unlikely(vma->vm_start > address)) { > + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) > return bad_area(regs, address); > > - mmap_read_unlock(mm); > - if (fault_in_pages_readable((const char __user *)regs->nip, > - sizeof(unsigned int))) > - return bad_area_nosemaphore(regs, address); > - goto retry; > + if (unlikely(expand_stack(vma, address))) > + return bad_area(regs, address); > } > > - /* Try to expand it */ > - if (unlikely(expand_stack(vma, address))) > - return bad_area(regs, address); > - > -good_area: > - > #ifdef CONFIG_PPC_MEM_KEYS > if (unlikely(access_pkey_error(is_write, is_exec, > (error_code & DSISR_KEYFAULT), vma))) > -- > 2.25.1