This patch, by Jake Moilanen with some further hacking from me, adds a
real execute permission bit to the linux PTEs on PPC64, and connects
that into the kernel infrastructure for implementing non-executable
stacks and heaps.  This means that on any PPC64 cpu since the POWER4
(i.e. POWER4, PPC970, PPC970FX, POWER4+, POWER5) you will get a
segfault if you try to execute instructions from a region that doesn't
have PROT_EXEC permission.  The patch also marks the pages of the
linear mapping that aren't part of the kernel text as non-executable.

Andrew and Linus, could you try this on your G5s?  I have tried it
here on a Debian system and a SLES9 system and everything runs fine,
but I haven't been able to try it on YDL, FC or RHEL4.

With this patch we default to executable stack and read-implies-exec
behaviour when there is no PT_GNU_STACK program header entry, or when
there is one and it indicates the stack is executable.  For 32-bit
processes, the heap is always executable, because the PLT contains
instructions and it ends up in the bss segment.

Signed-off-by: Jake Moilanen <[EMAIL PROTECTED]>
Signed-off-by: Paul Mackerras <[EMAIL PROTECTED]>

diff -urN linux-2.5/arch/ppc64/kernel/head.S g5-ppc64/arch/ppc64/kernel/head.S
--- linux-2.5/arch/ppc64/kernel/head.S  2005-03-07 08:21:53.000000000 +1100
+++ g5-ppc64/arch/ppc64/kernel/head.S   2005-03-15 17:14:44.000000000 +1100
@@ -950,11 +950,12 @@
         * accessing a userspace segment (even from the kernel). We assume
         * kernel addresses always have the high bit set.
         */
-       rlwinm  r4,r4,32-23,29,29       /* DSISR_STORE -> _PAGE_RW */
+       rlwinm  r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
        rotldi  r0,r3,15                /* Move high bit into MSR_PR posn */
        orc     r0,r12,r0               /* MSR_PR | ~high_bit */
        rlwimi  r4,r0,32-13,30,30       /* becomes _PAGE_USER access bit */
        ori     r4,r4,1                 /* add _PAGE_PRESENT */
+       rlwimi  r4,r5,22+2,31-2,31-2    /* Set _PAGE_EXEC if trap is 0x400 */
 
        /*
         * On iSeries, we soft-disable interrupts here, then
diff -urN linux-2.5/arch/ppc64/kernel/iSeries_htab.c 
g5-ppc64/arch/ppc64/kernel/iSeries_htab.c
--- linux-2.5/arch/ppc64/kernel/iSeries_htab.c  2004-09-24 15:23:06.000000000 
+1000
+++ g5-ppc64/arch/ppc64/kernel/iSeries_htab.c   2005-03-15 17:15:36.000000000 
+1100
@@ -144,6 +144,10 @@
 
        HvCallHpt_get(&hpte, slot);
        if ((hpte.dw0.dw0.avpn == avpn) && (hpte.dw0.dw0.v)) {
+               /*
+                * Hypervisor expects bits as NPPP, which is
+                * different from how they are mapped in our PP.
+                */
                HvCallHpt_setPp(slot, (newpp & 0x3) | ((newpp & 0x4) << 1));
                iSeries_hunlock(slot);
                return 0;
diff -urN linux-2.5/arch/ppc64/kernel/iSeries_setup.c 
g5-ppc64/arch/ppc64/kernel/iSeries_setup.c
--- linux-2.5/arch/ppc64/kernel/iSeries_setup.c 2005-03-07 08:21:53.000000000 
+1100
+++ g5-ppc64/arch/ppc64/kernel/iSeries_setup.c  2005-03-15 16:55:05.000000000 
+1100
@@ -633,6 +633,10 @@
                unsigned long vpn = va >> PAGE_SHIFT;
                unsigned long slot = HvCallHpt_findValid(&hpte, vpn);
 
+               /* Make non-kernel text non-executable */
+               if (!in_kernel_text(ea))
+                       mode_rw |= HW_NO_EXEC;
+
                if (hpte.dw0.dw0.v) {
                        /* HPTE exists, so just bolt it */
                        HvCallHpt_setSwBits(slot, 0x10, 0);
diff -urN linux-2.5/arch/ppc64/kernel/module.c 
g5-ppc64/arch/ppc64/kernel/module.c
--- linux-2.5/arch/ppc64/kernel/module.c        2004-05-11 07:53:04.000000000 
+1000
+++ g5-ppc64/arch/ppc64/kernel/module.c 2005-03-15 16:55:05.000000000 +1100
@@ -102,7 +102,8 @@
 {
        if (size == 0)
                return NULL;
-       return vmalloc(size);
+
+       return vmalloc_exec(size);
 }
 
 /* Free memory returned from module_alloc */
diff -urN linux-2.5/arch/ppc64/kernel/pSeries_lpar.c 
g5-ppc64/arch/ppc64/kernel/pSeries_lpar.c
--- linux-2.5/arch/ppc64/kernel/pSeries_lpar.c  2005-03-07 08:21:53.000000000 
+1100
+++ g5-ppc64/arch/ppc64/kernel/pSeries_lpar.c   2005-03-15 16:55:02.000000000 
+1100
@@ -470,7 +470,7 @@
        slot = pSeries_lpar_hpte_find(vpn);
        BUG_ON(slot == -1);
 
-       flags = newpp & 3;
+       flags = newpp & 7;
        lpar_rc = plpar_pte_protect(flags, slot, 0);
 
        BUG_ON(lpar_rc != H_Success);
diff -urN linux-2.5/arch/ppc64/mm/fault.c g5-ppc64/arch/ppc64/mm/fault.c
--- linux-2.5/arch/ppc64/mm/fault.c     2005-01-04 17:25:05.000000000 +1100
+++ g5-ppc64/arch/ppc64/mm/fault.c      2005-03-15 17:13:05.000000000 +1100
@@ -91,8 +91,9 @@
        struct mm_struct *mm = current->mm;
        siginfo_t info;
        unsigned long code = SEGV_MAPERR;
-       unsigned long is_write = error_code & 0x02000000;
+       unsigned long is_write = error_code & DSISR_ISSTORE;
        unsigned long trap = TRAP(regs);
+       unsigned long is_exec = trap == 0x400;
 
        BUG_ON((trap == 0x380) || (trap == 0x480));
 
@@ -109,7 +110,7 @@
        if (!user_mode(regs) && (address >= TASK_SIZE))
                return SIGSEGV;
 
-       if (error_code & 0x00400000) {
+       if (error_code & DSISR_DABRMATCH) {
                if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
                                        11, SIGSEGV) == NOTIFY_STOP)
                        return 0;
@@ -199,16 +200,19 @@
 good_area:
        code = SEGV_ACCERR;
 
+       if (is_exec) {
+               /* protection fault */
+               if (error_code & DSISR_PROTFAULT)
+                       goto bad_area;
+               if (!(vma->vm_flags & VM_EXEC))
+                       goto bad_area;
        /* a write */
-       if (is_write) {
+       } else if (is_write) {
                if (!(vma->vm_flags & VM_WRITE))
                        goto bad_area;
        /* a read */
        } else {
-               /* protection fault */
-               if (error_code & 0x08000000)
-                       goto bad_area;
-               if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+               if (!(vma->vm_flags & VM_READ))
                        goto bad_area;
        }
 
@@ -251,6 +255,12 @@
                return 0;
        }
 
+       if (trap == 0x400 && (error_code & DSISR_PROTFAULT)
+           && printk_ratelimit())
+               printk(KERN_CRIT "kernel tried to execute NX-protected"
+                      " page (%lx) - exploit attempt? (uid: %d)\n",
+                      address, current->uid);
+
        return SIGSEGV;
 
 /*
diff -urN linux-2.5/arch/ppc64/mm/hash_low.S g5-ppc64/arch/ppc64/mm/hash_low.S
--- linux-2.5/arch/ppc64/mm/hash_low.S  2005-01-06 13:13:08.000000000 +1100
+++ g5-ppc64/arch/ppc64/mm/hash_low.S   2005-03-15 16:55:02.000000000 +1100
@@ -89,7 +89,7 @@
        /* Prepare new PTE value (turn access RW into DIRTY, then
         * add BUSY,HASHPTE and ACCESSED)
         */
-       rlwinm  r30,r4,5,24,24  /* _PAGE_RW -> _PAGE_DIRTY */
+       rlwinm  r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
        or      r30,r30,r31
        ori     r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
        /* Write the linux PTE atomically (setting busy) */
@@ -112,11 +112,11 @@
        rldicl  r5,r5,0,25              /* vsid & 0x0000007fffffffff */
        rldicl  r0,r3,64-12,48          /* (ea >> 12) & 0xffff */
        xor     r28,r5,r0
-       
-       /* Convert linux PTE bits into HW equivalents
-        */
-       andi.   r3,r30,0x1fa            /* Get basic set of flags */
-       rlwinm  r0,r30,32-2+1,30,30     /* _PAGE_RW -> _PAGE_USER (r0) */
+
+       /* Convert linux PTE bits into HW equivalents */
+       andi.   r3,r30,0x1fe            /* Get basic set of flags */
+       xori    r3,r3,HW_NO_EXEC        /* _PAGE_EXEC -> NOEXEC */
+       rlwinm  r0,r30,32-9+1,30,30     /* _PAGE_RW -> _PAGE_USER (r0) */
        rlwinm  r4,r30,32-7+1,30,30     /* _PAGE_DIRTY -> _PAGE_USER (r4) */
        and     r0,r0,r4                /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 
*/
        andc    r0,r30,r0               /* r0 = pte & ~r0 */
diff -urN linux-2.5/arch/ppc64/mm/hash_utils.c 
g5-ppc64/arch/ppc64/mm/hash_utils.c
--- linux-2.5/arch/ppc64/mm/hash_utils.c        2005-03-07 08:21:53.000000000 
+1100
+++ g5-ppc64/arch/ppc64/mm/hash_utils.c 2005-03-15 17:20:35.000000000 +1100
@@ -51,6 +51,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cputable.h>
 #include <asm/abs_addr.h>
+#include <asm/sections.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -95,6 +96,7 @@
 {
        unsigned long addr;
        unsigned int step;
+       unsigned long tmp_mode;
 
        if (large)
                step = 16*MB;
@@ -112,6 +114,13 @@
                else
                        vpn = va >> PAGE_SHIFT;
 
+
+               tmp_mode = mode;
+               
+               /* Make non-kernel text non-executable */
+               if (!in_kernel_text(addr))
+                       tmp_mode = mode | HW_NO_EXEC;
+
                hash = hpt_hash(vpn, large);
 
                hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -120,12 +129,12 @@
                if (systemcfg->platform & PLATFORM_LPAR)
                        ret = pSeries_lpar_hpte_insert(hpteg, va,
                                virt_to_abs(addr) >> PAGE_SHIFT,
-                               0, mode, 1, large);
+                               0, tmp_mode, 1, large);
                else
 #endif /* CONFIG_PPC_PSERIES */
                        ret = native_hpte_insert(hpteg, va,
                                virt_to_abs(addr) >> PAGE_SHIFT,
-                               0, mode, 1, large);
+                               0, tmp_mode, 1, large);
 
                if (ret == -1) {
                        ppc64_terminate_msg(0x20, "create_pte_mapping");
@@ -238,8 +247,6 @@
 {
        struct page *page;
 
-#define PPC64_HWNOEXEC (1 << 2)
-
        if (!pfn_valid(pte_pfn(pte)))
                return pp;
 
@@ -251,7 +258,7 @@
                        __flush_dcache_icache(page_address(page));
                        set_bit(PG_arch_1, &page->flags);
                } else
-                       pp |= PPC64_HWNOEXEC;
+                       pp |= HW_NO_EXEC;
        }
        return pp;
 }
diff -urN linux-2.5/arch/ppc64/mm/hugetlbpage.c 
g5-ppc64/arch/ppc64/mm/hugetlbpage.c
--- linux-2.5/arch/ppc64/mm/hugetlbpage.c       2005-03-07 17:58:40.000000000 
+1100
+++ g5-ppc64/arch/ppc64/mm/hugetlbpage.c        2005-03-15 17:27:33.000000000 
+1100
@@ -782,7 +782,6 @@
 {
        pte_t *ptep;
        unsigned long va, vpn;
-       int is_write;
        pte_t old_pte, new_pte;
        unsigned long hpteflags, prpn;
        long slot;
@@ -809,8 +808,7 @@
         * Check the user's access rights to the page.  If access should be
         * prevented then send the problem up to do_page_fault.
         */
-       is_write = access & _PAGE_RW;
-       if (unlikely(is_write && !(pte_val(*ptep) & _PAGE_RW)))
+       if (unlikely(access & ~pte_val(*ptep)))
                goto out;
        /*
         * At this point, we have a pte (old_pte) which can be used to build
@@ -829,6 +827,8 @@
        new_pte = old_pte;
 
        hpteflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+       /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
+       hpteflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
 
        /* Check if pte already has an hpte (case 2) */
        if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
diff -urN linux-2.5/include/asm-ppc64/elf.h g5-ppc64/include/asm-ppc64/elf.h
--- linux-2.5/include/asm-ppc64/elf.h   2005-03-07 08:21:53.000000000 +1100
+++ g5-ppc64/include/asm-ppc64/elf.h    2005-03-17 19:34:20.000000000 +1100
@@ -226,6 +226,13 @@
        else if (current->personality != PER_LINUX32)           \
                set_personality(PER_LINUX);                     \
 } while (0)
+
+/*
+ * An executable for which elf_read_implies_exec() returns TRUE will
+ * have the READ_IMPLIES_EXEC personality flag set automatically.
+ */
+#define elf_read_implies_exec(ex, exec_stk)    (exec_stk != EXSTACK_DISABLE_X)
+
 #endif
 
 /*
diff -urN linux-2.5/include/asm-ppc64/page.h g5-ppc64/include/asm-ppc64/page.h
--- linux-2.5/include/asm-ppc64/page.h  2005-03-07 08:21:54.000000000 +1100
+++ g5-ppc64/include/asm-ppc64/page.h   2005-03-17 19:31:09.000000000 +1100
@@ -235,7 +235,26 @@
 
 #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 
-#define VM_DATA_DEFAULT_FLAGS  (VM_READ | VM_WRITE | VM_EXEC | \
+/*
+ * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
+ * and needs to be executable.  This means the whole heap ends
+ * up being executable.
+ */
+#define VM_DATA_DEFAULT_FLAGS32        (VM_READ | VM_WRITE | VM_EXEC | \
+                                VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS64        (VM_READ | VM_WRITE | \
+                                VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS \
+       (test_thread_flag(TIF_32BIT) ? \
+        VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
+
+/*
+ * This is the default if a program doesn't have a PT_GNU_STACK
+ * program header entry.
+ */
+#define VM_STACK_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
                                 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #endif /* __KERNEL__ */
diff -urN linux-2.5/include/asm-ppc64/pgtable.h 
g5-ppc64/include/asm-ppc64/pgtable.h
--- linux-2.5/include/asm-ppc64/pgtable.h       2005-03-07 17:58:40.000000000 
+1100
+++ g5-ppc64/include/asm-ppc64/pgtable.h        2005-03-17 09:04:49.000000000 
+1100
@@ -82,14 +82,14 @@
 #define _PAGE_PRESENT  0x0001 /* software: pte contains a translation */
 #define _PAGE_USER     0x0002 /* matches one of the PP bits */
 #define _PAGE_FILE     0x0002 /* (!present only) software: pte holds file 
offset */
-#define _PAGE_RW       0x0004 /* software: user write access allowed */
+#define _PAGE_EXEC     0x0004 /* No execute on POWER4 and newer (we invert) */
 #define _PAGE_GUARDED  0x0008
 #define _PAGE_COHERENT 0x0010 /* M: enforce memory coherence (SMP systems) */
 #define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */
 #define _PAGE_WRITETHRU        0x0040 /* W: cache write-through */
 #define _PAGE_DIRTY    0x0080 /* C: page changed */
 #define _PAGE_ACCESSED 0x0100 /* R: page referenced */
-#define _PAGE_EXEC     0x0200 /* software: i-cache coherence required */
+#define _PAGE_RW       0x0200 /* software: user write access allowed */
 #define _PAGE_HASHPTE  0x0400 /* software: pte has an associated HPTE */
 #define _PAGE_BUSY     0x0800 /* software: PTE & hash are busy */ 
 #define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
@@ -118,29 +118,38 @@
 #define PAGE_KERNEL    __pgprot(_PAGE_BASE | _PAGE_WRENABLE)
 #define PAGE_KERNEL_CI __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
                               _PAGE_WRENABLE | _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_EXEC)
 
 /*
- * The PowerPC can only do execute protection on a segment (256MB) basis,
- * not on a page basis.  So we consider execute permission the same as read.
+ * This bit in a hardware PTE indicates that the page is *not* executable.
+ */
+#define HW_NO_EXEC     _PAGE_EXEC
+
+/*
+ * POWER4 and newer have per page execute protection, older chips can only
+ * do this on a segment (256MB) basis.
+ *
  * Also, write permissions imply read permissions.
  * This is the closest we can get..
+ *
+ * Note due to the way vm flags are laid out, the bits are XWR
  */
 #define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY_X
+#define __P001 PAGE_READONLY
 #define __P010 PAGE_COPY
-#define __P011 PAGE_COPY_X
-#define __P100 PAGE_READONLY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_X
 #define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY
+#define __P110 PAGE_COPY_X
 #define __P111 PAGE_COPY_X
 
 #define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY_X
+#define __S001 PAGE_READONLY
 #define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED_X
-#define __S100 PAGE_READONLY
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_X
 #define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED
+#define __S110 PAGE_SHARED_X
 #define __S111 PAGE_SHARED_X
 
 #ifndef __ASSEMBLY__
@@ -438,7 +447,7 @@
 static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry, int dirty)
 {
        unsigned long bits = pte_val(entry) &
-               (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW);
+               (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
        unsigned long old, tmp;
 
        __asm__ __volatile__(
diff -urN linux-2.5/include/asm-ppc64/processor.h 
g5-ppc64/include/asm-ppc64/processor.h
--- linux-2.5/include/asm-ppc64/processor.h     2005-03-07 08:21:54.000000000 
+1100
+++ g5-ppc64/include/asm-ppc64/processor.h      2005-03-15 17:08:21.000000000 
+1100
@@ -173,6 +173,11 @@
 #define        SPRN_DEC        0x016   /* Decrement Register */
 #define        SPRN_DMISS      0x3D0   /* Data TLB Miss Register */
 #define        SPRN_DSISR      0x012   /* Data Storage Interrupt Status 
Register */
+#define   DSISR_NOHPTE         0x40000000      /* no translation found */
+#define   DSISR_PROTFAULT      0x08000000      /* protection fault */
+#define   DSISR_ISSTORE                0x02000000      /* access was a store */
+#define   DSISR_DABRMATCH      0x00400000      /* hit data breakpoint */
+#define   DSISR_NOSEGMENT      0x00200000      /* STAB/SLB miss */
 #define        SPRN_EAR        0x11A   /* External Address Register */
 #define        SPRN_ESR        0x3D4   /* Exception Syndrome Register */
 #define          ESR_IMCP      0x80000000      /* Instr. Machine Check - 
Protection */
diff -urN linux-2.5/include/asm-ppc64/sections.h 
g5-ppc64/include/asm-ppc64/sections.h
--- linux-2.5/include/asm-ppc64/sections.h      2004-02-12 18:17:28.000000000 
+1100
+++ g5-ppc64/include/asm-ppc64/sections.h       2005-03-15 16:55:05.000000000 
+1100
@@ -17,4 +17,13 @@
 #define __openfirmware
 #define __openfirmwaredata
 
+
+static inline int in_kernel_text(unsigned long addr)
+{
+       if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
+               return 1;
+
+       return 0;
+}
+
 #endif
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to