On Mon, 2016-03-07 at 19:09 +0530, Aneesh Kumar K.V wrote: > _PAGE_PRIVILEGED means the page can be accessed only by kernel. This is done > to keep pte bits similar to PowerISA 3.0 radix PTE format. User > pages are now makred by clearing _PAGE_PRIVILEGED bit. > > Previously we allowed kernel to have a privileged page > in the lower address range(USER_REGION). With this patch such access > is denied. > > We also prevent a kernel access to a non-privileged page in > higher address range (ie, REGION_ID != 0). Both the above access > scenario should never happen.
A few comments below. I didn't find any issues, just some potential cleanups. Mikey > Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> > --- > arch/powerpc/include/asm/book3s/64/hash.h | 34 > ++++++++++++++-------------- > arch/powerpc/include/asm/book3s/64/pgtable.h | 18 ++++++++++++++- > arch/powerpc/mm/hash64_4k.c | 2 +- > arch/powerpc/mm/hash64_64k.c | 4 ++-- > arch/powerpc/mm/hash_utils_64.c | 17 ++++++++------ > arch/powerpc/mm/hugepage-hash64.c | 2 +- > arch/powerpc/mm/hugetlbpage-hash64.c | 3 ++- > arch/powerpc/mm/hugetlbpage.c | 2 +- > arch/powerpc/mm/pgtable.c | 15 ++++++++++-- > arch/powerpc/mm/pgtable_64.c | 15 +++++++++--- > arch/powerpc/platforms/cell/spufs/fault.c | 2 +- > drivers/misc/cxl/fault.c | 5 ++-- > 12 files changed, 80 insertions(+), 39 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/hash.h > b/arch/powerpc/include/asm/book3s/64/hash.h > index f092d83fa623..fbefbaa92736 100644 > --- a/arch/powerpc/include/asm/book3s/64/hash.h > +++ b/arch/powerpc/include/asm/book3s/64/hash.h > @@ -20,7 +20,7 @@ > #define _PAGE_READ 0x00004 /* read access allowed */ > #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) > #define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) > -#define _PAGE_USER 0x00008 /* page may be accessed by userspace */ > +#define _PAGE_PRIVILEGED 0x00008 /* page can only be access by kernel */ /* page can only be accessed by kernel */ Or just /* kernel access only */ > #define _PAGE_GUARDED 0x00010 /* G: guarded (side-effect) > page */ > /* M (memory coherence) is always set in the HPTE, so we don't need it here > */ > #define _PAGE_COHERENT 0x0 > @@ -114,10 +114,13 @@ > #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN > #endif /* CONFIG_PPC_MM_SLICES */ > > -/* No separate kernel read-only */ > -#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access > blocked by key */ > +/* > + * No separate kernel read-only, user access blocked by key > + */ > +#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_RW | > _PAGE_DIRTY) > #define _PAGE_KERNEL_RO _PAGE_KERNEL_RW > -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) > +#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ > + _PAGE_RW | _PAGE_EXEC) > > /* Strong Access Ordering */ > #define _PAGE_SAO (_PAGE_WRITETHRU | _PAGE_NO_CACHE | > _PAGE_COHERENT) > @@ -149,7 +152,7 @@ > */ > #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | > _PAGE_NO_CACHE | \ > _PAGE_WRITETHRU | _PAGE_4K_PFN | \ > - _PAGE_USER | _PAGE_ACCESSED | _PAGE_READ |\ > + _PAGE_PRIVILEGED | _PAGE_ACCESSED | _PAGE_READ |\ > _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \ > _PAGE_SOFT_DIRTY) > /* > @@ -171,16 +174,13 @@ > * > * Note due to the way vm flags are laid out, the bits are XWR > */ > -#define PAGE_NONE __pgprot(_PAGE_BASE) > -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) > -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \ > - _PAGE_EXEC) > -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ) > -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \ > - _PAGE_EXEC) > -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ) > -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \ > - _PAGE_EXEC) > +#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED) > +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW) > +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC) > +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_READ) > +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) > +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ) > +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) Eyeballing these, they seemed to have been converted ok > > #define __P000 PAGE_NONE > #define __P001 PAGE_READONLY > @@ -421,8 +421,8 @@ static inline pte_t pte_clear_soft_dirty(pte_t pte) > */ > static inline int pte_protnone(pte_t pte) > { > - return (pte_val(pte) & > - (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT; > + return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PRIVILEGED)) == > + (_PAGE_PRESENT | _PAGE_PRIVILEGED); > } > #endif /* CONFIG_NUMA_BALANCING */ > > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h > b/arch/powerpc/include/asm/book3s/64/pgtable.h > index 4ac6221802ad..97d06de8dbf6 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h > @@ -187,7 +187,7 @@ extern struct page *pgd_page(pgd_t pgd); > > static inline bool pte_user(pte_t pte) > { > - return (pte_val(pte) & _PAGE_USER); > + return !(pte_val(pte) & _PAGE_PRIVILEGED); This function might be usable in some places you have in the patch. > } > > #ifdef CONFIG_MEM_SOFT_DIRTY > @@ -211,6 +211,22 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) > } > #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ > > +static inline bool check_pte_access(unsigned long access, unsigned long ptev) > +{ > + /* > + * This check for _PAGE_RWX and _PAG_PRESENT bits > + */ > + if (access & ~ptev) Is this really doing what the comment says? Also small typo _PAG_ => _PAGE_ > + return false; > + /* > + * This check for access to privilege space > + */ > + if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED)) > + return false; > + > + return true; > +} > + > void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); > void pgtable_cache_init(void); > > diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c > index 7ebac279d38e..42ba12c184e1 100644 > --- a/arch/powerpc/mm/hash64_4k.c > +++ b/arch/powerpc/mm/hash64_4k.c > @@ -38,7 +38,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, > unsigned long vsid, > if (unlikely(old_pte & _PAGE_BUSY)) > return 0; > /* If PTE permissions don't match, take page fault */ Is this comment still relevant? Same below. > - if (unlikely(access & ~old_pte)) > + if (unlikely(!check_pte_access(access, old_pte))) > return 1; > /* > * Try to lock the PTE, add ACCESSED and DIRTY if it was > diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c > index 83ac9f658733..f33b410d6c8a 100644 > --- a/arch/powerpc/mm/hash64_64k.c > +++ b/arch/powerpc/mm/hash64_64k.c > @@ -70,7 +70,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, > unsigned long vsid, > if (unlikely(old_pte & _PAGE_BUSY)) > return 0; > /* If PTE permissions don't match, take page fault */ > - if (unlikely(access & ~old_pte)) > + if (unlikely(!check_pte_access(access, old_pte))) > return 1; > /* > * Try to lock the PTE, add ACCESSED and DIRTY if it was > @@ -241,7 +241,7 @@ int __hash_page_64K(unsigned long ea, unsigned long > access, > if (unlikely(old_pte & _PAGE_BUSY)) > return 0; > /* If PTE permissions don't match, take page fault */ > - if (unlikely(access & ~old_pte)) > + if (unlikely(!check_pte_access(access, old_pte))) > return 1; > /* > * Check if PTE has the cache-inhibit bit set > diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c > index ec37f4b0a8ff..630603f74056 100644 > --- a/arch/powerpc/mm/hash_utils_64.c > +++ b/arch/powerpc/mm/hash_utils_64.c > @@ -174,7 +174,7 @@ unsigned long htab_convert_pte_flags(unsigned long > pteflags) > * User area is mapped with PP=0x2 for read/write > * or PP=0x3 for read-only (including writeable but clean pages). > */ > - if (pteflags & _PAGE_USER) { > + if (!(pteflags & _PAGE_PRIVILEGED)) { Could use pte_user() here? Maybe other places too. > if (pteflags & _PAGE_RWX) > rflags |= 0x2; > if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY))) > @@ -1086,7 +1086,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, > /* Pre-check access permissions (will be re-checked atomically > * in __hash_page_XX but this pre-check is a fast path > */ > - if (access & ~pte_val(*ptep)) { > + if (!check_pte_access(access, pte_val(*ptep))) { > DBG_LOW(" no access !\n"); > rc = 1; > goto bail; > @@ -1224,12 +1224,15 @@ int __hash_page(unsigned long ea, unsigned long msr, > unsigned long trap, > if (dsisr & DSISR_ISSTORE) > access |= _PAGE_WRITE; > /* > - * We need to set the _PAGE_USER bit if MSR_PR is set or if we are > - * accessing a userspace segment (even from the kernel). We assume > - * kernel addresses always have the high bit set. > + * We set _PAGE_PRIVILEGED only when > + * kernel mode access kernel space. > + * > + * _PAGE_PRIVILEGED is NOT set > + * 1) when kernel mode access user space > + * 2) user space access kernel space. > */ > - if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID)) > - access |= _PAGE_USER; > + if (!(msr & MSR_PR) && !(REGION_ID(ea) == USER_REGION_ID)) > + access |= _PAGE_PRIVILEGED; This is a bit ugly: (!(msr & MSR_PR) && !(REGION_ID(ea) == USER_REGION_ID)) You could set _PAGE_PRIVILEGED and then clear it using the same if statement as before. Might be easier to read. ie access |= _PAGE_PRIVILEGED; if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID)) access &= ~(_PAGE_PRIVILEGED); > > if (trap == 0x400) > access |= _PAGE_EXEC; > diff --git a/arch/powerpc/mm/hugepage-hash64.c > b/arch/powerpc/mm/hugepage-hash64.c > index 39342638a498..182f1d3fe73c 100644 > --- a/arch/powerpc/mm/hugepage-hash64.c > +++ b/arch/powerpc/mm/hugepage-hash64.c > @@ -41,7 +41,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, > unsigned long vsid, > if (unlikely(old_pmd & _PAGE_BUSY)) > return 0; > /* If PMD permissions don't match, take page fault */ As above, is this comment still correct? The comment says "permissions don't match" but the function call is "check_pte_access()". Seems like a different concept. > - if (unlikely(access & ~old_pmd)) > + if (unlikely(!check_pte_access(access, old_pmd))) > return 1; > /* > * Try to lock the PTE, add ACCESSED and DIRTY if it was > diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c > b/arch/powerpc/mm/hugetlbpage-hash64.c > index e6e54a04bd32..96765510a49c 100644 > --- a/arch/powerpc/mm/hugetlbpage-hash64.c > +++ b/arch/powerpc/mm/hugetlbpage-hash64.c > @@ -51,8 +51,9 @@ int __hash_page_huge(unsigned long ea, unsigned long > access, unsigned long vsid, > if (unlikely(old_pte & _PAGE_BUSY)) > return 0; > /* If PTE permissions don't match, take page fault */ > - if (unlikely(access & ~old_pte)) > + if (unlikely(!check_pte_access(access, old_pte))) Same comment again.... > return 1; > + > /* Try to lock the PTE, add ACCESSED and DIRTY if it was > * a write access */ > new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; > diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c > index 6e52e722d3f2..7201e9c624d5 100644 > --- a/arch/powerpc/mm/hugetlbpage.c > +++ b/arch/powerpc/mm/hugetlbpage.c > @@ -1003,7 +1003,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned > long addr, > end = pte_end; > > pte = READ_ONCE(*ptep); > - mask = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ; > + mask = _PAGE_PRESENT | _PAGE_READ; > if (write) > mask |= _PAGE_WRITE; > > diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c > index 98b5c03e344d..7b492283d502 100644 > --- a/arch/powerpc/mm/pgtable.c > +++ b/arch/powerpc/mm/pgtable.c > @@ -43,9 +43,20 @@ static inline int is_exec_fault(void) > */ > static inline int pte_looks_normal(pte_t pte) > { > + > +#if defined(CONFIG_PPC_BOOK3S_64) > + if ((pte_val(pte) & > + (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) == > + _PAGE_PRESENT) { > + if (!(pte_val(pte) & _PAGE_PRIVILEGED)) > + return 1; > + } > + return 0; > +#else > return (pte_val(pte) & > - (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) == > - (_PAGE_PRESENT | _PAGE_USER); > + (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) > == > + (_PAGE_PRESENT | _PAGE_USER); > +#endif > } > > static struct page *maybe_pte_to_page(pte_t pte) > diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c > index 00d8d985bba3..441905f7bba4 100644 > --- a/arch/powerpc/mm/pgtable_64.c > +++ b/arch/powerpc/mm/pgtable_64.c > @@ -280,8 +280,17 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned > long size, > if (flags & _PAGE_WRITE) > flags |= _PAGE_DIRTY; > > - /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ > - flags &= ~(_PAGE_USER | _PAGE_EXEC); > + /* we don't want to let _PAGE_EXEC leak out */ > + flags &= ~_PAGE_EXEC; > + /* > + * Force kernel mapping. > + */ > +#if defined(CONFIG_PPC_BOOK3S_64) > + flags |= _PAGE_PRIVILEGED; > +#else > + flags &= ~_PAGE_USER; > +#endif > + > > #ifdef _PAGE_BAP_SR > /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format > @@ -669,7 +678,7 @@ void pmdp_huge_split_prepare(struct vm_area_struct *vma, > * the translation is still valid, because we will withdraw > * pgtable_t after this. > */ > - pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0); > + pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED); > } > > > diff --git a/arch/powerpc/platforms/cell/spufs/fault.c > b/arch/powerpc/platforms/cell/spufs/fault.c > index c3a3bf1745b7..e29e4d5afa2d 100644 > --- a/arch/powerpc/platforms/cell/spufs/fault.c > +++ b/arch/powerpc/platforms/cell/spufs/fault.c You need to CC the spufs maintainer for this one. > @@ -141,7 +141,7 @@ int spufs_handle_class1(struct spu_context *ctx) > /* we must not hold the lock when entering copro_handle_mm_fault */ > spu_release(ctx); > > - access = (_PAGE_PRESENT | _PAGE_READ | _PAGE_USER); > + access = (_PAGE_PRESENT | _PAGE_READ); > access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL; > local_irq_save(flags); > ret = hash_page(ea, access, 0x300, dsisr); > diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c > index a3d5e1e16c21..23e37cd41e64 100644 > --- a/drivers/misc/cxl/fault.c > +++ b/drivers/misc/cxl/fault.c You need to CC both cxl driver maintainers for this one. > @@ -152,8 +152,9 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, > access = _PAGE_PRESENT | _PAGE_READ; > if (dsisr & CXL_PSL_DSISR_An_S) > access |= _PAGE_WRITE; > - if ((!ctx->kernel) || ~(dar & (1ULL << 63))) > - access |= _PAGE_USER; > + > + if (ctx->kernel && (dar & (1ULL << 63))) > + access |= _PAGE_PRIVILEGED; > > if (dsisr & DSISR_NOHPTE) > inv_flags |= HPTE_NOHPTE_UPDATE; _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev