On 27/03/16 19:23, Aneesh Kumar K.V wrote: > PowerISA 3.0 introduce two pte bits with the below meaning w.r.t Radix > 00 -> Normal Memory > 01 -> Strong Access Order > 10 -> Non idempotent I/O (Cache inhibited and guarded) > 11 -> Tolerant I/O (Cache inhibited) > > We drop the existing WIMG bits in linux page table in favour of above > constants. We loose _PAGE_WRITETHRU with this conversion. We only use > writethru via pgprot_cached_wthru() which is used by fbdev/controlfb.c > which is Apple control display and also PPC32. > > With respect to _PAGE_COHERENCE, we have been marking hpte > always coherent for some time now. htab_convert_pte_flags always added > HPTE_R_M. > > NOTE: KVM changes need closer review. > > Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> > --- > arch/powerpc/include/asm/book3s/64/hash.h | 72 > ++++++++++++++----------------- > arch/powerpc/include/asm/kvm_book3s_64.h | 27 +++++------- > arch/powerpc/kvm/book3s_64_mmu_hv.c | 11 +++-- > arch/powerpc/kvm/book3s_hv_rm_mmu.c | 12 +++--- > arch/powerpc/mm/hash64_64k.c | 2 +- > arch/powerpc/mm/hash_utils_64.c | 18 ++++---- > arch/powerpc/mm/pgtable.c | 8 ++-- > arch/powerpc/mm/pgtable_64.c | 4 -- > arch/powerpc/platforms/pseries/lpar.c | 4 -- > 9 files changed, 67 insertions(+), 91 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/hash.h > b/arch/powerpc/include/asm/book3s/64/hash.h > index 2a80981f1b0b..fd2d0ebfc49c 100644 > --- a/arch/powerpc/include/asm/book3s/64/hash.h > +++ b/arch/powerpc/include/asm/book3s/64/hash.h > @@ -20,12 +20,10 @@ > #define _PAGE_READ 0x00004 /* read access allowed */ > #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) > #define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) > -#define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ This change is redundant > -#define _PAGE_GUARDED 0x00010 /* G: guarded (side-effect) > page */ > -/* M (memory coherence) is always set in the HPTE, so we don't need it here > */ > -#define _PAGE_COHERENT 0x0 > -#define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */ > -#define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */ > +#define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ We add it right back here > +#define _PAGE_SAO 0x00010 /* Strong access order */ > +#define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */ I think the comment is not very useful, it just calls out the defines > +#define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache > inhibited */ > #define _PAGE_DIRTY 0x00080 /* C: page changed */ > #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ > #define _PAGE_SPECIAL 0x00400 /* software: special page */ > @@ -43,7 +41,12 @@ > #define _PAGE_HASHPTE (1ul << 61) /* PTE has associated > HPTE */ > #define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from > pointers */ > #define _PAGE_PRESENT (1ul << 63) /* pte contains a > translation */ > - > +/* > + * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE > + * Instead of fixing all of them, add an alternate define which > + * maps CI pte mapping. > + */ > +#define _PAGE_NO_CACHE _PAGE_TOLERANT > /* > * We need to differentiate between explicit huge page and THP huge > * page, since THP huge page also need to track real subpage details > @@ -122,9 +125,6 @@ > #define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ > _PAGE_RW | _PAGE_EXEC) > > -/* Strong Access Ordering */ > -#define _PAGE_SAO (_PAGE_WRITETHRU | _PAGE_NO_CACHE | > _PAGE_COHERENT) > - > /* No page size encoding in the linux PTE */ > #define _PAGE_PSIZE 0 > > @@ -150,10 +150,9 @@ > /* > * Mask of bits returned by pte_pgprot() > */ > -#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | > _PAGE_NO_CACHE | \ > - _PAGE_WRITETHRU | _PAGE_4K_PFN | \ > - _PAGE_PRIVILEGED | _PAGE_ACCESSED | _PAGE_READ |\ > - _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \ > +#define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | > \ > + _PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \ > + _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | > \ > _PAGE_SOFT_DIRTY) > /* > * We define 2 sets of base prot bits, one for basic pages (ie, > @@ -162,7 +161,7 @@ > * the processor might need it for DMA coherency. > */ > #define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE) > -#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) > +#define _PAGE_BASE (_PAGE_BASE_NC) > > /* Permission masks used to generate the __P and __S table, > * > @@ -203,9 +202,9 @@ > /* Permission masks used for kernel mappings */ > #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) > #define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ > - _PAGE_NO_CACHE) > + _PAGE_TOLERANT) I don't think this change is required given that _PAGE_NO_CACHE is _PAGE_TOLERANT > #define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ > - _PAGE_NO_CACHE | _PAGE_GUARDED) > + _PAGE_NON_IDEMPOTENT) > #define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) > #define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO) > #define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) > @@ -512,45 +511,26 @@ static inline void __set_pte_at(struct mm_struct *mm, > unsigned long addr, > *ptep = pte; > } > > -/* > - * Macro to mark a page protection value as "uncacheable". > - */ > - > -#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | > _PAGE_NO_CACHE | \ > - _PAGE_WRITETHRU) > +#define _PAGE_CACHE_CTL (_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT) > > #define pgprot_noncached pgprot_noncached > static inline pgprot_t pgprot_noncached(pgprot_t prot) > { > return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | > - _PAGE_NO_CACHE | _PAGE_GUARDED); > + _PAGE_NON_IDEMPOTENT); > } > > #define pgprot_noncached_wc pgprot_noncached_wc > static inline pgprot_t pgprot_noncached_wc(pgprot_t prot) > { > return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | > - _PAGE_NO_CACHE); > + _PAGE_TOLERANT); Same as before > } > > #define pgprot_cached pgprot_cached > static inline pgprot_t pgprot_cached(pgprot_t prot) > { > - return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | > - _PAGE_COHERENT); > -} > - > -#define pgprot_cached_wthru pgprot_cached_wthru > -static inline pgprot_t pgprot_cached_wthru(pgprot_t prot) > -{ > - return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | > - _PAGE_COHERENT | _PAGE_WRITETHRU); > -} > - > -#define pgprot_cached_noncoherent pgprot_cached_noncoherent > -static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot) > -{ > - return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL); > + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL)); > } > > #define pgprot_writecombine pgprot_writecombine > @@ -558,6 +538,18 @@ static inline pgprot_t pgprot_writecombine(pgprot_t prot) > { > return pgprot_noncached_wc(prot); > } > +/* > + * check a pte mapping have cache inhibited property > + */ > +static inline bool pte_ci(pte_t pte) > +{ > + unsigned long pte_v = pte_val(pte); > + > + if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) || > + ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)) > + return true; > + return false; > +} > > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, > diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h > b/arch/powerpc/include/asm/kvm_book3s_64.h > index f9a7a89a3e4f..ebdaf576cf26 100644 > --- a/arch/powerpc/include/asm/kvm_book3s_64.h > +++ b/arch/powerpc/include/asm/kvm_book3s_64.h > @@ -278,19 +278,24 @@ static inline unsigned long hpte_make_readonly(unsigned > long ptel) > return ptel; > } > > -static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long > io_type) > +static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci) > { > - unsigned int wimg = ptel & HPTE_R_WIMG; > + unsigned int wimg = hptel & HPTE_R_WIMG; > > /* Handle SAO */ > if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) && > cpu_has_feature(CPU_FTR_ARCH_206)) > wimg = HPTE_R_M; > > - if (!io_type) > + if (!is_ci) > return wimg == HPTE_R_M; > - > - return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type; > + /* > + * if host is mapped cache inhibited, make sure hptel also have > + * cache inhibited. > + */ The comment applies to the !!(wimg & HPTE_R_I) > + if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */ > + return false; This says the page cannot be cache inhibited and writethrough? > + return !!(wimg & HPTE_R_I); > } > > /* > @@ -333,18 +338,6 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t > *ptep, int writing) > return new_pte; > } > > - > -/* Return HPTE cache control bits corresponding to Linux pte bits */ > -static inline unsigned long hpte_cache_bits(unsigned long pte_val) > -{ > -#if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W > - return pte_val & (HPTE_R_W | HPTE_R_I); > -#else > - return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) + > - ((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0); > -#endif > -} > - > static inline bool hpte_read_permission(unsigned long pp, unsigned long key) > { > if (key) > diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c > b/arch/powerpc/kvm/book3s_64_mmu_hv.c > index c7b78d8336b2..05f09ae82587 100644 > --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c > +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c > @@ -447,7 +447,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, > struct kvm_vcpu *vcpu, > struct revmap_entry *rev; > struct page *page, *pages[1]; > long index, ret, npages; > - unsigned long is_io; > + bool is_ci; > unsigned int writing, write_ok; > struct vm_area_struct *vma; > unsigned long rcbits; > @@ -503,7 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, > struct kvm_vcpu *vcpu, > smp_rmb(); > > ret = -EFAULT; > - is_io = 0; > + is_ci = false; > pfn = 0; > page = NULL; > pte_size = PAGE_SIZE; > @@ -521,7 +521,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, > struct kvm_vcpu *vcpu, > pfn = vma->vm_pgoff + > ((hva - vma->vm_start) >> PAGE_SHIFT); > pte_size = psize; > - is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); > + is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot)))); > write_ok = vma->vm_flags & VM_WRITE; > } > up_read(¤t->mm->mmap_sem); > @@ -558,10 +558,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, > struct kvm_vcpu *vcpu, > goto out_put; > > /* Check WIMG vs. the actual page we're accessing */ > - if (!hpte_cache_flags_ok(r, is_io)) { > - if (is_io) > + if (!hpte_cache_flags_ok(r, is_ci)) { > + if (is_ci) > goto out_put; > - > /* > * Allow guest to map emulated device memory as > * uncacheable, but actually make it cacheable. > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c > b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > index 4cb8db05f3e5..99b4e9d5dd23 100644 > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > @@ -175,7 +175,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long > flags, > unsigned long g_ptel; > struct kvm_memory_slot *memslot; > unsigned hpage_shift; > - unsigned long is_io; > + bool is_ci; > unsigned long *rmap; > pte_t *ptep; > unsigned int writing; > @@ -199,7 +199,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long > flags, > gfn = gpa >> PAGE_SHIFT; > memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); > pa = 0; > - is_io = ~0ul; > + is_ci = false; > rmap = NULL; > if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { > /* Emulated MMIO - mark this with key=31 */ > @@ -250,7 +250,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long > flags, > if (writing && !pte_write(pte)) > /* make the actual HPTE be read-only */ > ptel = hpte_make_readonly(ptel); > - is_io = hpte_cache_bits(pte_val(pte)); > + is_ci = pte_ci(pte); > pa = pte_pfn(pte) << PAGE_SHIFT; > pa |= hva & (host_pte_size - 1); > pa |= gpa & ~PAGE_MASK; > @@ -267,9 +267,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long > flags, > else > pteh |= HPTE_V_ABSENT; > > - /* Check WIMG */ > - if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) { > - if (is_io) > + /*If we had host pte mapping then Check WIMG */ > + if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { > + if (is_ci) > return H_PARAMETER; > /* > * Allow guest to map emulated device memory as > diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c > index f33b410d6c8a..419562b0e9c8 100644 > --- a/arch/powerpc/mm/hash64_64k.c > +++ b/arch/powerpc/mm/hash64_64k.c > @@ -248,7 +248,7 @@ int __hash_page_64K(unsigned long ea, unsigned long > access, > * If so, bail out and refault as a 4k page > */ > if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) && > - unlikely(old_pte & _PAGE_NO_CACHE)) > + unlikely(pte_ci(pte))) > return 0; > /* > * Try to lock the PTE, add ACCESSED and DIRTY if it was > diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c > index 59d4600bacd5..e924690a5a0e 100644 > --- a/arch/powerpc/mm/hash_utils_64.c > +++ b/arch/powerpc/mm/hash_utils_64.c > @@ -192,12 +192,13 @@ unsigned long htab_convert_pte_flags(unsigned long > pteflags) > /* > * Add in WIG bits > */ > - if (pteflags & _PAGE_WRITETHRU) > - rflags |= HPTE_R_W; > - if (pteflags & _PAGE_NO_CACHE) > + > + if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) > rflags |= HPTE_R_I; > - if (pteflags & _PAGE_GUARDED) > - rflags |= HPTE_R_G; > + if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT) > + rflags |= (HPTE_R_I | HPTE_R_G); > + if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) > + rflags |= (HPTE_R_I | HPTE_R_W); > > return rflags; > } > @@ -1138,8 +1139,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, > /* If this PTE is non-cacheable and we have restrictions on > * using non cacheable large pages, then we switch to 4k > */ > - if (mmu_ci_restrictions && psize == MMU_PAGE_64K && > - (pte_val(*ptep) & _PAGE_NO_CACHE)) { > + if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) { > if (user_region) { > demote_segment_4k(mm, ea); > psize = MMU_PAGE_4K; > @@ -1293,13 +1293,13 @@ void hash_preload(struct mm_struct *mm, unsigned long > ea, > > WARN_ON(hugepage_shift); > #ifdef CONFIG_PPC_64K_PAGES > - /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on > + /* If either _PAGE_4K_PFN or cache inhibited is set (and we are on > * a 64K kernel), then we don't preload, hash_page() will take > * care of it once we actually try to access the page. > * That way we don't have to duplicate all of the logic for segment > * page size demotion here > */ > - if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) > + if ((pte_val(*ptep) & _PAGE_4K_PFN) || pte_ci(*ptep)) > goto out_exit; > #endif /* CONFIG_PPC_64K_PAGES */ > > diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c > index a34884beaa47..115a0a19d5a2 100644 > --- a/arch/powerpc/mm/pgtable.c > +++ b/arch/powerpc/mm/pgtable.c > @@ -38,16 +38,16 @@ static inline int is_exec_fault(void) > > /* We only try to do i/d cache coherency on stuff that looks like > * reasonably "normal" PTEs. We currently require a PTE to be present > - * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE. We also only do that > + * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that > * on userspace PTEs > */ > static inline int pte_looks_normal(pte_t pte) > { > > #if defined(CONFIG_PPC_BOOK3S_64) > - if ((pte_val(pte) & > - (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) == > - _PAGE_PRESENT) { > + if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) { > + if (pte_ci(pte)) > + return 0; > if (pte_user(pte)) > return 1; > } > diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c > index 6f1b7064f822..db924c54f370 100644 > --- a/arch/powerpc/mm/pgtable_64.c > +++ b/arch/powerpc/mm/pgtable_64.c > @@ -167,10 +167,6 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, > unsigned long size, > if ((flags & _PAGE_PRESENT) == 0) > flags |= pgprot_val(PAGE_KERNEL); > > - /* Non-cacheable page cannot be coherent */ > - if (flags & _PAGE_NO_CACHE) > - flags &= ~_PAGE_COHERENT; > - > /* We don't support the 4K PFN hack with ioremap */ > if (flags & _PAGE_4K_PFN) > return NULL; > diff --git a/arch/powerpc/platforms/pseries/lpar.c > b/arch/powerpc/platforms/pseries/lpar.c > index 2415a0d31f8f..0d4608990702 100644 > --- a/arch/powerpc/platforms/pseries/lpar.c > +++ b/arch/powerpc/platforms/pseries/lpar.c > @@ -152,10 +152,6 @@ static long pSeries_lpar_hpte_insert(unsigned long > hpte_group, > /* Exact = 0 */ > flags = 0; > > - /* Make pHyp happy */ > - if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU)) > - hpte_r &= ~HPTE_R_M; > - > if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) > flags |= H_COALESCE_CAND; >
Balbir _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev