Re: [Xen-devel] [PATCH v2 01/34] mm/gup: add make_dirty arg to put_user_pages_dirty_lock()

2019-08-06 Thread Ira Weiny
On Sun, Aug 04, 2019 at 03:48:42PM -0700, john.hubb...@gmail.com wrote:
> From: John Hubbard 
> 
> Provide a more capable variation of put_user_pages_dirty_lock(),
> and delete put_user_pages_dirty(). This is based on the
> following:
> 
> 1. Lots of call sites become simpler if a bool is passed
> into put_user_page*(), instead of making the call site
> choose which put_user_page*() variant to call.
> 
> 2. Christoph Hellwig's observation that set_page_dirty_lock()
> is usually correct, and set_page_dirty() is usually a
> bug, or at least questionable, within a put_user_page*()
> calling chain.
> 
> This leads to the following API choices:
> 
> * put_user_pages_dirty_lock(page, npages, make_dirty)
> 
> * There is no put_user_pages_dirty(). You have to
>   hand code that, in the rare case that it's
>   required.
> 
> Reviewed-by: Christoph Hellwig 
> Cc: Matthew Wilcox 
> Cc: Jan Kara 
> Cc: Ira Weiny 
> Cc: Jason Gunthorpe 
> Signed-off-by: John Hubbard 
> ---
>  drivers/infiniband/core/umem.c |   5 +-
>  drivers/infiniband/hw/hfi1/user_pages.c|   5 +-
>  drivers/infiniband/hw/qib/qib_user_pages.c |  13 +--
>  drivers/infiniband/hw/usnic/usnic_uiom.c   |   5 +-
>  drivers/infiniband/sw/siw/siw_mem.c|  19 +---
>  include/linux/mm.h |   5 +-
>  mm/gup.c   | 115 +
>  7 files changed, 61 insertions(+), 106 deletions(-)
> 
> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
> index 08da840ed7ee..965cf9dea71a 100644
> --- a/drivers/infiniband/core/umem.c
> +++ b/drivers/infiniband/core/umem.c
> @@ -54,10 +54,7 @@ static void __ib_umem_release(struct ib_device *dev, 
> struct ib_umem *umem, int d
>  
>   for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
>   page = sg_page_iter_page(&sg_iter);
> - if (umem->writable && dirty)
> - put_user_pages_dirty_lock(&page, 1);
> - else
> - put_user_page(page);
> + put_user_pages_dirty_lock(&page, 1, umem->writable && dirty);
>   }
>  
>   sg_free_table(&umem->sg_head);
> diff --git a/drivers/infiniband/hw/hfi1/user_pages.c 
> b/drivers/infiniband/hw/hfi1/user_pages.c
> index b89a9b9aef7a..469acb961fbd 100644
> --- a/drivers/infiniband/hw/hfi1/user_pages.c
> +++ b/drivers/infiniband/hw/hfi1/user_pages.c
> @@ -118,10 +118,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, 
> unsigned long vaddr, size_t np
>  void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
>size_t npages, bool dirty)
>  {
> - if (dirty)
> - put_user_pages_dirty_lock(p, npages);
> - else
> - put_user_pages(p, npages);
> + put_user_pages_dirty_lock(p, npages, dirty);
>  
>   if (mm) { /* during close after signal, mm can be NULL */
>   atomic64_sub(npages, &mm->pinned_vm);
> diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c 
> b/drivers/infiniband/hw/qib/qib_user_pages.c
> index bfbfbb7e0ff4..26c1fb8d45cc 100644
> --- a/drivers/infiniband/hw/qib/qib_user_pages.c
> +++ b/drivers/infiniband/hw/qib/qib_user_pages.c
> @@ -37,15 +37,6 @@
>  
>  #include "qib.h"
>  
> -static void __qib_release_user_pages(struct page **p, size_t num_pages,
> -  int dirty)
> -{
> - if (dirty)
> - put_user_pages_dirty_lock(p, num_pages);
> - else
> - put_user_pages(p, num_pages);
> -}
> -
>  /**
>   * qib_map_page - a safety wrapper around pci_map_page()
>   *
> @@ -124,7 +115,7 @@ int qib_get_user_pages(unsigned long start_page, size_t 
> num_pages,
>  
>   return 0;
>  bail_release:
> - __qib_release_user_pages(p, got, 0);
> + put_user_pages_dirty_lock(p, got, false);
>  bail:
>   atomic64_sub(num_pages, ¤t->mm->pinned_vm);
>   return ret;
> @@ -132,7 +123,7 @@ int qib_get_user_pages(unsigned long start_page, size_t 
> num_pages,
>  
>  void qib_release_user_pages(struct page **p, size_t num_pages)
>  {
> - __qib_release_user_pages(p, num_pages, 1);
> + put_user_pages_dirty_lock(p, num_pages, true);
>  
>   /* during close after signal, mm can be NULL */
>   if (current->mm)
> diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c 
> b/drivers/infiniband/hw/usnic/usnic_uiom.c
> index 0b0237d41613..62e6ffa9ad78 100644
> --- a/drivers/infiniband/hw/usnic/usnic_uiom.c
> +++ b/drivers/infiniband/hw/usnic/us

Re: [Xen-devel] [PATCH 00/34] put_user_pages(): miscellaneous call sites

2019-08-07 Thread Ira Weiny
On Wed, Aug 07, 2019 at 10:46:49AM +0200, Michal Hocko wrote:
> On Wed 07-08-19 10:37:26, Jan Kara wrote:
> > On Fri 02-08-19 12:14:09, John Hubbard wrote:
> > > On 8/2/19 7:52 AM, Jan Kara wrote:
> > > > On Fri 02-08-19 07:24:43, Matthew Wilcox wrote:
> > > > > On Fri, Aug 02, 2019 at 02:41:46PM +0200, Jan Kara wrote:
> > > > > > On Fri 02-08-19 11:12:44, Michal Hocko wrote:
> > > > > > > On Thu 01-08-19 19:19:31, john.hubb...@gmail.com wrote:
> > > > > > > [...]
> > > > > > > > 2) Convert all of the call sites for get_user_pages*(), to
> > > > > > > > invoke put_user_page*(), instead of put_page(). This involves 
> > > > > > > > dozens of
> > > > > > > > call sites, and will take some time.
> > > > > > > 
> > > > > > > How do we make sure this is the case and it will remain the case 
> > > > > > > in the
> > > > > > > future? There must be some automagic to enforce/check that. It is 
> > > > > > > simply
> > > > > > > not manageable to do it every now and then because then 3) will 
> > > > > > > simply
> > > > > > > be never safe.
> > > > > > > 
> > > > > > > Have you considered coccinele or some other scripted way to do the
> > > > > > > transition? I have no idea how to deal with future changes that 
> > > > > > > would
> > > > > > > break the balance though.
> > > 
> > > Hi Michal,
> > > 
> > > Yes, I've thought about it, and coccinelle falls a bit short (it's not 
> > > smart
> > > enough to know which put_page()'s to convert). However, there is a debug
> > > option planned: a yet-to-be-posted commit [1] uses struct page extensions
> > > (obviously protected by CONFIG_DEBUG_GET_USER_PAGES_REFERENCES) to add
> > > a redundant counter. That allows:
> > > 
> > > void __put_page(struct page *page)
> > > {
> > >   ...
> > >   /* Someone called put_page() instead of put_user_page() */
> > >   WARN_ON_ONCE(atomic_read(&page_ext->pin_count) > 0);
> > > 
> > > > > > 
> > > > > > Yeah, that's why I've been suggesting at LSF/MM that we may need to 
> > > > > > create
> > > > > > a gup wrapper - say vaddr_pin_pages() - and track which sites 
> > > > > > dropping
> > > > > > references got converted by using this wrapper instead of gup. The
> > > > > > counterpart would then be more logically named as unpin_page() or 
> > > > > > whatever
> > > > > > instead of put_user_page().  Sure this is not completely foolproof 
> > > > > > (you can
> > > > > > create new callsite using vaddr_pin_pages() and then just drop refs 
> > > > > > using
> > > > > > put_page()) but I suppose it would be a high enough barrier for 
> > > > > > missed
> > > > > > conversions... Thoughts?
> > > 
> > > The debug option above is still a bit simplistic in its implementation
> > > (and maybe not taking full advantage of the data it has), but I think
> > > it's preferable, because it monitors the "core" and WARNs.
> > > 
> > > Instead of the wrapper, I'm thinking: documentation and the passage of
> > > time, plus the debug option (perhaps enhanced--probably once I post it
> > > someone will notice opportunities), yes?
> > 
> > So I think your debug option and my suggested renaming serve a bit
> > different purposes (and thus both make sense). If you do the renaming, you
> > can just grep to see unconverted sites. Also when someone merges new GUP
> > user (unaware of the new rules) while you switch GUP to use pins instead of
> > ordinary references, you'll get compilation error in case of renaming
> > instead of hard to debug refcount leak without the renaming. And such
> > conflict is almost bound to happen given the size of GUP patch set... Also
> > the renaming serves against the "coding inertia" - i.e., GUP is around for
> > ages so people just use it without checking any documentation or comments.
> > After switching how GUP works, what used to be correct isn't anymore so
> > renaming the function serves as a warning that something has really
> > changed.
> 
> Fully agreed!

Ok Prior to this I've been basing all my work for the RDMA/FS DAX stuff in
Johns put_user_pages()...  (Including when I proposed failing truncate with a
lease in June [1])

However, based on the suggestions in that thread it became clear that a new
interface was going to need to be added to pass in the "RDMA file" information
to GUP to associate file pins with the correct processes...

I have many drawings on my white board with "a whole lot of lines" on them to
make sure that if a process opens a file, mmaps it, pins it with RDMA, _closes_
it, and ummaps it; that the resulting file pin can still be traced back to the
RDMA context and all the processes which may have access to it  No matter
where the original context may have come from.  I believe I have accomplished
that.

Before I go on, I would like to say that the "imbalance" of get_user_pages()
and put_page() bothers me from a purist standpoint...  However, since this
discussion cropped up I went ahead and ported my work to Linus' current master
(5.3-rc3+) and in doing so I only had to steal a bit of Johns c

[Xen-devel] [PATCH V2 7/7] IB/mthca: Use the new FOLL_LONGTERM flag to get_user_pages_fast()

2019-02-13 Thread ira . weiny
From: Ira Weiny 

Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against
FS DAX pages being mapped.

Signed-off-by: Ira Weiny 
---
 drivers/infiniband/hw/mthca/mthca_memfree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c 
b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 112d2f38e0de..8ff0e90d7564 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -472,7 +472,8 @@ int mthca_map_user_db(struct mthca_dev *dev, struct 
mthca_uar *uar,
goto out;
}
 
-   ret = get_user_pages_fast(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages);
+   ret = get_user_pages_fast(uaddr & PAGE_MASK, 1,
+ FOLL_WRITE | FOLL_LONGTERM, pages);
if (ret < 0)
goto out;
 
-- 
2.20.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH V2 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM

2019-02-13 Thread ira . weiny
From: Ira Weiny 

Rather than have a separate get_user_pages_longterm() call,
introduce FOLL_LONGTERM and change the longterm callers to use
it.

This patch does not change any functionality.

FOLL_LONGTERM can only be supported with get_user_pages() as it
requires vmas to determine if DAX is in use.

Signed-off-by: Ira Weiny 
---
 drivers/infiniband/core/umem.c |   5 +-
 drivers/infiniband/hw/qib/qib_user_pages.c |   8 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c   |   9 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c  |   6 +-
 drivers/vfio/vfio_iommu_type1.c|   3 +-
 include/linux/mm.h |  13 +-
 mm/gup.c   | 138 -
 mm/gup_benchmark.c |   5 +-
 8 files changed, 101 insertions(+), 86 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index b69d3efa8712..120a40df91b4 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -185,10 +185,11 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, 
unsigned long addr,
 
while (npages) {
down_read(&mm->mmap_sem);
-   ret = get_user_pages_longterm(cur_base,
+   ret = get_user_pages(cur_base,
 min_t(unsigned long, npages,
   PAGE_SIZE / sizeof (struct page *)),
-gup_flags, page_list, vma_list);
+gup_flags | FOLL_LONGTERM,
+page_list, vma_list);
if (ret < 0) {
up_read(&mm->mmap_sem);
goto umem_release;
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c 
b/drivers/infiniband/hw/qib/qib_user_pages.c
index ef8bcf366ddc..1b9368261035 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -114,10 +114,10 @@ int qib_get_user_pages(unsigned long start_page, size_t 
num_pages,
 
down_read(¤t->mm->mmap_sem);
for (got = 0; got < num_pages; got += ret) {
-   ret = get_user_pages_longterm(start_page + got * PAGE_SIZE,
- num_pages - got,
- FOLL_WRITE | FOLL_FORCE,
- p + got, NULL);
+   ret = get_user_pages(start_page + got * PAGE_SIZE,
+num_pages - got,
+FOLL_LONGTERM | FOLL_WRITE | FOLL_FORCE,
+p + got, NULL);
if (ret < 0) {
up_read(¤t->mm->mmap_sem);
goto bail_release;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c 
b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 06862a6af185..1d9a182ac163 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -143,10 +143,11 @@ static int usnic_uiom_get_pages(unsigned long addr, 
size_t size, int writable,
ret = 0;
 
while (npages) {
-   ret = get_user_pages_longterm(cur_base,
-   min_t(unsigned long, npages,
-   PAGE_SIZE / sizeof(struct page *)),
-   gup_flags, page_list, NULL);
+   ret = get_user_pages(cur_base,
+min_t(unsigned long, npages,
+PAGE_SIZE / sizeof(struct page *)),
+gup_flags | FOLL_LONGTERM,
+page_list, NULL);
 
if (ret < 0)
goto out;
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c 
b/drivers/media/v4l2-core/videobuf-dma-sg.c
index 08929c087e27..870a2a526e0b 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -186,12 +186,12 @@ static int videobuf_dma_init_user_locked(struct 
videobuf_dmabuf *dma,
dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n",
data, size, dma->nr_pages);
 
-   err = get_user_pages_longterm(data & PAGE_MASK, dma->nr_pages,
-flags, dma->pages, NULL);
+   err = get_user_pages(data & PAGE_MASK, dma->nr_pages,
+flags | FOLL_LONGTERM, dma->pages, NULL);
 
if (err != dma->nr_pages) {
dma->nr_pages = (err >= 0) ? err : 0;
-   dprintk(1, "get_user_pages_longterm: err=%d [%d]\n", err,
+   dprintk(1, "get_user_pages: err=%d [%d]\n", err,
dma->nr_pages);
return err < 0 ? err : -EINVAL;
}
diff --git a/drivers/vfio/vf

[Xen-devel] [PATCH V2 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool'

2019-02-13 Thread ira . weiny
From: Ira Weiny 

To facilitate additional options to get_user_pages_fast() change the
singular write parameter to be gup_flags.

This patch does not change any functionality.  New functionality will
follow in subsequent patches.

Some of the get_user_pages_fast() call sites were unchanged because they
already passed FOLL_WRITE or 0 for the write parameter.

Signed-off-by: Ira Weiny 
---
 arch/mips/mm/gup.c | 11 ++-
 arch/powerpc/kvm/book3s_64_mmu_hv.c|  4 ++--
 arch/powerpc/kvm/e500_mmu.c|  2 +-
 arch/powerpc/mm/mmu_context_iommu.c|  4 ++--
 arch/s390/kvm/interrupt.c  |  2 +-
 arch/s390/mm/gup.c | 12 ++--
 arch/sh/mm/gup.c   | 11 ++-
 arch/sparc/mm/gup.c|  9 +
 arch/x86/kvm/paging_tmpl.h |  2 +-
 arch/x86/kvm/svm.c |  2 +-
 drivers/fpga/dfl-afu-dma-region.c  |  2 +-
 drivers/gpu/drm/via/via_dmablit.c  |  3 ++-
 drivers/infiniband/hw/hfi1/user_pages.c|  3 ++-
 drivers/misc/genwqe/card_utils.c   |  2 +-
 drivers/misc/vmw_vmci/vmci_host.c  |  2 +-
 drivers/misc/vmw_vmci/vmci_queue_pair.c|  6 --
 drivers/platform/goldfish/goldfish_pipe.c  |  3 ++-
 drivers/rapidio/devices/rio_mport_cdev.c   |  4 +++-
 drivers/sbus/char/oradax.c |  2 +-
 drivers/scsi/st.c  |  3 ++-
 drivers/staging/gasket/gasket_page_table.c |  4 ++--
 drivers/tee/tee_shm.c  |  2 +-
 drivers/vfio/vfio_iommu_spapr_tce.c|  3 ++-
 drivers/vhost/vhost.c  |  2 +-
 drivers/video/fbdev/pvr2fb.c   |  2 +-
 drivers/virt/fsl_hypervisor.c  |  2 +-
 drivers/xen/gntdev.c   |  2 +-
 fs/orangefs/orangefs-bufmap.c  |  2 +-
 include/linux/mm.h |  4 ++--
 kernel/futex.c |  2 +-
 lib/iov_iter.c |  7 +--
 mm/gup.c   | 10 +-
 mm/util.c  |  8 
 net/ceph/pagevec.c |  2 +-
 net/rds/info.c |  2 +-
 net/rds/rdma.c |  3 ++-
 36 files changed, 81 insertions(+), 65 deletions(-)

diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 0d14e0d8eacf..4c2b4483683c 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -235,7 +235,7 @@ int __get_user_pages_fast(unsigned long start, int 
nr_pages, int write,
  * get_user_pages_fast() - pin user pages in memory
  * @start: starting user address
  * @nr_pages:  number of pages from start to pin
- * @write: whether pages will be written to
+ * @gup_flags: flags modifying pin behaviour
  * @pages: array that receives pointers to the pages pinned.
  * Should be at least nr_pages long.
  *
@@ -247,8 +247,8 @@ int __get_user_pages_fast(unsigned long start, int 
nr_pages, int write,
  * requested. If nr_pages is 0 or negative, returns 0. If no pages
  * were pinned, returns -errno.
  */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-   struct page **pages)
+int get_user_pages_fast(unsigned long start, int nr_pages,
+   unsigned int gup_flags, struct page **pages)
 {
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
@@ -273,7 +273,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, 
int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
goto slow;
-   if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+   if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
+  pages, &nr))
goto slow;
} while (pgdp++, addr = next, addr != end);
local_irq_enable();
@@ -289,7 +290,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, 
int write,
pages += nr;
 
ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT,
- pages, write ? FOLL_WRITE : 0);
+ pages, gup_flags);
 
/* Have to be a bit careful with return values */
if (nr > 0) {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index bd2dcfbf00cd..8fcb0a921e46 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -582,7 +582,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
/* If writing != 0, then the HPTE must allow writing, if we get here */
write_ok = writing;
hva = gfn_to_hva_memslot(memslot, gfn);
-   npages = get_user_pages_fast(hva, 1, writing, pages);
+   npages =

[Xen-devel] [PATCH V2 6/7] IB/qib: Use the new FOLL_LONGTERM flag to get_user_pages_fast()

2019-02-13 Thread ira . weiny
From: Ira Weiny 

Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against
FS DAX pages being mapped.

Signed-off-by: Ira Weiny 
---
 drivers/infiniband/hw/qib/qib_user_sdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c 
b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 31c523b2a9f5..b53cc0240e02 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -673,7 +673,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata 
*dd,
else
j = npages;
 
-   ret = get_user_pages_fast(addr, j, 0, pages);
+   ret = get_user_pages_fast(addr, j, FOLL_LONGTERM, pages);
if (ret != j) {
i = 0;
j = ret;
-- 
2.20.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH V2 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast

2019-02-13 Thread ira . weiny
From: Ira Weiny 

DAX pages were previously unprotected from longterm pins when users
called get_user_pages_fast().

Use the new FOLL_LONGTERM flag to check for DEVMAP pages and fall
back to regular GUP processing if a DEVMAP page is encountered.

Signed-off-by: Ira Weiny 
---
 mm/gup.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 6f32d36b3c5b..f7e759c523bb 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1439,6 +1439,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
unsigned long end,
goto pte_unmap;
 
if (pte_devmap(pte)) {
+   if (unlikely(flags & FOLL_LONGTERM))
+   goto pte_unmap;
+
pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
if (unlikely(!pgmap)) {
undo_dev_pagemap(nr, nr_start, pages);
@@ -1578,8 +1581,11 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, 
unsigned long addr,
if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
return 0;
 
-   if (pmd_devmap(orig))
+   if (pmd_devmap(orig)) {
+   if (unlikely(flags & FOLL_LONGTERM))
+   return 0;
return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr);
+   }
 
refs = 0;
page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
@@ -1904,8 +1910,20 @@ int get_user_pages_fast(unsigned long start, int 
nr_pages,
start += nr << PAGE_SHIFT;
pages += nr;
 
-   ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
- gup_flags);
+   if (gup_flags & FOLL_LONGTERM) {
+   down_read(¤t->mm->mmap_sem);
+   ret = __gup_longterm_locked(current, current->mm,
+   start, nr_pages - nr,
+   pages, NULL, gup_flags);
+   up_read(¤t->mm->mmap_sem);
+   } else {
+   /*
+* retain FAULT_FOLL_ALLOW_RETRY optimization if
+* possible
+*/
+   ret = get_user_pages_unlocked(start, nr_pages - nr,
+ pages, gup_flags);
+   }
 
/* Have to be a bit careful with return values */
if (nr > 0) {
-- 
2.20.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH V2 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool'

2019-02-13 Thread Ira Weiny
On Wed, Feb 13, 2019 at 04:11:10PM -0700, Jason Gunthorpe wrote:
> On Wed, Feb 13, 2019 at 03:04:51PM -0800, ira.we...@intel.com wrote:
> > From: Ira Weiny 
> > 
> > To facilitate additional options to get_user_pages_fast() change the
> > singular write parameter to be gup_flags.
> 
> So now we have:
> 
> long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
>   struct page **pages, unsigned int gup_flags);
> 
> and 
> 
> int get_user_pages_fast(unsigned long start, int nr_pages,
>   unsigned int gup_flags, struct page **pages)
> 
> Does this make any sense? At least the arguments should be in the same
> order, I think.

Yes...  and no.  see below.

> 
> Also this comment:
> /*
>  * get_user_pages_unlocked() is suitable to replace the form:
>  *
>  *  down_read(&mm->mmap_sem);
>  *  get_user_pages(tsk, mm, ..., pages, NULL);
>  *  up_read(&mm->mmap_sem);
>  *
>  *  with:
>  *
>  *  get_user_pages_unlocked(tsk, mm, ..., pages);
>  *
>  * It is functionally equivalent to get_user_pages_fast so
>  * get_user_pages_fast should be used instead if specific gup_flags
>  * (e.g. FOLL_FORCE) are not required.
>  */
> 
> Needs some attention as the recommendation is now nonsense.

IMO they are not functionally equivalent.

We can't remove *_unlocked() as it is used as both a helper for the arch
specific *_fast() calls, _and_ in drivers.  Again I don't know the history here
but it could be that the drivers should never have used the call in the first
place???  Or been converted at some point?

I could change the comment to be something like

/*
 * get_user_pages_unlocked() is only to be used by arch specific
 * get_user_pages_fast() calls.  Drivers should be calling
 * get_user_pages_fast()
 */

Instead of the current comment.

And change the drivers to get_user_pages_fast().

However, I'm not sure if these drivers need the FOLL_TOUCH flag which
*_unlocked() adds for them.  And adding FOLL_TOUCH to *_fast() is not going to
give the same functionality.

It _looks_ like we can add FOLL_TOUCH functionality to the fast path in the
generic code.  I'm not sure about the arch's.

If we did that then we can have those drivers use FOLL_TOUCH or not in *_fast()
if they want/need.

> 
> Honestly a proper explanation of why two functions exist would be
> great at this point :)

I've not researched it.  I do agree that there seems to be a lot of calls in
this file and the differences are subtle.

Ira

> 
> Jason

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH V2 5/7] IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast()

2019-02-13 Thread ira . weiny
From: Ira Weiny 

Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against
FS DAX pages being mapped.

Signed-off-by: Ira Weiny 
---
 drivers/infiniband/hw/hfi1/user_pages.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/user_pages.c 
b/drivers/infiniband/hw/hfi1/user_pages.c
index 78ccacaf97d0..6a7f9cd5a94e 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -104,9 +104,11 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned 
long vaddr, size_t np
bool writable, struct page **pages)
 {
int ret;
+   unsigned int gup_flags = writable ? FOLL_WRITE : 0;
 
-   ret = get_user_pages_fast(vaddr, npages, writable ? FOLL_WRITE : 0,
- pages);
+   gup_flags |= FOLL_LONGTERM;
+
+   ret = get_user_pages_fast(vaddr, npages, gup_flags, pages);
if (ret < 0)
return ret;
 
-- 
2.20.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH V2 2/7] mm/gup: Change write parameter to flags in fast walk

2019-02-13 Thread ira . weiny
From: Ira Weiny 

In order to support more options in the GUP fast walk, change
the write parameter to flags throughout the call stack.

This patch does not change functionality and passes FOLL_WRITE
where write was previously used.

Signed-off-by: Ira Weiny 
---
 mm/gup.c | 52 ++--
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index ee96eaff118c..681388236106 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1417,7 +1417,7 @@ static void undo_dev_pagemap(int *nr, int nr_start, 
struct page **pages)
 
 #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
-int write, struct page **pages, int *nr)
+unsigned int flags, struct page **pages, int *nr)
 {
struct dev_pagemap *pgmap = NULL;
int nr_start = *nr, ret = 0;
@@ -1435,7 +1435,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
unsigned long end,
if (pte_protnone(pte))
goto pte_unmap;
 
-   if (!pte_access_permitted(pte, write))
+   if (!pte_access_permitted(pte, flags & FOLL_WRITE))
goto pte_unmap;
 
if (pte_devmap(pte)) {
@@ -1487,7 +1487,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
unsigned long end,
  * useful to have gup_huge_pmd even if we can't operate on ptes.
  */
 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
-int write, struct page **pages, int *nr)
+unsigned int flags, struct page **pages, int *nr)
 {
return 0;
 }
@@ -1570,12 +1570,12 @@ static int __gup_device_huge_pud(pud_t pud, pud_t 
*pudp, unsigned long addr,
 #endif
 
 static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
-   unsigned long end, int write, struct page **pages, int *nr)
+   unsigned long end, unsigned int flags, struct page **pages, int 
*nr)
 {
struct page *head, *page;
int refs;
 
-   if (!pmd_access_permitted(orig, write))
+   if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
return 0;
 
if (pmd_devmap(orig))
@@ -1608,12 +1608,12 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, 
unsigned long addr,
 }
 
 static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
-   unsigned long end, int write, struct page **pages, int *nr)
+   unsigned long end, unsigned int flags, struct page **pages, int 
*nr)
 {
struct page *head, *page;
int refs;
 
-   if (!pud_access_permitted(orig, write))
+   if (!pud_access_permitted(orig, flags & FOLL_WRITE))
return 0;
 
if (pud_devmap(orig))
@@ -1646,13 +1646,13 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, 
unsigned long addr,
 }
 
 static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
-   unsigned long end, int write,
+   unsigned long end, unsigned int flags,
struct page **pages, int *nr)
 {
int refs;
struct page *head, *page;
 
-   if (!pgd_access_permitted(orig, write))
+   if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
return 0;
 
BUILD_BUG_ON(pgd_devmap(orig));
@@ -1683,7 +1683,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned 
long addr,
 }
 
 static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
-   int write, struct page **pages, int *nr)
+   unsigned int flags, struct page **pages, int *nr)
 {
unsigned long next;
pmd_t *pmdp;
@@ -1705,7 +1705,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
if (pmd_protnone(pmd))
return 0;
 
-   if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
+   if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
pages, nr))
return 0;
 
@@ -1715,9 +1715,9 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
 * pmd format and THP pmd format
 */
if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
-PMD_SHIFT, next, write, pages, nr))
+PMD_SHIFT, next, flags, pages, nr))
return 0;
-   } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+   } else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
return 0;
} while (pmdp++, addr = next, addr != end);
 
@@ -1725,7 +1725,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long 

[Xen-devel] [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it

2019-02-13 Thread ira . weiny
From: Ira Weiny 

NOTE: This series depends on my clean up patch to remove the write parameter
from gup_fast_permitted()[1]

HFI1, qib, and mthca, use get_user_pages_fast() due to it performance
advantages.  These pages can be held for a significant time.  But
get_user_pages_fast() does not protect against mapping of FS DAX pages.

Introduce FOLL_LONGTERM and use this flag in get_user_pages_fast() which
retains the performance while also adding the FS DAX checks.  XDP has also
shown interest in using this functionality.[2]

In addition we change get_user_pages() to use the new FOLL_LONGTERM flag and
remove the specialized get_user_pages_longterm call.

[1] https://lkml.org/lkml/2019/2/11/237
[2] https://lkml.org/lkml/2019/2/11/1789

Ira Weiny (7):
  mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM
  mm/gup: Change write parameter to flags in fast walk
  mm/gup: Change GUP fast to use flags rather than a write 'bool'
  mm/gup: Add FOLL_LONGTERM capability to GUP fast
  IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
  IB/qib: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
  IB/mthca: Use the new FOLL_LONGTERM flag to get_user_pages_fast()

 arch/mips/mm/gup.c  |  11 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c |   4 +-
 arch/powerpc/kvm/e500_mmu.c |   2 +-
 arch/powerpc/mm/mmu_context_iommu.c |   4 +-
 arch/s390/kvm/interrupt.c   |   2 +-
 arch/s390/mm/gup.c  |  12 +-
 arch/sh/mm/gup.c|  11 +-
 arch/sparc/mm/gup.c |   9 +-
 arch/x86/kvm/paging_tmpl.h  |   2 +-
 arch/x86/kvm/svm.c  |   2 +-
 drivers/fpga/dfl-afu-dma-region.c   |   2 +-
 drivers/gpu/drm/via/via_dmablit.c   |   3 +-
 drivers/infiniband/core/umem.c  |   5 +-
 drivers/infiniband/hw/hfi1/user_pages.c |   5 +-
 drivers/infiniband/hw/mthca/mthca_memfree.c |   3 +-
 drivers/infiniband/hw/qib/qib_user_pages.c  |   8 +-
 drivers/infiniband/hw/qib/qib_user_sdma.c   |   2 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c|   9 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c   |   6 +-
 drivers/misc/genwqe/card_utils.c|   2 +-
 drivers/misc/vmw_vmci/vmci_host.c   |   2 +-
 drivers/misc/vmw_vmci/vmci_queue_pair.c |   6 +-
 drivers/platform/goldfish/goldfish_pipe.c   |   3 +-
 drivers/rapidio/devices/rio_mport_cdev.c|   4 +-
 drivers/sbus/char/oradax.c  |   2 +-
 drivers/scsi/st.c   |   3 +-
 drivers/staging/gasket/gasket_page_table.c  |   4 +-
 drivers/tee/tee_shm.c   |   2 +-
 drivers/vfio/vfio_iommu_spapr_tce.c |   3 +-
 drivers/vfio/vfio_iommu_type1.c |   3 +-
 drivers/vhost/vhost.c   |   2 +-
 drivers/video/fbdev/pvr2fb.c|   2 +-
 drivers/virt/fsl_hypervisor.c   |   2 +-
 drivers/xen/gntdev.c|   2 +-
 fs/orangefs/orangefs-bufmap.c   |   2 +-
 include/linux/mm.h  |  17 +-
 kernel/futex.c  |   2 +-
 lib/iov_iter.c  |   7 +-
 mm/gup.c| 220 
 mm/gup_benchmark.c  |   5 +-
 mm/util.c   |   8 +-
 net/ceph/pagevec.c  |   2 +-
 net/rds/info.c  |   2 +-
 net/rds/rdma.c  |   3 +-
 44 files changed, 232 insertions(+), 180 deletions(-)

-- 
2.20.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it

2019-02-15 Thread Ira Weiny
> NOTE: This series depends on my clean up patch to remove the write parameter
> from gup_fast_permitted()[1]
> 
> HFI1, qib, and mthca, use get_user_pages_fast() due to it performance
> advantages.  These pages can be held for a significant time.  But
> get_user_pages_fast() does not protect against mapping of FS DAX pages.
> 
> Introduce FOLL_LONGTERM and use this flag in get_user_pages_fast() which
> retains the performance while also adding the FS DAX checks.  XDP has also
> shown interest in using this functionality.[2]
> 
> In addition we change get_user_pages() to use the new FOLL_LONGTERM flag and
> remove the specialized get_user_pages_longterm call.
> 
> [1] https://lkml.org/lkml/2019/2/11/237
> [2] https://lkml.org/lkml/2019/2/11/1789

Any comments on this series?  I've touched a lot of subsystems which I think
require review.

Thanks,
Ira

> 
> Ira Weiny (7):
>   mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM
>   mm/gup: Change write parameter to flags in fast walk
>   mm/gup: Change GUP fast to use flags rather than a write 'bool'
>   mm/gup: Add FOLL_LONGTERM capability to GUP fast
>   IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
>   IB/qib: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
>   IB/mthca: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
> 
>  arch/mips/mm/gup.c  |  11 +-
>  arch/powerpc/kvm/book3s_64_mmu_hv.c |   4 +-
>  arch/powerpc/kvm/e500_mmu.c |   2 +-
>  arch/powerpc/mm/mmu_context_iommu.c |   4 +-
>  arch/s390/kvm/interrupt.c   |   2 +-
>  arch/s390/mm/gup.c  |  12 +-
>  arch/sh/mm/gup.c|  11 +-
>  arch/sparc/mm/gup.c |   9 +-
>  arch/x86/kvm/paging_tmpl.h  |   2 +-
>  arch/x86/kvm/svm.c  |   2 +-
>  drivers/fpga/dfl-afu-dma-region.c   |   2 +-
>  drivers/gpu/drm/via/via_dmablit.c   |   3 +-
>  drivers/infiniband/core/umem.c  |   5 +-
>  drivers/infiniband/hw/hfi1/user_pages.c |   5 +-
>  drivers/infiniband/hw/mthca/mthca_memfree.c |   3 +-
>  drivers/infiniband/hw/qib/qib_user_pages.c  |   8 +-
>  drivers/infiniband/hw/qib/qib_user_sdma.c   |   2 +-
>  drivers/infiniband/hw/usnic/usnic_uiom.c|   9 +-
>  drivers/media/v4l2-core/videobuf-dma-sg.c   |   6 +-
>  drivers/misc/genwqe/card_utils.c|   2 +-
>  drivers/misc/vmw_vmci/vmci_host.c   |   2 +-
>  drivers/misc/vmw_vmci/vmci_queue_pair.c |   6 +-
>  drivers/platform/goldfish/goldfish_pipe.c   |   3 +-
>  drivers/rapidio/devices/rio_mport_cdev.c|   4 +-
>  drivers/sbus/char/oradax.c  |   2 +-
>  drivers/scsi/st.c   |   3 +-
>  drivers/staging/gasket/gasket_page_table.c  |   4 +-
>  drivers/tee/tee_shm.c   |   2 +-
>  drivers/vfio/vfio_iommu_spapr_tce.c |   3 +-
>  drivers/vfio/vfio_iommu_type1.c |   3 +-
>  drivers/vhost/vhost.c   |   2 +-
>  drivers/video/fbdev/pvr2fb.c|   2 +-
>  drivers/virt/fsl_hypervisor.c   |   2 +-
>  drivers/xen/gntdev.c|   2 +-
>  fs/orangefs/orangefs-bufmap.c   |   2 +-
>  include/linux/mm.h  |  17 +-
>  kernel/futex.c  |   2 +-
>  lib/iov_iter.c  |   7 +-
>  mm/gup.c| 220 
>  mm/gup_benchmark.c  |   5 +-
>  mm/util.c   |   8 +-
>  net/ceph/pagevec.c  |   2 +-
>  net/rds/info.c  |   2 +-
>  net/rds/rdma.c  |   3 +-
>  44 files changed, 232 insertions(+), 180 deletions(-)
> 
> -- 
> 2.20.1
> 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [RESEND PATCH 6/7] IB/qib: Use the new FOLL_LONGTERM flag to get_user_pages_fast()

2019-02-19 Thread ira . weiny
From: Ira Weiny 

Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against
FS DAX pages being mapped.

Signed-off-by: Ira Weiny 
---
 drivers/infiniband/hw/qib/qib_user_sdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c 
b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 31c523b2a9f5..b53cc0240e02 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -673,7 +673,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata 
*dd,
else
j = npages;
 
-   ret = get_user_pages_fast(addr, j, 0, pages);
+   ret = get_user_pages_fast(addr, j, FOLL_LONGTERM, pages);
if (ret != j) {
i = 0;
j = ret;
-- 
2.20.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [RESEND PATCH 7/7] IB/mthca: Use the new FOLL_LONGTERM flag to get_user_pages_fast()

2019-02-19 Thread ira . weiny
From: Ira Weiny 

Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against
FS DAX pages being mapped.

Signed-off-by: Ira Weiny 
---
 drivers/infiniband/hw/mthca/mthca_memfree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c 
b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 112d2f38e0de..8ff0e90d7564 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -472,7 +472,8 @@ int mthca_map_user_db(struct mthca_dev *dev, struct 
mthca_uar *uar,
goto out;
}
 
-   ret = get_user_pages_fast(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages);
+   ret = get_user_pages_fast(uaddr & PAGE_MASK, 1,
+ FOLL_WRITE | FOLL_LONGTERM, pages);
if (ret < 0)
goto out;
 
-- 
2.20.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [RESEND PATCH 5/7] IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast()

2019-02-19 Thread ira . weiny
From: Ira Weiny 

Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against
FS DAX pages being mapped.

Signed-off-by: Ira Weiny 
---
 drivers/infiniband/hw/hfi1/user_pages.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/user_pages.c 
b/drivers/infiniband/hw/hfi1/user_pages.c
index 78ccacaf97d0..6a7f9cd5a94e 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -104,9 +104,11 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned 
long vaddr, size_t np
bool writable, struct page **pages)
 {
int ret;
+   unsigned int gup_flags = writable ? FOLL_WRITE : 0;
 
-   ret = get_user_pages_fast(vaddr, npages, writable ? FOLL_WRITE : 0,
- pages);
+   gup_flags |= FOLL_LONGTERM;
+
+   ret = get_user_pages_fast(vaddr, npages, gup_flags, pages);
if (ret < 0)
return ret;
 
-- 
2.20.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [RESEND PATCH 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM

2019-02-19 Thread ira . weiny
From: Ira Weiny 

Rather than have a separate get_user_pages_longterm() call,
introduce FOLL_LONGTERM and change the longterm callers to use
it.

This patch does not change any functionality.

FOLL_LONGTERM can only be supported with get_user_pages() as it
requires vmas to determine if DAX is in use.

Signed-off-by: Ira Weiny 
---
 drivers/infiniband/core/umem.c |   5 +-
 drivers/infiniband/hw/qib/qib_user_pages.c |   8 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c   |   9 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c  |   6 +-
 drivers/vfio/vfio_iommu_type1.c|   3 +-
 include/linux/mm.h |  13 +-
 mm/gup.c   | 138 -
 mm/gup_benchmark.c |   5 +-
 8 files changed, 101 insertions(+), 86 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index b69d3efa8712..120a40df91b4 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -185,10 +185,11 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, 
unsigned long addr,
 
while (npages) {
down_read(&mm->mmap_sem);
-   ret = get_user_pages_longterm(cur_base,
+   ret = get_user_pages(cur_base,
 min_t(unsigned long, npages,
   PAGE_SIZE / sizeof (struct page *)),
-gup_flags, page_list, vma_list);
+gup_flags | FOLL_LONGTERM,
+page_list, vma_list);
if (ret < 0) {
up_read(&mm->mmap_sem);
goto umem_release;
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c 
b/drivers/infiniband/hw/qib/qib_user_pages.c
index ef8bcf366ddc..1b9368261035 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -114,10 +114,10 @@ int qib_get_user_pages(unsigned long start_page, size_t 
num_pages,
 
down_read(¤t->mm->mmap_sem);
for (got = 0; got < num_pages; got += ret) {
-   ret = get_user_pages_longterm(start_page + got * PAGE_SIZE,
- num_pages - got,
- FOLL_WRITE | FOLL_FORCE,
- p + got, NULL);
+   ret = get_user_pages(start_page + got * PAGE_SIZE,
+num_pages - got,
+FOLL_LONGTERM | FOLL_WRITE | FOLL_FORCE,
+p + got, NULL);
if (ret < 0) {
up_read(¤t->mm->mmap_sem);
goto bail_release;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c 
b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 06862a6af185..1d9a182ac163 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -143,10 +143,11 @@ static int usnic_uiom_get_pages(unsigned long addr, 
size_t size, int writable,
ret = 0;
 
while (npages) {
-   ret = get_user_pages_longterm(cur_base,
-   min_t(unsigned long, npages,
-   PAGE_SIZE / sizeof(struct page *)),
-   gup_flags, page_list, NULL);
+   ret = get_user_pages(cur_base,
+min_t(unsigned long, npages,
+PAGE_SIZE / sizeof(struct page *)),
+gup_flags | FOLL_LONGTERM,
+page_list, NULL);
 
if (ret < 0)
goto out;
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c 
b/drivers/media/v4l2-core/videobuf-dma-sg.c
index 08929c087e27..870a2a526e0b 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -186,12 +186,12 @@ static int videobuf_dma_init_user_locked(struct 
videobuf_dmabuf *dma,
dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n",
data, size, dma->nr_pages);
 
-   err = get_user_pages_longterm(data & PAGE_MASK, dma->nr_pages,
-flags, dma->pages, NULL);
+   err = get_user_pages(data & PAGE_MASK, dma->nr_pages,
+flags | FOLL_LONGTERM, dma->pages, NULL);
 
if (err != dma->nr_pages) {
dma->nr_pages = (err >= 0) ? err : 0;
-   dprintk(1, "get_user_pages_longterm: err=%d [%d]\n", err,
+   dprintk(1, "get_user_pages: err=%d [%d]\n", err,
dma->nr_pages);
return err < 0 ? err : -EINVAL;
}
diff --git a/drivers/vfio/vf

[Xen-devel] [RESEND PATCH 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast

2019-02-19 Thread ira . weiny
From: Ira Weiny 

DAX pages were previously unprotected from longterm pins when users
called get_user_pages_fast().

Use the new FOLL_LONGTERM flag to check for DEVMAP pages and fall
back to regular GUP processing if a DEVMAP page is encountered.

Signed-off-by: Ira Weiny 
---
 mm/gup.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 6f32d36b3c5b..f7e759c523bb 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1439,6 +1439,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
unsigned long end,
goto pte_unmap;
 
if (pte_devmap(pte)) {
+   if (unlikely(flags & FOLL_LONGTERM))
+   goto pte_unmap;
+
pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
if (unlikely(!pgmap)) {
undo_dev_pagemap(nr, nr_start, pages);
@@ -1578,8 +1581,11 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, 
unsigned long addr,
if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
return 0;
 
-   if (pmd_devmap(orig))
+   if (pmd_devmap(orig)) {
+   if (unlikely(flags & FOLL_LONGTERM))
+   return 0;
return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr);
+   }
 
refs = 0;
page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
@@ -1904,8 +1910,20 @@ int get_user_pages_fast(unsigned long start, int 
nr_pages,
start += nr << PAGE_SHIFT;
pages += nr;
 
-   ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
- gup_flags);
+   if (gup_flags & FOLL_LONGTERM) {
+   down_read(¤t->mm->mmap_sem);
+   ret = __gup_longterm_locked(current, current->mm,
+   start, nr_pages - nr,
+   pages, NULL, gup_flags);
+   up_read(¤t->mm->mmap_sem);
+   } else {
+   /*
+* retain FAULT_FOLL_ALLOW_RETRY optimization if
+* possible
+*/
+   ret = get_user_pages_unlocked(start, nr_pages - nr,
+ pages, gup_flags);
+   }
 
/* Have to be a bit careful with return values */
if (nr > 0) {
-- 
2.20.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [RESEND PATCH 0/7] Add FOLL_LONGTERM to GUP fast and use it

2019-02-19 Thread ira . weiny
From: Ira Weiny 

Resending these as I had only 1 minor comment which I believe we have covered
in this series.  I was anticipating these going through the mm tree as they
depend on a cleanup patch there and the IB changes are very minor.  But they
could just as well go through the IB tree.

NOTE: This series depends on my clean up patch to remove the write parameter
from gup_fast_permitted()[1]

HFI1, qib, and mthca, use get_user_pages_fast() due to it performance
advantages.  These pages can be held for a significant time.  But
get_user_pages_fast() does not protect against mapping of FS DAX pages.

Introduce FOLL_LONGTERM and use this flag in get_user_pages_fast() which
retains the performance while also adding the FS DAX checks.  XDP has also
shown interest in using this functionality.[2]

In addition we change get_user_pages() to use the new FOLL_LONGTERM flag and
remove the specialized get_user_pages_longterm call.

[1] https://lkml.org/lkml/2019/2/11/237
[2] https://lkml.org/lkml/2019/2/11/1789

Ira Weiny (7):
  mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM
  mm/gup: Change write parameter to flags in fast walk
  mm/gup: Change GUP fast to use flags rather than a write 'bool'
  mm/gup: Add FOLL_LONGTERM capability to GUP fast
  IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
  IB/qib: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
  IB/mthca: Use the new FOLL_LONGTERM flag to get_user_pages_fast()

 arch/mips/mm/gup.c  |  11 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c |   4 +-
 arch/powerpc/kvm/e500_mmu.c |   2 +-
 arch/powerpc/mm/mmu_context_iommu.c |   4 +-
 arch/s390/kvm/interrupt.c   |   2 +-
 arch/s390/mm/gup.c  |  12 +-
 arch/sh/mm/gup.c|  11 +-
 arch/sparc/mm/gup.c |   9 +-
 arch/x86/kvm/paging_tmpl.h  |   2 +-
 arch/x86/kvm/svm.c  |   2 +-
 drivers/fpga/dfl-afu-dma-region.c   |   2 +-
 drivers/gpu/drm/via/via_dmablit.c   |   3 +-
 drivers/infiniband/core/umem.c  |   5 +-
 drivers/infiniband/hw/hfi1/user_pages.c |   5 +-
 drivers/infiniband/hw/mthca/mthca_memfree.c |   3 +-
 drivers/infiniband/hw/qib/qib_user_pages.c  |   8 +-
 drivers/infiniband/hw/qib/qib_user_sdma.c   |   2 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c|   9 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c   |   6 +-
 drivers/misc/genwqe/card_utils.c|   2 +-
 drivers/misc/vmw_vmci/vmci_host.c   |   2 +-
 drivers/misc/vmw_vmci/vmci_queue_pair.c |   6 +-
 drivers/platform/goldfish/goldfish_pipe.c   |   3 +-
 drivers/rapidio/devices/rio_mport_cdev.c|   4 +-
 drivers/sbus/char/oradax.c  |   2 +-
 drivers/scsi/st.c   |   3 +-
 drivers/staging/gasket/gasket_page_table.c  |   4 +-
 drivers/tee/tee_shm.c   |   2 +-
 drivers/vfio/vfio_iommu_spapr_tce.c |   3 +-
 drivers/vfio/vfio_iommu_type1.c |   3 +-
 drivers/vhost/vhost.c   |   2 +-
 drivers/video/fbdev/pvr2fb.c|   2 +-
 drivers/virt/fsl_hypervisor.c   |   2 +-
 drivers/xen/gntdev.c|   2 +-
 fs/orangefs/orangefs-bufmap.c   |   2 +-
 include/linux/mm.h  |  17 +-
 kernel/futex.c  |   2 +-
 lib/iov_iter.c  |   7 +-
 mm/gup.c| 220 
 mm/gup_benchmark.c  |   5 +-
 mm/util.c   |   8 +-
 net/ceph/pagevec.c  |   2 +-
 net/rds/info.c  |   2 +-
 net/rds/rdma.c  |   3 +-
 44 files changed, 232 insertions(+), 180 deletions(-)

-- 
2.20.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [RESEND PATCH 2/7] mm/gup: Change write parameter to flags in fast walk

2019-02-19 Thread ira . weiny
From: Ira Weiny 

In order to support more options in the GUP fast walk, change
the write parameter to flags throughout the call stack.

This patch does not change functionality and passes FOLL_WRITE
where write was previously used.

Signed-off-by: Ira Weiny 
---
 mm/gup.c | 52 ++--
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index ee96eaff118c..681388236106 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1417,7 +1417,7 @@ static void undo_dev_pagemap(int *nr, int nr_start, 
struct page **pages)
 
 #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
-int write, struct page **pages, int *nr)
+unsigned int flags, struct page **pages, int *nr)
 {
struct dev_pagemap *pgmap = NULL;
int nr_start = *nr, ret = 0;
@@ -1435,7 +1435,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
unsigned long end,
if (pte_protnone(pte))
goto pte_unmap;
 
-   if (!pte_access_permitted(pte, write))
+   if (!pte_access_permitted(pte, flags & FOLL_WRITE))
goto pte_unmap;
 
if (pte_devmap(pte)) {
@@ -1487,7 +1487,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
unsigned long end,
  * useful to have gup_huge_pmd even if we can't operate on ptes.
  */
 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
-int write, struct page **pages, int *nr)
+unsigned int flags, struct page **pages, int *nr)
 {
return 0;
 }
@@ -1570,12 +1570,12 @@ static int __gup_device_huge_pud(pud_t pud, pud_t 
*pudp, unsigned long addr,
 #endif
 
 static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
-   unsigned long end, int write, struct page **pages, int *nr)
+   unsigned long end, unsigned int flags, struct page **pages, int 
*nr)
 {
struct page *head, *page;
int refs;
 
-   if (!pmd_access_permitted(orig, write))
+   if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
return 0;
 
if (pmd_devmap(orig))
@@ -1608,12 +1608,12 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, 
unsigned long addr,
 }
 
 static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
-   unsigned long end, int write, struct page **pages, int *nr)
+   unsigned long end, unsigned int flags, struct page **pages, int 
*nr)
 {
struct page *head, *page;
int refs;
 
-   if (!pud_access_permitted(orig, write))
+   if (!pud_access_permitted(orig, flags & FOLL_WRITE))
return 0;
 
if (pud_devmap(orig))
@@ -1646,13 +1646,13 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, 
unsigned long addr,
 }
 
 static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
-   unsigned long end, int write,
+   unsigned long end, unsigned int flags,
struct page **pages, int *nr)
 {
int refs;
struct page *head, *page;
 
-   if (!pgd_access_permitted(orig, write))
+   if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
return 0;
 
BUILD_BUG_ON(pgd_devmap(orig));
@@ -1683,7 +1683,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned 
long addr,
 }
 
 static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
-   int write, struct page **pages, int *nr)
+   unsigned int flags, struct page **pages, int *nr)
 {
unsigned long next;
pmd_t *pmdp;
@@ -1705,7 +1705,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
if (pmd_protnone(pmd))
return 0;
 
-   if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
+   if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
pages, nr))
return 0;
 
@@ -1715,9 +1715,9 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
 * pmd format and THP pmd format
 */
if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
-PMD_SHIFT, next, write, pages, nr))
+PMD_SHIFT, next, flags, pages, nr))
return 0;
-   } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+   } else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
return 0;
} while (pmdp++, addr = next, addr != end);
 
@@ -1725,7 +1725,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long 

[Xen-devel] [RESEND PATCH 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool'

2019-02-19 Thread ira . weiny
From: Ira Weiny 

To facilitate additional options to get_user_pages_fast() change the
singular write parameter to be gup_flags.

This patch does not change any functionality.  New functionality will
follow in subsequent patches.

Some of the get_user_pages_fast() call sites were unchanged because they
already passed FOLL_WRITE or 0 for the write parameter.

Signed-off-by: Ira Weiny 
---
 arch/mips/mm/gup.c | 11 ++-
 arch/powerpc/kvm/book3s_64_mmu_hv.c|  4 ++--
 arch/powerpc/kvm/e500_mmu.c|  2 +-
 arch/powerpc/mm/mmu_context_iommu.c|  4 ++--
 arch/s390/kvm/interrupt.c  |  2 +-
 arch/s390/mm/gup.c | 12 ++--
 arch/sh/mm/gup.c   | 11 ++-
 arch/sparc/mm/gup.c|  9 +
 arch/x86/kvm/paging_tmpl.h |  2 +-
 arch/x86/kvm/svm.c |  2 +-
 drivers/fpga/dfl-afu-dma-region.c  |  2 +-
 drivers/gpu/drm/via/via_dmablit.c  |  3 ++-
 drivers/infiniband/hw/hfi1/user_pages.c|  3 ++-
 drivers/misc/genwqe/card_utils.c   |  2 +-
 drivers/misc/vmw_vmci/vmci_host.c  |  2 +-
 drivers/misc/vmw_vmci/vmci_queue_pair.c|  6 --
 drivers/platform/goldfish/goldfish_pipe.c  |  3 ++-
 drivers/rapidio/devices/rio_mport_cdev.c   |  4 +++-
 drivers/sbus/char/oradax.c |  2 +-
 drivers/scsi/st.c  |  3 ++-
 drivers/staging/gasket/gasket_page_table.c |  4 ++--
 drivers/tee/tee_shm.c  |  2 +-
 drivers/vfio/vfio_iommu_spapr_tce.c|  3 ++-
 drivers/vhost/vhost.c  |  2 +-
 drivers/video/fbdev/pvr2fb.c   |  2 +-
 drivers/virt/fsl_hypervisor.c  |  2 +-
 drivers/xen/gntdev.c   |  2 +-
 fs/orangefs/orangefs-bufmap.c  |  2 +-
 include/linux/mm.h |  4 ++--
 kernel/futex.c |  2 +-
 lib/iov_iter.c |  7 +--
 mm/gup.c   | 10 +-
 mm/util.c  |  8 
 net/ceph/pagevec.c |  2 +-
 net/rds/info.c |  2 +-
 net/rds/rdma.c |  3 ++-
 36 files changed, 81 insertions(+), 65 deletions(-)

diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 0d14e0d8eacf..4c2b4483683c 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -235,7 +235,7 @@ int __get_user_pages_fast(unsigned long start, int 
nr_pages, int write,
  * get_user_pages_fast() - pin user pages in memory
  * @start: starting user address
  * @nr_pages:  number of pages from start to pin
- * @write: whether pages will be written to
+ * @gup_flags: flags modifying pin behaviour
  * @pages: array that receives pointers to the pages pinned.
  * Should be at least nr_pages long.
  *
@@ -247,8 +247,8 @@ int __get_user_pages_fast(unsigned long start, int 
nr_pages, int write,
  * requested. If nr_pages is 0 or negative, returns 0. If no pages
  * were pinned, returns -errno.
  */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-   struct page **pages)
+int get_user_pages_fast(unsigned long start, int nr_pages,
+   unsigned int gup_flags, struct page **pages)
 {
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
@@ -273,7 +273,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, 
int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
goto slow;
-   if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+   if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
+  pages, &nr))
goto slow;
} while (pgdp++, addr = next, addr != end);
local_irq_enable();
@@ -289,7 +290,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, 
int write,
pages += nr;
 
ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT,
- pages, write ? FOLL_WRITE : 0);
+ pages, gup_flags);
 
/* Have to be a bit careful with return values */
if (nr > 0) {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index bd2dcfbf00cd..8fcb0a921e46 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -582,7 +582,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
/* If writing != 0, then the HPTE must allow writing, if we get here */
write_ok = writing;
hva = gfn_to_hva_memslot(memslot, gfn);
-   npages = get_user_pages_fast(hva, 1, writing, pages);
+   npages =

Re: [Xen-devel] [RESEND PATCH 0/7] Add FOLL_LONGTERM to GUP fast and use it

2019-02-20 Thread Ira Weiny
On Wed, Feb 20, 2019 at 07:19:30AM -0800, Christoph Hellwig wrote:
> On Tue, Feb 19, 2019 at 09:30:33PM -0800, ira.we...@intel.com wrote:
> > From: Ira Weiny 
> > 
> > Resending these as I had only 1 minor comment which I believe we have 
> > covered
> > in this series.  I was anticipating these going through the mm tree as they
> > depend on a cleanup patch there and the IB changes are very minor.  But they
> > could just as well go through the IB tree.
> > 
> > NOTE: This series depends on my clean up patch to remove the write parameter
> > from gup_fast_permitted()[1]
> > 
> > HFI1, qib, and mthca, use get_user_pages_fast() due to it performance
> > advantages.  These pages can be held for a significant time.  But
> > get_user_pages_fast() does not protect against mapping of FS DAX pages.
> 
> This I don't get - if you do lock down long term mappings performance
> of the actual get_user_pages call shouldn't matter to start with.
> 
> What do I miss?

A couple of points.

First "longterm" is a relative thing and at this point is probably a misnomer.
This is really flagging a pin which is going to be given to hardware and can't
move.  I've thought of a couple of alternative names but I think we have to
settle on if we are going to use FL_LAYOUT or something else to solve the
"longterm" problem.  Then I think we can change the flag to a better name.

Second, It depends on how often you are registering memory.  I have spoken with
some RDMA users who consider MR in the performance path...  For the overall
application performance.  I don't have the numbers as the tests for HFI1 were
done a long time ago.  But there was a significant advantage.  Some of which is
probably due to the fact that you don't have to hold mmap_sem.

Finally, architecturally I think it would be good for everyone to use *_fast.
There are patches submitted to the RDMA list which would allow the use of
*_fast (they reworking the use of mmap_sem) and as soon as they are accepted
I'll submit a patch to convert the RDMA core as well.  Also to this point
others are looking to use *_fast.[2]

As an asside, Jasons pointed out in my previous submission that *_fast and
*_unlocked look very much the same.  I agree and I think further cleanup will
be coming.  But I'm focused on getting the final solution for DAX at the
moment.

Ira


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [RESEND PATCH 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool'

2019-02-21 Thread Ira Weiny
On Thu, Feb 21, 2019 at 08:48:41AM +0530, Souptick Joarder wrote:
> Hi Ira,
> 
> On Wed, Feb 20, 2019 at 11:01 AM  wrote:
> >
> > From: Ira Weiny 
> >
> > To facilitate additional options to get_user_pages_fast() change the
> > singular write parameter to be gup_flags.
> >
> > This patch does not change any functionality.  New functionality will
> > follow in subsequent patches.
> >
> > Some of the get_user_pages_fast() call sites were unchanged because they
> > already passed FOLL_WRITE or 0 for the write parameter.
> >
> > Signed-off-by: Ira Weiny 
> > ---

[snip]

> > diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
> > b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > index bd2dcfbf00cd..8fcb0a921e46 100644
> > --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > @@ -582,7 +582,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
> > struct kvm_vcpu *vcpu,
> > /* If writing != 0, then the HPTE must allow writing, if we get 
> > here */
> > write_ok = writing;
> > hva = gfn_to_hva_memslot(memslot, gfn);
> > -   npages = get_user_pages_fast(hva, 1, writing, pages);
> > +   npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, 
> > pages);
> 
> Just requesting for opinion,
> * writing ? FOLL_WRITE : 0 * is used in many places. How about placing it in a
> macro/ inline ?

I don't really think this would gain much.  And I don't think it would be more
clear.  In fact I can't even think of a macro name which would make sense.  I'm
inclined to leave this as written.

Ira

> 
> > if (npages < 1) {
> > /* Check if it's an I/O mapping */
> > down_read(¤t->mm->mmap_sem);
> > @@ -1175,7 +1175,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned 
> > long gpa,
> > if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
> > goto err;
> > hva = gfn_to_hva_memslot(memslot, gfn);
> > -   npages = get_user_pages_fast(hva, 1, 1, pages);
> > +   npages = get_user_pages_fast(hva, 1, FOLL_WRITE, pages);
> > if (npages < 1)
> > goto err;
> > page = pages[0];
> > diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
> > index 24296f4cadc6..e0af53fd78c5 100644
> > --- a/arch/powerpc/kvm/e500_mmu.c
> > +++ b/arch/powerpc/kvm/e500_mmu.c
> > @@ -783,7 +783,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
> > if (!pages)
> > return -ENOMEM;
> >
> > -   ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
> > +   ret = get_user_pages_fast(cfg->array, num_pages, FOLL_WRITE, pages);
> > if (ret < 0)
> > goto free_pages;
> >
> > diff --git a/arch/powerpc/mm/mmu_context_iommu.c 
> > b/arch/powerpc/mm/mmu_context_iommu.c
> > index a712a650a8b6..acb0990c8364 100644
> > --- a/arch/powerpc/mm/mmu_context_iommu.c
> > +++ b/arch/powerpc/mm/mmu_context_iommu.c
> > @@ -190,7 +190,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, 
> > unsigned long ua,
> > for (i = 0; i < entries; ++i) {
> > cur_ua = ua + (i << PAGE_SHIFT);
> > if (1 != get_user_pages_fast(cur_ua,
> > -   1/* pages */, 1/* iswrite */, 
> > &page)) {
> > +   1/* pages */, FOLL_WRITE, &page)) {
> > ret = -EFAULT;
> > for (j = 0; j < i; ++j)
> > put_page(pfn_to_page(mem->hpas[j] >>
> > @@ -209,7 +209,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, 
> > unsigned long ua,
> > if (mm_iommu_move_page_from_cma(page))
> > goto populate;
> > if (1 != get_user_pages_fast(cur_ua,
> > -   1/* pages */, 1/* iswrite 
> > */,
> > +   1/* pages */, FOLL_WRITE,
> > &page)) {
> > ret = -EFAULT;
> > for (j = 0; j < i; ++j)
> > diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> > index fcb55b02990e..69d9366b966c 100644
> > --- a/arch/s390/kvm/interrupt.c
> > +++ b/arch/s390/kvm/interrupt.c
> > @@ -227

Re: [Xen-devel] [RESEND PATCH 0/7] Add FOLL_LONGTERM to GUP fast and use it

2019-02-27 Thread Ira Weiny
On Tue, Feb 19, 2019 at 09:30:33PM -0800, 'Ira Weiny' wrote:
> From: Ira Weiny 
> 
> Resending these as I had only 1 minor comment which I believe we have covered
> in this series.  I was anticipating these going through the mm tree as they
> depend on a cleanup patch there and the IB changes are very minor.  But they
> could just as well go through the IB tree.
> 
> NOTE: This series depends on my clean up patch to remove the write parameter
> from gup_fast_permitted()[1]
> 
> HFI1, qib, and mthca, use get_user_pages_fast() due to it performance
> advantages.  These pages can be held for a significant time.  But
> get_user_pages_fast() does not protect against mapping of FS DAX pages.
> 
> Introduce FOLL_LONGTERM and use this flag in get_user_pages_fast() which
> retains the performance while also adding the FS DAX checks.  XDP has also
> shown interest in using this functionality.[2]
> 
> In addition we change get_user_pages() to use the new FOLL_LONGTERM flag and
> remove the specialized get_user_pages_longterm call.
> 
> [1] https://lkml.org/lkml/2019/2/11/237
> [2] https://lkml.org/lkml/2019/2/11/1789

Is there anything I need to do on this series or does anyone have any
objections to it going into 5.1?  And if so who's tree is it going to go
through?

Thanks,
Ira

> 
> Ira Weiny (7):
>   mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM
>   mm/gup: Change write parameter to flags in fast walk
>   mm/gup: Change GUP fast to use flags rather than a write 'bool'
>   mm/gup: Add FOLL_LONGTERM capability to GUP fast
>   IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
>   IB/qib: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
>   IB/mthca: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
> 
>  arch/mips/mm/gup.c  |  11 +-
>  arch/powerpc/kvm/book3s_64_mmu_hv.c |   4 +-
>  arch/powerpc/kvm/e500_mmu.c |   2 +-
>  arch/powerpc/mm/mmu_context_iommu.c |   4 +-
>  arch/s390/kvm/interrupt.c   |   2 +-
>  arch/s390/mm/gup.c  |  12 +-
>  arch/sh/mm/gup.c|  11 +-
>  arch/sparc/mm/gup.c |   9 +-
>  arch/x86/kvm/paging_tmpl.h  |   2 +-
>  arch/x86/kvm/svm.c  |   2 +-
>  drivers/fpga/dfl-afu-dma-region.c   |   2 +-
>  drivers/gpu/drm/via/via_dmablit.c   |   3 +-
>  drivers/infiniband/core/umem.c  |   5 +-
>  drivers/infiniband/hw/hfi1/user_pages.c |   5 +-
>  drivers/infiniband/hw/mthca/mthca_memfree.c |   3 +-
>  drivers/infiniband/hw/qib/qib_user_pages.c  |   8 +-
>  drivers/infiniband/hw/qib/qib_user_sdma.c   |   2 +-
>  drivers/infiniband/hw/usnic/usnic_uiom.c|   9 +-
>  drivers/media/v4l2-core/videobuf-dma-sg.c   |   6 +-
>  drivers/misc/genwqe/card_utils.c|   2 +-
>  drivers/misc/vmw_vmci/vmci_host.c   |   2 +-
>  drivers/misc/vmw_vmci/vmci_queue_pair.c |   6 +-
>  drivers/platform/goldfish/goldfish_pipe.c   |   3 +-
>  drivers/rapidio/devices/rio_mport_cdev.c|   4 +-
>  drivers/sbus/char/oradax.c  |   2 +-
>  drivers/scsi/st.c   |   3 +-
>  drivers/staging/gasket/gasket_page_table.c  |   4 +-
>  drivers/tee/tee_shm.c   |   2 +-
>  drivers/vfio/vfio_iommu_spapr_tce.c |   3 +-
>  drivers/vfio/vfio_iommu_type1.c |   3 +-
>  drivers/vhost/vhost.c   |   2 +-
>  drivers/video/fbdev/pvr2fb.c|   2 +-
>  drivers/virt/fsl_hypervisor.c   |   2 +-
>  drivers/xen/gntdev.c|   2 +-
>  fs/orangefs/orangefs-bufmap.c   |   2 +-
>  include/linux/mm.h  |  17 +-
>  kernel/futex.c  |   2 +-
>  lib/iov_iter.c  |   7 +-
>  mm/gup.c| 220 
>  mm/gup_benchmark.c  |   5 +-
>  mm/util.c   |   8 +-
>  net/ceph/pagevec.c  |   2 +-
>  net/rds/info.c  |   2 +-
>  net/rds/rdma.c  |   3 +-
>  44 files changed, 232 insertions(+), 180 deletions(-)
> 
> -- 
> 2.20.1
> 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[PATCH RFC PKS/PMEM 09/58] drivers/gpu: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls in the gpu stack are localized to a single thread.
To avoid the over head of global PKRS updates use the new kmap_thread()
call.

Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Patrik Jakobsson 
Signed-off-by: Ira Weiny 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 12 ++--
 drivers/gpu/drm/gma500/gma_display.c |  4 ++--
 drivers/gpu/drm/gma500/mmu.c | 10 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  4 ++--
 .../gpu/drm/i915/gem/selftests/i915_gem_context.c|  4 ++--
 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c   |  8 
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c |  4 ++--
 drivers/gpu/drm/i915/gt/intel_gtt.c  |  4 ++--
 drivers/gpu/drm/i915/gt/shmem_utils.c|  4 ++--
 drivers/gpu/drm/i915/i915_gem.c  |  8 
 drivers/gpu/drm/i915/i915_gpu_error.c|  4 ++--
 drivers/gpu/drm/i915/selftests/i915_perf.c   |  4 ++--
 drivers/gpu/drm/radeon/radeon_ttm.c  |  4 ++--
 13 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 978bae731398..bd564bccb7a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2437,11 +2437,11 @@ static ssize_t amdgpu_ttm_gtt_read(struct file *f, char 
__user *buf,
 
page = adev->gart.pages[p];
if (page) {
-   ptr = kmap(page);
+   ptr = kmap_thread(page);
ptr += off;
 
r = copy_to_user(buf, ptr, cur_size);
-   kunmap(adev->gart.pages[p]);
+   kunmap_thread(adev->gart.pages[p]);
} else
r = clear_user(buf, cur_size);
 
@@ -2507,9 +2507,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char 
__user *buf,
if (p->mapping != adev->mman.bdev.dev_mapping)
return -EPERM;
 
-   ptr = kmap(p);
+   ptr = kmap_thread(p);
r = copy_to_user(buf, ptr + off, bytes);
-   kunmap(p);
+   kunmap_thread(p);
if (r)
return -EFAULT;
 
@@ -2558,9 +2558,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const 
char __user *buf,
if (p->mapping != adev->mman.bdev.dev_mapping)
return -EPERM;
 
-   ptr = kmap(p);
+   ptr = kmap_thread(p);
r = copy_from_user(ptr + off, buf, bytes);
-   kunmap(p);
+   kunmap_thread(p);
if (r)
return -EFAULT;
 
diff --git a/drivers/gpu/drm/gma500/gma_display.c 
b/drivers/gpu/drm/gma500/gma_display.c
index 3df6d6e850f5..35f4e55c941f 100644
--- a/drivers/gpu/drm/gma500/gma_display.c
+++ b/drivers/gpu/drm/gma500/gma_display.c
@@ -400,9 +400,9 @@ int gma_crtc_cursor_set(struct drm_crtc *crtc,
/* Copy the cursor to cursor mem */
tmp_dst = dev_priv->vram_addr + cursor_gt->offset;
for (i = 0; i < cursor_pages; i++) {
-   tmp_src = kmap(gt->pages[i]);
+   tmp_src = kmap_thread(gt->pages[i]);
memcpy(tmp_dst, tmp_src, PAGE_SIZE);
-   kunmap(gt->pages[i]);
+   kunmap_thread(gt->pages[i]);
tmp_dst += PAGE_SIZE;
}
 
diff --git a/drivers/gpu/drm/gma500/mmu.c b/drivers/gpu/drm/gma500/mmu.c
index 505044c9a673..fba7a3a461fd 100644
--- a/drivers/gpu/drm/gma500/mmu.c
+++ b/drivers/gpu/drm/gma500/mmu.c
@@ -192,20 +192,20 @@ struct psb_mmu_pd *psb_mmu_alloc_pd(struct psb_mmu_driver 
*driver,
pd->invalid_pte = 0;
}
 
-   v = kmap(pd->dummy_pt);
+   v = kmap_thread(pd->dummy_pt);
for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
v[i] = pd->invalid_pte;
 
-   kunmap(pd->dummy_pt);
+   kunmap_thread(pd->dummy_pt);
 
-   v = kmap(pd->p);
+   v = kmap_thread(pd->p);
for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
v[i] = pd->invalid_pde;
 
-   kunmap(pd->p);
+   kunmap_thread(pd->p);
 
clear_page(kmap(pd->dummy_page));
-   kunmap(pd->dummy_page);
+   kunmap_thread(pd->dummy_page);
 
pd->tables = vmalloc_user(sizeof(struct psb_mmu_pt *) * 1024);
if (!pd->tables)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 38113d3c0138..274424795fb7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -566,9 +566,9 @@ i915_g

[PATCH RFC PKS/PMEM 03/58] memremap: Add zone device access protection

2020-10-09 Thread ira . weiny
From: Ira Weiny 

Device managed memory exposes itself to the kernel direct map which
allows stray pointers to access these device memories.

Stray pointers to normal memory may result in a crash or other
undesirable behavior which, while unfortunate, are usually recoverable
with a reboot.  Stray access, specifically stray writes, to areas such
as non-volatile memory are permanent in nature and thus are more likely
to result in permanent user data loss vs stray access to other memory
areas.

Furthermore, we protect against reads which can help with speculative
reads to poison areas as well.  But this is a secondary reason.

Set up an infrastructure for extra device access protection. Then
implement the new protection using the new Protection Keys Supervisor
(PKS) on architectures which support it.

To enable this extra protection devices specify a flag in the pgmap to
indicate that these areas wish to use additional protection.

Kernel code which intends to access this memory can do so automatically
through the use of the kmap infrastructure calling into
dev_access_[enable|disable]() described here.  The kmap infrastructure
is implemented in a follow on patch.

In addition, users can directly enable/disable the access through
dev_access_[enable|disable]() if they have a priori knowledge of the
type of pages they are accessing.

All calls to enable/disable protection flow through
dev_access_[enable|disable]() and are nestable by the use of a per task
reference count.  This reference count does 2 things.

1) Allows a thread to nest calls to disable protection such that the
   first call to re-enable protection does not 'break' the last access of
   the pmem device memory.

2) Provides faster performance by avoiding lots of MSR writes.  For
   example, looping over a sequence of pmem pages.

In addition, we must ensure the reference count is preserved through an
exception so we add the count to irqentry_state_t and save/restore the
reference count while giving exceptions their own count should they use
a kmap call.

The following shows how this works through an exception:

...
// ref == 0
dev_access_enable()  // ref += 1 ==> disable protection
irq()
// enable protection
// ref = 0
_handler()
dev_access_enable()   // ref += 1 ==> 
disable protection
dev_access_disable()  // ref -= 1 ==> 
enable protection
// WARN_ON(ref != 0)
// disable protection
do_pmem_thing()  // all good here
dev_access_disable() // ref -= 1 ==> 0 ==> enable protection
...

Nested exceptions operate the same way with each exception storing the
interrupted exception state all the way down.

The pkey value is never free'ed as this optimizes the implementation to
be either on or off using a static branch conditional in the fast paths.

Cc: Juri Lelli 
Cc: Vincent Guittot 
Cc: Dietmar Eggemann 
Cc: Steven Rostedt 
Cc: Ben Segall 
Cc: Mel Gorman 
Signed-off-by: Ira Weiny 
---
 arch/x86/entry/common.c  | 21 +
 include/linux/entry-common.h |  3 ++
 include/linux/memremap.h |  1 +
 include/linux/mm.h   | 43 +
 include/linux/sched.h|  3 ++
 init/init_task.c |  3 ++
 kernel/fork.c|  3 ++
 mm/Kconfig   | 13 ++
 mm/memremap.c| 90 
 9 files changed, 180 insertions(+)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 86ad32e0095e..3680724c1a4d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -264,12 +264,27 @@ noinstr void idtentry_exit_nmi(struct pt_regs *regs, 
irqentry_state_t *irq_state
  *
  * NOTE That the thread saved PKRS must be preserved separately to ensure
  * global overrides do not 'stick' on a thread.
+ *
+ * Furthermore, Zone Device Access Protection maintains access in a re-entrant
+ * manner through a reference count which also needs to be maintained should
+ * exception handlers use those interfaces for memory access.  Here we start
+ * off the exception handler ref count to 0 and ensure it is 0 when the
+ * exception is done.  Then restore it for the interrupted task.
  */
 noinstr void irq_save_pkrs(irqentry_state_t *state)
 {
if (!cpu_feature_enabled(X86_FEATURE_PKS))
return;
 
+#ifdef CONFIG_ZONE_DEVICE_ACCESS_PROTECTION
+   /*
+* Save the ref count of the current running process and set it to 0
+* for any irq users to properly track re-entrance
+*/
+   state->pkrs_ref = current->dev_page_access_ref;
+   current->dev_page_access_ref = 0;
+#endif
+
/*
 * The thread_pkrs must be maintained separately to prevent global
 * overrid

[PATCH RFC PKS/PMEM 00/58] PMEM: Introduce stray write protection for PMEM

2020-10-09 Thread ira . weiny
From: Ira Weiny 

Should a stray write in the kernel occur persistent memory is affected more
than regular memory.  A write to the wrong area of memory could result in
latent data corruption which will will persist after a reboot.  PKS provides a
nice way to restrict access to persistent memory kernel mappings, while
providing fast access when needed.

Since the last RFC[1] this patch set has grown quite a bit.  It now depends on
the core patches submitted separately.


https://lore.kernel.org/lkml/20201009194258.3207172-1-ira.we...@intel.com/

And contained in the git tree here:

https://github.com/weiny2/linux-kernel/tree/pks-rfc-v3

However, functionally there is only 1 major change from the last RFC.
Specifically, kmap() is most often used within a single thread in a 'map/do
something/unmap' pattern.  In fact this is the pattern used in ~90% of the
callers of kmap().  This pattern works very well for the pmem use case and the
testing which was done.  However, there were another ~20-30 kmap users which do
not follow this pattern.  Some of them seem to expect the mapping to be
'global' while others require a detailed audit to be sure.[2][3]

While we don't anticipate global mappings to pmem there is a danger in
changing the semantics of kmap().  Effectively, this would cause an unresolved
page fault with little to no information about why.

There were a number of options considered.

1) Attempt to change all the thread local kmap() calls to kmap_atomic()
2) Introduce a flags parameter to kmap() to indicate if the mapping should be
   global or not
3) Change ~20-30 call sites to 'kmap_global()' to indicate that they require a
   global mapping of the pages
4) Change ~209 call sites to 'kmap_thread()' to indicate that the mapping is to
   be used within that thread of execution only

Option 1 is simply not feasible kmap_atomic() is not the same semantic as
kmap() within a single tread.  Option 2 would require all of the call sites of
kmap() to change.  Option 3 seems like a good minimal change but there is a
danger that new code may miss the semantic change of kmap() and not get the
behavior intended for future users.  Therefore, option #4 was chosen.

To handle the global PKRS state in the most efficient manner possible.  We
lazily override the thread specific PKRS key value only when needed because we
anticipate PKS to not be needed will not be needed most of the time.  And even
when it is used 90% of the time it is a thread local call.


[1] https://lore.kernel.org/lkml/20200717072056.73134-1-ira.we...@intel.com/

[2] The following list of callers continue calling kmap() (utilizing the global
PKRS).  It would be nice if more of them could be converted to kmap_thread()

drivers/firewire/net.c: ptr = 
kmap(dev->broadcast_rcv_buffer.pages[u]);
drivers/gpu/drm/i915/gem/i915_gem_pages.c:  return 
kmap(sg_page(sgt->sgl));
drivers/gpu/drm/ttm/ttm_bo_util.c:  map->virtual = 
kmap(map->page);
drivers/infiniband/hw/qib/qib_user_sdma.c:  mpage = kmap(page);
drivers/misc/vmw_vmci/vmci_host.c:  context->notify = 
kmap(context->notify_page) + (uva & (PAGE_SIZE - 1));
drivers/misc/xilinx_sdfec.c:addr = kmap(pages[i]);
drivers/mmc/host/usdhi6rol0.c:  host->pg.mapped = 
kmap(host->pg.page);
drivers/mmc/host/usdhi6rol0.c:  host->pg.mapped = kmap(host->pg.page);
drivers/mmc/host/usdhi6rol0.c:  host->pg.mapped = kmap(host->pg.page);
drivers/nvme/target/tcp.c:  iov->iov_base = 
kmap(sg_page(sg)) + sg->offset + sg_offset;
drivers/scsi/libiscsi_tcp.c:segment->sg_mapped = 
kmap(sg_page(sg));
drivers/target/iscsi/iscsi_target.c:iov[i].iov_base = 
kmap(sg_page(sg)) + sg->offset + page_off;
drivers/target/target_core_transport.c: return 
kmap(sg_page(sg)) + sg->offset;
fs/btrfs/check-integrity.c: block_ctx->datav[i] = 
kmap(block_ctx->pagev[i]);
fs/ceph/dir.c:  cache_ctl->dentries = kmap(cache_ctl->page);
fs/ceph/inode.c:ctl->dentries = kmap(ctl->page);
fs/erofs/zpvec.h:   kmap_atomic(ctor->curr) : 
kmap(ctor->curr);
lib/scatterlist.c:  miter->addr = kmap(miter->page) + 
miter->__offset;
net/ceph/pagelist.c:pl->mapped_tail = kmap(page);
net/ceph/pagelist.c:pl->mapped_tail = kmap(page);
virt/kvm/kvm_main.c:hva = kmap(page);

[3] The following appear to follow the same pattern as ext2 which was converted
after some code audit.  So I _think_ they too could be converted to
k[un]map_thread().

fs/freevxfs/vxfs_subr.c|75| kmap(pp);
fs/jfs/jfs_metapage.c|102| kmap(page);
fs/jfs/jfs_metapage.c|156| k

[PATCH RFC PKS/PMEM 04/58] kmap: Add stray access protection for device pages

2020-10-09 Thread ira . weiny
From: Ira Weiny 

Device managed pages may have additional protections.  These protections
need to be removed prior to valid use by kernel users.

Check for special treatment of device managed pages in kmap and take
action if needed.  We use kmap as an interface for generic kernel code
because under normal circumstances it would be a bug for general kernel
code to not use kmap prior to accessing kernel memory.  Therefore, this
should allow any valid kernel users to seamlessly use these pages
without issues.

Because of the critical nature of kmap it must be pointed out that the
over head on regular DRAM is carefully implemented to be as fast as
possible.  Furthermore the underlying MSR write required on device pages
when protected is better than a normal MSR write.

Specifically, WRMSR(MSR_IA32_PKRS) is not serializing but still
maintains ordering properties similar to WRPKRU.  The current SDM
section on PKRS needs updating but should be the same as that of WRPKRU.
So to quote from the WRPKRU text:

WRPKRU will never execute speculatively. Memory accesses
affected by PKRU register will not execute (even speculatively)
until all prior executions of WRPKRU have completed execution
and updated the PKRU register.

Still this will make accessing pmem more expensive from the kernel but
the overhead is minimized and many pmem users access this memory through
user page mappings which are not affected at all.

Cc: Randy Dunlap 
Signed-off-by: Ira Weiny 
---
 include/linux/highmem.h | 32 +++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 14e6202ce47f..2a9806e3b8d2 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -31,6 +32,20 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, 
int size)
 
 #include 
 
+static inline void dev_page_enable_access(struct page *page, bool global)
+{
+   if (!page_is_access_protected(page))
+   return;
+   dev_access_enable(global);
+}
+
+static inline void dev_page_disable_access(struct page *page, bool global)
+{
+   if (!page_is_access_protected(page))
+   return;
+   dev_access_disable(global);
+}
+
 #ifdef CONFIG_HIGHMEM
 extern void *kmap_atomic_high_prot(struct page *page, pgprot_t prot);
 extern void kunmap_atomic_high(void *kvaddr);
@@ -55,6 +70,11 @@ static inline void *kmap(struct page *page)
else
addr = kmap_high(page);
kmap_flush_tlb((unsigned long)addr);
+   /*
+* Even non-highmem pages may have additional access protections which
+* need to be checked and potentially enabled.
+*/
+   dev_page_enable_access(page, true);
return addr;
 }
 
@@ -63,6 +83,11 @@ void kunmap_high(struct page *page);
 static inline void kunmap(struct page *page)
 {
might_sleep();
+   /*
+* Even non-highmem pages may have additional access protections which
+* need to be checked and potentially disabled.
+*/
+   dev_page_disable_access(page, true);
if (!PageHighMem(page))
return;
kunmap_high(page);
@@ -85,6 +110,7 @@ static inline void *kmap_atomic_prot(struct page *page, 
pgprot_t prot)
 {
preempt_disable();
pagefault_disable();
+   dev_page_enable_access(page, false);
if (!PageHighMem(page))
return page_address(page);
return kmap_atomic_high_prot(page, prot);
@@ -137,6 +163,7 @@ static inline unsigned long totalhigh_pages(void) { return 
0UL; }
 static inline void *kmap(struct page *page)
 {
might_sleep();
+   dev_page_enable_access(page, true);
return page_address(page);
 }
 
@@ -146,6 +173,7 @@ static inline void kunmap_high(struct page *page)
 
 static inline void kunmap(struct page *page)
 {
+   dev_page_disable_access(page, true);
 #ifdef ARCH_HAS_FLUSH_ON_KUNMAP
kunmap_flush_on_unmap(page_address(page));
 #endif
@@ -155,6 +183,7 @@ static inline void *kmap_atomic(struct page *page)
 {
preempt_disable();
pagefault_disable();
+   dev_page_enable_access(page, false);
return page_address(page);
 }
 #define kmap_atomic_prot(page, prot)   kmap_atomic(page)
@@ -216,7 +245,8 @@ static inline void kmap_atomic_idx_pop(void)
 #define kunmap_atomic(addr) \
 do {\
BUILD_BUG_ON(__same_type((addr), struct page *));   \
-   kunmap_atomic_high(addr);  \
+   dev_page_disable_access(kmap_to_page(addr), false); \
+   kunmap_atomic_high(addr);   \
pagefault_enable(); \
preempt_enable();   \
 } while (0)
-- 
2.28.0

[PATCH RFC PKS/PMEM 07/58] drivers/drbd: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this driver are localized to a single thread.  To
avoid the over head of global PKRS updates use the new kmap_thread()
call.

Cc: Jens Axboe 
Signed-off-by: Ira Weiny 
---
 drivers/block/drbd/drbd_main.c |  4 ++--
 drivers/block/drbd/drbd_receiver.c | 12 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 573dbf6f0c31..f0d0c6b0745e 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1532,9 +1532,9 @@ static int _drbd_no_send_page(struct drbd_peer_device 
*peer_device, struct page
int err;
 
socket = peer_device->connection->data.socket;
-   addr = kmap(page) + offset;
+   addr = kmap_thread(page) + offset;
err = drbd_send_all(peer_device->connection, socket, addr, size, 
msg_flags);
-   kunmap(page);
+   kunmap_thread(page);
if (!err)
peer_device->device->send_cnt += size >> 9;
return err;
diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index 422363daa618..4704bc0564e2 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1951,13 +1951,13 @@ read_in_block(struct drbd_peer_device *peer_device, u64 
id, sector_t sector,
page = peer_req->pages;
page_chain_for_each(page) {
unsigned len = min_t(int, ds, PAGE_SIZE);
-   data = kmap(page);
+   data = kmap_thread(page);
err = drbd_recv_all_warn(peer_device->connection, data, len);
if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
drbd_err(device, "Fault injection: Corrupting data on 
receive\n");
data[0] = data[0] ^ (unsigned long)-1;
}
-   kunmap(page);
+   kunmap_thread(page);
if (err) {
drbd_free_peer_req(device, peer_req);
return NULL;
@@ -1992,7 +1992,7 @@ static int drbd_drain_block(struct drbd_peer_device 
*peer_device, int data_size)
 
page = drbd_alloc_pages(peer_device, 1, 1);
 
-   data = kmap(page);
+   data = kmap_thread(page);
while (data_size) {
unsigned int len = min_t(int, data_size, PAGE_SIZE);
 
@@ -2001,7 +2001,7 @@ static int drbd_drain_block(struct drbd_peer_device 
*peer_device, int data_size)
break;
data_size -= len;
}
-   kunmap(page);
+   kunmap_thread(page);
drbd_free_pages(peer_device->device, page, 0);
return err;
 }
@@ -2033,10 +2033,10 @@ static int recv_dless_read(struct drbd_peer_device 
*peer_device, struct drbd_req
D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
 
bio_for_each_segment(bvec, bio, iter) {
-   void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
+   void *mapped = kmap_thread(bvec.bv_page) + bvec.bv_offset;
expect = min_t(int, data_size, bvec.bv_len);
err = drbd_recv_all_warn(peer_device->connection, mapped, 
expect);
-   kunmap(bvec.bv_page);
+   kunmap_thread(bvec.bv_page);
if (err)
return err;
data_size -= expect;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 06/58] kmap: Introduce k[un]map_thread debugging

2020-10-09 Thread ira . weiny
From: Ira Weiny 

Most kmap() callers use the map within a single thread and have no need
for the protection domain to be enabled globally.

To differentiate these kmap users, new k[un]map_thread() calls were
introduced which are thread local.

To aid in debugging the new use of kmap_thread(), add a reference count,
a check on that count, and tracing to ID where mapping errors occur.

Cc: Juri Lelli 
Cc: Vincent Guittot 
Cc: Dietmar Eggemann 
Cc: Steven Rostedt 
Cc: Ben Segall 
Cc: Mel Gorman 
Signed-off-by: Ira Weiny 
---
 include/linux/highmem.h|  5 +++
 include/linux/sched.h  |  5 +++
 include/trace/events/kmap_thread.h | 56 ++
 init/init_task.c   |  3 ++
 kernel/fork.c  | 15 
 lib/Kconfig.debug  |  8 +
 mm/debug.c | 23 
 7 files changed, 115 insertions(+)
 create mode 100644 include/trace/events/kmap_thread.h

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index ef7813544719..22d1c000802e 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -247,6 +247,10 @@ static inline void kunmap(struct page *page)
__kunmap(page, true);
 }
 
+#ifdef CONFIG_DEBUG_KMAP_THREAD
+void *kmap_thread(struct page *page);
+void kunmap_thread(struct page *page);
+#else
 static inline void *kmap_thread(struct page *page)
 {
return __kmap(page, false);
@@ -255,6 +259,7 @@ static inline void kunmap_thread(struct page *page)
 {
__kunmap(page, false);
 }
+#endif
 
 /*
  * Prevent people trying to call kunmap_atomic() as if it were kunmap()
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 25d97ab6c757..4627ea4a49e6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1318,6 +1318,11 @@ struct task_struct {
 #ifdef CONFIG_ZONE_DEVICE_ACCESS_PROTECTION
unsigned intdev_page_access_ref;
 #endif
+
+#ifdef CONFIG_DEBUG_KMAP_THREAD
+   unsigned intkmap_thread_cnt;
+#endif
+
/*
 * New fields for task_struct should be added above here, so that
 * they are included in the randomized portion of task_struct.
diff --git a/include/trace/events/kmap_thread.h 
b/include/trace/events/kmap_thread.h
new file mode 100644
index ..e7143cfe0daf
--- /dev/null
+++ b/include/trace/events/kmap_thread.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+
+/*
+ * Copyright (c) 2020 Intel Corporation.  All rights reserved.
+ *
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kmap_thread
+
+#if !defined(_TRACE_KMAP_THREAD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KMAP_THREAD_H
+
+#include 
+
+DECLARE_EVENT_CLASS(kmap_thread_template,
+   TP_PROTO(struct task_struct *tsk, struct page *page,
+void *caller_addr, int cnt),
+   TP_ARGS(tsk, page, caller_addr, cnt),
+
+   TP_STRUCT__entry(
+   __field(int, pid)
+   __field(struct page *, page)
+   __field(void *, caller_addr)
+   __field(int, cnt)
+   ),
+
+   TP_fast_assign(
+   __entry->pid = tsk->pid;
+   __entry->page = page;
+   __entry->caller_addr = caller_addr;
+   __entry->cnt = cnt;
+   ),
+
+   TP_printk("PID %d; (%d) %pS %p",
+   __entry->pid,
+   __entry->cnt,
+   __entry->caller_addr,
+   __entry->page
+   )
+);
+
+DEFINE_EVENT(kmap_thread_template, kmap_thread,
+   TP_PROTO(struct task_struct *tsk, struct page *page,
+void *caller_addr, int cnt),
+   TP_ARGS(tsk, page, caller_addr, cnt));
+
+DEFINE_EVENT(kmap_thread_template, kunmap_thread,
+   TP_PROTO(struct task_struct *tsk, struct page *page,
+void *caller_addr, int cnt),
+   TP_ARGS(tsk, page, caller_addr, cnt));
+
+
+#endif /* _TRACE_KMAP_THREAD_H */
+
+#include 
diff --git a/init/init_task.c b/init/init_task.c
index 9b39f25de59b..19f09965eb34 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -212,6 +212,9 @@ struct task_struct init_task
 #ifdef CONFIG_ZONE_DEVICE_ACCESS_PROTECTION
.dev_page_access_ref = 0,
 #endif
+#ifdef CONFIG_DEBUG_KMAP_THREAD
+   .kmap_thread_cnt = 0,
+#endif
 };
 EXPORT_SYMBOL(init_task);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index b6a3ee328a89..2c66e49b7614 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -722,6 +722,17 @@ static inline void put_signal_struct(struct signal_struct 
*sig)
free_signal_struct(sig);
 }
 
+#ifdef CONFIG_DEBUG_KMAP_THREAD
+static void check_outstanding_kmap_thread(struct task_struct *tsk)
+{
+   if (tsk->kmap_thread_cnt)
+   pr_warn(KERN_ERR "WARNING: PID %d; Failed to kunmap_thread() 
[cnt %d]\n",
+   tsk->pid, tsk->kmap_thread_cnt);
+}
+#else
+static void check_outs

[PATCH RFC PKS/PMEM 01/58] x86/pks: Add a global pkrs option

2020-10-09 Thread ira . weiny
From: Ira Weiny 

Some users, such as kmap(), sometimes requires PKS to be global.
However, updating all CPUs, and worse yet all threads is expensive.

Introduce a global PKRS state which is checked at critical times to
allow the state to enable access when global PKS is required.  To
accomplish this with minimal locking; the code is carefully designed
with the following key concepts.

1) Borrow the idea of lazy TLB invalidations from the fault handler
   code.  When enabling PKS access we anticipate that other threads are
   not yet running.  However, if they are we catch the fault and clean
   up the MSR value.

2) When disabling PKS access we force all MSR values across all CPU's.
   This is required to block access as soon as possible.[1]  However, it
   is key that we never attempt to update the per-task PKS values
   directly.  See next point.

3) Per-task PKS values never get updated with global PKS values.  This
   is key to prevent locking requirements and a nearly intractable
   problem of trying to update every task in the system.  Here are a few
   key points.

   3a) The MSR value can be updated with the global PKS value if that
   global value happened to change while the task was running.

   3b) If the task was sleeping while the global PKS was updated then
   the global value is added in when task's are scheduled.

   3c) If the global PKS value restricts access the MSR is updated as
   soon as possible[1] and the thread value is not updated which ensures
   the thread does not retain the elevated privileges after a context
   switch.

4) Follow on patches must be careful to preserve the separation of the
   thread PKRS value and the MSR value.

5) Access Disable on any individual pkey is turned into (Access Disable
   | Write Disable) to facilitate faster integration of the global value
   into the thread local MSR through a simple '&' operation.  Doing
   otherwise would result in complicated individual bit manipulation for
   each pkey.

[1] There is a race condition which is ignored which is required for
performance issues.  This potentially allows access to a thread until
the end of it's time slice.  After the context switch the global value
will be restored.

Signed-off-by: Ira Weiny 
---
 Documentation/core-api/protection-keys.rst |  11 +-
 arch/x86/entry/common.c|   7 +
 arch/x86/include/asm/pkeys.h   |   6 +-
 arch/x86/include/asm/pkeys_common.h|   8 +-
 arch/x86/kernel/process.c  |  74 +++-
 arch/x86/mm/fault.c| 189 -
 arch/x86/mm/pkeys.c|  88 --
 include/linux/pkeys.h  |   6 +-
 lib/pks/pks_test.c |  16 +-
 9 files changed, 329 insertions(+), 76 deletions(-)

diff --git a/Documentation/core-api/protection-keys.rst 
b/Documentation/core-api/protection-keys.rst
index c60366921d60..9e8a98653e13 100644
--- a/Documentation/core-api/protection-keys.rst
+++ b/Documentation/core-api/protection-keys.rst
@@ -121,9 +121,9 @@ mapping adds that mapping to the protection domain.
 int pks_key_alloc(const char * const pkey_user);
 #define PAGE_KERNEL_PKEY(pkey)
 #define _PAGE_KEY(pkey)
-void pks_mknoaccess(int pkey);
-void pks_mkread(int pkey);
-void pks_mkrdwr(int pkey);
+void pks_mknoaccess(int pkey, bool global);
+void pks_mkread(int pkey, bool global);
+void pks_mkrdwr(int pkey, bool global);
 void pks_key_free(int pkey);
 
 pks_key_alloc() allocates keys dynamically to allow better use of the limited
@@ -141,7 +141,10 @@ _PAGE_KEY().
 The pks_mk*() family of calls allows kernel users the ability to change the
 protections for the domain identified by the pkey specified.  3 states are
 available pks_mknoaccess(), pks_mkread(), and pks_mkrdwr() which set the access
-to none, read, and read/write respectively.
+to none, read, and read/write respectively.  'global' specifies that the
+protection should be set across all threads (logical CPU's) not just the
+current running thread/CPU.  This increases the overhead of PKS and lessens the
+protection so it should be used sparingly.
 
 Finally, pks_key_free() allows a user to return the key to the allocator for
 use by others.
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 324a8fd5ac10..86ad32e0095e 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -261,12 +261,19 @@ noinstr void idtentry_exit_nmi(struct pt_regs *regs, 
irqentry_state_t *irq_state
  * current running value and set the default PKRS value for the duration of the
  * exception.  Thus preventing exception handlers from having the elevated
  * access of the interrupted task.
+ *
+ * NOTE That the thread saved PKRS must be preserved separately to ensure
+ * global overrides do not 'stick' on a thread.
  */
 noinstr voi

[PATCH RFC PKS/PMEM 02/58] x86/pks/test: Add testing for global option

2020-10-09 Thread ira . weiny
From: Ira Weiny 

Now that PKS can be enabled globaly (for all threads) add a test which
spawns a thread and tests the same PKS functionality.

The test enables/disables PKS in 1 thread while attempting to access the
page in another thread.  We use the same test array as in the 'local'
PKS testing.

Signed-off-by: Ira Weiny 
---
 arch/x86/mm/fault.c |   4 ++
 lib/pks/pks_test.c  | 128 +---
 2 files changed, 124 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4b4ff9efa298..4c74f52fbc23 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1108,6 +1108,10 @@ static int spurious_kernel_fault_check(unsigned long 
error_code, pte_t *pte,
if (global_pkey_is_enabled(pte, is_write, irq_state))
return 1;
 
+   /*
+* NOTE: This must be after the global_pkey_is_enabled() call
+* to allow the fixup code to be tested.
+*/
if (handle_pks_testing(error_code, irq_state))
return 1;
 
diff --git a/lib/pks/pks_test.c b/lib/pks/pks_test.c
index 286c8b8457da..dfddccbe4cb6 100644
--- a/lib/pks/pks_test.c
+++ b/lib/pks/pks_test.c
@@ -154,7 +154,8 @@ static void check_exception(irqentry_state_t *irq_state)
}
 
/* Check the exception state */
-   if (!check_pkrs(test_armed_key, PKEY_DISABLE_ACCESS)) {
+   if (!check_pkrs(test_armed_key,
+   PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)) {
pr_err(" FAIL: PKRS cache and MSR\n");
test_exception_ctx->pass = false;
}
@@ -308,24 +309,29 @@ static int test_it(struct pks_test_ctx *ctx, struct 
pks_access_test *test, void
return ret;
 }
 
-static int run_access_test(struct pks_test_ctx *ctx,
-  struct pks_access_test *test,
-  void *ptr)
+static void set_protection(int pkey, enum pks_access_mode mode, bool global)
 {
-   switch (test->mode) {
+   switch (mode) {
case PKS_TEST_NO_ACCESS:
-   pks_mknoaccess(ctx->pkey, false);
+   pks_mknoaccess(pkey, global);
break;
case PKS_TEST_RDWR:
-   pks_mkrdwr(ctx->pkey, false);
+   pks_mkrdwr(pkey, global);
break;
case PKS_TEST_RDONLY:
-   pks_mkread(ctx->pkey, false);
+   pks_mkread(pkey, global);
break;
default:
pr_err("BUG in test invalid mode\n");
break;
}
+}
+
+static int run_access_test(struct pks_test_ctx *ctx,
+  struct pks_access_test *test,
+  void *ptr)
+{
+   set_protection(ctx->pkey, test->mode, false);
 
return test_it(ctx, test, ptr);
 }
@@ -516,6 +522,110 @@ static void run_exception_test(void)
 pass ? "PASS" : "FAIL");
 }
 
+struct shared_data {
+   struct mutex lock;
+   struct pks_test_ctx *ctx;
+   void *kmap_addr;
+   struct pks_access_test *test;
+};
+
+static int thread_main(void *d)
+{
+   struct shared_data *data = d;
+   struct pks_test_ctx *ctx = data->ctx;
+
+   while (!kthread_should_stop()) {
+   mutex_lock(&data->lock);
+   /*
+* wait for the main thread to hand us the page
+* We should be spinning so hopefully we will not have gotten
+* the global value from a schedule in.
+*/
+   if (data->kmap_addr) {
+   if (test_it(ctx, data->test, data->kmap_addr))
+   ctx->pass = false;
+   data->kmap_addr = NULL;
+   }
+   mutex_unlock(&data->lock);
+   }
+
+   return 0;
+}
+
+static void run_thread_access_test(struct shared_data *data,
+  struct pks_test_ctx *ctx,
+  struct pks_access_test *test,
+  void *ptr)
+{
+   set_protection(ctx->pkey, test->mode, true);
+
+   pr_info("checking...  mode %s; write %s\n",
+   get_mode_str(test->mode), test->write ? "TRUE" : 
"FALSE");
+
+   mutex_lock(&data->lock);
+   data->test = test;
+   data->kmap_addr = ptr;
+   mutex_unlock(&data->lock);
+
+   while (data->kmap_addr) {
+   msleep(10);
+   }
+}
+
+static void run_global_test(void)
+{
+   struct task_struct *other_task;
+   struct pks_test_ctx *ctx;
+   struct shared_data data;
+   bool pass = true;
+   void *ptr;

[PATCH RFC PKS/PMEM 11/58] drivers/net: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in these drivers are localized to a single thread.  To
avoid the over head of global PKRS updates use the new kmap_thread()
call.

Cc: "David S. Miller" 
Cc: Jakub Kicinski 
Cc: Jesse Brandeburg 
Signed-off-by: Ira Weiny 
---
 drivers/net/ethernet/intel/igb/igb_ethtool.c | 4 ++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c 
b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 6e8231c1ddf0..ac9189752012 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1794,14 +1794,14 @@ static int igb_check_lbtest_frame(struct igb_rx_buffer 
*rx_buffer,
 
frame_size >>= 1;
 
-   data = kmap(rx_buffer->page);
+   data = kmap_thread(rx_buffer->page);
 
if (data[3] != 0xFF ||
data[frame_size + 10] != 0xBE ||
data[frame_size + 12] != 0xAF)
match = false;
 
-   kunmap(rx_buffer->page);
+   kunmap_thread(rx_buffer->page);
 
return match;
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 71ec908266a6..7d469425f8b4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1963,14 +1963,14 @@ static bool ixgbe_check_lbtest_frame(struct 
ixgbe_rx_buffer *rx_buffer,
 
frame_size >>= 1;
 
-   data = kmap(rx_buffer->page) + rx_buffer->page_offset;
+   data = kmap_thread(rx_buffer->page) + rx_buffer->page_offset;
 
if (data[3] != 0xFF ||
data[frame_size + 10] != 0xBE ||
data[frame_size + 12] != 0xAF)
match = false;
 
-   kunmap(rx_buffer->page);
+   kunmap_thread(rx_buffer->page);
 
return match;
 }
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 05/58] kmap: Introduce k[un]map_thread

2020-10-09 Thread ira . weiny
From: Ira Weiny 

To correctly support the semantics of kmap() with Kernel protection keys
(PKS), kmap() may be required to set the protections on multiple
processors (globally).  Enabling PKS globally can be very expensive
depending on the requested operation.  Furthermore, enabling a domain
globally reduces the protection afforded by PKS.

Most kmap() (Aprox 209 of 229) callers use the map within a single thread and
have no need for the protection domain to be enabled globally.  However, the
remaining callers do not follow this pattern and, as best I can tell, expect
the mapping to be 'global' and available to any thread who may access the
mapping.[1]

We don't anticipate global mappings to pmem, however in general there is a
danger in changing the semantics of kmap().  Effectively, this would cause an
unresolved page fault with little to no information about why the failure
occurred.

To resolve this a number of options were considered.

1) Attempt to change all the thread local kmap() calls to kmap_atomic()[2]
2) Introduce a flags parameter to kmap() to indicate if the mapping should be
   global or not
3) Change ~20 call sites to 'kmap_global()' to indicate that they require a
   global enablement of the pages.
4) Change ~209 call sites to 'kmap_thread()' to indicate that the mapping is to
   be used within that thread of execution only

Option 1 is simply not feasible.  Option 2 would require all of the call sites
of kmap() to change.  Option 3 seems like a good minimal change but there is a
danger that new code may miss the semantic change of kmap() and not get the
behavior the developer intended.  Therefore, #4 was chosen.

Subsequent patches will convert most ~90% of the kmap callers to this new call
leaving about 10% of the existing kmap callers to enable PKS globally.

Cc: Randy Dunlap 
Signed-off-by: Ira Weiny 
---
 include/linux/highmem.h | 34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 2a9806e3b8d2..ef7813544719 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -60,7 +60,7 @@ static inline void kmap_flush_tlb(unsigned long addr) { }
 #endif
 
 void *kmap_high(struct page *page);
-static inline void *kmap(struct page *page)
+static inline void *__kmap(struct page *page, bool global)
 {
void *addr;
 
@@ -74,20 +74,20 @@ static inline void *kmap(struct page *page)
 * Even non-highmem pages may have additional access protections which
 * need to be checked and potentially enabled.
 */
-   dev_page_enable_access(page, true);
+   dev_page_enable_access(page, global);
return addr;
 }
 
 void kunmap_high(struct page *page);
 
-static inline void kunmap(struct page *page)
+static inline void __kunmap(struct page *page, bool global)
 {
might_sleep();
/*
 * Even non-highmem pages may have additional access protections which
 * need to be checked and potentially disabled.
 */
-   dev_page_disable_access(page, true);
+   dev_page_disable_access(page, global);
if (!PageHighMem(page))
return;
kunmap_high(page);
@@ -160,10 +160,10 @@ static inline struct page *kmap_to_page(void *addr)
 
 static inline unsigned long totalhigh_pages(void) { return 0UL; }
 
-static inline void *kmap(struct page *page)
+static inline void *__kmap(struct page *page, bool global)
 {
might_sleep();
-   dev_page_enable_access(page, true);
+   dev_page_enable_access(page, global);
return page_address(page);
 }
 
@@ -171,9 +171,9 @@ static inline void kunmap_high(struct page *page)
 {
 }
 
-static inline void kunmap(struct page *page)
+static inline void __kunmap(struct page *page, bool global)
 {
-   dev_page_disable_access(page, true);
+   dev_page_disable_access(page, global);
 #ifdef ARCH_HAS_FLUSH_ON_KUNMAP
kunmap_flush_on_unmap(page_address(page));
 #endif
@@ -238,6 +238,24 @@ static inline void kmap_atomic_idx_pop(void)
 
 #endif
 
+static inline void *kmap(struct page *page)
+{
+   return __kmap(page, true);
+}
+static inline void kunmap(struct page *page)
+{
+   __kunmap(page, true);
+}
+
+static inline void *kmap_thread(struct page *page)
+{
+   return __kmap(page, false);
+}
+static inline void kunmap_thread(struct page *page)
+{
+   __kunmap(page, false);
+}
+
 /*
  * Prevent people trying to call kunmap_atomic() as if it were kunmap()
  * kunmap_atomic() should get the return value of kmap_atomic, not the page.
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 24/58] fs/freevxfs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Christoph Hellwig 
Signed-off-by: Ira Weiny 
---
 fs/freevxfs/vxfs_immed.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index bfc780c682fb..9c42fec4cd85 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -69,9 +69,9 @@ vxfs_immed_readpage(struct file *fp, struct page *pp)
u_int64_t   offset = (u_int64_t)pp->index << PAGE_SHIFT;
caddr_t kaddr;
 
-   kaddr = kmap(pp);
+   kaddr = kmap_thread(pp);
memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_SIZE);
-   kunmap(pp);
+   kunmap_thread(pp);

flush_dcache_page(pp);
SetPageUptodate(pp);
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 14/58] fs/cifs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Steve French 
Signed-off-by: Ira Weiny 
---
 fs/cifs/cifsencrypt.c |  6 +++---
 fs/cifs/file.c| 16 
 fs/cifs/smb2ops.c |  8 
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 9daa256f69d4..2f8232d01a56 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -82,17 +82,17 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
 
rqst_page_get_length(rqst, i, &len, &offset);
 
-   kaddr = (char *) kmap(rqst->rq_pages[i]) + offset;
+   kaddr = (char *) kmap_thread(rqst->rq_pages[i]) + offset;
 
rc = crypto_shash_update(shash, kaddr, len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with payload\n",
 __func__);
-   kunmap(rqst->rq_pages[i]);
+   kunmap_thread(rqst->rq_pages[i]);
return rc;
}
 
-   kunmap(rqst->rq_pages[i]);
+   kunmap_thread(rqst->rq_pages[i]);
}
 
rc = crypto_shash_final(shash, signature);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index be46fab4c96d..6db2caab8852 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2145,17 +2145,17 @@ static int cifs_partialpagewrite(struct page *page, 
unsigned from, unsigned to)
inode = page->mapping->host;
 
offset += (loff_t)from;
-   write_data = kmap(page);
+   write_data = kmap_thread(page);
write_data += from;
 
if ((to > PAGE_SIZE) || (from > to)) {
-   kunmap(page);
+   kunmap_thread(page);
return -EIO;
}
 
/* racing with truncate? */
if (offset > mapping->host->i_size) {
-   kunmap(page);
+   kunmap_thread(page);
return 0; /* don't care */
}
 
@@ -2183,7 +2183,7 @@ static int cifs_partialpagewrite(struct page *page, 
unsigned from, unsigned to)
rc = -EIO;
}
 
-   kunmap(page);
+   kunmap_thread(page);
return rc;
 }
 
@@ -2559,10 +2559,10 @@ static int cifs_write_end(struct file *file, struct 
address_space *mapping,
   known which we might as well leverage */
/* BB check if anything else missing out of ppw
   such as updating last write time */
-   page_data = kmap(page);
+   page_data = kmap_thread(page);
rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
/* if (rc < 0) should we set writebehind rc? */
-   kunmap(page);
+   kunmap_thread(page);
 
free_xid(xid);
} else {
@@ -4511,7 +4511,7 @@ static int cifs_readpage_worker(struct file *file, struct 
page *page,
if (rc == 0)
goto read_complete;
 
-   read_data = kmap(page);
+   read_data = kmap_thread(page);
/* for reads over a certain size could initiate async read ahead */
 
rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
@@ -4540,7 +4540,7 @@ static int cifs_readpage_worker(struct file *file, struct 
page *page,
rc = 0;
 
 io_error:
-   kunmap(page);
+   kunmap_thread(page);
unlock_page(page);
 
 read_complete:
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 32f90dc82c84..a3e7ebab38b6 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -4068,12 +4068,12 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, 
int num_rqst,
 
rqst_page_get_length(&new_rq[i], j, &len, &offset);
 
-   dst = (char *) kmap(new_rq[i].rq_pages[j]) + offset;
-   src = (char *) kmap(old_rq[i - 1].rq_pages[j]) + offset;
+   dst = (char *) kmap_thread(new_rq[i].rq_pages[j]) + 
offset;
+   src = (char *) kmap_thread(old_rq[i - 1].rq_pages[j]) + 
offset;
 
memcpy(dst, src, len);
-   kunmap(new_rq[i].rq_pages[j]);
-   kunmap(old_rq[i - 1].rq_pages[j]);
+   kunmap_thread(new_rq[i].rq_pages[j]);
+   kunmap_thread(old_rq[i - 1].rq_pages[j]);
}
}
 
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 08/58] drivers/firmware_loader: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this driver are localized to a single thread.  To
avoid the over head of global PKRS updates use the new kmap_thread()
call.

Cc: Luis Chamberlain 
Signed-off-by: Ira Weiny 
---
 drivers/base/firmware_loader/fallback.c | 4 ++--
 drivers/base/firmware_loader/main.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/base/firmware_loader/fallback.c 
b/drivers/base/firmware_loader/fallback.c
index 283ca2de76d4..22dea9ba7a37 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -322,14 +322,14 @@ static void firmware_rw(struct fw_priv *fw_priv, char 
*buffer,
int page_ofs = offset & (PAGE_SIZE-1);
int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count);
 
-   page_data = kmap(fw_priv->pages[page_nr]);
+   page_data = kmap_thread(fw_priv->pages[page_nr]);
 
if (read)
memcpy(buffer, page_data + page_ofs, page_cnt);
else
memcpy(page_data + page_ofs, buffer, page_cnt);
 
-   kunmap(fw_priv->pages[page_nr]);
+   kunmap_thread(fw_priv->pages[page_nr]);
buffer += page_cnt;
offset += page_cnt;
count -= page_cnt;
diff --git a/drivers/base/firmware_loader/main.c 
b/drivers/base/firmware_loader/main.c
index 63b9714a0154..cc884c9f8742 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -409,11 +409,11 @@ static int fw_decompress_xz_pages(struct device *dev, 
struct fw_priv *fw_priv,
 
/* decompress onto the new allocated page */
page = fw_priv->pages[fw_priv->nr_pages - 1];
-   xz_buf.out = kmap(page);
+   xz_buf.out = kmap_thread(page);
xz_buf.out_pos = 0;
xz_buf.out_size = PAGE_SIZE;
xz_ret = xz_dec_run(xz_dec, &xz_buf);
-   kunmap(page);
+   kunmap_thread(page);
fw_priv->size += xz_buf.out_pos;
/* partial decompression means either end or error */
if (xz_buf.out_pos != PAGE_SIZE)
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 13/58] fs/btrfs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Chris Mason 
Cc: Josef Bacik 
Cc: David Sterba 
Signed-off-by: Ira Weiny 
---
 fs/btrfs/check-integrity.c |  4 ++--
 fs/btrfs/compression.c |  4 ++--
 fs/btrfs/inode.c   | 16 
 fs/btrfs/lzo.c | 24 
 fs/btrfs/raid56.c  | 34 +-
 fs/btrfs/reflink.c |  8 
 fs/btrfs/send.c|  4 ++--
 fs/btrfs/zlib.c| 32 
 fs/btrfs/zstd.c| 20 ++--
 9 files changed, 73 insertions(+), 73 deletions(-)

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 81a8c87a5afb..9e5a02512ab5 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2706,7 +2706,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
 
bio_for_each_segment(bvec, bio, iter) {
BUG_ON(bvec.bv_len != PAGE_SIZE);
-   mapped_datav[i] = kmap(bvec.bv_page);
+   mapped_datav[i] = kmap_thread(bvec.bv_page);
i++;
 
if (dev_state->state->print_mask &
@@ -2720,7 +2720,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
  bio, &bio_is_patched,
  bio->bi_opf);
bio_for_each_segment(bvec, bio, iter)
-   kunmap(bvec.bv_page);
+   kunmap_thread(bvec.bv_page);
kfree(mapped_datav);
} else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) {
if (dev_state->state->print_mask &
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 1ab56a734e70..5944fb36d68a 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1626,7 +1626,7 @@ static void heuristic_collect_sample(struct inode *inode, 
u64 start, u64 end,
curr_sample_pos = 0;
while (index < index_end) {
page = find_get_page(inode->i_mapping, index);
-   in_data = kmap(page);
+   in_data = kmap_thread(page);
/* Handle case where the start is not aligned to PAGE_SIZE */
i = start % PAGE_SIZE;
while (i < PAGE_SIZE - SAMPLING_READ_SIZE) {
@@ -1639,7 +1639,7 @@ static void heuristic_collect_sample(struct inode *inode, 
u64 start, u64 end,
start += SAMPLING_INTERVAL;
curr_sample_pos += SAMPLING_READ_SIZE;
}
-   kunmap(page);
+   kunmap_thread(page);
put_page(page);
 
index++;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9570458aa847..9710a52c6c42 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4603,7 +4603,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t 
from, loff_t len,
if (offset != blocksize) {
if (!len)
len = blocksize - offset;
-   kaddr = kmap(page);
+   kaddr = kmap_thread(page);
if (front)
memset(kaddr + (block_start - page_offset(page)),
0, offset);
@@ -4611,7 +4611,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t 
from, loff_t len,
memset(kaddr + (block_start - page_offset(page)) +  
offset,
0, len);
flush_dcache_page(page);
-   kunmap(page);
+   kunmap_thread(page);
}
ClearPageChecked(page);
set_page_dirty(page);
@@ -6509,9 +6509,9 @@ static noinline int uncompress_inline(struct btrfs_path 
*path,
 */
 
if (max_size + pg_offset < PAGE_SIZE) {
-   char *map = kmap(page);
+   char *map = kmap_thread(page);
memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - 
pg_offset);
-   kunmap(page);
+   kunmap_thread(page);
}
kfree(tmp);
return ret;
@@ -6704,7 +6704,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode 
*inode,
goto out;
}
} else {
-   map = kmap(page);
+   map = kmap_thread(page);
read_extent_buffer(leaf, map + pg_offset, ptr,
   copy_size);
if (pg_offset + copy_size < PAGE_SIZE) {
@@ -6712,7 +6712,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode 
*inode,
   PAGE_SIZE - pg_offset -

[PATCH RFC PKS/PMEM 10/58] drivers/rdma: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in these drivers are localized to a single thread.  To
avoid the over head of global PKRS updates use the new kmap_thread()
call.

Cc: Mike Marciniszyn 
Cc: Dennis Dalessandro 
Cc: Doug Ledford 
Cc: Jason Gunthorpe 
Cc: Faisal Latif 
Cc: Shiraz Saleem 
Cc: Bernard Metzler 
Signed-off-by: Ira Weiny 
---
 drivers/infiniband/hw/hfi1/sdma.c  |  4 ++--
 drivers/infiniband/hw/i40iw/i40iw_cm.c | 10 +-
 drivers/infiniband/sw/siw/siw_qp_tx.c  | 14 +++---
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/sdma.c 
b/drivers/infiniband/hw/hfi1/sdma.c
index 04575c9afd61..09d206e3229a 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -3130,7 +3130,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, 
struct sdma_txreq *tx,
}
 
if (type == SDMA_MAP_PAGE) {
-   kvaddr = kmap(page);
+   kvaddr = kmap_thread(page);
kvaddr += offset;
} else if (WARN_ON(!kvaddr)) {
__sdma_txclean(dd, tx);
@@ -3140,7 +3140,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, 
struct sdma_txreq *tx,
memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len);
tx->coalesce_idx += len;
if (type == SDMA_MAP_PAGE)
-   kunmap(page);
+   kunmap_thread(page);
 
/* If there is more data, return */
if (tx->tlen - tx->coalesce_idx)
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c 
b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index a3b95805c154..122d7a5642a1 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -3721,7 +3721,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct 
iw_cm_conn_param *conn_param)
ibmr->device = iwpd->ibpd.device;
iwqp->lsmm_mr = ibmr;
if (iwqp->page)
-   iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+   iwqp->sc_qp.qp_uk.sq_base = kmap_thread(iwqp->page);
dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp,
iwqp->ietf_mem.va,
(accept.size + 
conn_param->private_data_len),
@@ -3729,12 +3729,12 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct 
iw_cm_conn_param *conn_param)
 
} else {
if (iwqp->page)
-   iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+   iwqp->sc_qp.qp_uk.sq_base = kmap_thread(iwqp->page);
dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0);
}
 
if (iwqp->page)
-   kunmap(iwqp->page);
+   kunmap_thread(iwqp->page);
 
iwqp->cm_id = cm_id;
cm_node->cm_id = cm_id;
@@ -4102,10 +4102,10 @@ static void i40iw_cm_event_connected(struct 
i40iw_cm_event *event)
i40iw_cm_init_tsa_conn(iwqp, cm_node);
read0 = (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO);
if (iwqp->page)
-   iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+   iwqp->sc_qp.qp_uk.sq_base = kmap_thread(iwqp->page);
dev->iw_priv_qp_ops->qp_send_rtt(&iwqp->sc_qp, read0);
if (iwqp->page)
-   kunmap(iwqp->page);
+   kunmap_thread(iwqp->page);
 
memset(&attr, 0, sizeof(attr));
attr.qp_state = IB_QPS_RTS;
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c 
b/drivers/infiniband/sw/siw/siw_qp_tx.c
index d19d8325588b..4ed37c328d02 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -76,7 +76,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void 
*paddr)
if (unlikely(!p))
return -EFAULT;
 
-   buffer = kmap(p);
+   buffer = kmap_thread(p);
 
if (likely(PAGE_SIZE - off >= bytes)) {
memcpy(paddr, buffer + off, bytes);
@@ -84,7 +84,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void 
*paddr)
unsigned long part = bytes - (PAGE_SIZE - off);
 
memcpy(paddr, buffer + off, part);
-   kunmap(p);
+   kunmap_thread(p);
 
if (!mem->is_pbl)
p = siw_get_upage(mem->umem,
@@ -96,10 +96,10 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void 
*paddr)
if (unlikely(!p))
return -E

[PATCH RFC PKS/PMEM 20/58] fs/jffs2: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: David Woodhouse 
Cc: Richard Weinberger 
Signed-off-by: Ira Weiny 
---
 fs/jffs2/file.c | 4 ++--
 fs/jffs2/gc.c   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index f8fb89b10227..3e6d54f9b011 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -88,7 +88,7 @@ static int jffs2_do_readpage_nolock (struct inode *inode, 
struct page *pg)
 
BUG_ON(!PageLocked(pg));
 
-   pg_buf = kmap(pg);
+   pg_buf = kmap_thread(pg);
/* FIXME: Can kmap fail? */
 
ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_SHIFT,
@@ -103,7 +103,7 @@ static int jffs2_do_readpage_nolock (struct inode *inode, 
struct page *pg)
}
 
flush_dcache_page(pg);
-   kunmap(pg);
+   kunmap_thread(pg);
 
jffs2_dbg(2, "readpage finished\n");
return ret;
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 373b3b7c9f44..a7259783ab84 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -1335,7 +1335,7 @@ static int jffs2_garbage_collect_dnode(struct 
jffs2_sb_info *c, struct jffs2_era
return PTR_ERR(page);
}
 
-   pg_ptr = kmap(page);
+   pg_ptr = kmap_thread(page);
mutex_lock(&f->sem);
 
offset = start;
@@ -1400,7 +1400,7 @@ static int jffs2_garbage_collect_dnode(struct 
jffs2_sb_info *c, struct jffs2_era
}
}
 
-   kunmap(page);
+   kunmap_thread(page);
put_page(page);
return ret;
 }
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 17/58] fs/nilfs2: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Ryusuke Konishi 
Signed-off-by: Ira Weiny 
---
 fs/nilfs2/alloc.c  | 34 +-
 fs/nilfs2/cpfile.c |  4 ++--
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index adf3bb0a8048..2aa4c34094ef 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -524,7 +524,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
if (ret < 0)
return ret;
-   desc_kaddr = kmap(desc_bh->b_page);
+   desc_kaddr = kmap_thread(desc_bh->b_page);
desc = nilfs_palloc_block_get_group_desc(
inode, group, desc_bh, desc_kaddr);
n = nilfs_palloc_rest_groups_in_desc_block(inode, group,
@@ -536,7 +536,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
inode, group, 1, &bitmap_bh);
if (ret < 0)
goto out_desc;
-   bitmap_kaddr = kmap(bitmap_bh->b_page);
+   bitmap_kaddr = kmap_thread(bitmap_bh->b_page);
bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
pos = nilfs_palloc_find_available_slot(
bitmap, group_offset,
@@ -547,21 +547,21 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
desc, lock, -1);
req->pr_entry_nr =
entries_per_group * group + pos;
-   kunmap(desc_bh->b_page);
-   kunmap(bitmap_bh->b_page);
+   kunmap_thread(desc_bh->b_page);
+   kunmap_thread(bitmap_bh->b_page);
 
req->pr_desc_bh = desc_bh;
req->pr_bitmap_bh = bitmap_bh;
return 0;
}
-   kunmap(bitmap_bh->b_page);
+   kunmap_thread(bitmap_bh->b_page);
brelse(bitmap_bh);
}
 
group_offset = 0;
}
 
-   kunmap(desc_bh->b_page);
+   kunmap_thread(desc_bh->b_page);
brelse(desc_bh);
}
 
@@ -569,7 +569,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
return -ENOSPC;
 
  out_desc:
-   kunmap(desc_bh->b_page);
+   kunmap_thread(desc_bh->b_page);
brelse(desc_bh);
return ret;
 }
@@ -605,10 +605,10 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
spinlock_t *lock;
 
group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
-   desc_kaddr = kmap(req->pr_desc_bh->b_page);
+   desc_kaddr = kmap_thread(req->pr_desc_bh->b_page);
desc = nilfs_palloc_block_get_group_desc(inode, group,
 req->pr_desc_bh, desc_kaddr);
-   bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
+   bitmap_kaddr = kmap_thread(req->pr_bitmap_bh->b_page);
bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
lock = nilfs_mdt_bgl_lock(inode, group);
 
@@ -620,8 +620,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
else
nilfs_palloc_group_desc_add_entries(desc, lock, 1);
 
-   kunmap(req->pr_bitmap_bh->b_page);
-   kunmap(req->pr_desc_bh->b_page);
+   kunmap_thread(req->pr_bitmap_bh->b_page);
+   kunmap_thread(req->pr_desc_bh->b_page);
 
mark_buffer_dirty(req->pr_desc_bh);
mark_buffer_dirty(req->pr_bitmap_bh);
@@ -646,10 +646,10 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
spinlock_t *lock;
 
group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
-   desc_kaddr = kmap(req->pr_desc_bh->b_page);
+   desc_kaddr = kmap_thread(req->pr_desc_bh->b_page);
desc = nilfs_palloc_block_get_group_desc(inode, group,
 req->pr_desc_bh, desc_kaddr);
-   bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
+   bitmap_kaddr = kmap_thread(req->pr_bitmap_bh->b_page);
bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
lock = nilfs_mdt_bgl_lock(inode, group);
 
@@ -6

[PATCH RFC PKS/PMEM 25/58] fs/reiserfs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Jan Kara 
Cc: "Theodore Ts'o" 
Cc: Randy Dunlap 
Cc: Alex Shi 
Signed-off-by: Ira Weiny 
---
 fs/reiserfs/journal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e98f99338f8f..be8f56261e8c 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -4194,11 +4194,11 @@ static int do_journal_end(struct 
reiserfs_transaction_handle *th, int flags)
SB_ONDISK_JOURNAL_SIZE(sb)));
set_buffer_uptodate(tmp_bh);
page = cn->bh->b_page;
-   addr = kmap(page);
+   addr = kmap_thread(page);
memcpy(tmp_bh->b_data,
   addr + offset_in_page(cn->bh->b_data),
   cn->bh->b_size);
-   kunmap(page);
+   kunmap_thread(page);
mark_buffer_dirty(tmp_bh);
jindex++;
set_buffer_journal_dirty(cn->bh);
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 15/58] fs/ecryptfs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Herbert Xu 
Cc: Eric Biggers 
Cc: Aditya Pakki 
Signed-off-by: Ira Weiny 
---
 fs/ecryptfs/crypto.c | 8 
 fs/ecryptfs/read_write.c | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 0681540c48d9..e73e00994bee 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -469,10 +469,10 @@ int ecryptfs_encrypt_page(struct page *page)
}
 
lower_offset = lower_offset_for_page(crypt_stat, page);
-   enc_extent_virt = kmap(enc_extent_page);
+   enc_extent_virt = kmap_thread(enc_extent_page);
rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset,
  PAGE_SIZE);
-   kunmap(enc_extent_page);
+   kunmap_thread(enc_extent_page);
if (rc < 0) {
ecryptfs_printk(KERN_ERR,
"Error attempting to write lower page; rc = [%d]\n",
@@ -518,10 +518,10 @@ int ecryptfs_decrypt_page(struct page *page)
BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
 
lower_offset = lower_offset_for_page(crypt_stat, page);
-   page_virt = kmap(page);
+   page_virt = kmap_thread(page);
rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_SIZE,
 ecryptfs_inode);
-   kunmap(page);
+   kunmap_thread(page);
if (rc < 0) {
ecryptfs_printk(KERN_ERR,
"Error attempting to read lower page; rc = [%d]\n",
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 0438997ac9d8..5eca4330c0c0 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -64,11 +64,11 @@ int ecryptfs_write_lower_page_segment(struct inode 
*ecryptfs_inode,
 
offset = loff_t)page_for_lower->index) << PAGE_SHIFT)
  + offset_in_page);
-   virt = kmap(page_for_lower);
+   virt = kmap_thread(page_for_lower);
rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
if (rc > 0)
rc = 0;
-   kunmap(page_for_lower);
+   kunmap_thread(page_for_lower);
return rc;
 }
 
@@ -251,11 +251,11 @@ int ecryptfs_read_lower_page_segment(struct page 
*page_for_ecryptfs,
int rc;
 
offset = loff_t)page_index) << PAGE_SHIFT) + offset_in_page);
-   virt = kmap(page_for_ecryptfs);
+   virt = kmap_thread(page_for_ecryptfs);
rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
if (rc > 0)
rc = 0;
-   kunmap(page_for_ecryptfs);
+   kunmap_thread(page_for_ecryptfs);
flush_dcache_page(page_for_ecryptfs);
return rc;
 }
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 19/58] fs/hfsplus: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Signed-off-by: Ira Weiny 
---
 fs/hfsplus/bitmap.c |  20 -
 fs/hfsplus/bnode.c  | 102 ++--
 fs/hfsplus/btree.c  |  18 
 3 files changed, 70 insertions(+), 70 deletions(-)

diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index cebce0cfe340..9ec7c1559a0c 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -39,7 +39,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size,
start = size;
goto out;
}
-   pptr = kmap(page);
+   pptr = kmap_thread(page);
curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32;
i = offset % 32;
offset &= ~(PAGE_CACHE_BITS - 1);
@@ -74,7 +74,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size,
}
curr++;
}
-   kunmap(page);
+   kunmap_thread(page);
offset += PAGE_CACHE_BITS;
if (offset >= size)
break;
@@ -84,7 +84,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size,
start = size;
goto out;
}
-   curr = pptr = kmap(page);
+   curr = pptr = kmap_thread(page);
if ((size ^ offset) / PAGE_CACHE_BITS)
end = pptr + PAGE_CACHE_BITS / 32;
else
@@ -127,7 +127,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size,
len -= 32;
}
set_page_dirty(page);
-   kunmap(page);
+   kunmap_thread(page);
offset += PAGE_CACHE_BITS;
page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS,
 NULL);
@@ -135,7 +135,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size,
start = size;
goto out;
}
-   pptr = kmap(page);
+   pptr = kmap_thread(page);
curr = pptr;
end = pptr + PAGE_CACHE_BITS / 32;
}
@@ -151,7 +151,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size,
 done:
*curr = cpu_to_be32(n);
set_page_dirty(page);
-   kunmap(page);
+   kunmap_thread(page);
*max = offset + (curr - pptr) * 32 + i - start;
sbi->free_blocks -= *max;
hfsplus_mark_mdb_dirty(sb);
@@ -185,7 +185,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, 
u32 count)
page = read_mapping_page(mapping, pnr, NULL);
if (IS_ERR(page))
goto kaboom;
-   pptr = kmap(page);
+   pptr = kmap_thread(page);
curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32;
end = pptr + PAGE_CACHE_BITS / 32;
len = count;
@@ -215,11 +215,11 @@ int hfsplus_block_free(struct super_block *sb, u32 
offset, u32 count)
if (!count)
break;
set_page_dirty(page);
-   kunmap(page);
+   kunmap_thread(page);
page = read_mapping_page(mapping, ++pnr, NULL);
if (IS_ERR(page))
goto kaboom;
-   pptr = kmap(page);
+   pptr = kmap_thread(page);
curr = pptr;
end = pptr + PAGE_CACHE_BITS / 32;
}
@@ -231,7 +231,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, 
u32 count)
}
 out:
set_page_dirty(page);
-   kunmap(page);
+   kunmap_thread(page);
sbi->free_blocks += len;
hfsplus_mark_mdb_dirty(sb);
mutex_unlock(&sbi->alloc_mutex);
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 177fae4e6581..62757d92fbbd 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -29,14 +29,14 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int 
off, int len)
off &= ~PAGE_MASK;
 
l = min_t(int, len, PAGE_SIZE - off);
-   memcpy(buf, kmap(*pagep) + off, l);
-   kunmap(*pagep);
+   memcpy(buf, kmap_thread(*pagep) + off, l);
+   kunmap_thread(*pagep);
 
while ((len -= l) != 0) {
buf += l;
l = min_t(int, len, PAGE_SIZE);
-   memcpy(buf, kmap(*++pagep), l);
-   kunmap(*pagep);
+   memcpy(buf, kmap_thread(*++pagep), l);
+   kunmap_thread(*pagep);
}
 }
 
@@ -82,16 +82,16 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int 
off, int len)
off &= ~PAGE_MASK;
 
l = min_t(int, len, PAGE_SIZE - off);
-   memcpy(kmap(*pagep) + off, buf, l);
+   memcpy(kmap_thread(*pagep) + off, buf, l);
set_page_

[PATCH RFC PKS/PMEM 18/58] fs/hfs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Signed-off-by: Ira Weiny 
---
 fs/hfs/bnode.c | 14 +++---
 fs/hfs/btree.c | 20 ++--
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index b63a4df7327b..8b4d02576405 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -23,8 +23,8 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf,
off += node->page_offset;
page = node->page[0];
 
-   memcpy(buf, kmap(page) + off, len);
-   kunmap(page);
+   memcpy(buf, kmap_thread(page) + off, len);
+   kunmap_thread(page);
 }
 
 u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off)
@@ -108,9 +108,9 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
src_page = src_node->page[0];
dst_page = dst_node->page[0];
 
-   memcpy(kmap(dst_page) + dst, kmap(src_page) + src, len);
-   kunmap(src_page);
-   kunmap(dst_page);
+   memcpy(kmap_thread(dst_page) + dst, kmap_thread(src_page) + src, len);
+   kunmap_thread(src_page);
+   kunmap_thread(dst_page);
set_page_dirty(dst_page);
 }
 
@@ -125,9 +125,9 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int 
src, int len)
src += node->page_offset;
dst += node->page_offset;
page = node->page[0];
-   ptr = kmap(page);
+   ptr = kmap_thread(page);
memmove(ptr + dst, ptr + src, len);
-   kunmap(page);
+   kunmap_thread(page);
set_page_dirty(page);
 }
 
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 19017d296173..bd4a6d35e361 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -80,7 +80,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 
id, btree_keycmp ke
goto free_inode;
 
/* Load the header */
-   head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct 
hfs_bnode_desc));
+   head = (struct hfs_btree_header_rec *)(kmap_thread(page) + 
sizeof(struct hfs_bnode_desc));
tree->root = be32_to_cpu(head->root);
tree->leaf_count = be32_to_cpu(head->leaf_count);
tree->leaf_head = be32_to_cpu(head->leaf_head);
@@ -119,7 +119,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, 
u32 id, btree_keycmp ke
tree->node_size_shift = ffs(size) - 1;
tree->pages_per_bnode = (tree->node_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
-   kunmap(page);
+   kunmap_thread(page);
put_page(page);
return tree;
 
@@ -268,7 +268,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
 
off += node->page_offset;
pagep = node->page + (off >> PAGE_SHIFT);
-   data = kmap(*pagep);
+   data = kmap_thread(*pagep);
off &= ~PAGE_MASK;
idx = 0;
 
@@ -281,7 +281,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
idx += i;
data[off] |= m;
set_page_dirty(*pagep);
-   kunmap(*pagep);
+   kunmap_thread(*pagep);
tree->free_nodes--;
mark_inode_dirty(tree->inode);
hfs_bnode_put(node);
@@ -290,14 +290,14 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
}
}
if (++off >= PAGE_SIZE) {
-   kunmap(*pagep);
-   data = kmap(*++pagep);
+   kunmap_thread(*pagep);
+   data = kmap_thread(*++pagep);
off = 0;
}
idx += 8;
len--;
}
-   kunmap(*pagep);
+   kunmap_thread(*pagep);
nidx = node->next;
if (!nidx) {
printk(KERN_DEBUG "create new bmap node...\n");
@@ -313,7 +313,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
off = off16;
off += node->page_offset;
pagep = node->page + (off >> PAGE_SHIFT);
-   data = kmap(*pagep);
+   data = kmap_thread(*pagep);
off &= ~PAGE_MASK;
}
 }
@@ -360,7 +360,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
}
off += node->page_offset + nidx / 8;
page = node->page[off >> PAGE_SHIFT];
-   data = kmap(page);
+   data = kmap_thread(page);
off &= ~PAGE_MASK;
m = 1 <

[PATCH RFC PKS/PMEM 28/58] fs/cachefiles: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: David Howells 
Signed-off-by: Ira Weiny 
---
 fs/cachefiles/rdwr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 3080cda9e824..2468e5c067ba 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -936,9 +936,9 @@ int cachefiles_write_page(struct fscache_storage *op, 
struct page *page)
}
}
 
-   data = kmap(page);
+   data = kmap_thread(page);
ret = kernel_write(file, data, len, &pos);
-   kunmap(page);
+   kunmap_thread(page);
fput(file);
if (ret != len)
goto error_eio;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 16/58] fs/gfs2: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Bob Peterson 
Cc: Andreas Gruenbacher 
Signed-off-by: Ira Weiny 
---
 fs/gfs2/bmap.c   | 4 ++--
 fs/gfs2/ops_fstype.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0f69fbd4af66..375af4528411 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -67,7 +67,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct 
buffer_head *dibh,
}
 
if (!PageUptodate(page)) {
-   void *kaddr = kmap(page);
+   void *kaddr = kmap_thread(page);
u64 dsize = i_size_read(inode);
  
if (dsize > gfs2_max_stuffed_size(ip))
@@ -75,7 +75,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct 
buffer_head *dibh,
 
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
-   kunmap(page);
+   kunmap_thread(page);
 
SetPageUptodate(page);
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 6d18d2c91add..a5d20d9b504a 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -263,9 +263,9 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t 
sector, int silent)
__free_page(page);
return -EIO;
}
-   p = kmap(page);
+   p = kmap_thread(page);
gfs2_sb_in(sdp, p);
-   kunmap(page);
+   kunmap_thread(page);
__free_page(page);
return gfs2_check_sb(sdp, silent);
 }
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 23/58] fs/fuse: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Miklos Szeredi 
Signed-off-by: Ira Weiny 
---
 fs/fuse/readdir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 90e3f01bd796..953ffe6f56e3 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -536,9 +536,9 @@ static int fuse_readdir_cached(struct file *file, struct 
dir_context *ctx)
 * Contents of the page are now protected against changing by holding
 * the page lock.
 */
-   addr = kmap(page);
+   addr = kmap_thread(page);
res = fuse_parse_cache(ff, addr, size, ctx);
-   kunmap(page);
+   kunmap_thread(page);
unlock_page(page);
put_page(page);
 
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 22/58] fs/f2fs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Jaegeuk Kim 
Cc: Chao Yu 
Signed-off-by: Ira Weiny 
---
 fs/f2fs/f2fs.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d9e52a7f3702..ff72a45a577e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2410,12 +2410,12 @@ static inline struct page *f2fs_pagecache_get_page(
 
 static inline void f2fs_copy_page(struct page *src, struct page *dst)
 {
-   char *src_kaddr = kmap(src);
-   char *dst_kaddr = kmap(dst);
+   char *src_kaddr = kmap_thread(src);
+   char *dst_kaddr = kmap_thread(dst);
 
memcpy(dst_kaddr, src_kaddr, PAGE_SIZE);
-   kunmap(dst);
-   kunmap(src);
+   kunmap_thread(dst);
+   kunmap_thread(src);
 }
 
 static inline void f2fs_put_page(struct page *page, int unlock)
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 21/58] fs/nfs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Trond Myklebust 
Cc: Anna Schumaker 
Signed-off-by: Ira Weiny 
---
 fs/nfs/dir.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index cb52db9a0cfb..fee321acccb4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -213,7 +213,7 @@ int nfs_readdir_make_qstr(struct qstr *string, const char 
*name, unsigned int le
 static
 int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 {
-   struct nfs_cache_array *array = kmap(page);
+   struct nfs_cache_array *array = kmap_thread(page);
struct nfs_cache_array_entry *cache_entry;
int ret;
 
@@ -235,7 +235,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, 
struct page *page)
if (entry->eof != 0)
array->eof_index = array->size;
 out:
-   kunmap(page);
+   kunmap_thread(page);
return ret;
 }
 
@@ -347,7 +347,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
struct nfs_cache_array *array;
int status;
 
-   array = kmap(desc->page);
+   array = kmap_thread(desc->page);
 
if (*desc->dir_cookie == 0)
status = nfs_readdir_search_for_pos(array, desc);
@@ -359,7 +359,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
desc->current_index += array->size;
desc->page_index++;
}
-   kunmap(desc->page);
+   kunmap_thread(desc->page);
return status;
 }
 
@@ -602,10 +602,10 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t 
*desc, struct nfs_entry *en
 
 out_nopages:
if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
-   array = kmap(page);
+   array = kmap_thread(page);
array->eof_index = array->size;
status = 0;
-   kunmap(page);
+   kunmap_thread(page);
}
 
put_page(scratch);
@@ -669,7 +669,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t 
*desc, struct page *page,
goto out;
}
 
-   array = kmap(page);
+   array = kmap_thread(page);
 
status = nfs_readdir_alloc_pages(pages, array_size);
if (status < 0)
@@ -691,7 +691,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t 
*desc, struct page *page,
 
nfs_readdir_free_pages(pages, array_size);
 out_release_array:
-   kunmap(page);
+   kunmap_thread(page);
nfs4_label_free(entry.label);
 out:
nfs_free_fattr(entry.fattr);
@@ -803,7 +803,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
struct nfs_cache_array *array = NULL;
struct nfs_open_dir_context *ctx = file->private_data;
 
-   array = kmap(desc->page);
+   array = kmap_thread(desc->page);
for (i = desc->cache_entry_index; i < array->size; i++) {
struct nfs_cache_array_entry *ent;
 
@@ -827,7 +827,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
if (array->eof_index >= 0)
desc->eof = true;
 
-   kunmap(desc->page);
+   kunmap_thread(desc->page);
dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; 
returning = %d\n",
(unsigned long long)*desc->dir_cookie, res);
return res;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 31/58] fs/vboxsf: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Hans de Goede 
Signed-off-by: Ira Weiny 
---
 fs/vboxsf/file.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
index c4ab5996d97a..d9c7e6b7b4cc 100644
--- a/fs/vboxsf/file.c
+++ b/fs/vboxsf/file.c
@@ -216,7 +216,7 @@ static int vboxsf_readpage(struct file *file, struct page 
*page)
u8 *buf;
int err;
 
-   buf = kmap(page);
+   buf = kmap_thread(page);
 
err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf);
if (err == 0) {
@@ -227,7 +227,7 @@ static int vboxsf_readpage(struct file *file, struct page 
*page)
SetPageError(page);
}
 
-   kunmap(page);
+   kunmap_thread(page);
unlock_page(page);
return err;
 }
@@ -268,10 +268,10 @@ static int vboxsf_writepage(struct page *page, struct 
writeback_control *wbc)
if (!sf_handle)
return -EBADF;
 
-   buf = kmap(page);
+   buf = kmap_thread(page);
err = vboxsf_write(sf_handle->root, sf_handle->handle,
   off, &nwrite, buf);
-   kunmap(page);
+   kunmap_thread(page);
 
kref_put(&sf_handle->refcount, vboxsf_handle_release);
 
@@ -302,10 +302,10 @@ static int vboxsf_write_end(struct file *file, struct 
address_space *mapping,
if (!PageUptodate(page) && copied < len)
zero_user(page, from + copied, len - copied);
 
-   buf = kmap(page);
+   buf = kmap_thread(page);
err = vboxsf_write(sf_handle->root, sf_handle->handle,
   pos, &nwritten, buf + from);
-   kunmap(page);
+   kunmap_thread(page);
 
if (err) {
nwritten = 0;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 29/58] fs/ntfs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Anton Altaparmakov 
Signed-off-by: Ira Weiny 
---
 fs/ntfs/aops.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index bb0a43860ad2..11633d732809 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1099,7 +1099,7 @@ static int ntfs_write_mst_block(struct page *page,
if (!nr_bhs)
goto done;
/* Map the page so we can access its contents. */
-   kaddr = kmap(page);
+   kaddr = kmap_thread(page);
/* Clear the page uptodate flag whilst the mst fixups are applied. */
BUG_ON(!PageUptodate(page));
ClearPageUptodate(page);
@@ -1276,7 +1276,7 @@ static int ntfs_write_mst_block(struct page *page,
iput(VFS_I(base_tni));
}
SetPageUptodate(page);
-   kunmap(page);
+   kunmap_thread(page);
 done:
if (unlikely(err && err != -ENOMEM)) {
/*
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 30/58] fs/romfs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Signed-off-by: Ira Weiny 
---
 fs/romfs/super.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index e582d001f792..9050074c6755 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -107,7 +107,7 @@ static int romfs_readpage(struct file *file, struct page 
*page)
void *buf;
int ret;
 
-   buf = kmap(page);
+   buf = kmap_thread(page);
if (!buf)
return -ENOMEM;
 
@@ -136,7 +136,7 @@ static int romfs_readpage(struct file *file, struct page 
*page)
SetPageUptodate(page);
 
flush_dcache_page(page);
-   kunmap(page);
+   kunmap_thread(page);
unlock_page(page);
return ret;
 }
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 37/58] fs/ext2: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS update use the new kmap_thread() call instead.

Cc: Jan Kara 
Signed-off-by: Ira Weiny 
---
 fs/ext2/dir.c  | 2 +-
 fs/ext2/ext2.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index f3194bf20733..abe97ba458c8 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -196,7 +196,7 @@ static struct page * ext2_get_page(struct inode *dir, 
unsigned long n,
struct address_space *mapping = dir->i_mapping;
struct page *page = read_mapping_page(mapping, n, NULL);
if (!IS_ERR(page)) {
-   kmap(page);
+   kmap_thread(page);
if (unlikely(!PageChecked(page))) {
if (PageError(page) || !ext2_check_page(page, quiet))
goto fail;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 021ec8b42ac3..9bcb6714c255 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -749,7 +749,7 @@ extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode 
*, struct page **);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct 
page *, struct inode *, int);
 static inline void ext2_put_page(struct page *page)
 {
-   kunmap(page);
+   kunmap_thread(page);
put_page(page);
 }
 
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 36/58] fs/ext2: Use ext2_put_page

2020-10-09 Thread ira . weiny
From: Ira Weiny 

There are 3 places in namei.c where the equivalent of ext2_put_page() is
open coded.  We want to use k[un]map_thread() instead of k[un]map() in
ext2_[get|put]_page().

Move ext2_put_page() to ext2.h and use it in namei.c in prep for
converting the k[un]map() code.

Cc: Jan Kara 
Signed-off-by: Ira Weiny 
---
 fs/ext2/dir.c   |  6 --
 fs/ext2/ext2.h  |  8 
 fs/ext2/namei.c | 15 +--
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 70355ab6740e..f3194bf20733 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -66,12 +66,6 @@ static inline unsigned ext2_chunk_size(struct inode *inode)
return inode->i_sb->s_blocksize;
 }
 
-static inline void ext2_put_page(struct page *page)
-{
-   kunmap(page);
-   put_page(page);
-}
-
 /*
  * Return the offset into page `page_nr' of the last valid
  * byte in that page, plus one.
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 5136b7289e8d..021ec8b42ac3 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -16,6 +16,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 /* XXX Here for now... not interested in restructing headers JUST now */
 
@@ -745,6 +747,12 @@ extern int ext2_delete_entry (struct ext2_dir_entry_2 *, 
struct page *);
 extern int ext2_empty_dir (struct inode *);
 extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct 
page *, struct inode *, int);
+static inline void ext2_put_page(struct page *page)
+{
+   kunmap(page);
+   put_page(page);
+}
+
 
 /* ialloc.c */
 extern struct inode * ext2_new_inode (struct inode *, umode_t, const struct 
qstr *);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 5bf2c145643b..ea980f1e2e99 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -389,23 +389,18 @@ static int ext2_rename (struct inode * old_dir, struct 
dentry * old_dentry,
if (dir_de) {
if (old_dir != new_dir)
ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0);
-   else {
-   kunmap(dir_page);
-   put_page(dir_page);
-   }
+   else
+   ext2_put_page(dir_page);
inode_dec_link_count(old_dir);
}
return 0;
 
 
 out_dir:
-   if (dir_de) {
-   kunmap(dir_page);
-   put_page(dir_page);
-   }
+   if (dir_de)
+   ext2_put_page(dir_page);
 out_old:
-   kunmap(old_page);
-   put_page(old_page);
+   ext2_put_page(old_page);
 out:
return err;
 }
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 35/58] fs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Alexander Viro 
Cc: Jens Axboe 
Signed-off-by: Ira Weiny 
---
 fs/aio.c  |  4 ++--
 fs/binfmt_elf.c   |  4 ++--
 fs/binfmt_elf_fdpic.c |  4 ++--
 fs/exec.c | 10 +-
 fs/io_uring.c |  4 ++--
 fs/splice.c   |  4 ++--
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index d5ec30385566..27f95996d25f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1223,10 +1223,10 @@ static long aio_read_events_ring(struct kioctx *ctx,
avail = min(avail, nr - ret);
avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
 
-   ev = kmap(page);
+   ev = kmap_thread(page);
copy_ret = copy_to_user(event + ret, ev + pos,
sizeof(*ev) * avail);
-   kunmap(page);
+   kunmap_thread(page);
 
if (unlikely(copy_ret)) {
ret = -EFAULT;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 13d053982dd7..1a332ef1ae03 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2430,9 +2430,9 @@ static int elf_core_dump(struct coredump_params *cprm)
 
page = get_dump_page(addr);
if (page) {
-   void *kaddr = kmap(page);
+   void *kaddr = kmap_thread(page);
stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
-   kunmap(page);
+   kunmap_thread(page);
put_page(page);
} else
stop = !dump_skip(cprm, PAGE_SIZE);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 50f845702b92..8fbe188e0fdd 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1542,9 +1542,9 @@ static bool elf_fdpic_dump_segments(struct 
coredump_params *cprm)
bool res;
struct page *page = get_dump_page(addr);
if (page) {
-   void *kaddr = kmap(page);
+   void *kaddr = kmap_thread(page);
res = dump_emit(cprm, kaddr, PAGE_SIZE);
-   kunmap(page);
+   kunmap_thread(page);
put_page(page);
} else {
res = dump_skip(cprm, PAGE_SIZE);
diff --git a/fs/exec.c b/fs/exec.c
index a91003e28eaa..3948b8511e3a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -575,11 +575,11 @@ static int copy_strings(int argc, struct user_arg_ptr 
argv,
 
if (kmapped_page) {
flush_kernel_dcache_page(kmapped_page);
-   kunmap(kmapped_page);
+   kunmap_thread(kmapped_page);
put_arg_page(kmapped_page);
}
kmapped_page = page;
-   kaddr = kmap(kmapped_page);
+   kaddr = kmap_thread(kmapped_page);
kpos = pos & PAGE_MASK;
flush_arg_page(bprm, kpos, kmapped_page);
}
@@ -593,7 +593,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
 out:
if (kmapped_page) {
flush_kernel_dcache_page(kmapped_page);
-   kunmap(kmapped_page);
+   kunmap_thread(kmapped_page);
put_arg_page(kmapped_page);
}
return ret;
@@ -871,11 +871,11 @@ int transfer_args_to_stack(struct linux_binprm *bprm,
 
for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
-   char *src = kmap(bprm->page[index]) + offset;
+   char *src = kmap_thread(bprm->page[index]) + offset;
sp -= PAGE_SIZE - offset;
if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
ret = -EFAULT;
-   kunmap(bprm->page[index]);
+   kunmap_thread(bprm->page[index]);
if (ret)
goto out;
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index aae0ef2ec34d..f59bb079822d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2903,7 +2903,7 @@ static ssize_t loop_rw_iter(int rw, struct file *file, 
struct kiocb *kiocb,
iovec = iov_iter_iovec(iter);
} else {
/* fixed buffers import bvec */
-   iovec.

[PATCH RFC PKS/PMEM 33/58] fs/cramfs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Nicolas Pitre 
Signed-off-by: Ira Weiny 
---
 fs/cramfs/inode.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 912308600d39..003c014a42ed 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -247,8 +247,8 @@ static void *cramfs_blkdev_read(struct super_block *sb, 
unsigned int offset,
struct page *page = pages[i];
 
if (page) {
-   memcpy(data, kmap(page), PAGE_SIZE);
-   kunmap(page);
+   memcpy(data, kmap_thread(page), PAGE_SIZE);
+   kunmap_thread(page);
put_page(page);
} else
memset(data, 0, PAGE_SIZE);
@@ -826,7 +826,7 @@ static int cramfs_readpage(struct file *file, struct page 
*page)
 
maxblock = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
bytes_filled = 0;
-   pgdata = kmap(page);
+   pgdata = kmap_thread(page);
 
if (page->index < maxblock) {
struct super_block *sb = inode->i_sb;
@@ -914,13 +914,13 @@ static int cramfs_readpage(struct file *file, struct page 
*page)
 
memset(pgdata + bytes_filled, 0, PAGE_SIZE - bytes_filled);
flush_dcache_page(page);
-   kunmap(page);
+   kunmap_thread(page);
SetPageUptodate(page);
unlock_page(page);
return 0;
 
 err:
-   kunmap(page);
+   kunmap_thread(page);
ClearPageUptodate(page);
SetPageError(page);
unlock_page(page);
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 27/58] fs/ubifs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Richard Weinberger 
Signed-off-by: Ira Weiny 
---
 fs/ubifs/file.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index b77d1637bbbc..a3537447a885 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -111,7 +111,7 @@ static int do_readpage(struct page *page)
ubifs_assert(c, !PageChecked(page));
ubifs_assert(c, !PagePrivate(page));
 
-   addr = kmap(page);
+   addr = kmap_thread(page);
 
block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
@@ -174,7 +174,7 @@ static int do_readpage(struct page *page)
SetPageUptodate(page);
ClearPageError(page);
flush_dcache_page(page);
-   kunmap(page);
+   kunmap_thread(page);
return 0;
 
 error:
@@ -182,7 +182,7 @@ static int do_readpage(struct page *page)
ClearPageUptodate(page);
SetPageError(page);
flush_dcache_page(page);
-   kunmap(page);
+   kunmap_thread(page);
return err;
 }
 
@@ -616,7 +616,7 @@ static int populate_page(struct ubifs_info *c, struct page 
*page,
dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
inode->i_ino, page->index, i_size, page->flags);
 
-   addr = zaddr = kmap(page);
+   addr = zaddr = kmap_thread(page);
 
end_index = (i_size - 1) >> PAGE_SHIFT;
if (!i_size || page->index > end_index) {
@@ -692,7 +692,7 @@ static int populate_page(struct ubifs_info *c, struct page 
*page,
SetPageUptodate(page);
ClearPageError(page);
flush_dcache_page(page);
-   kunmap(page);
+   kunmap_thread(page);
*n = nn;
return 0;
 
@@ -700,7 +700,7 @@ static int populate_page(struct ubifs_info *c, struct page 
*page,
ClearPageUptodate(page);
SetPageError(page);
flush_dcache_page(page);
-   kunmap(page);
+   kunmap_thread(page);
ubifs_err(c, "bad data node (block %u, inode %lu)",
  page_block, inode->i_ino);
return -EINVAL;
@@ -918,7 +918,7 @@ static int do_writepage(struct page *page, int len)
/* Update radix tree tags */
set_page_writeback(page);
 
-   addr = kmap(page);
+   addr = kmap_thread(page);
block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
i = 0;
while (len) {
@@ -950,7 +950,7 @@ static int do_writepage(struct page *page, int len)
ClearPagePrivate(page);
ClearPageChecked(page);
 
-   kunmap(page);
+   kunmap_thread(page);
unlock_page(page);
end_page_writeback(page);
return err;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 39/58] fs/jffs2: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over head of
global PKRS updates use the new kmap_thread() call.

Signed-off-by: Ira Weiny 
---
 fs/jffs2/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 3e6d54f9b011..14dd2b18cc16 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -287,13 +287,13 @@ static int jffs2_write_end(struct file *filp, struct 
address_space *mapping,
 
/* In 2.4, it was already kmapped by generic_file_write(). Doesn't
   hurt to do it again. The alternative is ifdefs, which are ugly. */
-   kmap(pg);
+   kmap_thread(pg);
 
ret = jffs2_write_inode_range(c, f, ri, page_address(pg) + 
aligned_start,
  (pg->index << PAGE_SHIFT) + aligned_start,
  end - aligned_start, &writtenlen);
 
-   kunmap(pg);
+   kunmap_thread(pg);
 
if (ret) {
/* There was an error writing. */
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 32/58] fs/hostfs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Jeff Dike 
Cc: Richard Weinberger 
Cc: Anton Ivanov 
Signed-off-by: Ira Weiny 
---
 fs/hostfs/hostfs_kern.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index c070c0d8e3e9..608efd0f83cb 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -409,7 +409,7 @@ static int hostfs_writepage(struct page *page, struct 
writeback_control *wbc)
if (page->index >= end_index)
count = inode->i_size & (PAGE_SIZE-1);
 
-   buffer = kmap(page);
+   buffer = kmap_thread(page);
 
err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
if (err != count) {
@@ -425,7 +425,7 @@ static int hostfs_writepage(struct page *page, struct 
writeback_control *wbc)
err = 0;
 
  out:
-   kunmap(page);
+   kunmap_thread(page);
 
unlock_page(page);
return err;
@@ -437,7 +437,7 @@ static int hostfs_readpage(struct file *file, struct page 
*page)
loff_t start = page_offset(page);
int bytes_read, ret = 0;
 
-   buffer = kmap(page);
+   buffer = kmap_thread(page);
bytes_read = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
PAGE_SIZE);
if (bytes_read < 0) {
@@ -454,7 +454,7 @@ static int hostfs_readpage(struct file *file, struct page 
*page)
 
  out:
flush_dcache_page(page);
-   kunmap(page);
+   kunmap_thread(page);
unlock_page(page);
return ret;
 }
@@ -480,9 +480,9 @@ static int hostfs_write_end(struct file *file, struct 
address_space *mapping,
unsigned from = pos & (PAGE_SIZE - 1);
int err;
 
-   buffer = kmap(page);
+   buffer = kmap_thread(page);
err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer + from, copied);
-   kunmap(page);
+   kunmap_thread(page);
 
if (!PageUptodate(page) && err == PAGE_SIZE)
SetPageUptodate(page);
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 26/58] fs/zonefs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Damien Le Moal 
Cc: Naohiro Aota 
Signed-off-by: Ira Weiny 
---
 fs/zonefs/super.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 8ec7c8f109d7..2fd6c86beee1 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1297,7 +1297,7 @@ static int zonefs_read_super(struct super_block *sb)
if (ret)
goto free_page;
 
-   super = kmap(page);
+   super = kmap_thread(page);
 
ret = -EINVAL;
if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC)
@@ -1349,7 +1349,7 @@ static int zonefs_read_super(struct super_block *sb)
ret = 0;
 
 unmap:
-   kunmap(page);
+   kunmap_thread(page);
 free_page:
__free_page(page);
 
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 38/58] fs/isofs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over head of
global PKRS updates use the new kmap_thread() call.

Signed-off-by: Ira Weiny 
---
 fs/isofs/compress.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index bc12ac7e2312..ddd3fd99d2e1 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -344,7 +344,7 @@ static int zisofs_readpage(struct file *file, struct page 
*page)
pages[i] = grab_cache_page_nowait(mapping, index);
if (pages[i]) {
ClearPageError(pages[i]);
-   kmap(pages[i]);
+   kmap_thread(pages[i]);
}
}
 
@@ -356,7 +356,7 @@ static int zisofs_readpage(struct file *file, struct page 
*page)
flush_dcache_page(pages[i]);
if (i == full_page && err)
SetPageError(pages[i]);
-   kunmap(pages[i]);
+   kunmap_thread(pages[i]);
unlock_page(pages[i]);
if (i != full_page)
put_page(pages[i]);
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 34/58] fs/erofs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To avoid
the over head of global PKRS updates use the new kmap_thread() call.

Cc: Gao Xiang 
Cc: Chao Yu 
Signed-off-by: Ira Weiny 
---
 fs/erofs/super.c | 4 ++--
 fs/erofs/xattr.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index ddaa516c008a..41696b60f1b3 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -139,7 +139,7 @@ static int erofs_read_superblock(struct super_block *sb)
 
sbi = EROFS_SB(sb);
 
-   data = kmap(page);
+   data = kmap_thread(page);
dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET);
 
ret = -EINVAL;
@@ -189,7 +189,7 @@ static int erofs_read_superblock(struct super_block *sb)
}
ret = 0;
 out:
-   kunmap(page);
+   kunmap_thread(page);
put_page(page);
return ret;
 }
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index c8c381eadcd6..1771baa99d77 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -20,7 +20,7 @@ static inline void xattr_iter_end(struct xattr_iter *it, bool 
atomic)
 {
/* the only user of kunmap() is 'init_inode_xattrs' */
if (!atomic)
-   kunmap(it->page);
+   kunmap_thread(it->page);
else
kunmap_atomic(it->kaddr);
 
@@ -96,7 +96,7 @@ static int init_inode_xattrs(struct inode *inode)
}
 
/* read in shared xattr array (non-atomic, see kmalloc below) */
-   it.kaddr = kmap(it.page);
+   it.kaddr = kmap_thread(it.page);
atomic_map = false;
 
ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs);
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 40/58] net: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls in these drivers are localized to a single thread.
To avoid the over head of global PKRS updates use the new kmap_thread()
call.

Cc: "David S. Miller" 
Cc: Jakub Kicinski 
Cc: Alexey Kuznetsov 
Cc: Hideaki YOSHIFUJI 
Cc: Trond Myklebust 
Cc: Anna Schumaker 
Cc: Boris Pismenny 
Cc: Aviad Yehezkel 
Cc: John Fastabend 
Cc: Daniel Borkmann 
Signed-off-by: Ira Weiny 
---
 net/ceph/messenger.c | 4 ++--
 net/core/datagram.c  | 4 ++--
 net/core/sock.c  | 8 
 net/ipv4/ip_output.c | 4 ++--
 net/sunrpc/cache.c   | 4 ++--
 net/sunrpc/xdr.c | 8 
 net/tls/tls_device.c | 4 ++--
 7 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index d4d7a0e52491..0c49b8e333da 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1535,10 +1535,10 @@ static u32 ceph_crc32c_page(u32 crc, struct page *page,
 {
char *kaddr;
 
-   kaddr = kmap(page);
+   kaddr = kmap_thread(page);
BUG_ON(kaddr == NULL);
crc = crc32c(crc, kaddr + page_offset, length);
-   kunmap(page);
+   kunmap_thread(page);
 
return crc;
 }
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 639745d4f3b9..cbd0a343074a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -441,14 +441,14 @@ static int __skb_datagram_iter(const struct sk_buff *skb, 
int offset,
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
struct page *page = skb_frag_page(frag);
-   u8 *vaddr = kmap(page);
+   u8 *vaddr = kmap_thread(page);
 
if (copy > len)
copy = len;
n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
vaddr + skb_frag_off(frag) + offset - 
start,
copy, data, to);
-   kunmap(page);
+   kunmap_thread(page);
offset += n;
if (n != copy)
goto short_copy;
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c5c6b18eff4..9b46a75cd8c1 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2846,11 +2846,11 @@ ssize_t sock_no_sendpage(struct socket *sock, struct 
page *page, int offset, siz
ssize_t res;
struct msghdr msg = {.msg_flags = flags};
struct kvec iov;
-   char *kaddr = kmap(page);
+   char *kaddr = kmap_thread(page);
iov.iov_base = kaddr + offset;
iov.iov_len = size;
res = kernel_sendmsg(sock, &msg, &iov, 1, size);
-   kunmap(page);
+   kunmap_thread(page);
return res;
 }
 EXPORT_SYMBOL(sock_no_sendpage);
@@ -2861,12 +2861,12 @@ ssize_t sock_no_sendpage_locked(struct sock *sk, struct 
page *page,
ssize_t res;
struct msghdr msg = {.msg_flags = flags};
struct kvec iov;
-   char *kaddr = kmap(page);
+   char *kaddr = kmap_thread(page);
 
iov.iov_base = kaddr + offset;
iov.iov_len = size;
res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
-   kunmap(page);
+   kunmap_thread(page);
return res;
 }
 EXPORT_SYMBOL(sock_no_sendpage_locked);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e6f2ada9e7d5..05304fb251a4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -949,9 +949,9 @@ csum_page(struct page *page, int offset, int copy)
 {
char *kaddr;
__wsum csum;
-   kaddr = kmap(page);
+   kaddr = kmap_thread(page);
csum = csum_partial(kaddr + offset, copy, 0);
-   kunmap(page);
+   kunmap_thread(page);
return csum;
 }
 
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index baef5ee43dbb..88193f2a8e6f 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -935,9 +935,9 @@ static ssize_t cache_downcall(struct address_space *mapping,
if (!page)
goto out_slow;
 
-   kaddr = kmap(page);
+   kaddr = kmap_thread(page);
ret = cache_do_downcall(kaddr, buf, count, cd);
-   kunmap(page);
+   kunmap_thread(page);
unlock_page(page);
put_page(page);
return ret;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index be11d672b5b9..00afbb48fb0a 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1353,7 +1353,7 @@ xdr_xcode_array2(struct xdr_buf *buf, unsigned int base,
base &= ~PAGE_MASK;
avail_page = min_t(unsigned int, PAGE_SIZE - base,
avail_here);
-   c = kmap(*ppages) + base;
+   c = kmap_thread(*ppages) + base;
 
while (avail_here) {
avail_here -= avail_page;
@@ -1429,9 +1429,9 @@ xdr_xcode_array2(st

[PATCH RFC PKS/PMEM 50/58] drivers/android: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Greg Kroah-Hartman 
Signed-off-by: Ira Weiny 
---
 drivers/android/binder_alloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 69609696a843..5f50856caad7 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -1118,9 +1118,9 @@ binder_alloc_copy_user_to_buffer(struct binder_alloc 
*alloc,
page = binder_alloc_get_page(alloc, buffer,
 buffer_offset, &pgoff);
size = min_t(size_t, bytes, PAGE_SIZE - pgoff);
-   kptr = kmap(page) + pgoff;
+   kptr = kmap_thread(page) + pgoff;
ret = copy_from_user(kptr, from, size);
-   kunmap(page);
+   kunmap_thread(page);
if (ret)
return bytes - size + ret;
bytes -= size;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 52/58] mm: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Signed-off-by: Ira Weiny 
---
 mm/memory.c  | 8 
 mm/swapfile.c| 4 ++--
 mm/userfaultfd.c | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index fcfc4ca36eba..75a054882d7a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4945,7 +4945,7 @@ int __access_remote_vm(struct task_struct *tsk, struct 
mm_struct *mm,
if (bytes > PAGE_SIZE-offset)
bytes = PAGE_SIZE-offset;
 
-   maddr = kmap(page);
+   maddr = kmap_thread(page);
if (write) {
copy_to_user_page(vma, page, addr,
  maddr + offset, buf, bytes);
@@ -4954,7 +4954,7 @@ int __access_remote_vm(struct task_struct *tsk, struct 
mm_struct *mm,
copy_from_user_page(vma, page, addr,
buf, maddr + offset, bytes);
}
-   kunmap(page);
+   kunmap_thread(page);
put_page(page);
}
len -= bytes;
@@ -5216,14 +5216,14 @@ long copy_huge_page_from_user(struct page *dst_page,
 
for (i = 0; i < pages_per_huge_page; i++) {
if (allow_pagefault)
-   page_kaddr = kmap(dst_page + i);
+   page_kaddr = kmap_thread(dst_page + i);
else
page_kaddr = kmap_atomic(dst_page + i);
rc = copy_from_user(page_kaddr,
(const void __user *)(src + i * PAGE_SIZE),
PAGE_SIZE);
if (allow_pagefault)
-   kunmap(dst_page + i);
+   kunmap_thread(dst_page + i);
else
kunmap_atomic(page_kaddr);
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index debc94155f74..e3296ff95648 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3219,7 +3219,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, 
int, swap_flags)
error = PTR_ERR(page);
goto bad_swap_unlock_inode;
}
-   swap_header = kmap(page);
+   swap_header = kmap_thread(page);
 
maxpages = read_swap_header(p, swap_header, inode);
if (unlikely(!maxpages)) {
@@ -3395,7 +3395,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, 
int, swap_flags)
filp_close(swap_file, NULL);
 out:
if (page && !IS_ERR(page)) {
-   kunmap(page);
+   kunmap_thread(page);
put_page(page);
}
if (name)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9a3d451402d7..4d38c881bb2d 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -586,11 +586,11 @@ static __always_inline ssize_t __mcopy_atomic(struct 
mm_struct *dst_mm,
mmap_read_unlock(dst_mm);
BUG_ON(!page);
 
-   page_kaddr = kmap(page);
+   page_kaddr = kmap_thread(page);
err = copy_from_user(page_kaddr,
 (const void __user *) src_addr,
 PAGE_SIZE);
-   kunmap(page);
+   kunmap_thread(page);
if (unlikely(err)) {
err = -EFAULT;
goto out;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 56/58] dax: Stray access protection for dax_direct_access()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

dax_direct_access() is a special case of accessing pmem via a page
offset and without a struct page.

Because the dax driver is well aware of the special protections it has
mapped memory with, call dev_access_[en|dis]able() directly instead of
the unnecessary overhead of trying to get a page to kmap.

Similar to kmap, we leverage existing functions, dax_read_[un]lock(),
because they are already required to surround the use of the memory
returned from dax_direct_access().

Signed-off-by: Ira Weiny 
---
 drivers/dax/super.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e84070b55463..0ddb3ee73e36 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -30,6 +30,7 @@ static DEFINE_SPINLOCK(dax_host_lock);
 
 int dax_read_lock(void)
 {
+   dev_access_enable(false);
return srcu_read_lock(&dax_srcu);
 }
 EXPORT_SYMBOL_GPL(dax_read_lock);
@@ -37,6 +38,7 @@ EXPORT_SYMBOL_GPL(dax_read_lock);
 void dax_read_unlock(int id)
 {
srcu_read_unlock(&dax_srcu, id);
+   dev_access_disable(false);
 }
 EXPORT_SYMBOL_GPL(dax_read_unlock);
 
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 48/58] drivers/md: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Coly Li  (maintainer:BCACHE (BLOCK LAYER CACHE))
Cc: Kent Overstreet  (maintainer:BCACHE (BLOCK LAYER 
CACHE))
Signed-off-by: Ira Weiny 
---
 drivers/md/bcache/request.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index c7cadaafa947..a4571f6d09dd 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -44,10 +44,10 @@ static void bio_csum(struct bio *bio, struct bkey *k)
uint64_t csum = 0;
 
bio_for_each_segment(bv, bio, iter) {
-   void *d = kmap(bv.bv_page) + bv.bv_offset;
+   void *d = kmap_thread(bv.bv_page) + bv.bv_offset;
 
csum = bch_crc64_update(csum, d, bv.bv_len);
-   kunmap(bv.bv_page);
+   kunmap_thread(bv.bv_page);
}
 
k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1);
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 58/58] [dax|pmem]: Enable stray access protection

2020-10-09 Thread ira . weiny
From: Ira Weiny 

Protecting against stray writes is particularly important for PMEM
because, unlike writes to anonymous memory, writes to PMEM persists
across a reboot.  Thus data corruption could result in permanent loss of
data.

While stray writes are more serious than reads, protection is also
enabled for reads.  This helps to detect bugs in code which would
incorrectly access device memory and prevents a more serious machine
checks should those bug reads from a poison page.

Enable stray access protection by setting the flag in pgmap which
requests it.  There is no option presented to the user.  If Zone Device
Access Protection not be supported this flag will have no affect.

Signed-off-by: Ira Weiny 
---
 drivers/dax/device.c  | 2 ++
 drivers/nvdimm/pmem.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 1e89513f3c59..e6fb35b4f0fb 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -430,6 +430,8 @@ int dev_dax_probe(struct device *dev)
}
 
dev_dax->pgmap.type = MEMORY_DEVICE_GENERIC;
+   dev_dax->pgmap.flags |= PGMAP_PROT_ENABLED;
+
addr = devm_memremap_pages(dev, &dev_dax->pgmap);
if (IS_ERR(addr))
return PTR_ERR(addr);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index e4dc1ae990fc..9fcd8338e23f 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -426,6 +426,8 @@ static int pmem_attach_disk(struct device *dev,
return -EBUSY;
}
 
+   pmem->pgmap.flags |= PGMAP_PROT_ENABLED;
+
q = blk_alloc_queue(dev_to_node(dev));
if (!q)
return -ENOMEM;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 53/58] lib: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Alexander Viro 
Cc: "Jérôme Glisse" 
Cc: Martin KaFai Lau 
Cc: Song Liu 
Cc: Yonghong Song 
Cc: Andrii Nakryiko 
Cc: John Fastabend 
Cc: KP Singh 
Signed-off-by: Ira Weiny 
---
 lib/iov_iter.c | 12 ++--
 lib/test_bpf.c |  4 ++--
 lib/test_hmm.c |  8 
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5e40786c8f12..1d47f957cf95 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -208,7 +208,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, 
size_t offset, size_t b
}
/* Too bad - revert to non-atomic kmap */
 
-   kaddr = kmap(page);
+   kaddr = kmap_thread(page);
from = kaddr + offset;
left = copyout(buf, from, copy);
copy -= left;
@@ -225,7 +225,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, 
size_t offset, size_t b
from += copy;
bytes -= copy;
}
-   kunmap(page);
+   kunmap_thread(page);
 
 done:
if (skip == iov->iov_len) {
@@ -292,7 +292,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, 
size_t offset, size_t
}
/* Too bad - revert to non-atomic kmap */
 
-   kaddr = kmap(page);
+   kaddr = kmap_thread(page);
to = kaddr + offset;
left = copyin(to, buf, copy);
copy -= left;
@@ -309,7 +309,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, 
size_t offset, size_t
to += copy;
bytes -= copy;
}
-   kunmap(page);
+   kunmap_thread(page);
 
 done:
if (skip == iov->iov_len) {
@@ -1742,10 +1742,10 @@ int iov_iter_for_each_range(struct iov_iter *i, size_t 
bytes,
return 0;
 
iterate_all_kinds(i, bytes, v, -EINVAL, ({
-   w.iov_base = kmap(v.bv_page) + v.bv_offset;
+   w.iov_base = kmap_thread(v.bv_page) + v.bv_offset;
w.iov_len = v.bv_len;
err = f(&w, context);
-   kunmap(v.bv_page);
+   kunmap_thread(v.bv_page);
err;}), ({
w = v;
err = f(&w, context);})
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index ca7d635bccd9..441f822f56ba 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6506,11 +6506,11 @@ static void *generate_test_data(struct bpf_test *test, 
int sub)
if (!page)
goto err_kfree_skb;
 
-   ptr = kmap(page);
+   ptr = kmap_thread(page);
if (!ptr)
goto err_free_page;
memcpy(ptr, test->frag_data, MAX_DATA);
-   kunmap(page);
+   kunmap_thread(page);
skb_add_rx_frag(skb, 0, page, 0, MAX_DATA, MAX_DATA);
}
 
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index e7dc3de355b7..e40d26f97f45 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -329,9 +329,9 @@ static int dmirror_do_read(struct dmirror *dmirror, 
unsigned long start,
if (!page)
return -ENOENT;
 
-   tmp = kmap(page);
+   tmp = kmap_thread(page);
memcpy(ptr, tmp, PAGE_SIZE);
-   kunmap(page);
+   kunmap_thread(page);
 
ptr += PAGE_SIZE;
bounce->cpages++;
@@ -398,9 +398,9 @@ static int dmirror_do_write(struct dmirror *dmirror, 
unsigned long start,
if (!page || xa_pointer_tag(entry) != DPT_XA_TAG_WRITE)
return -ENOENT;
 
-   tmp = kmap(page);
+   tmp = kmap_thread(page);
memcpy(tmp, ptr, PAGE_SIZE);
-   kunmap(page);
+   kunmap_thread(page);
 
ptr += PAGE_SIZE;
bounce->cpages++;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 49/58] drivers/misc: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Greg Kroah-Hartman 
Signed-off-by: Ira Weiny 
---
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c 
b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 8531ae781195..f308abb8ad03 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -343,7 +343,7 @@ static int qp_memcpy_to_queue_iter(struct vmci_queue *queue,
size_t to_copy;
 
if (kernel_if->host)
-   va = kmap(kernel_if->u.h.page[page_index]);
+   va = kmap_thread(kernel_if->u.h.page[page_index]);
else
va = kernel_if->u.g.vas[page_index + 1];
/* Skip header. */
@@ -357,12 +357,12 @@ static int qp_memcpy_to_queue_iter(struct vmci_queue 
*queue,
if (!copy_from_iter_full((u8 *)va + page_offset, to_copy,
 from)) {
if (kernel_if->host)
-   kunmap(kernel_if->u.h.page[page_index]);
+   kunmap_thread(kernel_if->u.h.page[page_index]);
return VMCI_ERROR_INVALID_ARGS;
}
bytes_copied += to_copy;
if (kernel_if->host)
-   kunmap(kernel_if->u.h.page[page_index]);
+   kunmap_thread(kernel_if->u.h.page[page_index]);
}
 
return VMCI_SUCCESS;
@@ -391,7 +391,7 @@ static int qp_memcpy_from_queue_iter(struct iov_iter *to,
int err;
 
if (kernel_if->host)
-   va = kmap(kernel_if->u.h.page[page_index]);
+   va = kmap_thread(kernel_if->u.h.page[page_index]);
else
va = kernel_if->u.g.vas[page_index + 1];
/* Skip header. */
@@ -405,12 +405,12 @@ static int qp_memcpy_from_queue_iter(struct iov_iter *to,
err = copy_to_iter((u8 *)va + page_offset, to_copy, to);
if (err != to_copy) {
if (kernel_if->host)
-   kunmap(kernel_if->u.h.page[page_index]);
+   kunmap_thread(kernel_if->u.h.page[page_index]);
return VMCI_ERROR_INVALID_ARGS;
}
bytes_copied += to_copy;
if (kernel_if->host)
-   kunmap(kernel_if->u.h.page[page_index]);
+   kunmap_thread(kernel_if->u.h.page[page_index]);
}
 
return VMCI_SUCCESS;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 57/58] nvdimm/pmem: Stray access protection for pmem->virt_addr

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The pmem driver uses a cached virtual address to access its memory
directly.  Because the nvdimm driver is well aware of the special
protections it has mapped memory with, we call dev_access_[en|dis]able()
around the direct pmem->virt_addr (pmem_addr) usage instead of the
unnecessary overhead of trying to get a page to kmap.

Signed-off-by: Ira Weiny 
---
 drivers/nvdimm/pmem.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index fab29b514372..e4dc1ae990fc 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -148,7 +148,9 @@ static blk_status_t pmem_do_read(struct pmem_device *pmem,
if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
return BLK_STS_IOERR;
 
+   dev_access_enable(false);
rc = read_pmem(page, page_off, pmem_addr, len);
+   dev_access_disable(false);
flush_dcache_page(page);
return rc;
 }
@@ -180,11 +182,13 @@ static blk_status_t pmem_do_write(struct pmem_device 
*pmem,
 * after clear poison.
 */
flush_dcache_page(page);
+   dev_access_enable(false);
write_pmem(pmem_addr, page, page_off, len);
if (unlikely(bad_pmem)) {
rc = pmem_clear_poison(pmem, pmem_off, len);
write_pmem(pmem_addr, page, page_off, len);
}
+   dev_access_disable(false);
 
return rc;
 }
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 54/58] powerpc: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Signed-off-by: Ira Weiny 
---
 arch/powerpc/mm/mem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 42e25874f5a8..6ef557b8dda6 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -573,9 +573,9 @@ void flush_icache_user_page(struct vm_area_struct *vma, 
struct page *page,
 {
unsigned long maddr;
 
-   maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
+   maddr = (unsigned long) kmap_thread(page) + (addr & ~PAGE_MASK);
flush_icache_range(maddr, maddr + len);
-   kunmap(page);
+   kunmap_thread(page);
 }
 
 /*
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 55/58] samples: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Kirti Wankhede 
Signed-off-by: Ira Weiny 
---
 samples/vfio-mdev/mbochs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index 3cc5e5921682..6d95422c0b46 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -479,12 +479,12 @@ static ssize_t mdev_access(struct mdev_device *mdev, char 
*buf, size_t count,
pos -= MBOCHS_MMIO_BAR_OFFSET;
poff = pos & ~PAGE_MASK;
pg = __mbochs_get_page(mdev_state, pos >> PAGE_SHIFT);
-   map = kmap(pg);
+   map = kmap_thread(pg);
if (is_write)
memcpy(map + poff, buf, count);
else
memcpy(buf, map + poff, count);
-   kunmap(pg);
+   kunmap_thread(pg);
put_page(pg);
 
} else {
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 44/58] drivers/xen: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Stefano Stabellini 
Signed-off-by: Ira Weiny 
---
 drivers/xen/gntalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index 3fa40c723e8e..3b78e055feff 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -184,9 +184,9 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
 static void __del_gref(struct gntalloc_gref *gref)
 {
if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
-   uint8_t *tmp = kmap(gref->page);
+   uint8_t *tmp = kmap_thread(gref->page);
tmp[gref->notify.pgoff] = 0;
-   kunmap(gref->page);
+   kunmap_thread(gref->page);
}
if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(gref->notify.event);
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 12/58] fs/afs: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

The kmap() calls in this FS are localized to a single thread.  To
avoid the over head of global PKRS updates use the new kmap_thread()
call.

Cc: David Howells 
Signed-off-by: Ira Weiny 
---
 fs/afs/dir.c  | 16 
 fs/afs/dir_edit.c | 16 
 fs/afs/mntpt.c|  4 ++--
 fs/afs/write.c|  4 ++--
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 1d2e61e0ab04..5d01cdb590de 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -127,14 +127,14 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, 
struct page *page,
qty /= sizeof(union afs_xdr_dir_block);
 
/* check them */
-   dbuf = kmap(page);
+   dbuf = kmap_thread(page);
for (tmp = 0; tmp < qty; tmp++) {
if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) {
printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
   __func__, dvnode->vfs_inode.i_ino, tmp, qty,
   ntohs(dbuf->blocks[tmp].hdr.magic));
trace_afs_dir_check_failed(dvnode, off, i_size);
-   kunmap(page);
+   kunmap_thread(page);
trace_afs_file_error(dvnode, -EIO, 
afs_file_error_dir_bad_magic);
goto error;
}
@@ -146,7 +146,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, 
struct page *page,
((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0;
}
 
-   kunmap(page);
+   kunmap_thread(page);
 
 checked:
afs_stat_v(dvnode, n_read_dir);
@@ -177,13 +177,13 @@ static bool afs_dir_check_pages(struct afs_vnode *dvnode, 
struct afs_read *req)
req->pos, req->index, req->nr_pages, req->offset);
 
for (i = 0; i < req->nr_pages; i++) {
-   dbuf = kmap(req->pages[i]);
+   dbuf = kmap_thread(req->pages[i]);
for (j = 0; j < qty; j++) {
union afs_xdr_dir_block *block = &dbuf->blocks[j];
 
pr_warn("[%02x] %32phN\n", i * qty + j, block);
}
-   kunmap(req->pages[i]);
+   kunmap_thread(req->pages[i]);
}
return false;
 }
@@ -481,7 +481,7 @@ static int afs_dir_iterate(struct inode *dir, struct 
dir_context *ctx,
 
limit = blkoff & ~(PAGE_SIZE - 1);
 
-   dbuf = kmap(page);
+   dbuf = kmap_thread(page);
 
/* deal with the individual blocks stashed on this page */
do {
@@ -489,7 +489,7 @@ static int afs_dir_iterate(struct inode *dir, struct 
dir_context *ctx,
   sizeof(union afs_xdr_dir_block)];
ret = afs_dir_iterate_block(dvnode, ctx, dblock, 
blkoff);
if (ret != 1) {
-   kunmap(page);
+   kunmap_thread(page);
goto out;
}
 
@@ -497,7 +497,7 @@ static int afs_dir_iterate(struct inode *dir, struct 
dir_context *ctx,
 
} while (ctx->pos < dir->i_size && blkoff < limit);
 
-   kunmap(page);
+   kunmap_thread(page);
ret = 0;
}
 
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
index b108528bf010..35ed6828e205 100644
--- a/fs/afs/dir_edit.c
+++ b/fs/afs/dir_edit.c
@@ -218,7 +218,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE);
need_slots /= AFS_DIR_DIRENT_SIZE;
 
-   meta_page = kmap(page0);
+   meta_page = kmap_thread(page0);
meta = &meta_page->blocks[0];
if (i_size == 0)
goto new_directory;
@@ -247,7 +247,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
set_page_private(page, 1);
SetPagePrivate(page);
}
-   dir_page = kmap(page);
+   dir_page = kmap_thread(page);
}
 
/* Abandon the edit if we got a callback break. */
@@ -284,7 +284,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
 
if (page != page0) {
unlock_page(page);
-   kunmap(page);
+   kunmap_thread(page);
put_page(page);
}
}
@@ -323,7 +323,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
afs_set_contig_bits(block, slot, need_slots);
if (page != page0) {
unlock_page(page);
-   kunmap(page);
+   kunmap_thread(page);
put_page(page);
}
 
@@ -337,7 +337,7

[PATCH RFC PKS/PMEM 46/58] drives/staging: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Greg Kroah-Hartman 
Signed-off-by: Ira Weiny 
---
 drivers/staging/rts5208/rtsx_transport.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/rts5208/rtsx_transport.c 
b/drivers/staging/rts5208/rtsx_transport.c
index 0027bcf638ad..f747cc23951b 100644
--- a/drivers/staging/rts5208/rtsx_transport.c
+++ b/drivers/staging/rts5208/rtsx_transport.c
@@ -92,13 +92,13 @@ unsigned int rtsx_stor_access_xfer_buf(unsigned char 
*buffer,
while (sglen > 0) {
unsigned int plen = min(sglen, (unsigned int)
PAGE_SIZE - poff);
-   unsigned char *ptr = kmap(page);
+   unsigned char *ptr = kmap_thread(page);
 
if (dir == TO_XFER_BUF)
memcpy(ptr + poff, buffer + cnt, plen);
else
memcpy(buffer + cnt, ptr + poff, plen);
-   kunmap(page);
+   kunmap_thread(page);
 
/* Start at the beginning of the next page */
poff = 0;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 45/58] drivers/firmware: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Ard Biesheuvel 
Signed-off-by: Ira Weiny 
---
 drivers/firmware/efi/capsule-loader.c | 6 +++---
 drivers/firmware/efi/capsule.c| 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/firmware/efi/capsule-loader.c 
b/drivers/firmware/efi/capsule-loader.c
index 4dde8edd53b6..aa2e0b5940fd 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -197,7 +197,7 @@ static ssize_t efi_capsule_write(struct file *file, const 
char __user *buff,
page = cap_info->pages[cap_info->index - 1];
}
 
-   kbuff = kmap(page);
+   kbuff = kmap_thread(page);
kbuff += PAGE_SIZE - cap_info->page_bytes_remain;
 
/* Copy capsule binary data from user space to kernel space buffer */
@@ -217,7 +217,7 @@ static ssize_t efi_capsule_write(struct file *file, const 
char __user *buff,
}
 
cap_info->count += write_byte;
-   kunmap(page);
+   kunmap_thread(page);
 
/* Submit the full binary to efi_capsule_update() API */
if (cap_info->header.headersize > 0 &&
@@ -236,7 +236,7 @@ static ssize_t efi_capsule_write(struct file *file, const 
char __user *buff,
return write_byte;
 
 fail_unmap:
-   kunmap(page);
+   kunmap_thread(page);
 failed:
efi_free_all_buff_pages(cap_info);
return ret;
diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c
index 598b7800d14e..edb7797b0e4f 100644
--- a/drivers/firmware/efi/capsule.c
+++ b/drivers/firmware/efi/capsule.c
@@ -244,7 +244,7 @@ int efi_capsule_update(efi_capsule_header_t *capsule, 
phys_addr_t *pages)
for (i = 0; i < sg_count; i++) {
efi_capsule_block_desc_t *sglist;
 
-   sglist = kmap(sg_pages[i]);
+   sglist = kmap_thread(sg_pages[i]);
 
for (j = 0; j < SGLIST_PER_PAGE && count > 0; j++) {
u64 sz = min_t(u64, imagesize,
@@ -265,7 +265,7 @@ int efi_capsule_update(efi_capsule_header_t *capsule, 
phys_addr_t *pages)
else
sglist[j].data = page_to_phys(sg_pages[i + 1]);
 
-   kunmap(sg_pages[i]);
+   kunmap_thread(sg_pages[i]);
}
 
mutex_lock(&capsule_mutex);
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 43/58] drivers/mmc: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Ulf Hansson 
Cc: Sascha Sommer 
Signed-off-by: Ira Weiny 
---
 drivers/mmc/host/mmc_spi.c| 4 ++--
 drivers/mmc/host/sdricoh_cs.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 18a850f37ddc..ab28e7103b8d 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -918,7 +918,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct 
mmc_command *cmd,
}
 
/* allow pio too; we don't allow highmem */
-   kmap_addr = kmap(sg_page(sg));
+   kmap_addr = kmap_thread(sg_page(sg));
if (direction == DMA_TO_DEVICE)
t->tx_buf = kmap_addr + sg->offset;
else
@@ -950,7 +950,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct 
mmc_command *cmd,
/* discard mappings */
if (direction == DMA_FROM_DEVICE)
flush_kernel_dcache_page(sg_page(sg));
-   kunmap(sg_page(sg));
+   kunmap_thread(sg_page(sg));
if (dma_dev)
dma_unmap_page(dma_dev, dma_addr, PAGE_SIZE, dir);
 
diff --git a/drivers/mmc/host/sdricoh_cs.c b/drivers/mmc/host/sdricoh_cs.c
index 76a8cd3a186f..7806bc69c4f1 100644
--- a/drivers/mmc/host/sdricoh_cs.c
+++ b/drivers/mmc/host/sdricoh_cs.c
@@ -312,11 +312,11 @@ static void sdricoh_request(struct mmc_host *mmc, struct 
mmc_request *mrq)
int result;
page = sg_page(data->sg);
 
-   buf = kmap(page) + data->sg->offset + (len * i);
+   buf = kmap_thread(page) + data->sg->offset + (len * i);
result =
sdricoh_blockio(host,
data->flags & MMC_DATA_READ, buf, len);
-   kunmap(page);
+   kunmap_thread(page);
flush_dcache_page(page);
if (result) {
dev_err(dev, "sdricoh_request: cmd %i "
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 42/58] drivers/scsi: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Signed-off-by: Ira Weiny 
---
 drivers/scsi/ipr.c | 8 
 drivers/scsi/pmcraid.c | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index b0aa58d117cc..a5a0b8feb661 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3923,9 +3923,9 @@ static int ipr_copy_ucode_buffer(struct ipr_sglist 
*sglist,
buffer += bsize_elem) {
struct page *page = sg_page(sg);
 
-   kaddr = kmap(page);
+   kaddr = kmap_thread(page);
memcpy(kaddr, buffer, bsize_elem);
-   kunmap(page);
+   kunmap_thread(page);
 
sg->length = bsize_elem;
 
@@ -3938,9 +3938,9 @@ static int ipr_copy_ucode_buffer(struct ipr_sglist 
*sglist,
if (len % bsize_elem) {
struct page *page = sg_page(sg);
 
-   kaddr = kmap(page);
+   kaddr = kmap_thread(page);
memcpy(kaddr, buffer, len % bsize_elem);
-   kunmap(page);
+   kunmap_thread(page);
 
sg->length = len % bsize_elem;
}
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index aa9ae2ae8579..4b05ba4b8a11 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -3269,13 +3269,13 @@ static int pmcraid_copy_sglist(
for (i = 0; i < (len / bsize_elem); i++, sg = sg_next(sg), buffer += 
bsize_elem) {
struct page *page = sg_page(sg);
 
-   kaddr = kmap(page);
+   kaddr = kmap_thread(page);
if (direction == DMA_TO_DEVICE)
rc = copy_from_user(kaddr, buffer, bsize_elem);
else
rc = copy_to_user(buffer, kaddr, bsize_elem);
 
-   kunmap(page);
+   kunmap_thread(page);
 
if (rc) {
pmcraid_err("failed to copy user data into sg list\n");
@@ -3288,14 +3288,14 @@ static int pmcraid_copy_sglist(
if (len % bsize_elem) {
struct page *page = sg_page(sg);
 
-   kaddr = kmap(page);
+   kaddr = kmap_thread(page);
 
if (direction == DMA_TO_DEVICE)
rc = copy_from_user(kaddr, buffer, len % bsize_elem);
else
rc = copy_to_user(buffer, kaddr, len % bsize_elem);
 
-   kunmap(page);
+   kunmap_thread(page);
 
sg->length = len % bsize_elem;
}
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 47/58] drivers/mtd: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls are localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Miquel Raynal 
Cc: Richard Weinberger 
Cc: Vignesh Raghavendra 
Signed-off-by: Ira Weiny 
---
 drivers/mtd/mtd_blkdevs.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 0c05f77f9b21..4b18998273fa 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -88,14 +88,14 @@ static blk_status_t do_blktrans_request(struct 
mtd_blktrans_ops *tr,
return BLK_STS_IOERR;
return BLK_STS_OK;
case REQ_OP_READ:
-   buf = kmap(bio_page(req->bio)) + bio_offset(req->bio);
+   buf = kmap_thread(bio_page(req->bio)) + bio_offset(req->bio);
for (; nsect > 0; nsect--, block++, buf += tr->blksize) {
if (tr->readsect(dev, block, buf)) {
-   kunmap(bio_page(req->bio));
+   kunmap_thread(bio_page(req->bio));
return BLK_STS_IOERR;
}
}
-   kunmap(bio_page(req->bio));
+   kunmap_thread(bio_page(req->bio));
rq_flush_dcache_pages(req);
return BLK_STS_OK;
case REQ_OP_WRITE:
@@ -103,14 +103,14 @@ static blk_status_t do_blktrans_request(struct 
mtd_blktrans_ops *tr,
return BLK_STS_IOERR;
 
rq_flush_dcache_pages(req);
-   buf = kmap(bio_page(req->bio)) + bio_offset(req->bio);
+   buf = kmap_thread(bio_page(req->bio)) + bio_offset(req->bio);
for (; nsect > 0; nsect--, block++, buf += tr->blksize) {
if (tr->writesect(dev, block, buf)) {
-   kunmap(bio_page(req->bio));
+   kunmap_thread(bio_page(req->bio));
return BLK_STS_IOERR;
}
}
-   kunmap(bio_page(req->bio));
+   kunmap_thread(bio_page(req->bio));
return BLK_STS_OK;
default:
return BLK_STS_IOERR;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 51/58] kernel: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

This kmap() call is localized to a single thread.  To avoid the over
head of global PKRS updates use the new kmap_thread() call.

Cc: Eric Biederman 
Signed-off-by: Ira Weiny 
---
 kernel/kexec_core.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index c19c0dad1ebe..272a9920c0d6 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -815,7 +815,7 @@ static int kimage_load_normal_segment(struct kimage *image,
if (result < 0)
goto out;
 
-   ptr = kmap(page);
+   ptr = kmap_thread(page);
/* Start with a clear page */
clear_page(ptr);
ptr += maddr & ~PAGE_MASK;
@@ -828,7 +828,7 @@ static int kimage_load_normal_segment(struct kimage *image,
memcpy(ptr, kbuf, uchunk);
else
result = copy_from_user(ptr, buf, uchunk);
-   kunmap(page);
+   kunmap_thread(page);
if (result) {
result = -EFAULT;
goto out;
@@ -879,7 +879,7 @@ static int kimage_load_crash_segment(struct kimage *image,
goto out;
}
arch_kexec_post_alloc_pages(page_address(page), 1, 0);
-   ptr = kmap(page);
+   ptr = kmap_thread(page);
ptr += maddr & ~PAGE_MASK;
mchunk = min_t(size_t, mbytes,
PAGE_SIZE - (maddr & ~PAGE_MASK));
@@ -895,7 +895,7 @@ static int kimage_load_crash_segment(struct kimage *image,
else
result = copy_from_user(ptr, buf, uchunk);
kexec_flush_icache_page(page);
-   kunmap(page);
+   kunmap_thread(page);
arch_kexec_pre_free_pages(page_address(page), 1);
if (result) {
result = -EFAULT;
-- 
2.28.0.rc0.12.gb6a658bd00c9




[PATCH RFC PKS/PMEM 41/58] drivers/target: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls in this driver are localized to a single thread.  To
avoid the over head of global PKRS updates use the new kmap_thread()
call.

Signed-off-by: Ira Weiny 
---
 drivers/target/target_core_iblock.c| 4 ++--
 drivers/target/target_core_rd.c| 4 ++--
 drivers/target/target_core_transport.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_iblock.c 
b/drivers/target/target_core_iblock.c
index 1c181d31f4c8..df7b1568edb3 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -415,7 +415,7 @@ iblock_execute_zero_out(struct block_device *bdev, struct 
se_cmd *cmd)
unsigned char *buf, *not_zero;
int ret;
 
-   buf = kmap(sg_page(sg)) + sg->offset;
+   buf = kmap_thread(sg_page(sg)) + sg->offset;
if (!buf)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
/*
@@ -423,7 +423,7 @@ iblock_execute_zero_out(struct block_device *bdev, struct 
se_cmd *cmd)
 * incoming WRITE_SAME payload does not contain zeros.
 */
not_zero = memchr_inv(buf, 0x00, cmd->data_length);
-   kunmap(sg_page(sg));
+   kunmap_thread(sg_page(sg));
 
if (not_zero)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index 408bd975170b..dbbdd39c5bf9 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -159,9 +159,9 @@ static int rd_allocate_sgl_table(struct rd_dev *rd_dev, 
struct rd_dev_sg_table *
sg_assign_page(&sg[j], pg);
sg[j].length = PAGE_SIZE;
 
-   p = kmap(pg);
+   p = kmap_thread(pg);
memset(p, init_payload, PAGE_SIZE);
-   kunmap(pg);
+   kunmap_thread(pg);
}
 
page_offset += sg_per_table;
diff --git a/drivers/target/target_core_transport.c 
b/drivers/target/target_core_transport.c
index ff26ab0a5f60..8d0bae5a92e5 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1692,11 +1692,11 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, 
struct se_session *se_sess
unsigned char *buf = NULL;
 
if (sgl)
-   buf = kmap(sg_page(sgl)) + sgl->offset;
+   buf = kmap_thread(sg_page(sgl)) + sgl->offset;
 
if (buf) {
memset(buf, 0, sgl->length);
-   kunmap(sg_page(sgl));
+   kunmap_thread(sg_page(sgl));
}
}
 
-- 
2.28.0.rc0.12.gb6a658bd00c9




Re: [PATCH RFC PKS/PMEM 09/58] drivers/gpu: Utilize new kmap_thread()

2020-10-10 Thread Ira Weiny
On Sat, Oct 10, 2020 at 12:03:49AM +0200, Daniel Vetter wrote:
> On Fri, Oct 09, 2020 at 12:49:44PM -0700, ira.we...@intel.com wrote:
> > From: Ira Weiny 
> > 
> > These kmap() calls in the gpu stack are localized to a single thread.
> > To avoid the over head of global PKRS updates use the new kmap_thread()
> > call.
> > 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: Patrik Jakobsson 
> > Signed-off-by: Ira Weiny 
> 
> I'm guessing the entire pile goes in through some other tree.
>

Apologies for not realizing there were multiple maintainers here.

But, I was thinking it would land together through the mm tree once the core
support lands.  I've tried to split these out in a way they can be easily
reviewed/acked by the correct developers.

> If so:
> 
> Acked-by: Daniel Vetter 
> 
> If you want this to land through maintainer trees, then we need a
> per-driver split (since aside from amdgpu and radeon they're all different
> subtrees).

It is just RFC for the moment.  I need to get the core support accepted first
then this can land.

> 
> btw the two kmap calls in drm you highlight in the cover letter should
> also be convertible to kmap_thread. We only hold vmalloc mappings for a
> longer time (or it'd be quite a driver bug). So if you want maybe throw
> those two as two additional patches on top, and we can do some careful
> review & testing for them.

Cool.  I'll add them in.

Ira

> -Daniel
> 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 12 ++--
> >  drivers/gpu/drm/gma500/gma_display.c |  4 ++--
> >  drivers/gpu/drm/gma500/mmu.c | 10 +-
> >  drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  4 ++--
> >  .../gpu/drm/i915/gem/selftests/i915_gem_context.c|  4 ++--
> >  drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c   |  8 
> >  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c |  4 ++--
> >  drivers/gpu/drm/i915/gt/intel_gtt.c  |  4 ++--
> >  drivers/gpu/drm/i915/gt/shmem_utils.c|  4 ++--
> >  drivers/gpu/drm/i915/i915_gem.c  |  8 
> >  drivers/gpu/drm/i915/i915_gpu_error.c|  4 ++--
> >  drivers/gpu/drm/i915/selftests/i915_perf.c   |  4 ++--
> >  drivers/gpu/drm/radeon/radeon_ttm.c  |  4 ++--
> >  13 files changed, 37 insertions(+), 37 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 978bae731398..bd564bccb7a3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -2437,11 +2437,11 @@ static ssize_t amdgpu_ttm_gtt_read(struct file *f, 
> > char __user *buf,
> >  
> > page = adev->gart.pages[p];
> > if (page) {
> > -   ptr = kmap(page);
> > +   ptr = kmap_thread(page);
> > ptr += off;
> >  
> > r = copy_to_user(buf, ptr, cur_size);
> > -   kunmap(adev->gart.pages[p]);
> > +   kunmap_thread(adev->gart.pages[p]);
> > } else
> > r = clear_user(buf, cur_size);
> >  
> > @@ -2507,9 +2507,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char 
> > __user *buf,
> > if (p->mapping != adev->mman.bdev.dev_mapping)
> > return -EPERM;
> >  
> > -   ptr = kmap(p);
> > +   ptr = kmap_thread(p);
> > r = copy_to_user(buf, ptr + off, bytes);
> > -   kunmap(p);
> > +   kunmap_thread(p);
> > if (r)
> > return -EFAULT;
> >  
> > @@ -2558,9 +2558,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, 
> > const char __user *buf,
> > if (p->mapping != adev->mman.bdev.dev_mapping)
> > return -EPERM;
> >  
> > -   ptr = kmap(p);
> > +   ptr = kmap_thread(p);
> > r = copy_from_user(ptr + off, buf, bytes);
> > -   kunmap(p);
> > +   kunmap_thread(p);
> > if (r)
> > return -EFAULT;
> >  
> > diff --git a/drivers/gpu/drm/gma500/gma_display.c 
> > b/drivers/gpu/drm/gma500/gma_display.c
> > index 3df6d6e850f5..35f4e55c941f 100644
> > --- a/drivers/gpu/drm/gma500/gma_display.c
> > +++ b/drivers/gpu/drm/gma500/gma_display.c
> > @@ -400,9 +400,9 @@ int g

Re: [PATCH RFC PKS/PMEM 10/58] drivers/rdma: Utilize new kmap_thread()

2020-10-11 Thread Ira Weiny
On Sat, Oct 10, 2020 at 11:36:49AM +, Bernard Metzler wrote:
> -ira.we...@intel.com wrote: -
> 

[snip]

> >@@ -505,7 +505,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx,
> >struct socket *s)
> > page_array[seg] = p;
> > 
> > if (!c_tx->use_sendpage) {
> >-iov[seg].iov_base = kmap(p) + fp_off;
> >+iov[seg].iov_base = kmap_thread(p) + 
> >fp_off;
> 
> This misses a corresponding kunmap_thread() in siw_unmap_pages()
> (pls change line 403 in siw_qp_tx.c as well)

Thanks I missed that.

Done.

Ira

> 
> Thanks,
> Bernard.
> 



Re: [PATCH RFC PKS/PMEM 48/58] drivers/md: Utilize new kmap_thread()

2020-10-11 Thread Ira Weiny
On Sat, Oct 10, 2020 at 10:20:34AM +0800, Coly Li wrote:
> On 2020/10/10 03:50, ira.we...@intel.com wrote:
> > From: Ira Weiny 
> > 
> > These kmap() calls are localized to a single thread.  To avoid the over
> > head of global PKRS updates use the new kmap_thread() call.
> > 
> 
> Hi Ira,
> 
> There were a number of options considered.
> 
> 1) Attempt to change all the thread local kmap() calls to kmap_atomic()
> 2) Introduce a flags parameter to kmap() to indicate if the mapping
> should be global or not
> 3) Change ~20-30 call sites to 'kmap_global()' to indicate that they
> require a global mapping of the pages
> 4) Change ~209 call sites to 'kmap_thread()' to indicate that the
> mapping is to be used within that thread of execution only
> 
> 
> I copied the above information from patch 00/58 to this message. The
> idea behind kmap_thread() is fine to me, but as you said the new api is
> very easy to be missed in new code (even for me). I would like to be
> supportive to option 2) introduce a flag to kmap(), then we won't forget
> the new thread-localized kmap method, and people won't ask why a
> _thread() function is called but no kthread created.

Thanks for the feedback.

I'm going to hold off making any changes until others weigh in.  FWIW, I kind
of like option 2 as well.  But there is already kmap_atomic() so it seemed like
kmap_() was more in line with the current API.

Thanks,
Ira

> 
> Thanks.
> 
> 
> Coly Li
> 



Re: [PATCH RFC PKS/PMEM 57/58] nvdimm/pmem: Stray access protection for pmem->virt_addr

2020-10-11 Thread Ira Weiny
On Fri, Oct 09, 2020 at 07:53:07PM -0700, John Hubbard wrote:
> On 10/9/20 12:50 PM, ira.we...@intel.com wrote:
> > From: Ira Weiny 
> > 
> > The pmem driver uses a cached virtual address to access its memory
> > directly.  Because the nvdimm driver is well aware of the special
> > protections it has mapped memory with, we call dev_access_[en|dis]able()
> > around the direct pmem->virt_addr (pmem_addr) usage instead of the
> > unnecessary overhead of trying to get a page to kmap.
> > 
> > Signed-off-by: Ira Weiny 
> > ---
> >   drivers/nvdimm/pmem.c | 4 
> >   1 file changed, 4 insertions(+)
> > 
> > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> > index fab29b514372..e4dc1ae990fc 100644
> > --- a/drivers/nvdimm/pmem.c
> > +++ b/drivers/nvdimm/pmem.c
> > @@ -148,7 +148,9 @@ static blk_status_t pmem_do_read(struct pmem_device 
> > *pmem,
> > if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
> > return BLK_STS_IOERR;
> > +   dev_access_enable(false);
> > rc = read_pmem(page, page_off, pmem_addr, len);
> > +   dev_access_disable(false);
> 
> Hi Ira!
> 
> The APIs should be tweaked to use a symbol (GLOBAL, PER_THREAD), instead of
> true/false. Try reading the above and you'll see that it sounds like it's
> doing the opposite of what it is ("enable_this(false)" sounds like a clumsy
> API design to *disable*, right?). And there is no hint about the scope.

Sounds reasonable.

> 
> And it *could* be so much more readable like this:
> 
> dev_access_enable(DEV_ACCESS_THIS_THREAD);

I'll think about the flag name.  I'm not liking 'this thread'.

Maybe DEV_ACCESS_[GLOBAL|THREAD]

Ira




Re: [PATCH RFC PKS/PMEM 22/58] fs/f2fs: Utilize new kmap_thread()

2020-10-11 Thread Ira Weiny
On Fri, Oct 09, 2020 at 06:30:36PM -0700, Eric Biggers wrote:
> On Sat, Oct 10, 2020 at 01:39:54AM +0100, Matthew Wilcox wrote:
> > On Fri, Oct 09, 2020 at 02:34:34PM -0700, Eric Biggers wrote:
> > > On Fri, Oct 09, 2020 at 12:49:57PM -0700, ira.we...@intel.com wrote:
> > > > The kmap() calls in this FS are localized to a single thread.  To avoid
> > > > the over head of global PKRS updates use the new kmap_thread() call.
> > > >
> > > > @@ -2410,12 +2410,12 @@ static inline struct page 
> > > > *f2fs_pagecache_get_page(
> > > >  
> > > >  static inline void f2fs_copy_page(struct page *src, struct page *dst)
> > > >  {
> > > > -   char *src_kaddr = kmap(src);
> > > > -   char *dst_kaddr = kmap(dst);
> > > > +   char *src_kaddr = kmap_thread(src);
> > > > +   char *dst_kaddr = kmap_thread(dst);
> > > >  
> > > > memcpy(dst_kaddr, src_kaddr, PAGE_SIZE);
> > > > -   kunmap(dst);
> > > > -   kunmap(src);
> > > > +   kunmap_thread(dst);
> > > > +   kunmap_thread(src);
> > > >  }
> > > 
> > > Wouldn't it make more sense to switch cases like this to kmap_atomic()?
> > > The pages are only mapped to do a memcpy(), then they're immediately 
> > > unmapped.
> > 
> > Maybe you missed the earlier thread from Thomas trying to do something
> > similar for rather different reasons ...
> > 
> > https://lore.kernel.org/lkml/20200919091751.06...@linutronix.de/
> 
> I did miss it.  I'm not subscribed to any of the mailing lists it was sent to.
> 
> Anyway, it shouldn't matter.  Patchsets should be standalone, and not require
> reading random prior threads on linux-kernel to understand.

Sorry, but I did not think that the discussion above was directly related.  If
I'm not mistaken, Thomas' work was directed at relaxing kmap_atomic() into
kmap_thread() calls.  While interesting, it is not the point of this series.  I
want to restrict kmap() callers into kmap_thread().

For this series it was considered to change the kmap_thread() call sites to
kmap_atomic().  But like I said in the cover letter kmap_atomic() is not the
same semantic.  It is too strict.  Perhaps I should have expanded that
explanation.

> 
> And I still don't really understand.  After this patchset, there is still code
> nearly identical to the above (doing a temporary mapping just for a memcpy) 
> that
> would still be using kmap_atomic().

I don't understand.  You mean there would be other call sites calling:

kmap_atomic()
memcpy()
kunmap_atomic()

?

> Is the idea that later, such code will be
> converted to use kmap_thread() instead?  If not, why use one over the other?
 

The reason for the new call is that with PKS added behind kmap we have 3 levels
of mapping we want.

global kmap (can span threads and sleep)
'thread' kmap (can sleep but not span threads)
'atomic' kmap (can't sleep nor span threads [by definition])

As Matthew said perhaps 'global kmaps' may be best changed to vmaps?  I just
don't know the details of every call site.

And since I don't know the call site details if there are kmap_thread() calls
which are better off as kmap_atomic() calls I think it is worth converting
them.  But I made the assumption that kmap users would already be calling
kmap_atomic() if they could (because it is more efficient).

Ira



Re: [PATCH RFC PKS/PMEM 22/58] fs/f2fs: Utilize new kmap_thread()

2020-10-12 Thread Ira Weiny
On Mon, Oct 12, 2020 at 05:44:38PM +0100, Matthew Wilcox wrote:
> On Mon, Oct 12, 2020 at 09:28:29AM -0700, Dave Hansen wrote:
> > kmap_atomic() is always preferred over kmap()/kmap_thread().
> > kmap_atomic() is _much_ more lightweight since its TLB invalidation is
> > always CPU-local and never broadcast.
> > 
> > So, basically, unless you *must* sleep while the mapping is in place,
> > kmap_atomic() is preferred.
> 
> But kmap_atomic() disables preemption, so the _ideal_ interface would map
> it only locally, then on preemption make it global.  I don't even know
> if that _can_ be done.  But this email makes it seem like kmap_atomic()
> has no downsides.

And that is IIUC what Thomas was trying to solve.

Also, Linus brought up that kmap_atomic() has quirks in nesting.[1]

>From what I can see all of these discussions support the need to have something
between kmap() and kmap_atomic().

However, the reason behind converting call sites to kmap_thread() are different
between Thomas' patch set and mine.  Both require more kmap granularity.
However, they do so with different reasons and underlying implementations but
with the _same_ resulting semantics; a thread local mapping which is
preemptable.[2]  Therefore they each focus on changing different call sites.

While this patch set is huge I think it serves a valuable purpose to identify a
large number of call sites which are candidates for this new semantic.

Ira

[1] 
https://lore.kernel.org/lkml/CAHk-=wgbmwsTOKs23Z=71ebtruloeah2u3tnqt2athewvkb...@mail.gmail.com/
[2] It is important to note these implementations are not incompatible with
each other.  So I don't see yet another 'kmap_something()' being required.



Re: [PATCH RFC PKS/PMEM 22/58] fs/f2fs: Utilize new kmap_thread()

2020-10-12 Thread Ira Weiny
On Mon, Oct 12, 2020 at 09:02:54PM +0100, Matthew Wilcox wrote:
> On Mon, Oct 12, 2020 at 12:53:54PM -0700, Ira Weiny wrote:
> > On Mon, Oct 12, 2020 at 05:44:38PM +0100, Matthew Wilcox wrote:
> > > On Mon, Oct 12, 2020 at 09:28:29AM -0700, Dave Hansen wrote:
> > > > kmap_atomic() is always preferred over kmap()/kmap_thread().
> > > > kmap_atomic() is _much_ more lightweight since its TLB invalidation is
> > > > always CPU-local and never broadcast.
> > > > 
> > > > So, basically, unless you *must* sleep while the mapping is in place,
> > > > kmap_atomic() is preferred.
> > > 
> > > But kmap_atomic() disables preemption, so the _ideal_ interface would map
> > > it only locally, then on preemption make it global.  I don't even know
> > > if that _can_ be done.  But this email makes it seem like kmap_atomic()
> > > has no downsides.
> > 
> > And that is IIUC what Thomas was trying to solve.
> > 
> > Also, Linus brought up that kmap_atomic() has quirks in nesting.[1]
> > 
> > >From what I can see all of these discussions support the need to have 
> > >something
> > between kmap() and kmap_atomic().
> > 
> > However, the reason behind converting call sites to kmap_thread() are 
> > different
> > between Thomas' patch set and mine.  Both require more kmap granularity.
> > However, they do so with different reasons and underlying implementations 
> > but
> > with the _same_ resulting semantics; a thread local mapping which is
> > preemptable.[2]  Therefore they each focus on changing different call sites.
> > 
> > While this patch set is huge I think it serves a valuable purpose to 
> > identify a
> > large number of call sites which are candidates for this new semantic.
> 
> Yes, I agree.  My problem with this patch-set is that it ties it to
> some Intel feature that almost nobody cares about.

I humbly disagree.  At this level the only thing this is tied to is the idea
that there are additional memory protections available which can be enabled
quickly on a per-thread basis.  PKS on Intel is but 1 implementation of that.

Even the kmap code only has knowledge that there is something which needs to be
done special on a devm page.

>
> Maybe we should
> care about it, but you didn't try very hard to make anyone care about
> it in the cover letter.

Ok my bad.  We have customers who care very much about restricting access to
the PMEM pages to prevent bugs in the kernel from causing permanent damage to
their data/file systems.  I'll reword the cover letter better.

> 
> For a future patch-set, I'd like to see you just introduce the new
> API.  Then you can optimise the Intel implementation of it afterwards.
> Those patch-sets have entirely different reviewers.

I considered doing this.  But this seemed more logical because the feature is
being driven by PMEM which is behind the kmap interface not by the users of the
API.

I can introduce a patch set with a kmap_thread() call which does nothing if
that is more palatable but it seems wrong to me to do so.

Ira



Re: [PATCH RFC PKS/PMEM 33/58] fs/cramfs: Utilize new kmap_thread()

2020-10-13 Thread Ira Weiny
On Tue, Oct 13, 2020 at 08:36:43PM +0100, Matthew Wilcox wrote:
> On Tue, Oct 13, 2020 at 11:44:29AM -0700, Dan Williams wrote:
> > On Fri, Oct 9, 2020 at 12:52 PM  wrote:
> > >
> > > From: Ira Weiny 
> > >
> > > The kmap() calls in this FS are localized to a single thread.  To avoid
> > > the over head of global PKRS updates use the new kmap_thread() call.
> > >
> > > Cc: Nicolas Pitre 
> > > Signed-off-by: Ira Weiny 
> > > ---
> > >  fs/cramfs/inode.c | 10 +-
> > >  1 file changed, 5 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
> > > index 912308600d39..003c014a42ed 100644
> > > --- a/fs/cramfs/inode.c
> > > +++ b/fs/cramfs/inode.c
> > > @@ -247,8 +247,8 @@ static void *cramfs_blkdev_read(struct super_block 
> > > *sb, unsigned int offset,
> > > struct page *page = pages[i];
> > >
> > > if (page) {
> > > -   memcpy(data, kmap(page), PAGE_SIZE);
> > > -   kunmap(page);
> > > +   memcpy(data, kmap_thread(page), PAGE_SIZE);
> > > +   kunmap_thread(page);
> > 
> > Why does this need a sleepable kmap? This looks like a textbook
> > kmap_atomic() use case.
> 
> There's a lot of code of this form.  Could we perhaps have:
> 
> static inline void copy_to_highpage(struct page *to, void *vfrom, unsigned 
> int size)
> {
>   char *vto = kmap_atomic(to);
> 
>   memcpy(vto, vfrom, size);
>   kunmap_atomic(vto);
> }
> 
> in linux/highmem.h ?

Christoph had the same idea.  I'll work on it.

Ira




Re: [PATCH RFC PKS/PMEM 33/58] fs/cramfs: Utilize new kmap_thread()

2020-10-13 Thread Ira Weiny
On Tue, Oct 13, 2020 at 09:01:49PM +0100, Al Viro wrote:
> On Tue, Oct 13, 2020 at 08:36:43PM +0100, Matthew Wilcox wrote:
> 
> > static inline void copy_to_highpage(struct page *to, void *vfrom, unsigned 
> > int size)
> > {
> > char *vto = kmap_atomic(to);
> > 
> > memcpy(vto, vfrom, size);
> > kunmap_atomic(vto);
> > }
> > 
> > in linux/highmem.h ?
> 
> You mean, like
> static void memcpy_from_page(char *to, struct page *page, size_t offset, 
> size_t len)
> {
> char *from = kmap_atomic(page);
> memcpy(to, from + offset, len);
> kunmap_atomic(from);
> }
> 
> static void memcpy_to_page(struct page *page, size_t offset, const char 
> *from, size_t len)
> {
> char *to = kmap_atomic(page);
> memcpy(to + offset, from, len);
> kunmap_atomic(to);
> }
> 
> static void memzero_page(struct page *page, size_t offset, size_t len)
> {
> char *addr = kmap_atomic(page);
> memset(addr + offset, 0, len);
> kunmap_atomic(addr);
> }
> 
> in lib/iov_iter.c?  FWIW, I don't like that "highpage" in the name and
> highmem.h as location - these make perfect sense regardless of highmem;
> they are normal memory operations with page + offset used instead of
> a pointer...

I was thinking along those lines as well especially because of the direction
this patch set takes kmap().

Thanks for pointing these out to me.  How about I lift them to a common header?
But if not highmem.h where?

Ira



Re: [PATCH RFC PKS/PMEM 24/58] fs/freevxfs: Utilize new kmap_thread()

2020-10-13 Thread Ira Weiny
On Tue, Oct 13, 2020 at 12:25:44PM +0100, Christoph Hellwig wrote:
> > -   kaddr = kmap(pp);
> > +   kaddr = kmap_thread(pp);
> > memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_SIZE);
> > -   kunmap(pp);
> > +   kunmap_thread(pp);
> 
> You only Cced me on this particular patch, which means I have absolutely
> no idea what kmap_thread and kunmap_thread actually do, and thus can't
> provide an informed review.

Sorry the list was so big I struggled with who to CC and on which patches.

> 
> That being said I think your life would be a lot easier if you add
> helpers for the above code sequence and its counterpart that copies
> to a potential hughmem page first, as that hides the implementation
> details from most users.

Matthew Wilcox and Al Viro have suggested similar ideas.

https://lore.kernel.org/lkml/20201013205012.gi2046...@iweiny-desk2.sc.intel.com/

Ira



Re: [PATCH RFC PKS/PMEM 05/58] kmap: Introduce k[un]map_thread

2020-11-09 Thread Ira Weiny
On Tue, Nov 10, 2020 at 02:13:56AM +0100, Thomas Gleixner wrote:
> Ira,
> 
> On Fri, Oct 09 2020 at 12:49, ira weiny wrote:
> > From: Ira Weiny 
> >
> > To correctly support the semantics of kmap() with Kernel protection keys
> > (PKS), kmap() may be required to set the protections on multiple
> > processors (globally).  Enabling PKS globally can be very expensive
> > depending on the requested operation.  Furthermore, enabling a domain
> > globally reduces the protection afforded by PKS.
> >
> > Most kmap() (Aprox 209 of 229) callers use the map within a single thread 
> > and
> > have no need for the protection domain to be enabled globally.  However, the
> > remaining callers do not follow this pattern and, as best I can tell, expect
> > the mapping to be 'global' and available to any thread who may access the
> > mapping.[1]
> >
> > We don't anticipate global mappings to pmem, however in general there is a
> > danger in changing the semantics of kmap().  Effectively, this would cause 
> > an
> > unresolved page fault with little to no information about why the failure
> > occurred.
> >
> > To resolve this a number of options were considered.
> >
> > 1) Attempt to change all the thread local kmap() calls to kmap_atomic()[2]
> > 2) Introduce a flags parameter to kmap() to indicate if the mapping should 
> > be
> >global or not
> > 3) Change ~20 call sites to 'kmap_global()' to indicate that they require a
> >global enablement of the pages.
> > 4) Change ~209 call sites to 'kmap_thread()' to indicate that the mapping 
> > is to
> >be used within that thread of execution only
> >
> > Option 1 is simply not feasible.  Option 2 would require all of the call 
> > sites
> > of kmap() to change.  Option 3 seems like a good minimal change but there 
> > is a
> > danger that new code may miss the semantic change of kmap() and not get the
> > behavior the developer intended.  Therefore, #4 was chosen.
> 
> There is Option #5:

There is now yes.  :-D

> 
> Convert the thread local kmap() invocations to the proposed kmap_local()
> interface which is coming along [1].

I've been trying to follow that thread.

> 
> That solves a couple of issues:
> 
>  1) It relieves the current kmap_atomic() usage sites from the implict
> pagefault/preempt disable semantics which apply even when
> CONFIG_HIGHMEM is disabled. kmap_local() still can be invoked from
> atomic context.
> 
>  2) Due to #1 it allows to replace the conditional usage of kmap() and
> kmap_atomic() for purely thread local mappings.
> 
>  3) It puts the burden on the HIGHMEM inflicted systems
> 
>  4) It is actually more efficient for most of the pure thread local use
> cases on HIGHMEM inflicted systems because it avoids the overhead of
> the global lock and the potential kmap slot exhaustion. A potential
> preemption will be more expensive, but that's not really the case we
> want to optimize for.
> 
>  5) It solves the RT issue vs. kmap_atomic()
> 
> So instead of creating yet another variety of kmap() which is just
> scratching the particular PKRS itch, can we please consolidate all of
> that on the wider reaching kmap_local() approach?

Yes I agree.  We absolutely don't want more kmap*() calls and I was hoping to
dovetail into your kmap_local() work.[2]

I've pivoted away from this work a bit to clean up all the
kmap()/memcpy*()/kunmaps() as discussed elsewhere in the thread first.[3]  I
was hoping your work would land and then I could s/kmap_thread()/kmap_local()/
on all of these patches.

Also, we can convert the new memcpy_*_page() calls to kmap_local() as well.
[For now my patch just uses kmap_atomic().]

I've not looked at all of the patches in your latest version.  Have you
included converting any of the kmap() call sites?  I thought you were more
focused on converting the kmap_atomic() to kmap_local()?

Ira

> 
> Thanks,
> 
> tglx
>  
> [1] https://lore.kernel.org/lkml/20201103092712.714480...@linutronix.de/

[2] 
https://lore.kernel.org/lkml/20201012195354.gc2046...@iweiny-desk2.sc.intel.com/
[3] https://lore.kernel.org/lkml/20201009213434.GA839@sol.localdomain/
https://lore.kernel.org/lkml/20201013200149.gi3576...@zeniv.linux.org.uk/




Re: [PATCH v4 1/2] memremap: rename MEMORY_DEVICE_DEVDAX to MEMORY_DEVICE_GENERIC

2020-08-31 Thread Ira Weiny
On Mon, Aug 31, 2020 at 12:19:07PM +0200, Roger Pau Monné wrote:
> On Thu, Aug 20, 2020 at 01:37:41PM +0200, Roger Pau Monné wrote:
> > On Tue, Aug 11, 2020 at 11:07:36PM +0200, David Hildenbrand wrote:
> > > On 11.08.20 11:44, Roger Pau Monne wrote:
> > > > This is in preparation for the logic behind MEMORY_DEVICE_DEVDAX also
> > > > being used by non DAX devices.
> > > > 
> > > > No functional change intended.
> > > > 
> > > > Signed-off-by: Roger Pau Monné 

Dan is out on leave so I'll chime in.

I can't really justify keeping this as DEVDAX if there is another user who
needs the same type of mapping.

Sorry for the delay.

Reviewed-by: Ira Weiny 

> > > > ---
> > > > Cc: Dan Williams 
> > > > Cc: Vishal Verma 
> > > > Cc: Dave Jiang 
> > > > Cc: Andrew Morton 
> > > > Cc: Jason Gunthorpe 
> > > > Cc: Ira Weiny 
> > > > Cc: "Aneesh Kumar K.V" 
> > > > Cc: Johannes Thumshirn 
> > > > Cc: Logan Gunthorpe 
> > > > Cc: linux-nvd...@lists.01.org
> > > > Cc: xen-devel@lists.xenproject.org
> > > > Cc: linux...@kvack.org
> > > > ---
> > > >  drivers/dax/device.c | 2 +-
> > > >  include/linux/memremap.h | 9 -
> > > >  mm/memremap.c| 2 +-
> > > >  3 files changed, 6 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> > > > index 4c0af2eb7e19..1e89513f3c59 100644
> > > > --- a/drivers/dax/device.c
> > > > +++ b/drivers/dax/device.c
> > > > @@ -429,7 +429,7 @@ int dev_dax_probe(struct device *dev)
> > > > return -EBUSY;
> > > > }
> > > >  
> > > > -   dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX;
> > > > +   dev_dax->pgmap.type = MEMORY_DEVICE_GENERIC;
> > > > addr = devm_memremap_pages(dev, &dev_dax->pgmap);
> > > > if (IS_ERR(addr))
> > > > return PTR_ERR(addr);
> > > > diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> > > > index 5f5b2df06e61..e5862746751b 100644
> > > > --- a/include/linux/memremap.h
> > > > +++ b/include/linux/memremap.h
> > > > @@ -46,11 +46,10 @@ struct vmem_altmap {
> > > >   * wakeup is used to coordinate physical address space management (ex:
> > > >   * fs truncate/hole punch) vs pinned pages (ex: device dma).
> > > >   *
> > > > - * MEMORY_DEVICE_DEVDAX:
> > > > + * MEMORY_DEVICE_GENERIC:
> > > >   * Host memory that has similar access semantics as System RAM i.e. DMA
> > > > - * coherent and supports page pinning. In contrast to
> > > > - * MEMORY_DEVICE_FS_DAX, this memory is access via a device-dax
> > > > - * character device.
> > > > + * coherent and supports page pinning. This is for example used by DAX 
> > > > devices
> > > > + * that expose memory using a character device.
> > > >   *
> > > >   * MEMORY_DEVICE_PCI_P2PDMA:
> > > >   * Device memory residing in a PCI BAR intended for use with 
> > > > Peer-to-Peer
> > > > @@ -60,7 +59,7 @@ enum memory_type {
> > > > /* 0 is reserved to catch uninitialized type fields */
> > > > MEMORY_DEVICE_PRIVATE = 1,
> > > > MEMORY_DEVICE_FS_DAX,
> > > > -   MEMORY_DEVICE_DEVDAX,
> > > > +   MEMORY_DEVICE_GENERIC,
> > > > MEMORY_DEVICE_PCI_P2PDMA,
> > > >  };
> > > >  
> > > > diff --git a/mm/memremap.c b/mm/memremap.c
> > > > index 03e38b7a38f1..006dace60b1a 100644
> > > > --- a/mm/memremap.c
> > > > +++ b/mm/memremap.c
> > > > @@ -216,7 +216,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int 
> > > > nid)
> > > > return ERR_PTR(-EINVAL);
> > > > }
> > > > break;
> > > > -   case MEMORY_DEVICE_DEVDAX:
> > > > +   case MEMORY_DEVICE_GENERIC:
> > > > need_devmap_managed = false;
> > > > break;
> > > > case MEMORY_DEVICE_PCI_P2PDMA:
> > > > 
> > > 
> > > No strong opinion (@Dan?), I do wonder if a separate type would make 
> > > sense.
> > 
> > Gentle ping.
> 
> Sorry to ping again, but I would rather get this out of my queue if
> possible, seeing as the other patch is OK to go in but depends on this
> one going in first.
> 
> Thanks, Roger.



Re: [PATCH v2] xen: Convert kmap() to kmap_local_page()

2022-04-25 Thread Ira Weiny
On Wed, Apr 20, 2022 at 04:07:36PM +0200, Fabio M. De Francesco wrote:
> On mercoledì 20 aprile 2022 15:57:14 CEST Julia Lawall wrote:
> > 
> > On Wed, 20 Apr 2022, Fabio M. De Francesco wrote:
> > 
> > > On mercoledì 20 aprile 2022 15:40:10 CEST Julia Lawall wrote:
> > > >
> > > > On Wed, 20 Apr 2022, Fabio M. De Francesco wrote:
> > > >
> > > > > On mercoledì 20 aprile 2022 08:03:05 CEST Julia Lawall wrote:
> > > > > >
> > > > > > On Wed, 20 Apr 2022, Alaa Mohamed wrote:
> > > > > >
> > > > > > > kmap() is being deprecated and these usages are all local to 
> the
> > > thread
> > > > > > > so there is no reason kmap_local_page() can't be used.
> > > > > > >
> > > > > > > Replace kmap() calls with kmap_local_page().
> > > > > >
> > > > > > OK, so from a Coccinelle point of view, could we do
> > > > > >
> > > > > > @@
> > > > > > expression e1,e2,x,f;
> > > > > > @@
> > > > > >
> > > > > > e1 =
> > > > > > - kmap
> > > > > > + kmap_local_page
> > > > > > (e2)
> > > > > > ... when != x = e1 // not stored in any location and not passed 
> to
> > > > > another function
> > > > > > when != f(...,e1,...)
> > > > > > when != x = e2
> > > > > > when != f(...,e2,...)
> > > > > > -kunmap(e2)
> > > > > > +kunmap_local(e1)
> > > > > >
> > > > > > julia
> > > > > >
> > > > >
> > > > > I've never spent sufficient time to understand properly the syntax 
> and
> > > > > semantics of expressions of Coccinelle. However, thanks Julia, this
> > > code
> > > > > looks good and can be very helpful.
> > > > >
> > > > > Only a minor objection... it doesn't tell when 'e2' has been 
> allocated
> > > > > within the same function where the kmap() call is.
> > > > >
> > > > > In the particular case that I cite above, I'd prefer to remove the
> > > > > allocation of the page (say with alloc_page()) and convert kmap() /
> > > kunmap()
> > > > > to use kmalloc() / kfree().
> > > > >
> > > > > Fox example, this is done in the following patch:
> > > > >
> > > > > commit 633b0616cfe0 ("x86/sgx: Remove unnecessary kmap() from
> > > > > sgx_ioc_enclave_init()") from Ira Weiny.
> > > > >
> > > > > Can Coccinelle catch also those special cases where a page that is
> > > passed
> > > > > to kmap() is allocated within that same function (vs. being passed 
> as
> > > > > argument to this function) and, if so, propose a replacement with
> > > > > kmalloc()?
> > > >
> > > > It looks complex in this case, because the allocation is in another
> > > > function, and it is passed to another function.
> > >
> > > This is not the special case I was talking about. In this case your 
> code
> > > for Coccinelle tells the right proposal and it is exactly what Alaa did 
> in
> > > her patch (which is good!).
> > >
> > > I'm talking about other special cases like the one I pointed to with 
> the
> > > link I provided. I'm sorry if my bad English made you think that Alaa's
> > > patch was one of those cases where the page is allocated within the 
> same
> > > function where kmap() is.
> > >
> > > I hope that now I've been clearer :)
> > 
> > Ah, sorry for the misunderstanding.  If you have an example, I can take a
> > look and propose something for this special case.
> > 
> > julia
> 
> Yes, I have the example that you are asking for. It's that commit 
> 633b0616cfe0 from Ira Weiny.
> 
> Let me copy and paste it here for your convenience...
> 
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/
> ioctl.c
> index 90a5caf76939..2e10367ea66c 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -604,7 +604,6 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, 
> void __user *arg)
>  {
> struct sgx_sigstruct *sigstruct;
> struct sgx_e

Re: [PATCH 20/26] block: move the dax flag to queue_limits

2024-08-04 Thread Ira Weiny
On Mon, Jun 17, 2024 at 08:04:47AM +0200, Christoph Hellwig wrote:
> Move the dax flag into the queue_limits feature field so that it can be
> set atomically with the queue frozen.

I've only just bisected this issue.  So I'm not at all sure what is going on.

What I do know is that the ndctl dax-ext4.sh and dax-xfs.sh tests are failing
in 6.11.  I bisected to this patch.  A revert fixes ext4 but not xfs.

I plan to continue looking into it in the morning.

Alison, Dave, AFAICS this is not related to the panics you are seeing in other
tests.

Ira

> 
> Signed-off-by: Christoph Hellwig 
> Reviewed-by: Damien Le Moal 
> ---
>  block/blk-mq-debugfs.c   | 1 -
>  drivers/md/dm-table.c| 4 ++--
>  drivers/nvdimm/pmem.c| 7 ++-
>  drivers/s390/block/dcssblk.c | 2 +-
>  include/linux/blkdev.h   | 6 --
>  5 files changed, 9 insertions(+), 11 deletions(-)
> 
> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
> index 62b132e9a9ce3b..f4fa820251ce83 100644
> --- a/block/blk-mq-debugfs.c
> +++ b/block/blk-mq-debugfs.c
> @@ -88,7 +88,6 @@ static const char *const blk_queue_flag_name[] = {
>   QUEUE_FLAG_NAME(SAME_FORCE),
>   QUEUE_FLAG_NAME(INIT_DONE),
>   QUEUE_FLAG_NAME(POLL),
> - QUEUE_FLAG_NAME(DAX),
>   QUEUE_FLAG_NAME(STATS),
>   QUEUE_FLAG_NAME(REGISTERED),
>   QUEUE_FLAG_NAME(QUIESCED),
> diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
> index 84d636712c7284..e44697037e86f4 100644
> --- a/drivers/md/dm-table.c
> +++ b/drivers/md/dm-table.c
> @@ -1834,11 +1834,11 @@ int dm_table_set_restrictions(struct dm_table *t, 
> struct request_queue *q,
>   limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
>  
>   if (dm_table_supports_dax(t, device_not_dax_capable)) {
> - blk_queue_flag_set(QUEUE_FLAG_DAX, q);
> + limits->features |= BLK_FEAT_DAX;
>   if (dm_table_supports_dax(t, 
> device_not_dax_synchronous_capable))
>   set_dax_synchronous(t->md->dax_dev);
>   } else
> - blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
> + limits->features &= ~BLK_FEAT_DAX;
>  
>   if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
>   dax_write_cache(t->md->dax_dev, true);
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index b821dcf018f6ae..1dd74c969d5a09 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
> @@ -465,7 +465,6 @@ static int pmem_attach_disk(struct device *dev,
>   struct dax_device *dax_dev;
>   struct nd_pfn_sb *pfn_sb;
>   struct pmem_device *pmem;
> - struct request_queue *q;
>   struct gendisk *disk;
>   void *addr;
>   int rc;
> @@ -499,6 +498,8 @@ static int pmem_attach_disk(struct device *dev,
>   }
>   if (fua)
>   lim.features |= BLK_FEAT_FUA;
> + if (is_nd_pfn(dev))
> + lim.features |= BLK_FEAT_DAX;
>  
>   if (!devm_request_mem_region(dev, res->start, resource_size(res),
>   dev_name(&ndns->dev))) {
> @@ -509,7 +510,6 @@ static int pmem_attach_disk(struct device *dev,
>   disk = blk_alloc_disk(&lim, nid);
>   if (IS_ERR(disk))
>   return PTR_ERR(disk);
> - q = disk->queue;
>  
>   pmem->disk = disk;
>   pmem->pgmap.owner = pmem;
> @@ -547,9 +547,6 @@ static int pmem_attach_disk(struct device *dev,
>   }
>   pmem->virt_addr = addr;
>  
> - if (pmem->pfn_flags & PFN_MAP)
> - blk_queue_flag_set(QUEUE_FLAG_DAX, q);
> -
>   disk->fops  = &pmem_fops;
>   disk->private_data  = pmem;
>   nvdimm_namespace_disk_name(ndns, disk->disk_name);
> diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
> index 6d1689a2717e5f..d5a5d11ae0dcdf 100644
> --- a/drivers/s390/block/dcssblk.c
> +++ b/drivers/s390/block/dcssblk.c
> @@ -548,6 +548,7 @@ dcssblk_add_store(struct device *dev, struct 
> device_attribute *attr, const char
>  {
>   struct queue_limits lim = {
>   .logical_block_size = 4096,
> + .features   = BLK_FEAT_DAX,
>   };
>   int rc, i, j, num_of_segments;
>   struct dcssblk_dev_info *dev_info;
> @@ -643,7 +644,6 @@ dcssblk_add_store(struct device *dev, struct 
> device_attribute *attr, const char
>   dev_info->gd->fops = &dcssblk_devops;
>   dev_info->gd->private_data = dev_info;
>   dev_info->gd->flags |= GENHD_FL_NO_PART;
> - blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->gd->queue);
>  
>   seg_byte_size = (dev_info->end - dev_info->start + 1);
>   set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index f3d4519d609d95..7022e06a3dd9a3 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -307,6 +307,9 @@ enum {
>  
>   /* supports REQ_NOWAIT */
>   BLK_FEAT_NOWAIT = (1u << 7),