On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> Some function defined in vfio_iommu_type1.c were common and
> we want to use these for FSL IOMMU (PAMU) and iommu-none driver.
> So some of them are moved to vfio_iommu_common.c
> 
> I think we can do more of that but we will take this step by step.
> 
> Signed-off-by: Bharat Bhushan <bharat.bhus...@freescale.com>
> ---
>  drivers/vfio/Makefile            |    4 +-
>  drivers/vfio/vfio_iommu_common.c |  235 
> ++++++++++++++++++++++++++++++++++++++
>  drivers/vfio/vfio_iommu_common.h |   30 +++++
>  drivers/vfio/vfio_iommu_type1.c  |  206 +---------------------------------
>  4 files changed, 268 insertions(+), 207 deletions(-)
>  create mode 100644 drivers/vfio/vfio_iommu_common.c
>  create mode 100644 drivers/vfio/vfio_iommu_common.h
> 
> diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
> index 72bfabc..c5792ec 100644
> --- a/drivers/vfio/Makefile
> +++ b/drivers/vfio/Makefile
> @@ -1,4 +1,4 @@
>  obj-$(CONFIG_VFIO) += vfio.o
> -obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
> -obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
> +obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_common.o vfio_iommu_type1.o
> +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_common.o 
> vfio_iommu_spapr_tce.o
>  obj-$(CONFIG_VFIO_PCI) += pci/
> diff --git a/drivers/vfio/vfio_iommu_common.c 
> b/drivers/vfio/vfio_iommu_common.c
> new file mode 100644
> index 0000000..8bdc0ea
> --- /dev/null
> +++ b/drivers/vfio/vfio_iommu_common.c
> @@ -0,0 +1,235 @@
> +/*
> + * VFIO: Common code for vfio IOMMU support
> + *
> + * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
> + *     Author: Alex Williamson <alex.william...@redhat.com>
> + *     Author: Bharat Bhushan <bharat.bhus...@freescale.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * Derived from original vfio:
> + * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
> + * Author: Tom Lyon, p...@cisco.com
> + */
> +
> +#include <linux/compat.h>
> +#include <linux/device.h>
> +#include <linux/fs.h>
> +#include <linux/iommu.h>
> +#include <linux/module.h>
> +#include <linux/mm.h>
> +#include <linux/pci.h>               /* pci_bus_type */
> +#include <linux/rbtree.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +#include <linux/vfio.h>
> +#include <linux/workqueue.h>

Please cleanup includes on both the source and target files.  You
obviously don't need linux/pci.h here for one.

> +
> +static bool disable_hugepages;
> +module_param_named(disable_hugepages,
> +                disable_hugepages, bool, S_IRUGO | S_IWUSR);
> +MODULE_PARM_DESC(disable_hugepages,
> +              "Disable VFIO IOMMU support for IOMMU hugepages.");
> +
> +struct vwork {
> +     struct mm_struct        *mm;
> +     long                    npage;
> +     struct work_struct      work;
> +};
> +
> +/* delayed decrement/increment for locked_vm */
> +void vfio_lock_acct_bg(struct work_struct *work)
> +{
> +     struct vwork *vwork = container_of(work, struct vwork, work);
> +     struct mm_struct *mm;
> +
> +     mm = vwork->mm;
> +     down_write(&mm->mmap_sem);
> +     mm->locked_vm += vwork->npage;
> +     up_write(&mm->mmap_sem);
> +     mmput(mm);
> +     kfree(vwork);
> +}
> +
> +void vfio_lock_acct(long npage)
> +{
> +     struct vwork *vwork;
> +     struct mm_struct *mm;
> +
> +     if (!current->mm || !npage)
> +             return; /* process exited or nothing to do */
> +
> +     if (down_write_trylock(&current->mm->mmap_sem)) {
> +             current->mm->locked_vm += npage;
> +             up_write(&current->mm->mmap_sem);
> +             return;
> +     }
> +
> +     /*
> +      * Couldn't get mmap_sem lock, so must setup to update
> +      * mm->locked_vm later. If locked_vm were atomic, we
> +      * wouldn't need this silliness
> +      */
> +     vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
> +     if (!vwork)
> +             return;
> +     mm = get_task_mm(current);
> +     if (!mm) {
> +             kfree(vwork);
> +             return;
> +     }
> +     INIT_WORK(&vwork->work, vfio_lock_acct_bg);
> +     vwork->mm = mm;
> +     vwork->npage = npage;
> +     schedule_work(&vwork->work);
> +}
> +
> +/*
> + * Some mappings aren't backed by a struct page, for example an mmap'd
> + * MMIO range for our own or another device.  These use a different
> + * pfn conversion and shouldn't be tracked as locked pages.
> + */
> +bool is_invalid_reserved_pfn(unsigned long pfn)
> +{
> +     if (pfn_valid(pfn)) {
> +             bool reserved;
> +             struct page *tail = pfn_to_page(pfn);
> +             struct page *head = compound_trans_head(tail);
> +             reserved = !!(PageReserved(head));
> +             if (head != tail) {
> +                     /*
> +                      * "head" is not a dangling pointer
> +                      * (compound_trans_head takes care of that)
> +                      * but the hugepage may have been split
> +                      * from under us (and we may not hold a
> +                      * reference count on the head page so it can
> +                      * be reused before we run PageReferenced), so
> +                      * we've to check PageTail before returning
> +                      * what we just read.
> +                      */
> +                     smp_rmb();
> +                     if (PageTail(tail))
> +                             return reserved;
> +             }
> +             return PageReserved(tail);
> +     }
> +
> +     return true;
> +}
> +
> +int put_pfn(unsigned long pfn, int prot)
> +{
> +     if (!is_invalid_reserved_pfn(pfn)) {
> +             struct page *page = pfn_to_page(pfn);
> +             if (prot & IOMMU_WRITE)
> +                     SetPageDirty(page);
> +             put_page(page);
> +             return 1;
> +     }
> +     return 0;
> +}
> +
> +static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
> +{
> +     struct page *page[1];
> +     struct vm_area_struct *vma;
> +     int ret = -EFAULT;
> +
> +     if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) {
> +             *pfn = page_to_pfn(page[0]);
> +             return 0;
> +     }
> +
> +     printk("via vma\n");
> +     down_read(&current->mm->mmap_sem);
> +
> +     vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
> +
> +     if (vma && vma->vm_flags & VM_PFNMAP) {
> +             *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
> +             if (is_invalid_reserved_pfn(*pfn))
> +                     ret = 0;
> +     }
> +
> +     up_read(&current->mm->mmap_sem);
> +
> +     return ret;
> +}
> +
> +/*
> + * Attempt to pin pages.  We really don't want to track all the pfns and
> + * the iommu can only map chunks of consecutive pfns anyway, so get the
> + * first page and all consecutive pages with the same locking.
> + */
> +long vfio_pin_pages(unsigned long vaddr, long npage,
> +                        int prot, unsigned long *pfn_base)
> +{
> +     unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> +     bool lock_cap = capable(CAP_IPC_LOCK);
> +     long ret, i;
> +
> +     if (!current->mm)
> +             return -ENODEV;
> +
> +     ret = vaddr_get_pfn(vaddr, prot, pfn_base);
> +     if (ret)
> +             return ret;
> +
> +     if (is_invalid_reserved_pfn(*pfn_base))
> +             return 1;
> +
> +     if (!lock_cap && current->mm->locked_vm + 1 > limit) {
> +             put_pfn(*pfn_base, prot);
> +             pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
> +                     limit << PAGE_SHIFT);
> +             return -ENOMEM;
> +     }
> +
> +     if (unlikely(disable_hugepages)) {
> +             vfio_lock_acct(1);
> +             return 1;
> +     }
> +
> +     /* Lock all the consecutive pages from pfn_base */
> +     for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {
> +             unsigned long pfn = 0;
> +
> +             ret = vaddr_get_pfn(vaddr, prot, &pfn);
> +             if (ret)
> +                     break;
> +
> +             if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) {
> +                     put_pfn(pfn, prot);
> +                     break;
> +             }
> +
> +             if (!lock_cap && current->mm->locked_vm + i + 1 > limit) {
> +                     put_pfn(pfn, prot);
> +                     pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
> +                             __func__, limit << PAGE_SHIFT);
> +                     break;
> +             }
> +     }
> +
> +     vfio_lock_acct(i);
> +
> +     return i;
> +}
> +
> +long vfio_unpin_pages(unsigned long pfn, long npage,
> +                          int prot, bool do_accounting)
> +{
> +     unsigned long unlocked = 0;
> +     long i;
> +
> +     for (i = 0; i < npage; i++)
> +             unlocked += put_pfn(pfn++, prot);
> +
> +     if (do_accounting)
> +             vfio_lock_acct(-unlocked);
> +
> +     return unlocked;
> +}
> diff --git a/drivers/vfio/vfio_iommu_common.h 
> b/drivers/vfio/vfio_iommu_common.h
> new file mode 100644
> index 0000000..4738391
> --- /dev/null
> +++ b/drivers/vfio/vfio_iommu_common.h
> @@ -0,0 +1,30 @@
> +/*
> + * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
> + * Copyright (C) 2013 Freescale Semiconductor, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published
> + * by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
> + */
> +
> +#ifndef _VFIO_IOMMU_COMMON_H
> +#define _VFIO_IOMMU_COMMON_H
> +
> +void vfio_lock_acct_bg(struct work_struct *work);

Does this need to be exposed?

> +void vfio_lock_acct(long npage);
> +bool is_invalid_reserved_pfn(unsigned long pfn);
> +int put_pfn(unsigned long pfn, int prot);

Why are these exposed, they only seem to be used by functions in the new
common file.

> +long vfio_pin_pages(unsigned long vaddr, long npage, int prot,
> +                 unsigned long *pfn_base);
> +long vfio_unpin_pages(unsigned long pfn, long npage,
> +                   int prot, bool do_accounting);

Can we get by with just these two and vfio_lock_acct()?

> +#endif
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index a9807de..e9a58fa 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -37,6 +37,7 @@
>  #include <linux/uaccess.h>
>  #include <linux/vfio.h>
>  #include <linux/workqueue.h>
> +#include "vfio_iommu_common.h"
>  
>  #define DRIVER_VERSION  "0.2"
>  #define DRIVER_AUTHOR   "Alex Williamson <alex.william...@redhat.com>"
> @@ -48,12 +49,6 @@ module_param_named(allow_unsafe_interrupts,
>  MODULE_PARM_DESC(allow_unsafe_interrupts,
>                "Enable VFIO IOMMU support for on platforms without interrupt 
> remapping support.");
>  
> -static bool disable_hugepages;
> -module_param_named(disable_hugepages,
> -                disable_hugepages, bool, S_IRUGO | S_IWUSR);
> -MODULE_PARM_DESC(disable_hugepages,
> -              "Disable VFIO IOMMU support for IOMMU hugepages.");
> -
>  struct vfio_iommu {
>       struct iommu_domain     *domain;
>       struct mutex            lock;
> @@ -123,205 +118,6 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, 
> struct vfio_dma *old)
>       rb_erase(&old->node, &iommu->dma_list);
>  }
>  
> -struct vwork {
> -     struct mm_struct        *mm;
> -     long                    npage;
> -     struct work_struct      work;
> -};
> -
> -/* delayed decrement/increment for locked_vm */
> -static void vfio_lock_acct_bg(struct work_struct *work)
> -{
> -     struct vwork *vwork = container_of(work, struct vwork, work);
> -     struct mm_struct *mm;
> -
> -     mm = vwork->mm;
> -     down_write(&mm->mmap_sem);
> -     mm->locked_vm += vwork->npage;
> -     up_write(&mm->mmap_sem);
> -     mmput(mm);
> -     kfree(vwork);
> -}
> -
> -static void vfio_lock_acct(long npage)
> -{
> -     struct vwork *vwork;
> -     struct mm_struct *mm;
> -
> -     if (!current->mm || !npage)
> -             return; /* process exited or nothing to do */
> -
> -     if (down_write_trylock(&current->mm->mmap_sem)) {
> -             current->mm->locked_vm += npage;
> -             up_write(&current->mm->mmap_sem);
> -             return;
> -     }
> -
> -     /*
> -      * Couldn't get mmap_sem lock, so must setup to update
> -      * mm->locked_vm later. If locked_vm were atomic, we
> -      * wouldn't need this silliness
> -      */
> -     vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
> -     if (!vwork)
> -             return;
> -     mm = get_task_mm(current);
> -     if (!mm) {
> -             kfree(vwork);
> -             return;
> -     }
> -     INIT_WORK(&vwork->work, vfio_lock_acct_bg);
> -     vwork->mm = mm;
> -     vwork->npage = npage;
> -     schedule_work(&vwork->work);
> -}
> -
> -/*
> - * Some mappings aren't backed by a struct page, for example an mmap'd
> - * MMIO range for our own or another device.  These use a different
> - * pfn conversion and shouldn't be tracked as locked pages.
> - */
> -static bool is_invalid_reserved_pfn(unsigned long pfn)
> -{
> -     if (pfn_valid(pfn)) {
> -             bool reserved;
> -             struct page *tail = pfn_to_page(pfn);
> -             struct page *head = compound_trans_head(tail);
> -             reserved = !!(PageReserved(head));
> -             if (head != tail) {
> -                     /*
> -                      * "head" is not a dangling pointer
> -                      * (compound_trans_head takes care of that)
> -                      * but the hugepage may have been split
> -                      * from under us (and we may not hold a
> -                      * reference count on the head page so it can
> -                      * be reused before we run PageReferenced), so
> -                      * we've to check PageTail before returning
> -                      * what we just read.
> -                      */
> -                     smp_rmb();
> -                     if (PageTail(tail))
> -                             return reserved;
> -             }
> -             return PageReserved(tail);
> -     }
> -
> -     return true;
> -}
> -
> -static int put_pfn(unsigned long pfn, int prot)
> -{
> -     if (!is_invalid_reserved_pfn(pfn)) {
> -             struct page *page = pfn_to_page(pfn);
> -             if (prot & IOMMU_WRITE)
> -                     SetPageDirty(page);
> -             put_page(page);
> -             return 1;
> -     }
> -     return 0;
> -}
> -
> -static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
> -{
> -     struct page *page[1];
> -     struct vm_area_struct *vma;
> -     int ret = -EFAULT;
> -
> -     if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) {
> -             *pfn = page_to_pfn(page[0]);
> -             return 0;
> -     }
> -
> -     down_read(&current->mm->mmap_sem);
> -
> -     vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
> -
> -     if (vma && vma->vm_flags & VM_PFNMAP) {
> -             *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
> -             if (is_invalid_reserved_pfn(*pfn))
> -                     ret = 0;
> -     }
> -
> -     up_read(&current->mm->mmap_sem);
> -
> -     return ret;
> -}
> -
> -/*
> - * Attempt to pin pages.  We really don't want to track all the pfns and
> - * the iommu can only map chunks of consecutive pfns anyway, so get the
> - * first page and all consecutive pages with the same locking.
> - */
> -static long vfio_pin_pages(unsigned long vaddr, long npage,
> -                        int prot, unsigned long *pfn_base)
> -{
> -     unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> -     bool lock_cap = capable(CAP_IPC_LOCK);
> -     long ret, i;
> -
> -     if (!current->mm)
> -             return -ENODEV;
> -
> -     ret = vaddr_get_pfn(vaddr, prot, pfn_base);
> -     if (ret)
> -             return ret;
> -
> -     if (is_invalid_reserved_pfn(*pfn_base))
> -             return 1;
> -
> -     if (!lock_cap && current->mm->locked_vm + 1 > limit) {
> -             put_pfn(*pfn_base, prot);
> -             pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
> -                     limit << PAGE_SHIFT);
> -             return -ENOMEM;
> -     }
> -
> -     if (unlikely(disable_hugepages)) {
> -             vfio_lock_acct(1);
> -             return 1;
> -     }
> -
> -     /* Lock all the consecutive pages from pfn_base */
> -     for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {
> -             unsigned long pfn = 0;
> -
> -             ret = vaddr_get_pfn(vaddr, prot, &pfn);
> -             if (ret)
> -                     break;
> -
> -             if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) {
> -                     put_pfn(pfn, prot);
> -                     break;
> -             }
> -
> -             if (!lock_cap && current->mm->locked_vm + i + 1 > limit) {
> -                     put_pfn(pfn, prot);
> -                     pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
> -                             __func__, limit << PAGE_SHIFT);
> -                     break;
> -             }
> -     }
> -
> -     vfio_lock_acct(i);
> -
> -     return i;
> -}
> -
> -static long vfio_unpin_pages(unsigned long pfn, long npage,
> -                          int prot, bool do_accounting)
> -{
> -     unsigned long unlocked = 0;
> -     long i;
> -
> -     for (i = 0; i < npage; i++)
> -             unlocked += put_pfn(pfn++, prot);
> -
> -     if (do_accounting)
> -             vfio_lock_acct(-unlocked);
> -
> -     return unlocked;
> -}
> -
>  static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
>                           dma_addr_t iova, size_t *size)
>  {



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to