On Wed, Aug 03, 2016 at 06:40:52PM +1000, Alexey Kardashevskiy wrote:
> In real mode, TCE tables are invalidated using special
> cache-inhibited store instructions which are not available in
> virtual mode
> 
> This defines and implements exchange_rm() callback. This does not
> define set_rm/clear_rm/flush_rm callbacks as there is no user for those -
> exchange/exchange_rm are only to be used by KVM for VFIO.
> 
> The exchange_rm callback is defined for IODA1/IODA2 powernv platforms.
> 
> This replaces list_for_each_entry_rcu with its lockless version as
> from now on pnv_pci_ioda2_tce_invalidate() can be called in
> the real mode too.
> 
> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>
> ---
>  arch/powerpc/include/asm/iommu.h          |  7 +++++++
>  arch/powerpc/kernel/iommu.c               | 23 +++++++++++++++++++++++
>  arch/powerpc/platforms/powernv/pci-ioda.c | 26 +++++++++++++++++++++++++-
>  3 files changed, 55 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/iommu.h 
> b/arch/powerpc/include/asm/iommu.h
> index cd4df44..a13d207 100644
> --- a/arch/powerpc/include/asm/iommu.h
> +++ b/arch/powerpc/include/asm/iommu.h
> @@ -64,6 +64,11 @@ struct iommu_table_ops {
>                       long index,
>                       unsigned long *hpa,
>                       enum dma_data_direction *direction);
> +     /* Real mode */
> +     int (*exchange_rm)(struct iommu_table *tbl,
> +                     long index,
> +                     unsigned long *hpa,
> +                     enum dma_data_direction *direction);
>  #endif
>       void (*clear)(struct iommu_table *tbl,
>                       long index, long npages);
> @@ -209,6 +214,8 @@ extern void iommu_del_device(struct device *dev);
>  extern int __init tce_iommu_bus_notifier_init(void);
>  extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
>               unsigned long *hpa, enum dma_data_direction *direction);
> +extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
> +             unsigned long *hpa, enum dma_data_direction *direction);
>  #else
>  static inline void iommu_register_group(struct iommu_table_group 
> *table_group,
>                                       int pci_domain_number,
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index a8f017a..65b2dac 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -1020,6 +1020,29 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned 
> long entry,
>  }
>  EXPORT_SYMBOL_GPL(iommu_tce_xchg);
>  
> +long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
> +             unsigned long *hpa, enum dma_data_direction *direction)
> +{
> +     long ret;
> +
> +     ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
> +
> +     if (!ret && ((*direction == DMA_FROM_DEVICE) ||
> +                     (*direction == DMA_BIDIRECTIONAL))) {
> +             struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
> +
> +             if (likely(pg)) {
> +                     SetPageDirty(pg);
> +             } else {

Isn't there a race here, if someone else updates this TCE entry
between your initial exchange and the rollback exchange below?

> +                     tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
> +                     ret = -EFAULT;
> +             }
> +     }
> +
> +     return ret;
> +}
> +EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
> +
>  int iommu_take_ownership(struct iommu_table *tbl)
>  {
>       unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index c04afd2..a0b5ea6 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1827,6 +1827,17 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, 
> long index,
>  
>       return ret;
>  }
> +
> +static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index,
> +             unsigned long *hpa, enum dma_data_direction *direction)
> +{
> +     long ret = pnv_tce_xchg(tbl, index, hpa, direction);
> +
> +     if (!ret)
> +             pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true);
> +
> +     return ret;
> +}
>  #endif
>  
>  static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
> @@ -1841,6 +1852,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
>       .set = pnv_ioda1_tce_build,
>  #ifdef CONFIG_IOMMU_API
>       .exchange = pnv_ioda1_tce_xchg,
> +     .exchange_rm = pnv_ioda1_tce_xchg_rm,
>  #endif
>       .clear = pnv_ioda1_tce_free,
>       .get = pnv_tce_get,
> @@ -1915,7 +1927,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct 
> iommu_table *tbl,
>  {
>       struct iommu_table_group_link *tgl;
>  
> -     list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
> +     list_for_each_entry_lockless(tgl, &tbl->it_group_list, next) {

So.. IIUC, previously this had a bool rm parameter, but wouldn't
actually work in real mode even if it was set?

>               struct pnv_ioda_pe *pe = container_of(tgl->table_group,
>                               struct pnv_ioda_pe, table_group);
>               struct pnv_phb *phb = pe->phb;
> @@ -1973,6 +1985,17 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, 
> long index,
>  
>       return ret;
>  }
> +
> +static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index,
> +             unsigned long *hpa, enum dma_data_direction *direction)
> +{
> +     long ret = pnv_tce_xchg(tbl, index, hpa, direction);
> +
> +     if (!ret)
> +             pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true);
> +
> +     return ret;
> +}
>  #endif
>  
>  static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
> @@ -1992,6 +2015,7 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = {
>       .set = pnv_ioda2_tce_build,
>  #ifdef CONFIG_IOMMU_API
>       .exchange = pnv_ioda2_tce_xchg,
> +     .exchange_rm = pnv_ioda2_tce_xchg_rm,
>  #endif
>       .clear = pnv_ioda2_tce_free,
>       .get = pnv_tce_get,

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature

Reply via email to