On Mon, Oct 15, 2018 at 08:32:58PM +1100, Alexey Kardashevskiy wrote:
> We are going to add a global list of NPUs in the system which is going
> to be yet another static symbol. Let's reorganise the code and put all
> static symbols into one struct for better tracking what is really needed
> for NPU (this might become a driver data some day).
> 
> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>

I'm not entirely convinced this is worthwhile, but maybe it'll become
clearer with the later patches.  There are some nits though.

> ---
>  arch/powerpc/include/asm/pci.h            |  1 +
>  arch/powerpc/platforms/powernv/npu-dma.c  | 77 
> ++++++++++++++++++-------------
>  arch/powerpc/platforms/powernv/pci-ioda.c |  2 +
>  3 files changed, 47 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
> index 2af9ded..1a96075 100644
> --- a/arch/powerpc/include/asm/pci.h
> +++ b/arch/powerpc/include/asm/pci.h
> @@ -129,5 +129,6 @@ extern void pcibios_scan_phb(struct pci_controller *hose);
>  
>  extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev);
>  extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index);
> +extern void pnv_npu2_devices_init(void);
>  
>  #endif /* __ASM_POWERPC_PCI_H */
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c 
> b/arch/powerpc/platforms/powernv/npu-dma.c
> index 13e5153..01402f9 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -22,20 +22,6 @@
>  #include "pci.h"
>  
>  /*
> - * spinlock to protect initialisation of an npu_context for a particular
> - * mm_struct.
> - */
> -static DEFINE_SPINLOCK(npu_context_lock);
> -
> -/*
> - * When an address shootdown range exceeds this threshold we invalidate the
> - * entire TLB on the GPU for the given PID rather than each specific address 
> in
> - * the range.
> - */
> -static uint64_t atsd_threshold = 2 * 1024 * 1024;
> -static struct dentry *atsd_threshold_dentry;
> -
> -/*
>   * Other types of TCE cache invalidation are not functional in the
>   * hardware.
>   */
> @@ -392,6 +378,33 @@ struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct 
> pnv_ioda_pe *npe)
>  /*
>   * NPU2 ATS
>   */
> +static struct {
> +     /*
> +      * spinlock to protect initialisation of an npu_context for
> +      * a particular mm_struct.
> +      */
> +     spinlock_t context_lock;
> +
> +     /* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */
> +     int max_index;
> +
> +     /*
> +      * When an address shootdown range exceeds this threshold we invalidate 
> the
> +      * entire TLB on the GPU for the given PID rather than each specific 
> address in
> +      * the range.
> +      */
> +     uint64_t atsd_threshold;
> +     struct dentry *atsd_threshold_dentry;
> +
> +} npu2_devices;

Even as a structu, it should be possible to statically initialize this.

> +
> +void pnv_npu2_devices_init(void)
> +{
> +     memset(&npu2_devices, 0, sizeof(npu2_devices));

The memset() is unnecessary.  The static structure lives in the BSS,
which means it is already initialized to zeroes.

> +     spin_lock_init(&npu2_devices.context_lock);
> +     npu2_devices.atsd_threshold = 2 * 1024 * 1024;
> +}
> +
>  static struct npu *npdev_to_npu(struct pci_dev *npdev)
>  {
>       struct pnv_phb *nphb;
> @@ -404,9 +417,6 @@ static struct npu *npdev_to_npu(struct pci_dev *npdev)
>  /* Maximum number of nvlinks per npu */
>  #define NV_MAX_LINKS 6
>  
> -/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */
> -static int max_npu2_index;
> -
>  struct npu_context {
>       struct mm_struct *mm;
>       struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
> @@ -472,7 +482,7 @@ static void mmio_invalidate_pid(struct mmio_atsd_reg 
> mmio_atsd_reg[NV_MAX_NPUS],
>       int i;
>       unsigned long launch;
>  
> -     for (i = 0; i <= max_npu2_index; i++) {
> +     for (i = 0; i <= npu2_devices.max_index; i++) {
>               if (mmio_atsd_reg[i].reg < 0)
>                       continue;
>  
> @@ -503,7 +513,7 @@ static void mmio_invalidate_va(struct mmio_atsd_reg 
> mmio_atsd_reg[NV_MAX_NPUS],
>       int i;
>       unsigned long launch;
>  
> -     for (i = 0; i <= max_npu2_index; i++) {
> +     for (i = 0; i <= npu2_devices.max_index; i++) {
>               if (mmio_atsd_reg[i].reg < 0)
>                       continue;
>  
> @@ -536,7 +546,7 @@ static void mmio_invalidate_wait(
>       int i, reg;
>  
>       /* Wait for all invalidations to complete */
> -     for (i = 0; i <= max_npu2_index; i++) {
> +     for (i = 0; i <= npu2_devices.max_index; i++) {
>               if (mmio_atsd_reg[i].reg < 0)
>                       continue;
>  
> @@ -559,7 +569,7 @@ static void acquire_atsd_reg(struct npu_context 
> *npu_context,
>       struct npu *npu;
>       struct pci_dev *npdev;
>  
> -     for (i = 0; i <= max_npu2_index; i++) {
> +     for (i = 0; i <= npu2_devices.max_index; i++) {
>               mmio_atsd_reg[i].reg = -1;
>               for (j = 0; j < NV_MAX_LINKS; j++) {
>                       /*
> @@ -593,7 +603,7 @@ static void release_atsd_reg(struct mmio_atsd_reg 
> mmio_atsd_reg[NV_MAX_NPUS])
>  {
>       int i;
>  
> -     for (i = 0; i <= max_npu2_index; i++) {
> +     for (i = 0; i <= npu2_devices.max_index; i++) {
>               /*
>                * We can't rely on npu_context->npdev[][] being the same here
>                * as when acquire_atsd_reg() was called, hence we use the
> @@ -683,7 +693,7 @@ static void pnv_npu2_mn_invalidate_range(struct 
> mmu_notifier *mn,
>       struct npu_context *npu_context = mn_to_npu_context(mn);
>       unsigned long address;
>  
> -     if (end - start > atsd_threshold) {
> +     if (end - start > npu2_devices.atsd_threshold) {
>               /*
>                * Just invalidate the entire PID if the address range is too
>                * large.
> @@ -777,12 +787,12 @@ struct npu_context *pnv_npu2_init_context(struct 
> pci_dev *gpdev,
>        * We store the npu pci device so we can more easily get at the
>        * associated npus.
>        */
> -     spin_lock(&npu_context_lock);
> +     spin_lock(&npu2_devices.context_lock);
>       npu_context = mm->context.npu_context;
>       if (npu_context) {
>               if (npu_context->release_cb != cb ||
>                       npu_context->priv != priv) {
> -                     spin_unlock(&npu_context_lock);
> +                     spin_unlock(&npu2_devices.context_lock);
>                       opal_npu_destroy_context(nphb->opal_id, mm->context.id,
>                                               PCI_DEVID(gpdev->bus->number,
>                                                       gpdev->devfn));
> @@ -791,12 +801,12 @@ struct npu_context *pnv_npu2_init_context(struct 
> pci_dev *gpdev,
>  
>               WARN_ON(!kref_get_unless_zero(&npu_context->kref));
>       }
> -     spin_unlock(&npu_context_lock);
> +     spin_unlock(&npu2_devices.context_lock);
>  
>       if (!npu_context) {
>               /*
>                * We can set up these fields without holding the
> -              * npu_context_lock as the npu_context hasn't been returned to
> +              * npu2_devices.context_lock as the npu_context hasn't been 
> returned to
>                * the caller meaning it can't be destroyed. Parallel allocation
>                * is protected against by mmap_sem.
>                */
> @@ -887,9 +897,9 @@ void pnv_npu2_destroy_context(struct npu_context 
> *npu_context,
>       WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
>       opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
>                               PCI_DEVID(gpdev->bus->number, gpdev->devfn));
> -     spin_lock(&npu_context_lock);
> +     spin_lock(&npu2_devices.context_lock);
>       removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
> -     spin_unlock(&npu_context_lock);
> +     spin_unlock(&npu2_devices.context_lock);
>  
>       /*
>        * We need to do this outside of pnv_npu2_release_context so that it is
> @@ -958,9 +968,10 @@ int pnv_npu2_init(struct pnv_phb *phb)
>       static int npu_index;
>       uint64_t rc = 0;
>  
> -     if (!atsd_threshold_dentry) {
> -             atsd_threshold_dentry = debugfs_create_x64("atsd_threshold",
> -                                0600, powerpc_debugfs_root, &atsd_threshold);
> +     if (!npu2_devices.atsd_threshold_dentry) {
> +             npu2_devices.atsd_threshold_dentry = debugfs_create_x64(
> +                             "atsd_threshold", 0600, powerpc_debugfs_root,
> +                             &npu2_devices.atsd_threshold);
>       }
>  
>       phb->npu.nmmu_flush =
> @@ -988,7 +999,7 @@ int pnv_npu2_init(struct pnv_phb *phb)
>       npu_index++;
>       if (WARN_ON(npu_index >= NV_MAX_NPUS))
>               return -ENOSPC;
> -     max_npu2_index = npu_index;
> +     npu2_devices.max_index = npu_index;
>       phb->npu.index = npu_index;
>  
>       return 0;
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index e37b9cc..0cc81c0 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1279,6 +1279,8 @@ static void pnv_pci_ioda_setup_PEs(void)
>       struct pci_bus *bus;
>       struct pci_dev *pdev;
>  
> +     pnv_npu2_devices_init();
> +
>       list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
>               phb = hose->private_data;
>               if (phb->type == PNV_PHB_NPU_NVLINK) {

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature

Reply via email to