On Thu, Nov 10, 2016 at 08:23:42PM +0200, Mikko Perttunen wrote:
> Add support for the Host1x unit to be located behind
> an IOMMU. This is required when gather buffers may be
> allocated non-contiguously in physical memory, as can
> be the case when TegraDRM is also using the IOMMU.
> 
> Signed-off-by: Mikko Perttunen <mperttunen at nvidia.com>
> ---
>  drivers/gpu/host1x/cdma.c       | 65 +++++++++++++++++++++++++++++------
>  drivers/gpu/host1x/cdma.h       |  4 ++-
>  drivers/gpu/host1x/dev.c        | 39 +++++++++++++++++++--
>  drivers/gpu/host1x/dev.h        |  5 +++
>  drivers/gpu/host1x/hw/cdma_hw.c | 10 +++---
>  drivers/gpu/host1x/job.c        | 76 
> +++++++++++++++++++++++++++++++++++------
>  drivers/gpu/host1x/job.h        |  1 +
>  7 files changed, 171 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
> index c5d82a8..14692446e 100644
> --- a/drivers/gpu/host1x/cdma.c
> +++ b/drivers/gpu/host1x/cdma.c
> @@ -51,9 +51,15 @@ static void host1x_pushbuffer_destroy(struct push_buffer 
> *pb)
>       struct host1x_cdma *cdma = pb_to_cdma(pb);
>       struct host1x *host1x = cdma_to_host1x(cdma);
>  
> -     if (pb->phys != 0)
> -             dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped,
> -                         pb->phys);
> +     if (!pb->phys)
> +             return;
> +
> +     if (host1x->domain) {
> +             iommu_unmap(host1x->domain, pb->dma, pb->alloc_size_bytes);
> +             free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
> +     }
> +
> +     dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys);
>  
>       pb->mapped = NULL;
>       pb->phys = 0;
> @@ -66,28 +72,65 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
>  {
>       struct host1x_cdma *cdma = pb_to_cdma(pb);
>       struct host1x *host1x = cdma_to_host1x(cdma);
> +     struct iova *alloc;
> +     int err;
>  
>       pb->mapped = NULL;
>       pb->phys = 0;
>       pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8;
> +     pb->alloc_size_bytes = pb->size_bytes + 4;
>  
>       /* initialize buffer pointers */
>       pb->fence = pb->size_bytes - 8;
>       pb->pos = 0;
>  
> -     /* allocate and map pushbuffer memory */
> -     pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys,
> -                               GFP_KERNEL);
> -     if (!pb->mapped)
> -             goto fail;
> +     if (host1x->domain) {
> +             unsigned long shift;
> +
> +             pb->alloc_size_bytes = iova_align(&host1x->iova,
> +                                               pb->alloc_size_bytes);
> +
> +             pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes,
> +                                       &pb->phys, GFP_KERNEL);
> +             if (!pb->mapped)
> +                     return -ENOMEM;
> +
> +             shift = iova_shift(&host1x->iova);
> +             alloc = alloc_iova(
> +                     &host1x->iova, pb->alloc_size_bytes >> shift,
> +                     host1x->domain->geometry.aperture_end >> shift, true);

Let's precompute some of these values to make this expression more
compact. For example pb->alloc_size_bytes is used a couple of times, so
it could be stored temporarily in a local variable with a nice and short
name such as "size".

If you store the aperture end, or limit_pfn, as I commented in an
earlier patch, then this also becomes much more compact.

> +             if (!alloc) {
> +                     err = -ENOMEM;
> +                     goto iommu_free_mem;
> +             }
> +
> +             err = iommu_map(host1x->domain,
> +                             iova_dma_addr(&host1x->iova, alloc),
> +                             pb->phys, pb->alloc_size_bytes,
> +                             IOMMU_READ);

Same here.

> +             if (err)
> +                     goto iommu_free_iova;
> +
> +             pb->dma = iova_dma_addr(&host1x->iova, alloc);
> +     } else {
> +             pb->mapped = dma_alloc_wc(host1x->dev, pb->alloc_size_bytes,
> +                                       &pb->phys, GFP_KERNEL);
> +             if (!pb->mapped)
> +                     return -ENOMEM;
> +
> +             pb->dma = pb->phys;
> +     }
>  
>       host1x_hw_pushbuffer_init(host1x, pb);
>  
>       return 0;
>  
> -fail:
> -     host1x_pushbuffer_destroy(pb);
> -     return -ENOMEM;
> +iommu_free_iova:
> +     __free_iova(&host1x->iova, alloc);
> +iommu_free_mem:
> +     dma_free_wc(host1x->dev, pb->alloc_size_bytes, pb->mapped, pb->phys);
> +
> +     return err;
>  }
>  
>  /*
> diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
> index 470087a..4b3645f 100644
> --- a/drivers/gpu/host1x/cdma.h
> +++ b/drivers/gpu/host1x/cdma.h
> @@ -43,10 +43,12 @@ struct host1x_job;
>  
>  struct push_buffer {
>       void *mapped;                   /* mapped pushbuffer memory */
> -     dma_addr_t phys;                /* physical address of pushbuffer */
> +     dma_addr_t dma;                 /* device address of pushbuffer */
> +     phys_addr_t phys;               /* physical address of pushbuffer */
>       u32 fence;                      /* index we've written */
>       u32 pos;                        /* index to write to */
>       u32 size_bytes;
> +     u32 alloc_size_bytes;

Maybe just "alloc_size"?

>  };
>  
>  struct buffer_timeout {
> diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
> index a62317a..ca4527e 100644
> --- a/drivers/gpu/host1x/dev.c
> +++ b/drivers/gpu/host1x/dev.c
> @@ -168,16 +168,36 @@ static int host1x_probe(struct platform_device *pdev)
>               return err;
>       }
>  
> +     if (iommu_present(&platform_bus_type)) {
> +             struct iommu_domain_geometry *geometry;
> +             unsigned long order;
> +
> +             host->domain = iommu_domain_alloc(&platform_bus_type);
> +             if (!host->domain)
> +                     return -ENOMEM;
> +
> +             err = iommu_attach_device(host->domain, &pdev->dev);
> +             if (err)
> +                     goto fail_free_domain;
> +
> +             geometry = &host->domain->geometry;
> +
> +             order = __ffs(host->domain->pgsize_bitmap);
> +             init_iova_domain(&host->iova, 1UL << order,
> +                              geometry->aperture_start >> order,
> +                              geometry->aperture_end >> order);
> +     }
> +
>       err = host1x_channel_list_init(host);
>       if (err) {
>               dev_err(&pdev->dev, "failed to initialize channel list\n");
> -             return err;
> +             goto fail_detach_device;
>       }
>  
>       err = clk_prepare_enable(host->clk);
>       if (err < 0) {
>               dev_err(&pdev->dev, "failed to enable clock\n");
> -             return err;
> +             goto fail_detach_device;
>       }
>  
>       err = host1x_syncpt_init(host);
> @@ -206,6 +226,15 @@ static int host1x_probe(struct platform_device *pdev)
>       host1x_syncpt_deinit(host);
>  fail_unprepare_disable:
>       clk_disable_unprepare(host->clk);
> +fail_detach_device:
> +     if (host->domain) {
> +             put_iova_domain(&host->iova);
> +             iommu_detach_device(host->domain, &pdev->dev);
> +     }
> +fail_free_domain:
> +     if (host->domain)
> +             iommu_domain_free(host->domain);
> +
>       return err;
>  }
>  
> @@ -218,6 +247,12 @@ static int host1x_remove(struct platform_device *pdev)
>       host1x_syncpt_deinit(host);
>       clk_disable_unprepare(host->clk);
>  
> +     if (host->domain) {
> +             put_iova_domain(&host->iova);
> +             iommu_detach_device(host->domain, &pdev->dev);
> +             iommu_domain_free(host->domain);
> +     }
> +
>       return 0;
>  }
>  
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index 5220510..6189825 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -19,6 +19,8 @@
>  
>  #include <linux/platform_device.h>
>  #include <linux/device.h>
> +#include <linux/iommu.h>
> +#include <linux/iova.h>
>  
>  #include "channel.h"
>  #include "syncpt.h"
> @@ -108,6 +110,9 @@ struct host1x {
>       struct device *dev;
>       struct clk *clk;
>  
> +     struct iommu_domain *domain;
> +     struct iova_domain iova;
> +
>       struct mutex intr_mutex;
>       int intr_syncpt_irq;
>  
> diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
> index 659c1bb..b8c0348 100644
> --- a/drivers/gpu/host1x/hw/cdma_hw.c
> +++ b/drivers/gpu/host1x/hw/cdma_hw.c
> @@ -55,7 +55,7 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, 
> u32 getptr,
>               *(p++) = HOST1X_OPCODE_NOP;
>               *(p++) = HOST1X_OPCODE_NOP;
>               dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__,
> -                     &pb->phys, getptr);
> +                     &pb->dma, getptr);
>               getptr = (getptr + 8) & (pb->size_bytes - 1);
>       }
>  
> @@ -78,9 +78,9 @@ static void cdma_start(struct host1x_cdma *cdma)
>                        HOST1X_CHANNEL_DMACTRL);
>  
>       /* set base, put and end pointer */
> -     host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
> +     host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
>       host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
> -     host1x_ch_writel(ch, cdma->push_buffer.phys +
> +     host1x_ch_writel(ch, cdma->push_buffer.dma +
>                        cdma->push_buffer.size_bytes + 4,
>                        HOST1X_CHANNEL_DMAEND);
>  
> @@ -115,8 +115,8 @@ static void cdma_timeout_restart(struct host1x_cdma 
> *cdma, u32 getptr)
>                        HOST1X_CHANNEL_DMACTRL);
>  
>       /* set base, end pointer (all of memory) */
> -     host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
> -     host1x_ch_writel(ch, cdma->push_buffer.phys +
> +     host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
> +     host1x_ch_writel(ch, cdma->push_buffer.dma +
>                        cdma->push_buffer.size_bytes,
>                        HOST1X_CHANNEL_DMAEND);
>  
> diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
> index a91b7c4..222d930 100644
> --- a/drivers/gpu/host1x/job.c
> +++ b/drivers/gpu/host1x/job.c
> @@ -174,8 +174,9 @@ static int do_waitchks(struct host1x_job *job, struct 
> host1x *host,
>       return 0;
>  }
>  
> -static unsigned int pin_job(struct host1x_job *job)
> +static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
>  {
> +     int err;
>       unsigned int i;

Maybe use an inverse christmas tree for variables as in other parts of
this driver? That is, add the new int err belowe the existing unsined
int i.

>  
>       job->num_unpins = 0;
> @@ -186,12 +187,16 @@ static unsigned int pin_job(struct host1x_job *job)
>               dma_addr_t phys_addr;
>  
>               reloc->target.bo = host1x_bo_get(reloc->target.bo);
> -             if (!reloc->target.bo)
> +             if (!reloc->target.bo) {
> +                     err = -EINVAL;
>                       goto unpin;
> +             }
>  
>               phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
> -             if (!phys_addr)
> +             if (!phys_addr) {
> +                     err = -EINVAL;
>                       goto unpin;
> +             }
>  
>               job->addr_phys[job->num_unpins] = phys_addr;
>               job->unpins[job->num_unpins].bo = reloc->target.bo;
> @@ -204,25 +209,68 @@ static unsigned int pin_job(struct host1x_job *job)
>               struct sg_table *sgt;
>               dma_addr_t phys_addr;
>  
> +             size_t gather_size = 0;
> +             struct scatterlist *sg;
> +             struct iova *alloc;
> +             unsigned long shift;
> +             int j;

There should be no blank line between blocks of variable declarations.
Also, make j an unsigned int.

> +
>               g->bo = host1x_bo_get(g->bo);
> -             if (!g->bo)
> +             if (!g->bo) {
> +                     err = -EINVAL;
>                       goto unpin;
> +             }
>  
>               phys_addr = host1x_bo_pin(g->bo, &sgt);
> -             if (!phys_addr)
> +             if (!phys_addr) {
> +                     err = -EINVAL;
>                       goto unpin;
> +             }
> +
> +             if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
> +                     for_each_sg(sgt->sgl, sg, sgt->nents, j) {
> +                             gather_size += sg->length;
> +                     }

No need for curly brackets here.

> +                     gather_size = iova_align(&host->iova, gather_size);
> +
> +                     shift = iova_shift(&host->iova);
> +                     alloc = alloc_iova(
> +                             &host->iova, gather_size >> shift,
> +                             host->domain->geometry.aperture_end >> shift,
> +                             true);

This could be made a little more compact as well.

Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: not available
URL: 
<https://lists.freedesktop.org/archives/dri-devel/attachments/20161205/59910d4a/attachment.sig>

Reply via email to