[AMD Official Use Only - General]

Reviewed-by: David Yat Sin <david.yat...@amd.com>

> -----Original Message-----
> From: Kuehling, Felix <felix.kuehl...@amd.com>
> Sent: Friday, July 28, 2023 4:00 PM
> To: Francis, David <david.fran...@amd.com>; amd-gfx@lists.freedesktop.org;
> Yat Sin, David <david.yat...@amd.com>
> Subject: Re: [PATCH v3] drm/amdgpu: Add EXT_COHERENT memory allocation
> flags
>
> On 2023-07-28 15:39, David Francis wrote:
> > These flags (for GEM and SVM allocations) allocate memory that allows
> > for system-scope atomic semantics.
> >
> > On GFX943 these flags cause caches to be avoided on non-local memory.
> >
> > On all other ASICs they are identical in functionality to the
> > equivalent COHERENT flags.
> >
> > Corresponding Thunk patch is at
> > https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88
> >
> > v3: changed name of flag
> >
> > Signed-off-by: David Francis <david.fran...@amd.com>
>
> I made one comment on the user mode patch regarding the explicit handling
> of invalid combinations of Uncached, Coherent, ExtCoherent flags. I'm not
> sure what we agreed on any more. But I don't think we want to just leave it up
> to chance. Other than that, this patch looks good to me.
>
> Regards,
>    Felix
>
>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  2 ++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c      |  1 +
> >   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c           |  1 +
> >   drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c           |  1 +
> >   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c            |  5 ++++-
> >   drivers/gpu/drm/amd/amdkfd/kfd_svm.c             | 10 +++++++++-
> >   include/uapi/drm/amdgpu_drm.h                    | 10 +++++++++-
> >   include/uapi/linux/kfd_ioctl.h                   |  3 +++
> >   8 files changed, 30 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > index d34c3ef8f3ed..a1ce261f2d06 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > @@ -1738,6 +1738,8 @@ int
> amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> >
> >     if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
> >             alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
> > +   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)
> > +           alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT;
> >     if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
> >             alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> > index 12210598e5b8..76b618735dc0 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> > @@ -331,6 +331,7 @@ amdgpu_dma_buf_create_obj(struct drm_device
> *dev,
> > struct dma_buf *dma_buf)
> >
> >             flags |= other->flags &
> (AMDGPU_GEM_CREATE_CPU_GTT_USWC |
> >                                      AMDGPU_GEM_CREATE_COHERENT
> |
> > +
> AMDGPU_GEM_CREATE_EXT_COHERENT |
> >
> AMDGPU_GEM_CREATE_UNCACHED);
> >     }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > index 6b430e10d38e..301ffe30824f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> > @@ -632,6 +632,7 @@ static void gmc_v10_0_get_vm_pte(struct
> amdgpu_device *adev,
> >     }
> >
> >     if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
> > +                          AMDGPU_GEM_CREATE_EXT_COHERENT |
> >                            AMDGPU_GEM_CREATE_UNCACHED))
> >             *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
> >                      AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); diff --git
> > a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> > index a6ee0220db56..846894e212e7 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> > @@ -540,6 +540,7 @@ static void gmc_v11_0_get_vm_pte(struct
> amdgpu_device *adev,
> >     }
> >
> >     if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
> > +                          AMDGPU_GEM_CREATE_EXT_COHERENT |
> >                            AMDGPU_GEM_CREATE_UNCACHED))
> >             *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
> >                      AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); diff --git
> > a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > index 880460cd3239..92a623e130d9 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > @@ -1183,7 +1183,8 @@ static void gmc_v9_0_get_coherence_flags(struct
> amdgpu_device *adev,
> >   {
> >     struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
> >     bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
> > -   bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
> > +   bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
> AMDGPU_GEM_CREATE_EXT_COHERENT);
> > +   bool ext_coherent = bo->flags &
> AMDGPU_GEM_CREATE_EXT_COHERENT;
> >     bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
> >     struct amdgpu_vm *vm = mapping->bo_va->base.vm;
> >     unsigned int mtype_local, mtype;
> > @@ -1251,6 +1252,8 @@ static void gmc_v9_0_get_coherence_flags(struct
> amdgpu_device *adev,
> >             snoop = true;
> >             if (uncached) {
> >                     mtype = MTYPE_UC;
> > +           } else if (ext_coherent) {
> > +                   mtype = is_local ? MTYPE_CC : MTYPE_UC;
> >             } else if (adev->flags & AMD_IS_APU) {
> >                     mtype = is_local ? mtype_local : MTYPE_NC;
> >             } else {
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> > index 1b50eae051a4..e9ffcfc5dedc 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> > @@ -1155,7 +1155,8 @@ svm_range_get_pte_flags(struct kfd_node
> *node,
> >     uint32_t mapping_flags = 0;
> >     uint64_t pte_flags;
> >     bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
> > -   bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
> > +   bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT |
> KFD_IOCTL_SVM_FLAG_EXT_COHERENT);
> > +   bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT;
> >     bool uncached = false; /*flags &
> KFD_IOCTL_SVM_FLAG_UNCACHED;*/
> >     unsigned int mtype_local;
> >
> > @@ -1203,6 +1204,13 @@ svm_range_get_pte_flags(struct kfd_node
> *node,
> >             snoop = true;
> >             if (uncached) {
> >                     mapping_flags |= AMDGPU_VM_MTYPE_UC;
> > +           } else if (ext_coherent) {
> > +                   /* local HBM region close to partition */
> > +                   if (bo_node->adev == node->adev &&
> > +                       (!bo_node->xcp || !node->xcp || bo_node->xcp-
> >mem_id == node->xcp->mem_id))
> > +                           mapping_flags |= AMDGPU_VM_MTYPE_CC;
> > +                   else
> > +                           mapping_flags |= AMDGPU_VM_MTYPE_UC;
> >             } else if (domain == SVM_RANGE_VRAM_DOMAIN) {
> >                     /* local HBM region close to partition */
> >                     if (bo_node->adev == node->adev && diff --git
> > a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index
> > 79b14828d542..629860dbc9ec 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -145,7 +145,7 @@ extern "C" {
> >    */
> >   #define AMDGPU_GEM_CREATE_DISCARDABLE             (1 << 12)
> >   /* Flag that BO is shared coherently between multiple devices or CPU
> threads.
> > - * May depend on GPU instructions to flush caches explicitly
> > + * May depend on GPU instructions to flush caches to system scope
> explicitly.
> >    *
> >    * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs
> and
> >    * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
> > @@ -158,6 +158,14 @@ extern "C" {
> >    * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
> >    */
> >   #define AMDGPU_GEM_CREATE_UNCACHED                (1 << 14)
> > +/* Flag that BO should be coherent across devices when using
> > +device-level
> > + * atomics. May depend on GPU instructions to flush caches to device
> > +scope
> > + * explicitly, promoting them to system scope automatically.
> > + *
> > + * This influences the choice of MTYPE in the PTEs on GFXv9 and later
> > +GPUs and
> > + * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
> > + */
> > +#define AMDGPU_GEM_CREATE_EXT_COHERENT             (1 << 15)
> >
> >   struct drm_amdgpu_gem_create_in  {
> >     /** the requested memory size */
> > diff --git a/include/uapi/linux/kfd_ioctl.h
> > b/include/uapi/linux/kfd_ioctl.h index eeb2fdcbdcb7..f0ed68974c54
> > 100644
> > --- a/include/uapi/linux/kfd_ioctl.h
> > +++ b/include/uapi/linux/kfd_ioctl.h
> > @@ -405,6 +405,7 @@ struct kfd_ioctl_acquire_vm_args {
> >   #define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM     (1 << 27)
> >   #define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT  (1 << 26)
> >   #define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED  (1 << 25)
> > +#define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT       (1 << 24)
> >
> >   /* Allocate memory for later SVM (shared virtual memory) mapping.
> >    *
> > @@ -659,6 +660,8 @@ enum kfd_mmio_remap {
> >   #define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY     0x00000020
> >   /* Keep GPU memory mapping always valid as if XNACK is disable */
> >   #define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED   0x00000040
> > +/* Fine grained coherency between all devices using device-scope atomics
> */
> > +#define KFD_IOCTL_SVM_FLAG_EXT_COHERENT        0x00000080
> >
> >   /**
> >    * kfd_ioctl_svm_op - SVM ioctl operations

Reply via email to