On 2022-05-05 16:04, Alex Deucher wrote:
> From: Likun Gao <[email protected]>
> 
> Support memory to memory linear copy in PIO mode for LSDMA.
> 
> Signed-off-by: Likun Gao <[email protected]>
> Reviewed-by: Christian König <[email protected]>
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c | 26 ++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h |  5 +++
>  drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c   | 44 +++++++++++++++++++++++
>  3 files changed, 75 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c
> index af00a66f8282..3f1c674afe41 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c
> @@ -23,3 +23,29 @@
>  
>  #include "amdgpu.h"
>  #include "amdgpu_lsdma.h"
> +
> +#define AMDGPU_LSDMA_MAX_SIZE        0x2000000ULL
> +
> +int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev,
> +                       uint64_t src_addr,
> +                       uint64_t dst_addr,
> +                       uint64_t mem_size)
> +{
> +     int ret;
> +
> +     if (mem_size == 0)
> +             return -EINVAL;
> +
> +     while(mem_size > 0) {

Kernel style requires a space after the "while" and before the "(".

> +             uint64_t current_copy_size = min(mem_size, 
> AMDGPU_LSDMA_MAX_SIZE);
> +
> +             ret = adev->lsdma.funcs->copy_mem(adev, src_addr, dst_addr, 
> current_copy_size);
> +             if (ret)
> +                     return ret;
> +             src_addr += current_copy_size;
> +             dst_addr += current_copy_size;
> +             mem_size -= current_copy_size;
> +     }
> +
> +     return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h
> index 4df4da423181..be397666e4c1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h
> @@ -30,6 +30,11 @@ struct amdgpu_lsdma {
>  
>  struct amdgpu_lsdma_funcs
>  {
> +     int (*copy_mem)(struct amdgpu_device *adev, uint64_t src_addr,
> +                    uint64_t dst_addr, uint64_t size);
>  };
>  
> +int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, uint64_t src_addr,
> +                          uint64_t dst_addr, uint64_t mem_size);
> +
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c 
> b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c
> index b611ade53be2..0d2bdd04bd76 100644
> --- a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c
> @@ -29,5 +29,49 @@
>  #include "lsdma/lsdma_6_0_0_offset.h"
>  #include "lsdma/lsdma_6_0_0_sh_mask.h"
>  
> +static int lsdma_v6_0_copy_mem(struct amdgpu_device *adev,
> +                            uint64_t src_addr,
> +                            uint64_t dst_addr,
> +                            uint64_t size)
> +{
> +     uint32_t usec_timeout = 5000;  /* wait for 5ms */
> +     uint32_t tmp, expect_val;
> +     int i;
> +
> +     WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, 
> lower_32_bits(src_addr));
> +     WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, 
> upper_32_bits(src_addr));
> +
> +     WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, 
> lower_32_bits(dst_addr));
> +     WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, 
> upper_32_bits(dst_addr));
> +
> +     WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
> +
> +     tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
> +     tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
> +     tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
> +     tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
> +     tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
> +     tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
> +     tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
> +     tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0);
> +     WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
> +
> +     expect_val = LSDMA_PIO_STATUS__PIO_IDLE_MASK | 
> LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK;
> +     for (i = 0; i < usec_timeout; i++) {
> +             tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_STATUS);
> +             if ((tmp & expect_val) == expect_val)
> +                     break;
> +             udelay(1);
> +     }

Does the hardware specify a minimum delay after the write to the 
LSDMA_PIO_COMMAND,
and before we check the LSDMA_PIO_STATUS?

At the moment the code above checks the status immediately after writing to
LSDMA_PIO_COMMAND, and the copy wouldn't be completed.

If the poll timeout is 1 us, as the loop shows us, maybe the command completion
is larger than that (the time after writing to LSDMA_PIO_COMMAND and before
checking LSDMA_PIO_STATUS)?

> +
> +     if (i >= usec_timeout) {
> +             dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n");
> +             return -ETIMEDOUT;
> +     }
> +
> +     return 0;
> +}
> +
>  const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs = {
> +     .copy_mem = lsdma_v6_0_copy_mem
>  };

Regards,
-- 
Luben

Reply via email to