Inlined:

On 2022-10-10 02:07, jiadong....@amd.com wrote:
> From: "Jiadong.Zhu" <jiadong....@amd.com>
> 
> Set ring functions with software ring callbacks on gfx9.
> 
> The software ring could be tested by debugfs_test_ib case.
> 
> v2: Set sw_ring 2 to enable software ring by default.
> v3: Remove the parameter for software ring enablement.
> v4: Use amdgpu_ring_init/fini for software rings.
> v5: Update for code format. Fix conflict.
> v6: Remove unnecessary checks and enable software ring on gfx9 by default.
> v7: Use static array for software ring names and priorities.
> v8: Stop creating software rings if no gfx ring existed.
> 
> Acked-by: Luben Tuikov <luben.tui...@amd.com>

Tags appear in chronological order, and thus Acked-by and Reviewed-by tags
appear after your Signed-off-by tag. Please move the Acked-by tag after
your Signed-off-by and thenafter add any subsequent/new tags below it.

I don't see any glaring problems with this patch, but it should be heavily 
tested.

Regards,
Luben

> Cc: Christian Koenig <christian.koe...@amd.com>
> Cc: Luben Tuikov <luben.tui...@amd.com>
> Cc: Andrey Grodzovsky <andrey.grodzov...@amd.com>
> Cc: Michel Dänzer <mic...@daenzer.net>
> Cc: Likun Gao <likun....@amd.com>
> Signed-off-by: Jiadong.Zhu <jiadong....@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h      |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c |  20 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |   2 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c        | 113 ++++++++++++++++++-
>  5 files changed, 136 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 9996dadb39f7..4fdfc3ec134a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -348,6 +348,7 @@ struct amdgpu_gfx {
>  
>       bool                            is_poweron;
>  
> +     struct amdgpu_ring              sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
>       struct amdgpu_ring_mux          muxer;
>  };
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 40b1277b4f0c..f08ee1ac281c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -39,6 +39,7 @@ struct amdgpu_vm;
>  #define AMDGPU_MAX_RINGS             28
>  #define AMDGPU_MAX_HWIP_RINGS                8
>  #define AMDGPU_MAX_GFX_RINGS         2
> +#define AMDGPU_MAX_SW_GFX_RINGS         2
>  #define AMDGPU_MAX_COMPUTE_RINGS     8
>  #define AMDGPU_MAX_VCE_RINGS         3
>  #define AMDGPU_MAX_UVD_ENC_RINGS     2
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> index 43cab8a37754..2e64ffccc030 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
> @@ -29,6 +29,14 @@
>  
>  #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
>  
> +static const struct ring_info {
> +     unsigned int hw_pio;
> +     const char *ring_name;
> +} sw_ring_info[] = {
> +     { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
> +     { AMDGPU_RING_PRIO_2, "gfx_high"},
> +};
> +
>  int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring 
> *ring,
>                        unsigned int entry_size)
>  {
> @@ -215,3 +223,15 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
> uint32_t count)
>  {
>       WARN_ON(!ring->is_sw_ring);
>  }
> +
> +const char *amdgpu_sw_ring_name(int idx)
> +{
> +     return idx < ARRAY_SIZE(sw_ring_info) ?
> +             sw_ring_info[idx].ring_name : NULL;
> +}
> +
> +unsigned int amdgpu_sw_ring_priority(int idx)
> +{
> +     return idx < ARRAY_SIZE(sw_ring_info) ?
> +             sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> index d91629589577..28399f4b0e5c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> @@ -73,4 +73,6 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
> uint32_t count);
>  void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
>  void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
>  
> +const char *amdgpu_sw_ring_name(int idx);
> +unsigned int amdgpu_sw_ring_priority(int idx);
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 6b609f33261f..8d4fbc9e3fc0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -47,6 +47,7 @@
>  
>  #include "amdgpu_ras.h"
>  
> +#include "amdgpu_ring_mux.h"
>  #include "gfx_v9_4.h"
>  #include "gfx_v9_0.h"
>  #include "gfx_v9_4_2.h"
> @@ -56,6 +57,7 @@
>  #include "asic_reg/gc/gc_9_0_default.h"
>  
>  #define GFX9_NUM_GFX_RINGS     1
> +#define GFX9_NUM_SW_GFX_RINGS  2
>  #define GFX9_MEC_HPD_SIZE 4096
>  #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
>  #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
> @@ -2273,6 +2275,7 @@ static int gfx_v9_0_sw_init(void *handle)
>       struct amdgpu_ring *ring;
>       struct amdgpu_kiq *kiq;
>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> +     unsigned int hw_prio;
>  
>       switch (adev->ip_versions[GC_HWIP][0]) {
>       case IP_VERSION(9, 0, 1):
> @@ -2356,6 +2359,9 @@ static int gfx_v9_0_sw_init(void *handle)
>                       sprintf(ring->name, "gfx_%d", i);
>               ring->use_doorbell = true;
>               ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
> +
> +             /* disable scheduler on the real ring */
> +             ring->no_scheduler = true;
>               r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
>                                    AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
>                                    AMDGPU_RING_PRIO_DEFAULT, NULL);
> @@ -2363,6 +2369,41 @@ static int gfx_v9_0_sw_init(void *handle)
>                       return r;
>       }
>  
> +     /* set up the software rings */
> +     if (adev->gfx.num_gfx_rings) {
> +             for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
> +                     ring = &adev->gfx.sw_gfx_ring[i];
> +                     ring->ring_obj = NULL;
> +                     sprintf(ring->name, amdgpu_sw_ring_name(i));
> +                     ring->use_doorbell = true;
> +                     ring->doorbell_index = adev->doorbell_index.gfx_ring0 
> << 1;
> +                     ring->is_sw_ring = true;
> +                     hw_prio = amdgpu_sw_ring_priority(i);
> +                     r = amdgpu_ring_init(adev, ring, 1024, 
> &adev->gfx.eop_irq,
> +                                          AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 
> hw_prio,
> +                                          NULL);
> +                     if (r)
> +                             return r;
> +                     ring->wptr = 0;
> +             }
> +
> +             /* init the muxer and add software rings */
> +             r = amdgpu_ring_mux_init(&adev->gfx.muxer, 
> &adev->gfx.gfx_ring[0],
> +                                      GFX9_NUM_SW_GFX_RINGS);
> +             if (r) {
> +                     DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
> +                     return r;
> +             }
> +             for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
> +                     r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
> +                                                     
> &adev->gfx.sw_gfx_ring[i]);
> +                     if (r) {
> +                             DRM_ERROR("amdgpu_ring_mux_add_sw_ring 
> failed(%d)\n", r);
> +                             return r;
> +                     }
> +             }
> +     }
> +
>       /* set up the compute queues - allocate horizontally across pipes */
>       ring_id = 0;
>       for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
> @@ -2413,6 +2454,12 @@ static int gfx_v9_0_sw_fini(void *handle)
>       int i;
>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>  
> +     if (adev->gfx.num_gfx_rings) {
> +             for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
> +                     amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
> +             amdgpu_ring_mux_fini(&adev->gfx.muxer);
> +     }
> +
>       for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>               amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>       for (i = 0; i < adev->gfx.num_compute_rings; i++)
> @@ -5877,7 +5924,11 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
>  
>       switch (me_id) {
>       case 0:
> -             amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
> +             /* Fence signals are handled on the software rings*/
> +             if (adev->gfx.num_gfx_rings) {
> +                     for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
> +                             amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
> +             }
>               break;
>       case 1:
>       case 2:
> @@ -6882,6 +6933,61 @@ static const struct amdgpu_ring_funcs 
> gfx_v9_0_ring_funcs_gfx = {
>       .emit_mem_sync = gfx_v9_0_emit_mem_sync,
>  };
>  
> +static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
> +     .type = AMDGPU_RING_TYPE_GFX,
> +     .align_mask = 0xff,
> +     .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> +     .support_64bit_ptrs = true,
> +     .secure_submission_supported = true,
> +     .vmhub = AMDGPU_GFXHUB_0,
> +     .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
> +     .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
> +     .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
> +     .emit_frame_size = /* totally 242 maximum if 16 IBs */
> +             5 +  /* COND_EXEC */
> +             7 +  /* PIPELINE_SYNC */
> +             SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> +             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> +             2 + /* VM_FLUSH */
> +             8 +  /* FENCE for VM_FLUSH */
> +             20 + /* GDS switch */
> +             4 + /* double SWITCH_BUFFER,
> +                  * the first COND_EXEC jump to the place just
> +                  * prior to this double SWITCH_BUFFER
> +                  */
> +             5 + /* COND_EXEC */
> +             7 +      /*     HDP_flush */
> +             4 +      /*     VGT_flush */
> +             14 + /* CE_META */
> +             31 + /* DE_META */
> +             3 + /* CNTX_CTRL */
> +             5 + /* HDP_INVL */
> +             8 + 8 + /* FENCE x2 */
> +             2 + /* SWITCH_BUFFER */
> +             7, /* gfx_v9_0_emit_mem_sync */
> +     .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
> +     .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
> +     .emit_fence = gfx_v9_0_ring_emit_fence,
> +     .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
> +     .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
> +     .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
> +     .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
> +     .test_ring = gfx_v9_0_ring_test_ring,
> +     .test_ib = gfx_v9_0_ring_test_ib,
> +     .insert_nop = amdgpu_sw_ring_insert_nop,
> +     .pad_ib = amdgpu_ring_generic_pad_ib,
> +     .emit_switch_buffer = gfx_v9_ring_emit_sb,
> +     .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
> +     .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
> +     .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> +     .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
> +     .emit_wreg = gfx_v9_0_ring_emit_wreg,
> +     .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
> +     .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> +     .soft_recovery = gfx_v9_0_ring_soft_recovery,
> +     .emit_mem_sync = gfx_v9_0_emit_mem_sync,
> +};
> +
>  static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>       .type = AMDGPU_RING_TYPE_COMPUTE,
>       .align_mask = 0xff,
> @@ -6959,6 +7065,11 @@ static void gfx_v9_0_set_ring_funcs(struct 
> amdgpu_device *adev)
>       for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>               adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
>  
> +     if (adev->gfx.num_gfx_rings) {
> +             for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
> +                     adev->gfx.sw_gfx_ring[i].funcs = 
> &gfx_v9_0_sw_ring_funcs_gfx;
> +     }
> +
>       for (i = 0; i < adev->gfx.num_compute_rings; i++)
>               adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
>  }

Reply via email to