On 9/12/2024 5:29 PM, Asad Kamal wrote:
> Use metrics 1_6 to report activities per partition
> 
> v2: Use separate per instance for different platforms, shared
> vcn handled by other fix
> 
> Signed-off-by: Asad Kamal <asad.ka...@amd.com>

Series is -

        Reviewed-by: Lijo Lazar <lijo.la...@amd.com>

Thanks,
Lijo
> ---
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 78 ++++++++++++++-----
>  1 file changed, 60 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> index ee178914ca53..cd739f627df0 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> @@ -358,7 +358,7 @@ static int smu_v13_0_6_tables_init(struct smu_context 
> *smu)
>               return -ENOMEM;
>       smu_table->metrics_time = 0;
>  
> -     smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_5);
> +     smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_6);
>       smu_table->gpu_metrics_table =
>               kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
>       if (!smu_table->gpu_metrics_table) {
> @@ -2302,15 +2302,18 @@ static int 
> smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu)
>  
>  static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void 
> **table)
>  {
> +     bool per_inst, smu_13_0_6_per_inst, smu_13_0_14_per_inst, apu_per_inst;
>       struct smu_table_context *smu_table = &smu->smu_table;
> -     struct gpu_metrics_v1_5 *gpu_metrics =
> -             (struct gpu_metrics_v1_5 *)smu_table->gpu_metrics_table;
> +     struct gpu_metrics_v1_6 *gpu_metrics =
> +             (struct gpu_metrics_v1_6 *)smu_table->gpu_metrics_table;
>       bool flag = smu_v13_0_6_is_unified_metrics(smu);
> +     int ret = 0, xcc_id, inst, i, j, k, idx;
>       struct amdgpu_device *adev = smu->adev;
> -     int ret = 0, xcc_id, inst, i, j;
>       MetricsTableX_t *metrics_x;
>       MetricsTableA_t *metrics_a;
> +     struct amdgpu_xcp *xcp;
>       u16 link_width_level;
> +     u32 inst_mask;
>  
>       metrics_x = kzalloc(max(sizeof(MetricsTableX_t), 
> sizeof(MetricsTableA_t)), GFP_KERNEL);
>       ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true);
> @@ -2321,7 +2324,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct 
> smu_context *smu, void **table
>  
>       metrics_a = (MetricsTableA_t *)metrics_x;
>  
> -     smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 5);
> +     smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 6);
>  
>       gpu_metrics->temperature_hotspot =
>               SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, flag));
> @@ -2363,8 +2366,15 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct 
> smu_context *smu, void **table
>  
>       gpu_metrics->current_uclk = 
> SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, flag));
>  
> -     /* Throttle status is not reported through metrics now */
> -     gpu_metrics->throttle_status = 0;
> +     /* Total accumulated cycle counter */
> +     gpu_metrics->accumulation_counter = 
> GET_METRIC_FIELD(AccumulationCounter, flag);
> +
> +     /* Accumulated throttler residencies */
> +     gpu_metrics->prochot_residency_acc = 
> GET_METRIC_FIELD(ProchotResidencyAcc, flag);
> +     gpu_metrics->ppt_residency_acc = GET_METRIC_FIELD(PptResidencyAcc, 
> flag);
> +     gpu_metrics->socket_thm_residency_acc = 
> GET_METRIC_FIELD(SocketThmResidencyAcc, flag);
> +     gpu_metrics->vr_thm_residency_acc = GET_METRIC_FIELD(VrThmResidencyAcc, 
> flag);
> +     gpu_metrics->hbm_thm_residency_acc = 
> GET_METRIC_FIELD(HbmThmResidencyAcc, flag);
>  
>       /* Clock Lock Status. Each bit corresponds to each GFXCLK instance */
>       gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak, flag) 
> >> GET_INST(GC, 0);
> @@ -2419,19 +2429,51 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct 
> smu_context *smu, void **table
>                       SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc, 
> flag)[i]);
>       }
>  
> -     for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
> -             inst = GET_INST(JPEG, i);
> -             for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
> -                     gpu_metrics->jpeg_activity[(i * 
> adev->jpeg.num_jpeg_rings) + j] =
> -                             SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy, flag)
> -                             [(inst * adev->jpeg.num_jpeg_rings) + j]);
> +     gpu_metrics->num_partition = adev->xcp_mgr->num_xcps;
> +
> +     apu_per_inst = (adev->flags & AMD_IS_APU) && (smu->smc_fw_version >= 
> 0x04556A00);
> +     smu_13_0_6_per_inst = !(adev->flags & AMD_IS_APU) &&
> +                             (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)
> +                              == IP_VERSION(13, 0, 6)) &&
> +                             (smu->smc_fw_version >= 0x556F00);
> +     smu_13_0_14_per_inst = !(adev->flags & AMD_IS_APU) &&
> +                             (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)
> +                              == IP_VERSION(13, 0, 14)) &&
> +                             (smu->smc_fw_version >= 0x05550B00);
> +
> +     per_inst = apu_per_inst || smu_13_0_6_per_inst || smu_13_0_14_per_inst;
> +
> +     for_each_xcp(adev->xcp_mgr, xcp, i) {
> +             amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask);
> +             idx = 0;
> +             for_each_inst(k, inst_mask) {
> +                     /* Both JPEG and VCN has same instances */
> +                     inst = GET_INST(VCN, k);
> +
> +                     for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
> +                             gpu_metrics->xcp_stats[i].jpeg_busy
> +                                     [(idx * adev->jpeg.num_jpeg_rings) + j] 
> =
> +                                     SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy, 
> flag)
> +                                                     [(inst * 
> adev->jpeg.num_jpeg_rings) + j]);
> +                     }
> +                     gpu_metrics->xcp_stats[i].vcn_busy[idx] =
> +                            SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy, 
> flag)[inst]);
> +                     idx++;
> +
>               }
> -     }
>  
> -     for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> -             inst = GET_INST(VCN, i);
> -             gpu_metrics->vcn_activity[i] =
> -                     SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy, flag)[inst]);
> +             if (per_inst) {
> +                     amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, 
> &inst_mask);
> +                     idx = 0;
> +                     for_each_inst(k, inst_mask) {
> +                             inst = GET_INST(GC, k);
> +                             gpu_metrics->xcp_stats[i].gfx_busy_inst[idx] =
> +                                     SMUQ10_ROUND(metrics_x->GfxBusy[inst]);
> +                             gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] =
> +                                     
> SMUQ10_ROUND(metrics_x->GfxBusyAcc[inst]);
> +                             idx++;
> +                     }
> +             }
>       }
>  
>       gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth, 
> flag));

Reply via email to