From: "Ma,Li" <li...@amd.com>

Add new gpu_metrics_v2_3 to acquire average temperature info from SMU metrics. 
To acquire average temp info from gpu_metrics interface, but gpu_metrics_v2_2 
only has members to show current temp info.
---
v1:
        Only add average_temperature_gfx in gpu_metrics_v2_3.
v2:
        Add average temp members for soc, core and l3 in gpu_metrics_v2_3 and 
put these new members at the end of gpu_metrics_v2_3. Add operation to read 
average temp info from metrics table.
v3:
        Merge v1 and v2 and rename the patch.
v4:
        Merge v3. Add firmware version judgment in 
vangogh_common_get_gpu_metrics to maintain backward compatibility and rename 
the patch. "return ret" on error scenario in smu_cmn_get_smc_version.

Signed-off-by: Li Ma <li...@amd.com>
---
 .../gpu/drm/amd/include/kgd_pp_interface.h    |  58 +++++++
 .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 161 ++++++++++++++++--
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c        |   3 +
 3 files changed, 210 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 7e3231c2191c..a40ead44778a 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -824,4 +824,62 @@ struct gpu_metrics_v2_2 {
        uint64_t                        indep_throttle_status;
 };
 
+struct gpu_metrics_v2_3 {
+       struct metrics_table_header     common_header;
+
+       /* Temperature */
+       uint16_t                        temperature_gfx; // gfx temperature on 
APUs
+       uint16_t                        temperature_soc; // soc temperature on 
APUs
+       uint16_t                        temperature_core[8]; // CPU core 
temperature on APUs
+       uint16_t                        temperature_l3[2];
+
+       /* Utilization */
+       uint16_t                        average_gfx_activity;
+       uint16_t                        average_mm_activity; // UVD or VCN
+
+       /* Driver attached timestamp (in ns) */
+       uint64_t                        system_clock_counter;
+
+       /* Power/Energy */
+       uint16_t                        average_socket_power; // dGPU + APU 
power on A + A platform
+       uint16_t                        average_cpu_power;
+       uint16_t                        average_soc_power;
+       uint16_t                        average_gfx_power;
+       uint16_t                        average_core_power[8]; // CPU core 
power on APUs
+
+       /* Average clocks */
+       uint16_t                        average_gfxclk_frequency;
+       uint16_t                        average_socclk_frequency;
+       uint16_t                        average_uclk_frequency;
+       uint16_t                        average_fclk_frequency;
+       uint16_t                        average_vclk_frequency;
+       uint16_t                        average_dclk_frequency;
+
+       /* Current clocks */
+       uint16_t                        current_gfxclk;
+       uint16_t                        current_socclk;
+       uint16_t                        current_uclk;
+       uint16_t                        current_fclk;
+       uint16_t                        current_vclk;
+       uint16_t                        current_dclk;
+       uint16_t                        current_coreclk[8]; // CPU core clocks
+       uint16_t                        current_l3clk[2];
+
+       /* Throttle status (ASIC dependent) */
+       uint32_t                        throttle_status;
+
+       /* Fans */
+       uint16_t                        fan_pwm;
+
+       uint16_t                        padding[3];
+
+       /* Throttle status (ASIC independent) */
+       uint64_t                        indep_throttle_status;
+
+       /* Average Temperature */
+       uint16_t                        average_temperature_gfx; // average gfx 
temperature on APUs
+       uint16_t                        average_temperature_soc; // average soc 
temperature on APUs
+       uint16_t                        average_temperature_core[8]; // average 
CPU core temperature on APUs
+       uint16_t                        average_temperature_l3[2];
+};
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 847990145dcd..64235f123405 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -223,14 +223,13 @@ static int vangogh_tables_init(struct smu_context *smu)
 {
        struct smu_table_context *smu_table = &smu->smu_table;
        struct smu_table *tables = smu_table->tables;
-       struct amdgpu_device *adev = smu->adev;
        uint32_t if_version;
+       uint32_t smu_version;
        uint32_t ret = 0;
 
-       ret = smu_cmn_get_smc_version(smu, &if_version, NULL);
+       ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version);
        if (ret) {
-               dev_err(adev->dev, "Failed to get smu if version!\n");
-               goto err0_out;
+               return ret;
        }
 
        SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t),
@@ -255,7 +254,10 @@ static int vangogh_tables_init(struct smu_context *smu)
                goto err0_out;
        smu_table->metrics_time = 0;
 
-       smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2);
+       if(smu_version >= 0x043F3E00)
+               smu_table->gpu_metrics_table_size = sizeof(struct 
gpu_metrics_v2_3);
+       else
+               smu_table->gpu_metrics_table_size = sizeof(struct 
gpu_metrics_v2_2);
        smu_table->gpu_metrics_table = 
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
        if (!smu_table->gpu_metrics_table)
                goto err1_out;
@@ -1648,6 +1650,63 @@ static int vangogh_set_watermarks_table(struct 
smu_context *smu,
        return 0;
 }
 
+static ssize_t vangogh_get_legacy_gpu_metrics_v2_3(struct smu_context *smu,
+                                     void **table)
+{
+       struct smu_table_context *smu_table = &smu->smu_table;
+       struct gpu_metrics_v2_3 *gpu_metrics =
+               (struct gpu_metrics_v2_3 *)smu_table->gpu_metrics_table;
+       SmuMetrics_legacy_t metrics;
+       int ret = 0;
+
+       ret = smu_cmn_get_metrics_table(smu, &metrics, true);
+       if (ret)
+               return ret;
+
+       smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 3);
+
+       gpu_metrics->temperature_gfx = metrics.GfxTemperature;
+       gpu_metrics->temperature_soc = metrics.SocTemperature;
+       memcpy(&gpu_metrics->temperature_core[0],
+               &metrics.CoreTemperature[0],
+               sizeof(uint16_t) * 4);
+       gpu_metrics->temperature_l3[0] = metrics.L3Temperature[0];
+
+       gpu_metrics->average_gfx_activity = metrics.GfxActivity;
+       gpu_metrics->average_mm_activity = metrics.UvdActivity;
+
+       gpu_metrics->average_socket_power = metrics.CurrentSocketPower;
+       gpu_metrics->average_cpu_power = metrics.Power[0];
+       gpu_metrics->average_soc_power = metrics.Power[1];
+       gpu_metrics->average_gfx_power = metrics.Power[2];
+       memcpy(&gpu_metrics->average_core_power[0],
+               &metrics.CorePower[0],
+               sizeof(uint16_t) * 4);
+
+       gpu_metrics->average_gfxclk_frequency = metrics.GfxclkFrequency;
+       gpu_metrics->average_socclk_frequency = metrics.SocclkFrequency;
+       gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency;
+       gpu_metrics->average_fclk_frequency = metrics.MemclkFrequency;
+       gpu_metrics->average_vclk_frequency = metrics.VclkFrequency;
+       gpu_metrics->average_dclk_frequency = metrics.DclkFrequency;
+
+       memcpy(&gpu_metrics->current_coreclk[0],
+               &metrics.CoreFrequency[0],
+               sizeof(uint16_t) * 4);
+       gpu_metrics->current_l3clk[0] = metrics.L3Frequency[0];
+
+       gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+       gpu_metrics->indep_throttle_status =
+                       
smu_cmn_get_indep_throttler_status(metrics.ThrottlerStatus,
+                                                          
vangogh_throttler_map);
+
+       gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
+
+       *table = (void *)gpu_metrics;
+
+       return sizeof(struct gpu_metrics_v2_3);
+}
+
 static ssize_t vangogh_get_legacy_gpu_metrics(struct smu_context *smu,
                                      void **table)
 {
@@ -1705,6 +1764,77 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct 
smu_context *smu,
        return sizeof(struct gpu_metrics_v2_2);
 }
 
+static ssize_t vangogh_get_gpu_metrics_v2_3(struct smu_context *smu,
+                                     void **table)
+{
+       struct smu_table_context *smu_table = &smu->smu_table;
+       struct gpu_metrics_v2_3 *gpu_metrics =
+               (struct gpu_metrics_v2_3 *)smu_table->gpu_metrics_table;
+       SmuMetrics_t metrics;
+       int ret = 0;
+
+       ret = smu_cmn_get_metrics_table(smu, &metrics, true);
+       if (ret)
+               return ret;
+
+       smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 3);
+
+       gpu_metrics->temperature_gfx = metrics.Current.GfxTemperature;
+       gpu_metrics->temperature_soc = metrics.Current.SocTemperature;
+       memcpy(&gpu_metrics->temperature_core[0],
+               &metrics.Current.CoreTemperature[0],
+               sizeof(uint16_t) * 4);
+       gpu_metrics->temperature_l3[0] = metrics.Current.L3Temperature[0];
+
+       gpu_metrics->average_temperature_gfx = metrics.Average.GfxTemperature;
+       gpu_metrics->average_temperature_soc = metrics.Average.SocTemperature;
+       memcpy(&gpu_metrics->average_temperature_core[0],
+               &metrics.Average.CoreTemperature[0],
+               sizeof(uint16_t) * 4);
+       gpu_metrics->average_temperature_l3[0] = 
metrics.Average.L3Temperature[0];
+
+       gpu_metrics->average_gfx_activity = metrics.Current.GfxActivity;
+       gpu_metrics->average_mm_activity = metrics.Current.UvdActivity;
+
+       gpu_metrics->average_socket_power = metrics.Current.CurrentSocketPower;
+       gpu_metrics->average_cpu_power = metrics.Current.Power[0];
+       gpu_metrics->average_soc_power = metrics.Current.Power[1];
+       gpu_metrics->average_gfx_power = metrics.Current.Power[2];
+       memcpy(&gpu_metrics->average_core_power[0],
+               &metrics.Average.CorePower[0],
+               sizeof(uint16_t) * 4);
+
+       gpu_metrics->average_gfxclk_frequency = metrics.Average.GfxclkFrequency;
+       gpu_metrics->average_socclk_frequency = metrics.Average.SocclkFrequency;
+       gpu_metrics->average_uclk_frequency = metrics.Average.MemclkFrequency;
+       gpu_metrics->average_fclk_frequency = metrics.Average.MemclkFrequency;
+       gpu_metrics->average_vclk_frequency = metrics.Average.VclkFrequency;
+       gpu_metrics->average_dclk_frequency = metrics.Average.DclkFrequency;
+
+       gpu_metrics->current_gfxclk = metrics.Current.GfxclkFrequency;
+       gpu_metrics->current_socclk = metrics.Current.SocclkFrequency;
+       gpu_metrics->current_uclk = metrics.Current.MemclkFrequency;
+       gpu_metrics->current_fclk = metrics.Current.MemclkFrequency;
+       gpu_metrics->current_vclk = metrics.Current.VclkFrequency;
+       gpu_metrics->current_dclk = metrics.Current.DclkFrequency;
+
+       memcpy(&gpu_metrics->current_coreclk[0],
+               &metrics.Current.CoreFrequency[0],
+               sizeof(uint16_t) * 4);
+       gpu_metrics->current_l3clk[0] = metrics.Current.L3Frequency[0];
+
+       gpu_metrics->throttle_status = metrics.Current.ThrottlerStatus;
+       gpu_metrics->indep_throttle_status =
+                       
smu_cmn_get_indep_throttler_status(metrics.Current.ThrottlerStatus,
+                                                          
vangogh_throttler_map);
+
+       gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
+
+       *table = (void *)gpu_metrics;
+
+       return sizeof(struct gpu_metrics_v2_3);
+}
+
 static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu,
                                      void **table)
 {
@@ -1772,20 +1902,27 @@ static ssize_t vangogh_get_gpu_metrics(struct 
smu_context *smu,
 static ssize_t vangogh_common_get_gpu_metrics(struct smu_context *smu,
                                      void **table)
 {
-       struct amdgpu_device *adev = smu->adev;
        uint32_t if_version;
+       uint32_t smu_version;
        int ret = 0;
 
-       ret = smu_cmn_get_smc_version(smu, &if_version, NULL);
+       ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version);
        if (ret) {
-               dev_err(adev->dev, "Failed to get smu if version!\n");
                return ret;
        }
 
-       if (if_version < 0x3)
-               ret = vangogh_get_legacy_gpu_metrics(smu, table);
-       else
-               ret = vangogh_get_gpu_metrics(smu, table);
+       if(smu_version >= 0x043F3E00){
+               if (if_version < 0x3)
+                       ret = vangogh_get_legacy_gpu_metrics_v2_3(smu, table);
+               else
+                       ret = vangogh_get_gpu_metrics_v2_3(smu, table);
+       }
+       else{
+               if (if_version < 0x3)
+                       ret = vangogh_get_legacy_gpu_metrics(smu, table);
+               else
+                       ret = vangogh_get_gpu_metrics(smu, table);
+       }       
 
        return ret;
 }
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 15e4298c7cc8..e4f8f90ac5aa 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -969,6 +969,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t 
frev, uint8_t crev)
        case METRICS_VERSION(2, 2):
                structure_size = sizeof(struct gpu_metrics_v2_2);
                break;
+       case METRICS_VERSION(2, 3):
+               structure_size = sizeof(struct gpu_metrics_v2_3);
+               break;
        default:
                return;
        }
-- 
2.25.1

Reply via email to