date:20250306

[PATCH RFC v3 3/7] drm/display: dp: use new DCPD access helpers

2025-03-06 Thread Dmitry Baryshkov

From: Dmitry Baryshkov 

Switch drm_dp_helper.c to use new set of DPCD read / write helpers.

Reviewed-by: Lyude Paul 
Acked-by: Jani Nikula 
Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/display/drm_dp_helper.c | 296 +---
 1 file changed, 116 insertions(+), 180 deletions(-)

diff --git a/drivers/gpu/drm/display/drm_dp_helper.c 
b/drivers/gpu/drm/display/drm_dp_helper.c
index 
410be0be233ad94702af423262a7d98e21afbfeb..e2439c8a7fefe116b04aaa689b557e2387b05540
 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -327,7 +327,7 @@ static int __read_delay(struct drm_dp_aux *aux, const u8 
dpcd[DP_RECEIVER_CAP_SI
if (offset < DP_RECEIVER_CAP_SIZE) {
rd_interval = dpcd[offset];
} else {
-   if (drm_dp_dpcd_readb(aux, offset, &rd_interval) != 1) {
+   if (drm_dp_dpcd_read_byte(aux, offset, &rd_interval) < 0) {
drm_dbg_kms(aux->drm_dev, "%s: failed rd interval 
read\n",
aux->name);
/* arbitrary default delay */
@@ -358,7 +358,7 @@ int drm_dp_128b132b_read_aux_rd_interval(struct drm_dp_aux 
*aux)
int unit;
u8 val;
 
-   if (drm_dp_dpcd_readb(aux, DP_128B132B_TRAINING_AUX_RD_INTERVAL, &val) 
!= 1) {
+   if (drm_dp_dpcd_read_byte(aux, DP_128B132B_TRAINING_AUX_RD_INTERVAL, 
&val) < 0) {
drm_err(aux->drm_dev, "%s: failed rd interval read\n",
aux->name);
/* default to max */
@@ -807,30 +807,20 @@ int drm_dp_dpcd_read_phy_link_status(struct drm_dp_aux 
*aux,
 {
int ret;
 
-   if (dp_phy == DP_PHY_DPRX) {
-   ret = drm_dp_dpcd_read(aux,
-  DP_LANE0_1_STATUS,
-  link_status,
-  DP_LINK_STATUS_SIZE);
-
-   if (ret < 0)
-   return ret;
+   if (dp_phy == DP_PHY_DPRX)
+   return drm_dp_dpcd_read_data(aux,
+DP_LANE0_1_STATUS,
+link_status,
+DP_LINK_STATUS_SIZE);
 
-   WARN_ON(ret != DP_LINK_STATUS_SIZE);
-
-   return 0;
-   }
-
-   ret = drm_dp_dpcd_read(aux,
-  DP_LANE0_1_STATUS_PHY_REPEATER(dp_phy),
-  link_status,
-  DP_LINK_STATUS_SIZE - 1);
+   ret = drm_dp_dpcd_read_data(aux,
+   DP_LANE0_1_STATUS_PHY_REPEATER(dp_phy),
+   link_status,
+   DP_LINK_STATUS_SIZE - 1);
 
if (ret < 0)
return ret;
 
-   WARN_ON(ret != DP_LINK_STATUS_SIZE - 1);
-
/* Convert the LTTPR to the sink PHY link status layout */
memmove(&link_status[DP_SINK_STATUS - DP_LANE0_1_STATUS + 1],
&link_status[DP_SINK_STATUS - DP_LANE0_1_STATUS],
@@ -846,7 +836,7 @@ static int read_payload_update_status(struct drm_dp_aux 
*aux)
int ret;
u8 status;
 
-   ret = drm_dp_dpcd_readb(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, &status);
+   ret = drm_dp_dpcd_read_byte(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, 
&status);
if (ret < 0)
return ret;
 
@@ -873,21 +863,21 @@ int drm_dp_dpcd_write_payload(struct drm_dp_aux *aux,
int ret;
int retries = 0;
 
-   drm_dp_dpcd_writeb(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS,
-  DP_PAYLOAD_TABLE_UPDATED);
+   drm_dp_dpcd_write_byte(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS,
+  DP_PAYLOAD_TABLE_UPDATED);
 
payload_alloc[0] = vcpid;
payload_alloc[1] = start_time_slot;
payload_alloc[2] = time_slot_count;
 
-   ret = drm_dp_dpcd_write(aux, DP_PAYLOAD_ALLOCATE_SET, payload_alloc, 3);
-   if (ret != 3) {
+   ret = drm_dp_dpcd_write_data(aux, DP_PAYLOAD_ALLOCATE_SET, 
payload_alloc, 3);
+   if (ret < 0) {
drm_dbg_kms(aux->drm_dev, "failed to write payload allocation 
%d\n", ret);
goto fail;
}
 
 retry:
-   ret = drm_dp_dpcd_readb(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, &status);
+   ret = drm_dp_dpcd_read_byte(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, 
&status);
if (ret < 0) {
drm_dbg_kms(aux->drm_dev, "failed to read payload table status 
%d\n", ret);
goto fail;
@@ -1043,15 +1033,15 @@ bool drm_dp_send_real_edid_checksum(struct drm_dp_aux 
*aux,
 {
u8 link_edid_read = 0, auto_test_req = 0, test_resp = 0;
 
-   if (drm_dp_dpcd_read(aux, DP_DEVICE_SERVICE_IRQ_VECTOR,
-&auto_test_req, 1) < 1) {
+   if (drm_dp_dpcd_read_byte(aux, DP_DEVICE_SERVICE_IRQ_VECTOR,
+ &auto_test_re

[PATCH] drm/amdgpu: format old RAS eeprom data into V3 version

2025-03-06 Thread Tao Zhou

Clear old data and save it in V3 format.

Signed-off-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c   |  5 
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c| 26 ++-
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h|  1 +
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 837f33698b38..266f24002e07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3465,6 +3465,11 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device 
*adev)
adev, control->bad_channel_bitmap);
con->update_channel_flag = false;
}
+
+   if (control->tbl_hdr.version < RAS_TABLE_VER_V3)
+   if (!amdgpu_ras_eeprom_reset_table(control))
+   if (amdgpu_ras_save_bad_pages(adev, NULL))
+   dev_warn(adev->dev, "Failed to save 
EEPROM data in V3 format!\n");
}
 
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 09a6f8bc1a5a..71dddb8983ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -413,9 +413,11 @@ static void amdgpu_ras_set_eeprom_table_version(struct 
amdgpu_ras_eeprom_control
 
switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
case IP_VERSION(8, 10, 0):
-   case IP_VERSION(12, 0, 0):
hdr->version = RAS_TABLE_VER_V2_1;
return;
+   case IP_VERSION(12, 0, 0):
+   hdr->version = RAS_TABLE_VER_V3;
+   return;
default:
hdr->version = RAS_TABLE_VER_V1;
return;
@@ -443,7 +445,7 @@ int amdgpu_ras_eeprom_reset_table(struct 
amdgpu_ras_eeprom_control *control)
hdr->header = RAS_TABLE_HDR_VAL;
amdgpu_ras_set_eeprom_table_version(control);
 
-   if (hdr->version == RAS_TABLE_VER_V2_1) {
+   if (hdr->version >= RAS_TABLE_VER_V2_1) {
hdr->first_rec_offset = RAS_RECORD_START_V2_1;
hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
RAS_TABLE_V2_1_INFO_SIZE;
@@ -461,7 +463,7 @@ int amdgpu_ras_eeprom_reset_table(struct 
amdgpu_ras_eeprom_control *control)
}
 
csum = __calc_hdr_byte_sum(control);
-   if (hdr->version == RAS_TABLE_VER_V2_1)
+   if (hdr->version >= RAS_TABLE_VER_V2_1)
csum += __calc_ras_info_byte_sum(control);
csum = -csum;
hdr->checksum = csum;
@@ -752,7 +754,7 @@ amdgpu_ras_eeprom_update_header(struct 
amdgpu_ras_eeprom_control *control)
"Saved bad pages %d reaches threshold value %d\n",
control->ras_num_bad_pages, 
ras->bad_page_cnt_threshold);
control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
-   if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) {
+   if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) {
control->tbl_rai.rma_status = 
GPU_RETIRED__ECC_REACH_THRESHOLD;
control->tbl_rai.health_percent = 0;
}
@@ -765,7 +767,7 @@ amdgpu_ras_eeprom_update_header(struct 
amdgpu_ras_eeprom_control *control)
amdgpu_dpm_send_rma_reason(adev);
}
 
-   if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+   if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
RAS_TABLE_V2_1_INFO_SIZE +
control->ras_num_recs * 
RAS_TABLE_RECORD_SIZE;
@@ -805,7 +807,7 @@ amdgpu_ras_eeprom_update_header(struct 
amdgpu_ras_eeprom_control *control)
 * now calculate gpu health percent
 */
if (amdgpu_bad_page_threshold != 0 &&
-   control->tbl_hdr.version == RAS_TABLE_VER_V2_1 &&
+   control->tbl_hdr.version >= RAS_TABLE_VER_V2_1 &&
control->ras_num_bad_pages <= ras->bad_page_cnt_threshold)
control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold 
-
   control->ras_num_bad_pages) 
* 100) /
@@ -818,7 +820,7 @@ amdgpu_ras_eeprom_update_header(struct 
amdgpu_ras_eeprom_control *control)
csum += *pp;
 
csum += __calc_hdr_byte_sum(control);
-   if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+   if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
csum += __calc_ras_info_byte_sum(control);
/* avoid sign extension when assigning to "checksum" */
csum = -csum;
@@ -1035,7 +1037,7 @@ uint32_t amdgpu_ras_eeprom_max_record_count(struct 
amdgpu_ras_eeprom_control *co
/* get available eeprom table version first

Re: [PATCH v3 4/4] drm/amdgpu: fix warning and errors caused by duplicated defines in sid.h

2025-03-06 Thread Alexandre Demers

On Thu, Mar 6, 2025 at 10:17 AM Alex Deucher  wrote:
>
> On Wed, Mar 5, 2025 at 8:57 PM Alexandre Demers
>  wrote:
> >
> > Let's finish the cleanup in sid.h to calm down things after wiring it into
> > dce_v6_0.c.
> >
> > This is a bigger cleanup.
> > Many defines found under sid.h have already been properly moved
> > into the different "_d.h" and "_sh_mask.h", so they should have been
> > already removed from sid.h and properly linked in where needed.
> >
> > Signed-off-by: Alexandre Demers 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c |  12 +-
> >  drivers/gpu/drm/amd/amdgpu/si.c   |  68 +++---
> >  drivers/gpu/drm/amd/amdgpu/si_enums.h |   2 -
> >  drivers/gpu/drm/amd/amdgpu/sid.h  | 336 +-
> >  4 files changed, 44 insertions(+), 374 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 
> > b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > index 2ccb450b35a6..7bb11916a619 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> > @@ -32,7 +32,6 @@
> >  #include "amdgpu.h"
> >  #include "amdgpu_pm.h"
> >  #include "amdgpu_i2c.h"
> > -#include "sid.h"
> >  #include "atom.h"
> >  #include "amdgpu_atombios.h"
> >  #include "atombios_crtc.h"
> > @@ -41,18 +40,25 @@
> >  #include "amdgpu_connectors.h"
> >  #include "amdgpu_display.h"
> >
> > +#include "dce_v6_0.h"
> > +#include "sid.h"
> > +
> >  #include "bif/bif_3_0_d.h"
> >  #include "bif/bif_3_0_sh_mask.h"
> > +
> >  #include "oss/oss_1_0_d.h"
> >  #include "oss/oss_1_0_sh_mask.h"
> > +
> >  #include "gca/gfx_6_0_d.h"
> >  #include "gca/gfx_6_0_sh_mask.h"
> > +#include "gca/gfx_7_2_enum.h"
> > +
> >  #include "gmc/gmc_6_0_d.h"
> >  #include "gmc/gmc_6_0_sh_mask.h"
> > +
> >  #include "dce/dce_6_0_d.h"
> >  #include "dce/dce_6_0_sh_mask.h"
> > -#include "gca/gfx_7_2_enum.h"
> > -#include "dce_v6_0.h"
> > +
> >  #include "si_enums.h"
> >
> >  static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/si.c 
> > b/drivers/gpu/drm/amd/amdgpu/si.c
> > index 79307ae3e477..a18b3ece635b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/si.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/si.c
> > @@ -1124,41 +1124,41 @@ static struct amdgpu_allowed_register_entry 
> > si_allowed_read_registers[] = {
> > {mmCP_STALLED_STAT3},
> > {GB_ADDR_CONFIG},
> > {MC_ARB_RAMCFG},
> > -   {GB_TILE_MODE0},
> > -   {GB_TILE_MODE1},
> > -   {GB_TILE_MODE2},
> > -   {GB_TILE_MODE3},
> > -   {GB_TILE_MODE4},
> > -   {GB_TILE_MODE5},
> > -   {GB_TILE_MODE6},
> > -   {GB_TILE_MODE7},
> > -   {GB_TILE_MODE8},
> > -   {GB_TILE_MODE9},
> > -   {GB_TILE_MODE10},
> > -   {GB_TILE_MODE11},
> > -   {GB_TILE_MODE12},
> > -   {GB_TILE_MODE13},
> > -   {GB_TILE_MODE14},
> > -   {GB_TILE_MODE15},
> > -   {GB_TILE_MODE16},
> > -   {GB_TILE_MODE17},
> > -   {GB_TILE_MODE18},
> > -   {GB_TILE_MODE19},
> > -   {GB_TILE_MODE20},
> > -   {GB_TILE_MODE21},
> > -   {GB_TILE_MODE22},
> > -   {GB_TILE_MODE23},
> > -   {GB_TILE_MODE24},
> > -   {GB_TILE_MODE25},
> > -   {GB_TILE_MODE26},
> > -   {GB_TILE_MODE27},
> > -   {GB_TILE_MODE28},
> > -   {GB_TILE_MODE29},
> > -   {GB_TILE_MODE30},
> > -   {GB_TILE_MODE31},
> > +   {mmGB_TILE_MODE0},
> > +   {mmGB_TILE_MODE1},
> > +   {mmGB_TILE_MODE2},
> > +   {mmGB_TILE_MODE3},
> > +   {mmGB_TILE_MODE4},
> > +   {mmGB_TILE_MODE5},
> > +   {mmGB_TILE_MODE6},
> > +   {mmGB_TILE_MODE7},
> > +   {mmGB_TILE_MODE8},
> > +   {mmGB_TILE_MODE9},
> > +   {mmGB_TILE_MODE10},
> > +   {mmGB_TILE_MODE11},
> > +   {mmGB_TILE_MODE12},
> > +   {mmGB_TILE_MODE13},
> > +   {mmGB_TILE_MODE14},
> > +   {mmGB_TILE_MODE15},
> > +   {mmGB_TILE_MODE16},
> > +   {mmGB_TILE_MODE17},
> > +   {mmGB_TILE_MODE18},
> > +   {mmGB_TILE_MODE19},
> > +   {mmGB_TILE_MODE20},
> > +   {mmGB_TILE_MODE21},
> > +   {mmGB_TILE_MODE22},
> > +   {mmGB_TILE_MODE23},
> > +   {mmGB_TILE_MODE24},
> > +   {mmGB_TILE_MODE25},
> > +   {mmGB_TILE_MODE26},
> > +   {mmGB_TILE_MODE27},
> > +   {mmGB_TILE_MODE28},
> > +   {mmGB_TILE_MODE29},
> > +   {mmGB_TILE_MODE30},
> > +   {mmGB_TILE_MODE31},
> > {CC_RB_BACKEND_DISABLE, true},
>
> Why not replace this with mmCC_RB_BACKEND_DISABLE as well for consistency?
>
> Alex

It will be coming in the next series, which will be major. I'm working
on it so it is as readable
and modular as possible. In the current series, I just took care of
the warnings and errors generated
by the addition of sid.h either by removing redefinitions or using
updated defines.

Alexandre
>
> > -   {GC_USER_RB_BACKEND_DISABLE, true},
> > -   {PA_SC_RASTER_CONFIG, true},
> > +   {mmGC_USER_RB_BACKEND_DISABLE, true},
> > +   {mmPA_SC_RASTER_CONFIG, true},
> >  };
> >
> >

[PATCH RFC v3 2/7] drm/display: dp: change drm_dp_dpcd_read_link_status() return value

2025-03-06 Thread Dmitry Baryshkov

From: Dmitry Baryshkov 

drm_dp_dpcd_read_link_status() follows the "return error code or number
of bytes read" protocol, with the code returning less bytes than
requested in case of some errors. However most of the drivers
interpreted that as "return error code in case of any error". Switch
drm_dp_dpcd_read_link_status() to drm_dp_dpcd_read_data() and make it
follow that protocol too.

Acked-by: Jani Nikula 
Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/amd/amdgpu/atombios_dp.c   |  8 
 .../gpu/drm/bridge/cadence/cdns-mhdp8546-core.c|  2 +-
 drivers/gpu/drm/display/drm_dp_helper.c|  7 +++
 drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c   |  4 ++--
 drivers/gpu/drm/msm/dp/dp_ctrl.c   | 24 +-
 drivers/gpu/drm/msm/dp/dp_link.c   | 18 
 drivers/gpu/drm/radeon/atombios_dp.c   |  8 
 7 files changed, 28 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c 
b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 
521b9faab18059ed92ebb1dc9a9847e8426e7403..492813ab1b54197ba842075bc2909984c39bd5c1
 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -458,8 +458,8 @@ bool amdgpu_atombios_dp_needs_link_train(struct 
amdgpu_connector *amdgpu_connect
u8 link_status[DP_LINK_STATUS_SIZE];
struct amdgpu_connector_atom_dig *dig = amdgpu_connector->con_priv;
 
-   if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux, 
link_status)
-   <= 0)
+   if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux,
+link_status) < 0)
return false;
if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count))
return false;
@@ -616,7 +616,7 @@ amdgpu_atombios_dp_link_train_cr(struct 
amdgpu_atombios_dp_link_train_info *dp_i
drm_dp_link_train_clock_recovery_delay(dp_info->aux, 
dp_info->dpcd);
 
if (drm_dp_dpcd_read_link_status(dp_info->aux,
-dp_info->link_status) <= 0) {
+dp_info->link_status) < 0) {
DRM_ERROR("displayport link status failed\n");
break;
}
@@ -681,7 +681,7 @@ amdgpu_atombios_dp_link_train_ce(struct 
amdgpu_atombios_dp_link_train_info *dp_i
drm_dp_link_train_channel_eq_delay(dp_info->aux, dp_info->dpcd);
 
if (drm_dp_dpcd_read_link_status(dp_info->aux,
-dp_info->link_status) <= 0) {
+dp_info->link_status) < 0) {
DRM_ERROR("displayport link status failed\n");
break;
}
diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c 
b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
index 
81fad14c2cd598045d989c7d51f292bafb92c144..8d5420a5b691180c4d051a450d5d3d869a558d1a
 100644
--- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
+++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
@@ -2305,7 +2305,7 @@ static int cdns_mhdp_update_link_status(struct 
cdns_mhdp_device *mhdp)
 * If everything looks fine, just return, as we don't handle
 * DP IRQs.
 */
-   if (ret > 0 &&
+   if (!ret &&
drm_dp_channel_eq_ok(status, mhdp->link.num_lanes) &&
drm_dp_clock_recovery_ok(status, mhdp->link.num_lanes))
goto out;
diff --git a/drivers/gpu/drm/display/drm_dp_helper.c 
b/drivers/gpu/drm/display/drm_dp_helper.c
index 
e43a8f4a252dae22eeaae1f4ca94da064303033d..410be0be233ad94702af423262a7d98e21afbfeb
 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -778,14 +778,13 @@ EXPORT_SYMBOL(drm_dp_dpcd_write);
  * @aux: DisplayPort AUX channel
  * @status: buffer to store the link status in (must be at least 6 bytes)
  *
- * Returns the number of bytes transferred on success or a negative error
- * code on failure.
+ * Returns a negative error code on failure or 0 on success.
  */
 int drm_dp_dpcd_read_link_status(struct drm_dp_aux *aux,
 u8 status[DP_LINK_STATUS_SIZE])
 {
-   return drm_dp_dpcd_read(aux, DP_LANE0_1_STATUS, status,
-   DP_LINK_STATUS_SIZE);
+   return drm_dp_dpcd_read_data(aux, DP_LANE0_1_STATUS, status,
+DP_LINK_STATUS_SIZE);
 }
 EXPORT_SYMBOL(drm_dp_dpcd_read_link_status);
 
diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c 
b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c
index 
f6355c16cc0ab2e28408ab8a7246f4ca17710456..a3b78b0fd53ef854a54edf40fb333766da88f1c6
 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_li

[PATCH RFC v3 7/7] drm/display: dp-tunnel: use new DCPD access helpers

2025-03-06 Thread Dmitry Baryshkov

From: Dmitry Baryshkov 

Switch drm_dp_tunnel.c to use new set of DPCD read / write helpers.

Reviewed-by: Lyude Paul 
Acked-by: Jani Nikula 
Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/display/drm_dp_tunnel.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/display/drm_dp_tunnel.c 
b/drivers/gpu/drm/display/drm_dp_tunnel.c
index 
90fe07a89260e21e78f2db7f57a90602be921a11..076edf1610480275c62395334ab0536befa42f15
 100644
--- a/drivers/gpu/drm/display/drm_dp_tunnel.c
+++ b/drivers/gpu/drm/display/drm_dp_tunnel.c
@@ -222,7 +222,7 @@ static int read_tunnel_regs(struct drm_dp_aux *aux, struct 
drm_dp_tunnel_regs *r
while ((len = next_reg_area(&offset))) {
int address = DP_TUNNELING_BASE + offset;
 
-   if (drm_dp_dpcd_read(aux, address, tunnel_reg_ptr(regs, 
address), len) < 0)
+   if (drm_dp_dpcd_read_data(aux, address, tunnel_reg_ptr(regs, 
address), len) < 0)
return -EIO;
 
offset += len;
@@ -913,7 +913,7 @@ static int set_bw_alloc_mode(struct drm_dp_tunnel *tunnel, 
bool enable)
u8 mask = DP_DISPLAY_DRIVER_BW_ALLOCATION_MODE_ENABLE | 
DP_UNMASK_BW_ALLOCATION_IRQ;
u8 val;
 
-   if (drm_dp_dpcd_readb(tunnel->aux, DP_DPTX_BW_ALLOCATION_MODE_CONTROL, 
&val) < 0)
+   if (drm_dp_dpcd_read_byte(tunnel->aux, 
DP_DPTX_BW_ALLOCATION_MODE_CONTROL, &val) < 0)
goto out_err;
 
if (enable)
@@ -921,7 +921,7 @@ static int set_bw_alloc_mode(struct drm_dp_tunnel *tunnel, 
bool enable)
else
val &= ~mask;
 
-   if (drm_dp_dpcd_writeb(tunnel->aux, DP_DPTX_BW_ALLOCATION_MODE_CONTROL, 
val) < 0)
+   if (drm_dp_dpcd_write_byte(tunnel->aux, 
DP_DPTX_BW_ALLOCATION_MODE_CONTROL, val) < 0)
goto out_err;
 
tunnel->bw_alloc_enabled = enable;
@@ -1039,7 +1039,7 @@ static int clear_bw_req_state(struct drm_dp_aux *aux)
 {
u8 bw_req_mask = DP_BW_REQUEST_SUCCEEDED | DP_BW_REQUEST_FAILED;
 
-   if (drm_dp_dpcd_writeb(aux, DP_TUNNELING_STATUS, bw_req_mask) < 0)
+   if (drm_dp_dpcd_write_byte(aux, DP_TUNNELING_STATUS, bw_req_mask) < 0)
return -EIO;
 
return 0;
@@ -1052,7 +1052,7 @@ static int bw_req_complete(struct drm_dp_aux *aux, bool 
*status_changed)
u8 val;
int err;
 
-   if (drm_dp_dpcd_readb(aux, DP_TUNNELING_STATUS, &val) < 0)
+   if (drm_dp_dpcd_read_byte(aux, DP_TUNNELING_STATUS, &val) < 0)
return -EIO;
 
*status_changed = val & status_change_mask;
@@ -1095,7 +1095,7 @@ static int allocate_tunnel_bw(struct drm_dp_tunnel 
*tunnel, int bw)
if (err)
goto out;
 
-   if (drm_dp_dpcd_writeb(tunnel->aux, DP_REQUEST_BW, request_bw) < 0) {
+   if (drm_dp_dpcd_write_byte(tunnel->aux, DP_REQUEST_BW, request_bw) < 0) 
{
err = -EIO;
goto out;
}
@@ -1196,13 +1196,13 @@ static int check_and_clear_status_change(struct 
drm_dp_tunnel *tunnel)
u8 mask = DP_BW_ALLOCATION_CAPABILITY_CHANGED | DP_ESTIMATED_BW_CHANGED;
u8 val;
 
-   if (drm_dp_dpcd_readb(tunnel->aux, DP_TUNNELING_STATUS, &val) < 0)
+   if (drm_dp_dpcd_read_byte(tunnel->aux, DP_TUNNELING_STATUS, &val) < 0)
goto out_err;
 
val &= mask;
 
if (val) {
-   if (drm_dp_dpcd_writeb(tunnel->aux, DP_TUNNELING_STATUS, val) < 
0)
+   if (drm_dp_dpcd_write_byte(tunnel->aux, DP_TUNNELING_STATUS, 
val) < 0)
goto out_err;
 
return 1;
@@ -1215,7 +1215,7 @@ static int check_and_clear_status_change(struct 
drm_dp_tunnel *tunnel)
 * Check for estimated BW changes explicitly to account for lost
 * BW change notifications.
 */
-   if (drm_dp_dpcd_readb(tunnel->aux, DP_ESTIMATED_BW, &val) < 0)
+   if (drm_dp_dpcd_read_byte(tunnel->aux, DP_ESTIMATED_BW, &val) < 0)
goto out_err;
 
if (val * tunnel->bw_granularity != tunnel->estimated_bw)
@@ -1300,7 +1300,7 @@ int drm_dp_tunnel_handle_irq(struct drm_dp_tunnel_mgr 
*mgr, struct drm_dp_aux *a
 {
u8 val;
 
-   if (drm_dp_dpcd_readb(aux, DP_TUNNELING_STATUS, &val) < 0)
+   if (drm_dp_dpcd_read_byte(aux, DP_TUNNELING_STATUS, &val) < 0)
return -EIO;
 
if (val & (DP_BW_REQUEST_SUCCEEDED | DP_BW_REQUEST_FAILED))

-- 
2.39.5

[PATCH RFC v3 4/7] drm/display: dp-aux-dev: use new DCPD access helpers

2025-03-06 Thread Dmitry Baryshkov

From: Dmitry Baryshkov 

Switch drm_dp_aux_dev.c to use new set of DPCD read / write helpers.

Acked-by: Jani Nikula 
Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/display/drm_dp_aux_dev.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/display/drm_dp_aux_dev.c 
b/drivers/gpu/drm/display/drm_dp_aux_dev.c
index 
29555b9f03c8c42681c17c4a01e74a966cf8611f..a31ab3f41efb71fd5f936c24ba5c3b8ebea68a5e
 100644
--- a/drivers/gpu/drm/display/drm_dp_aux_dev.c
+++ b/drivers/gpu/drm/display/drm_dp_aux_dev.c
@@ -163,17 +163,16 @@ static ssize_t auxdev_read_iter(struct kiocb *iocb, 
struct iov_iter *to)
break;
}
 
-   res = drm_dp_dpcd_read(aux_dev->aux, pos, buf, todo);
-
+   res = drm_dp_dpcd_read_data(aux_dev->aux, pos, buf, todo);
if (res <= 0)
break;
 
-   if (copy_to_iter(buf, res, to) != res) {
+   if (copy_to_iter(buf, todo, to) != todo) {
res = -EFAULT;
break;
}
 
-   pos += res;
+   pos += todo;
}
 
if (pos != iocb->ki_pos)
@@ -211,12 +210,11 @@ static ssize_t auxdev_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
break;
}
 
-   res = drm_dp_dpcd_write(aux_dev->aux, pos, buf, todo);
-
+   res = drm_dp_dpcd_write_data(aux_dev->aux, pos, buf, todo);
if (res <= 0)
break;
 
-   pos += res;
+   pos += todo;
}
 
if (pos != iocb->ki_pos)

-- 
2.39.5

[PATCH RFC v3 0/7] drm/display: dp: add new DPCD access functions

2025-03-06 Thread Dmitry Baryshkov

Existing DPCD access functions return an error code or the number of
bytes being read / write in case of partial access. However a lot of
drivers either (incorrectly) ignore partial access or mishandle error
codes. In other cases this results in a boilerplate code which compares
returned value with the size.

As suggested by Jani implement new set of DPCD access helpers, which
ignore partial access, always return 0 or an error code. Implement
new helpers using existing functions to ensure backwards compatibility
and to assess necessity to handle incomplete reads on a global scale.
Currently only one possible place has been identified, dp-aux-dev, which
needs to handle possible holes in DPCD.

This series targets only the DRM helpers code. If the approach is found
to be acceptable, each of the drivers should be converted on its own.

Signed-off-by: Dmitry Baryshkov 
---
Changes in v3:
- Fixed cover letter (Jani)
- Added intel-gfx and intel-xe to get the series CI-tested (Jani)
- Link to v2: 
https://lore.kernel.org/r/20250301-drm-rework-dpcd-access-v2-0-4d92602fc...@linaro.org

Changes in v2:
- Reimplemented new helpers using old ones (Lyude)
- Reworked the drm_dp_dpcd_read_link_status() patch (Lyude)
- Dropped the dp-aux-dev patch (Jani)
- Link to v1: 
https://lore.kernel.org/r/20250117-drm-rework-dpcd-access-v1-0-7fc020e04...@linaro.org

---
Dmitry Baryshkov (7):
  drm/display: dp: implement new access helpers
  drm/display: dp: change drm_dp_dpcd_read_link_status() return value
  drm/display: dp: use new DCPD access helpers
  drm/display: dp-aux-dev: use new DCPD access helpers
  drm/display: dp-cec: use new DCPD access helpers
  drm/display: dp-mst-topology: use new DCPD access helpers
  drm/display: dp-tunnel: use new DCPD access helpers

 drivers/gpu/drm/amd/amdgpu/atombios_dp.c   |   8 +-
 .../gpu/drm/bridge/cadence/cdns-mhdp8546-core.c|   2 +-
 drivers/gpu/drm/display/drm_dp_aux_dev.c   |  12 +-
 drivers/gpu/drm/display/drm_dp_cec.c   |  37 ++-
 drivers/gpu/drm/display/drm_dp_helper.c| 307 +
 drivers/gpu/drm/display/drm_dp_mst_topology.c  | 105 ---
 drivers/gpu/drm/display/drm_dp_tunnel.c|  20 +-
 drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c   |   4 +-
 drivers/gpu/drm/msm/dp/dp_ctrl.c   |  24 +-
 drivers/gpu/drm/msm/dp/dp_link.c   |  18 +-
 drivers/gpu/drm/radeon/atombios_dp.c   |   8 +-
 include/drm/display/drm_dp_helper.h|  92 +-
 12 files changed, 322 insertions(+), 315 deletions(-)
---
base-commit: 565351ae7e0cee80e9b5ed84452a5b13644ffc4d
change-id: 20241231-drm-rework-dpcd-access-b0fc2e47d613

Best regards,
-- 
Dmitry Baryshkov

[PATCH RFC v3 5/7] drm/display: dp-cec: use new DCPD access helpers

2025-03-06 Thread Dmitry Baryshkov

From: Dmitry Baryshkov 

Switch drm_dp_cec.c to use new set of DPCD read / write helpers.

Reviewed-by: Lyude Paul 
Acked-by: Jani Nikula 
Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/display/drm_dp_cec.c | 37 ++--
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/display/drm_dp_cec.c 
b/drivers/gpu/drm/display/drm_dp_cec.c
index 
56a4965e518cc237c992a2e31b9f6de05c14766a..ed31471bd0e28826254ecedac48c5c126729d470
 100644
--- a/drivers/gpu/drm/display/drm_dp_cec.c
+++ b/drivers/gpu/drm/display/drm_dp_cec.c
@@ -96,7 +96,7 @@ static int drm_dp_cec_adap_enable(struct cec_adapter *adap, 
bool enable)
u32 val = enable ? DP_CEC_TUNNELING_ENABLE : 0;
ssize_t err = 0;
 
-   err = drm_dp_dpcd_writeb(aux, DP_CEC_TUNNELING_CONTROL, val);
+   err = drm_dp_dpcd_write_byte(aux, DP_CEC_TUNNELING_CONTROL, val);
return (enable && err < 0) ? err : 0;
 }
 
@@ -112,7 +112,7 @@ static int drm_dp_cec_adap_log_addr(struct cec_adapter 
*adap, u8 addr)
la_mask |= adap->log_addrs.log_addr_mask | (1 << addr);
mask[0] = la_mask & 0xff;
mask[1] = la_mask >> 8;
-   err = drm_dp_dpcd_write(aux, DP_CEC_LOGICAL_ADDRESS_MASK, mask, 2);
+   err = drm_dp_dpcd_write_data(aux, DP_CEC_LOGICAL_ADDRESS_MASK, mask, 2);
return (addr != CEC_LOG_ADDR_INVALID && err < 0) ? err : 0;
 }
 
@@ -123,15 +123,14 @@ static int drm_dp_cec_adap_transmit(struct cec_adapter 
*adap, u8 attempts,
unsigned int retries = min(5, attempts - 1);
ssize_t err;
 
-   err = drm_dp_dpcd_write(aux, DP_CEC_TX_MESSAGE_BUFFER,
-   msg->msg, msg->len);
+   err = drm_dp_dpcd_write_data(aux, DP_CEC_TX_MESSAGE_BUFFER,
+msg->msg, msg->len);
if (err < 0)
return err;
 
-   err = drm_dp_dpcd_writeb(aux, DP_CEC_TX_MESSAGE_INFO,
-(msg->len - 1) | (retries << 4) |
-DP_CEC_TX_MESSAGE_SEND);
-   return err < 0 ? err : 0;
+   return drm_dp_dpcd_write_byte(aux, DP_CEC_TX_MESSAGE_INFO,
+ (msg->len - 1) | (retries << 4) |
+ DP_CEC_TX_MESSAGE_SEND);
 }
 
 static int drm_dp_cec_adap_monitor_all_enable(struct cec_adapter *adap,
@@ -144,13 +143,13 @@ static int drm_dp_cec_adap_monitor_all_enable(struct 
cec_adapter *adap,
if (!(adap->capabilities & CEC_CAP_MONITOR_ALL))
return 0;
 
-   err = drm_dp_dpcd_readb(aux, DP_CEC_TUNNELING_CONTROL, &val);
-   if (err >= 0) {
+   err = drm_dp_dpcd_read_byte(aux, DP_CEC_TUNNELING_CONTROL, &val);
+   if (!err) {
if (enable)
val |= DP_CEC_SNOOPING_ENABLE;
else
val &= ~DP_CEC_SNOOPING_ENABLE;
-   err = drm_dp_dpcd_writeb(aux, DP_CEC_TUNNELING_CONTROL, val);
+   err = drm_dp_dpcd_write_byte(aux, DP_CEC_TUNNELING_CONTROL, 
val);
}
return (enable && err < 0) ? err : 0;
 }
@@ -194,7 +193,7 @@ static int drm_dp_cec_received(struct drm_dp_aux *aux)
u8 rx_msg_info;
ssize_t err;
 
-   err = drm_dp_dpcd_readb(aux, DP_CEC_RX_MESSAGE_INFO, &rx_msg_info);
+   err = drm_dp_dpcd_read_byte(aux, DP_CEC_RX_MESSAGE_INFO, &rx_msg_info);
if (err < 0)
return err;
 
@@ -202,7 +201,7 @@ static int drm_dp_cec_received(struct drm_dp_aux *aux)
return 0;
 
msg.len = (rx_msg_info & DP_CEC_RX_MESSAGE_LEN_MASK) + 1;
-   err = drm_dp_dpcd_read(aux, DP_CEC_RX_MESSAGE_BUFFER, msg.msg, msg.len);
+   err = drm_dp_dpcd_read_data(aux, DP_CEC_RX_MESSAGE_BUFFER, msg.msg, 
msg.len);
if (err < 0)
return err;
 
@@ -215,7 +214,7 @@ static void drm_dp_cec_handle_irq(struct drm_dp_aux *aux)
struct cec_adapter *adap = aux->cec.adap;
u8 flags;
 
-   if (drm_dp_dpcd_readb(aux, DP_CEC_TUNNELING_IRQ_FLAGS, &flags) < 0)
+   if (drm_dp_dpcd_read_byte(aux, DP_CEC_TUNNELING_IRQ_FLAGS, &flags) < 0)
return;
 
if (flags & DP_CEC_RX_MESSAGE_INFO_VALID)
@@ -230,7 +229,7 @@ static void drm_dp_cec_handle_irq(struct drm_dp_aux *aux)
 (DP_CEC_TX_ADDRESS_NACK_ERROR | DP_CEC_TX_DATA_NACK_ERROR))
cec_transmit_attempt_done(adap, CEC_TX_STATUS_NACK |
CEC_TX_STATUS_MAX_RETRIES);
-   drm_dp_dpcd_writeb(aux, DP_CEC_TUNNELING_IRQ_FLAGS, flags);
+   drm_dp_dpcd_write_byte(aux, DP_CEC_TUNNELING_IRQ_FLAGS, flags);
 }
 
 /**
@@ -253,13 +252,13 @@ void drm_dp_cec_irq(struct drm_dp_aux *aux)
if (!aux->cec.adap)
goto unlock;
 
-   ret = drm_dp_dpcd_readb(aux, DP_DEVICE_SERVICE_IRQ_VECTOR_ESI1,
-   &cec_irq);
+   ret = drm_dp_dpcd_read_byte(aux, DP_DEVICE_SERVICE_IRQ_VECTOR_ESI1,
+

[PATCH v3 3/3] drm/amdkfd: Add support for more per-process flag

2025-03-06 Thread Harish Kasiviswanathan

Add support for more per-process flags starting with option to configure
MFMA precision for gfx 9.5

v2: Change flag name to KFD_PROC_FLAG_MFMA_HIGH_PRECISION
Remove unused else condition
v3: Bump the KFD API version

Signed-off-by: Harish Kasiviswanathan 
Reviewed-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  3 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |  6 --
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c  | 11 +--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c  |  6 --
 include/uapi/linux/kfd_ioctl.h|  8 ++--
 10 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 8c2e92378b49..1e9dd00620bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -606,7 +606,8 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
default_policy,
alternate_policy,
(void __user *)args->alternate_aperture_base,
-   args->alternate_aperture_size))
+   args->alternate_aperture_size,
+   args->misc_process_flag))
err = -EINVAL;
 
 out:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d23c6a358d34..2afcc1b4856a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2596,7 +2596,8 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
   enum cache_policy default_policy,
   enum cache_policy alternate_policy,
   void __user *alternate_aperture_base,
-  uint64_t alternate_aperture_size)
+  uint64_t alternate_aperture_size,
+  u32 misc_process_properties)
 {
bool retval = true;
 
@@ -2611,7 +2612,8 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
default_policy,
alternate_policy,
alternate_aperture_base,
-   alternate_aperture_size);
+   alternate_aperture_size,
+   misc_process_properties);
 
if (retval)
goto out;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 7146e227e2c1..122eb745e9c4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -174,7 +174,8 @@ struct device_queue_manager_ops {
   enum cache_policy default_policy,
   enum cache_policy alternate_policy,
   void __user *alternate_aperture_base,
-  uint64_t alternate_aperture_size);
+  uint64_t alternate_aperture_size,
+  u32 misc_process_properties);
 
int (*process_termination)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
@@ -210,7 +211,8 @@ struct device_queue_manager_asic_ops {
   enum cache_policy default_policy,
   enum cache_policy alternate_policy,
   void __user *alternate_aperture_base,
-  uint64_t alternate_aperture_size);
+  uint64_t alternate_aperture_size,
+  u32 misc_process_properties);
void(*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 32bedef912b3..0508ef5a41d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -40,7 +40,8 @@ static bool set_cache_memory_policy_cik(struct 
device_queue_manage

RE: [PATCH v4] drm/amdgpu: Fix the race condition for draining retry fault

2025-03-06 Thread Deng, Emily

[AMD Official Use Only - AMD Internal Distribution Only]



From: Chen, Xiaogang 
Sent: Friday, March 7, 2025 1:01 AM
To: Deng, Emily ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH v4] drm/amdgpu: Fix the race condition for draining retry 
fault


Thanks for catch up and fix this race condition. It looks good to me. One minor 
thing below:
On 3/6/2025 12:03 AM, Emily Deng wrote:

Issue:

In the scenario where svm_range_restore_pages is called, but svm->checkpoint_ts

 has not been set and the retry fault has not been drained, 
svm_range_unmap_from_cpu

is triggered and calls svm_range_free. Meanwhile, svm_range_restore_pages

continues execution and reaches svm_range_from_addr. This results in

a "failed to find prange..." error, causing the page recovery to fail.



How to fix:

Move the timestamp check code under the protection of svm->lock.



v2:

Make sure all right locks are released before go out.



v3:

Directly goto out_unlock_svms, and return -EAGAIN.



v4:

Refine code.



Signed-off-by: Emily Deng 

---

 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 30 +++-

 1 file changed, 16 insertions(+), 14 deletions(-)



diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index d04725583f19..83ac14bf7a7a 100644

--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

@@ -3008,19 +3008,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,

goto out;

}



-   /* check if this page fault time stamp is before svms->checkpoint_ts */

-   if (svms->checkpoint_ts[gpuidx] != 0) {

-   if (amdgpu_ih_ts_after_or_equal(ts,  
svms->checkpoint_ts[gpuidx])) {

-   pr_debug("draining retry fault, drop fault 0x%llx\n", 
addr);

-   r = 0;

-   goto out;

-   } else

-   /* ts is after svms->checkpoint_ts now, reset 
svms->checkpoint_ts

-* to zero to avoid following ts wrap around give wrong 
comparing

-*/

-svms->checkpoint_ts[gpuidx] = 0;

-   }

-

if (!p->xnack_enabled) {

pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);

r = -EFAULT;

@@ -3040,6 +3027,20 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,

mmap_read_lock(mm);

 retry_write_locked:

mutex_lock(&svms->lock);

+

+   /* check if this page fault time stamp is before svms->checkpoint_ts */

+   if (svms->checkpoint_ts[gpuidx] != 0) {

+   if (amdgpu_ih_ts_after_or_equal(ts,  
svms->checkpoint_ts[gpuidx])) {

+   pr_debug("draining retry fault, drop fault 0x%llx\n", 
addr);

+   r = -EAGAIN;

We drop page fault because it is stale, not mean to handle it again. if return 
-EAGAIN we do amdgpu_gmc_filter_faults_remove. If after unmap, user map same 
range again we should treat page fault happened at same range as new one.

Regards

Xiaogang

Sorry, I didn't quite catch that. So, you think we shouldn't remove the fault 
from amdgpu_gmc_filter_faults_remove?

Emily Deng
Best Wishes






+   goto out_unlock_svms;

+   } else

+   /* ts is after svms->checkpoint_ts now, reset 
svms->checkpoint_ts

+* to zero to avoid following ts wrap around give wrong 
comparing

+*/

+svms->checkpoint_ts[gpuidx] = 0;

+   }

+

prange = svm_range_from_addr(svms, addr, NULL);

if (!prange) {

pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",

@@ -3165,7 +3166,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,

mutex_unlock(&svms->lock);

mmap_read_unlock(mm);



-   svm_range_count_fault(node, p, gpuidx);

+   if (r != -EAGAIN)

+   svm_range_count_fault(node, p, gpuidx);



mmput(mm);

 out:

[PATCH] drm: amdkfd: Replace (un)register_chrdev() by (unregister/alloc)_chrdev_region()

2025-03-06 Thread Salah Triki

Replace (un)register_chrdev() by (unregister/alloc)_chrdev_region() as
they are deprecated since kernel 2.6. alloc_chrdev_region() generates a
dev_t value, so replace the kfd_char_dev_major int variable by the
kfd_char_dev_id dev_t variable and drop the MKDEV() call. Initialize a
cdev structure and add it to the device driver model as register_chrdev()
used to do and since alloc_chrdev_region() does not do it. Drop the
iminor() call since alloc_chrdev_region() allocates only one minor number.
On error and in the module exit function, remove the cdev structure from
the device driver model as unregister_chrdev() used to do.

Signed-off-by: Salah Triki 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 35 
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 065d87841459..55c74466d2c5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -37,6 +37,8 @@
 #include 
 #include 
 #include 
+#include 
+
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_svm.h"
@@ -61,12 +63,14 @@ static const struct file_operations kfd_fops = {
.mmap = kfd_mmap,
 };
 
-static int kfd_char_dev_major = -1;
+static dev_t kfd_char_dev_id;
 struct device *kfd_device;
 static const struct class kfd_class = {
.name = kfd_dev_name,
 };
 
+static struct cdev kfd_cdev;
+
 static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process 
*p, __u32 gpu_id)
 {
struct kfd_process_device *pdd;
@@ -90,17 +94,24 @@ int kfd_chardev_init(void)
 {
int err = 0;
 
-   kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
-   err = kfd_char_dev_major;
+   err = alloc_chrdev_region(&kfd_char_dev_id, 0, 1, kfd_dev_name);
+
if (err < 0)
-   goto err_register_chrdev;
+   goto err_alloc_chrdev_region;
+
+   cdev_init(&kfd_cdev, &kfd_fops);
+   kfd_cdev.owner = THIS_MODULE;
+
+   err = cdev_add(&kfd_cdev, kfd_char_dev_id, 1);
+   if (err)
+   goto err_cdev_add;
 
err = class_register(&kfd_class);
if (err)
goto err_class_create;
 
kfd_device = device_create(&kfd_class, NULL,
-  MKDEV(kfd_char_dev_major, 0),
+  kfd_char_dev_id,
   NULL, kfd_dev_name);
err = PTR_ERR(kfd_device);
if (IS_ERR(kfd_device))
@@ -111,16 +122,19 @@ int kfd_chardev_init(void)
 err_device_create:
class_unregister(&kfd_class);
 err_class_create:
-   unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
-err_register_chrdev:
+   cdev_del(&kfd_cdev);
+err_cdev_add:
+   unregister_chrdev_region(kfd_char_dev_id, 1);
+err_alloc_chrdev_region:
return err;
 }
 
 void kfd_chardev_exit(void)
 {
-   device_destroy(&kfd_class, MKDEV(kfd_char_dev_major, 0));
+   device_destroy(&kfd_class, kfd_char_dev_id);
class_unregister(&kfd_class);
-   unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
+   cdev_del(&kfd_cdev);
+   unregister_chrdev_region(kfd_char_dev_id, 1);
kfd_device = NULL;
 }
 
@@ -130,9 +144,6 @@ static int kfd_open(struct inode *inode, struct file *filep)
struct kfd_process *process;
bool is_32bit_user_mode;
 
-   if (iminor(inode) != 0)
-   return -ENODEV;
-
is_32bit_user_mode = in_compat_syscall();
 
if (is_32bit_user_mode) {
-- 
2.34.1

Re: [PATCH 05/11] drm/amdgpu/mes: update hqd masks when disable_kq is set

2025-03-06 Thread Khatri, Sunil


Reviewed-by: Sunil Khatri 

On 3/6/2025 2:17 AM, Alex Deucher wrote:

Make all resources available to user queues.

Suggested-by: Sunil Khatri 
Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index afc2ce344df52..71dce64a0ea32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -157,14 +157,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
 * Set GFX pipe 0 queue 1-7 for MES scheduling
 * mask =  1110b
 */
-   adev->mes.gfx_hqd_mask[i] = 0xFE;
+   adev->mes.gfx_hqd_mask[i] = 
adev->gfx.disable_kq ? 0xFF : 0xFE;
else
/*
 * GFX pipe 0 queue 0 is being used by Kernel 
queue.
 * Set GFX pipe 0 queue 1 for MES scheduling
 * mask = 10b
 */
-   adev->mes.gfx_hqd_mask[i] = 0x2;
+   adev->mes.gfx_hqd_mask[i] = 
adev->gfx.disable_kq ? 0x3 : 0x2;
}
}
  
@@ -172,7 +172,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)

/* use only 1st MEC pipes */
if (i >= adev->gfx.mec.num_pipe_per_mec)
continue;
-   adev->mes.compute_hqd_mask[i] = 0xc;
+   adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 
0xC;
}
  
  	for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {

Re: [PATCH 06/11] drm/amdgpu/mes: make more vmids available when disable_kq=1

2025-03-06 Thread Khatri, Sunil


Reviewed-by: Sunil Khatri 

On 3/6/2025 2:17 AM, Alex Deucher wrote:

If we don't have kernel queues, the vmids can be used by
the MES for user queues.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 2 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c  | 2 +-
  3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 71dce64a0ea32..e2007759f5b4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -142,7 +142,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
  
  	adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;

adev->mes.vmid_mask_mmhub = 0xff00;
-   adev->mes.vmid_mask_gfxhub = 0xff00;
+   adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xfffe : 
0xff00;
  
  	if (adev->gfx.num_gfx_rings) {

for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 95d894a231fcf..19a5f196829f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -900,7 +900,7 @@ static int gmc_v10_0_sw_init(struct amdgpu_ip_block 
*ip_block)
 * amdgpu graphics/compute will use VMIDs 1-7
 * amdkfd will use VMIDs 8-15
 */
-   adev->vm_manager.first_kfd_vmid = 8;
+   adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
  
  	amdgpu_vm_manager_init(adev);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c

index ea7c32d8380ba..598324e736092 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -837,7 +837,7 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block 
*ip_block)
 * amdgpu graphics/compute will use VMIDs 1-7
 * amdkfd will use VMIDs 8-15
 */
-   adev->vm_manager.first_kfd_vmid = 8;
+   adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
  
  	amdgpu_vm_manager_init(adev);

Re: [PATCH 03/11] drm/amdgpu/gfx: add generic handling for disable_kq

2025-03-06 Thread Khatri, Sunil




On 3/6/2025 6:36 AM, Felix Kuehling wrote:


On 2025-03-05 15:47, Alex Deucher wrote:

Add proper checks for disable_kq functionality in
gfx helper functions.  Add special logic for families
that require the clear state setup.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 92 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  2 +
  2 files changed, 67 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

index a194bf3347cbc..af3f8b62f6fd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -371,6 +371,18 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
  return 0;
  }
  +static bool amdgpu_gfx_disable_gfx_kq(struct amdgpu_device *adev)
+{
+    if (adev->gfx.disable_kq) {
+    /* GFX11 needs the GFX ring for clear buffer */
+    if (amdgpu_ip_version(adev, GC_HWIP, 0) <= IP_VERSION(12, 0, 
0))


Yes the check has to be  < as gfx12 do not need the clear buffer based 
on our discussions.


Regards
Sunil



Should this be < instead of <=?




Regards,
  Felix


+    return false;
+    else
+    return true;
+    }
+    return false;
+}
+
  /* create MQD for each compute/gfx queue */
  int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
 unsigned int mqd_size, int xcc_id)
@@ -379,6 +391,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device 
*adev,

  struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
  struct amdgpu_ring *ring = &kiq->ring;
  u32 domain = AMDGPU_GEM_DOMAIN_GTT;
+    bool disable_kq_gfx = amdgpu_gfx_disable_gfx_kq(adev);


name of variable and function could be in sync. disable_gfx_kq and 
amdgpu_gfx_disable_gfx_kq or change function name according to variable.


Also another suggestion here is better to have one more variable in the 
gfx struct or ring and read this amdgpu_gfx_disable_gfx_kq once and use 
it in all the places. It does looks confusing

so many similar sounding names.

Regards
Sunil

    #if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
  /* Only enable on gfx10 and 11 for now to avoid changing 
behavior on older chips */
@@ -413,7 +426,8 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device 
*adev,

  }
  }
  -    if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
+    if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring &&
+    !disable_kq_gfx) {
  /* create MQD for each KGQ */
  for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
  ring = &adev->gfx.gfx_ring[i];
@@ -437,25 +451,28 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device 
*adev,

  }
  }
  -    /* create MQD for each KCQ */
-    for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-    j = i + xcc_id * adev->gfx.num_compute_rings;
-    ring = &adev->gfx.compute_ring[j];
-    if (!ring->mqd_obj) {
-    r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
-    domain, &ring->mqd_obj,
-    &ring->mqd_gpu_addr, &ring->mqd_ptr);
-    if (r) {
-    dev_warn(adev->dev, "failed to create ring mqd bo 
(%d)", r);

-    return r;
-    }
+    if (!adev->gfx.disable_kq) {


Maybe just set adev->gfx.num_compute_rings to 0 somewhere, then you 
don't need this condition.




+    /* create MQD for each KCQ */
+    for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+    j = i + xcc_id * adev->gfx.num_compute_rings;
+    ring = &adev->gfx.compute_ring[j];
+    if (!ring->mqd_obj) {
+    r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+    domain, &ring->mqd_obj,
+    &ring->mqd_gpu_addr, &ring->mqd_ptr);
+    if (r) {
+    dev_warn(adev->dev, "failed to create ring mqd 
bo (%d)", r);

+    return r;
+    }
  -    ring->mqd_size = mqd_size;
-    /* prepare MQD backup */
-    adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, 
GFP_KERNEL);

-    if (!adev->gfx.mec.mqd_backup[j]) {
-    dev_warn(adev->dev, "no memory to create MQD backup 
for ring %s\n", ring->name);

-    return -ENOMEM;
+    ring->mqd_size = mqd_size;
+    /* prepare MQD backup */
+    adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, 
GFP_KERNEL);

+    if (!adev->gfx.mec.mqd_backup[j]) {
+    dev_warn(adev->dev, "no memory to create MQD 
backup for ring %s\n",

+ ring->name);
+    return -ENOMEM;
+    }
  }
  }
  }
@@ -468,8 +485,10 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device 
*adev, int xcc_id)

  struct amdgpu_ring *ring = NULL;
  int i, j;
  struct amdgpu_kiq *kiq = &adev->gfx.

Re: [PATCH] drm/amdgpu: Fix annotation for dce_v6_0_line_buffer_adjust function

2025-03-06 Thread Alex Deucher

On Wed, Mar 5, 2025 at 11:45 AM Srinivasan Shanmugam
 wrote:
>
> Updated description for the 'other_mode' parameter. This parameter is
> used to determine the display mode of another display controller that
> may be sharing the line buffer.
>
> Cc: Ken Wang 
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Srinivasan Shanmugam 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 
> b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> index 315c098271af..ac51b7a6e8d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> @@ -1041,6 +1041,8 @@ static void dce_v6_0_program_watermarks(struct 
> amdgpu_device *adev,
>   * @amdgpu_crtc: the selected display controller
>   * @mode: the current display mode on the selected display
>   * controller
> + * @other_mode: the display mode of another display controller
> + *  that may be sharing the line buffer
>   *
>   * Setup up the line buffer allocation for
>   * the selected display controller (CIK).
> --
> 2.34.1
>

Re: [PATCH 11/11] drm/amdgpu/sdma7: add support for disable_kq

2025-03-06 Thread Khatri, Sunil


Reviewed-by: Sunil Khatri 

On 3/6/2025 2:17 AM, Alex Deucher wrote:

When the parameter is set, disable user submissions
to kernel queues.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 92a79296708ae..40d45f738c0a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -1316,6 +1316,9 @@ static int sdma_v7_0_early_init(struct amdgpu_ip_block 
*ip_block)
struct amdgpu_device *adev = ip_block->adev;
int r;
  
+	if (amdgpu_disable_kq == 1)

+   adev->sdma.no_user_submission = true;
+
r = amdgpu_sdma_init_microcode(adev, 0, true);
if (r) {
DRM_ERROR("Failed to init sdma firmware!\n");
@@ -1351,6 +1354,7 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block 
*ip_block)
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->me = i;
+   ring->no_user_submission = adev->sdma.no_user_submission;
  
  		DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,

ring->use_doorbell?"true":"false");

Re: [PATCH] drm/amdgpu/vcn: fix idle work handler for VCN 2.5

2025-03-06 Thread Alex Deucher

Ping?

Thanks,

Alex

On Wed, Mar 5, 2025 at 2:42 PM Alex Deucher  wrote:
>
> VCN 2.5 uses the PG callback to enable VCN DPM which is
> a global state.  As such, we need to make sure all instances
> are in the same state.
>
> v2: switch to a ref count (Lijo)
> v3: switch to its own idle work handler
> v4: fix logic in DPG handling
>
> Fixes: 4ce4fe27205c ("drm/amdgpu/vcn: use per instance callbacks for idle 
> work handler")
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 120 +-
>  1 file changed, 116 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c 
> b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
> index dff1a88590363..ff03436698a4f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
> @@ -107,6 +107,115 @@ static int amdgpu_ih_clientid_vcns[] = {
> SOC15_IH_CLIENTID_VCN1
>  };
>
> +static void vcn_v2_5_idle_work_handler(struct work_struct *work)
> +{
> +   struct amdgpu_vcn_inst *vcn_inst =
> +   container_of(work, struct amdgpu_vcn_inst, idle_work.work);
> +   struct amdgpu_device *adev = vcn_inst->adev;
> +   unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
> +   unsigned int i, j;
> +   int r = 0;
> +
> +   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +   struct amdgpu_vcn_inst *v = &adev->vcn.inst[i];
> +
> +   if (adev->vcn.harvest_config & (1 << i))
> +   continue;
> +
> +   for (j = 0; j < v->num_enc_rings; ++j)
> +   fence[i] += 
> amdgpu_fence_count_emitted(&v->ring_enc[j]);
> +
> +   /* Only set DPG pause for VCN3 or below, VCN4 and above will 
> be handled by FW */
> +   if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
> +   !v->using_unified_queue) {
> +   struct dpg_pause_state new_state;
> +
> +   if (fence[i] ||
> +   unlikely(atomic_read(&v->dpg_enc_submission_cnt)))
> +   new_state.fw_based = VCN_DPG_STATE__PAUSE;
> +   else
> +   new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
> +
> +   v->pause_dpg_mode(v, &new_state);
> +   }
> +
> +   fence[i] += amdgpu_fence_count_emitted(&v->ring_dec);
> +   fences += fence[i];
> +
> +   }
> +
> +   if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) 
> {
> +   amdgpu_device_ip_set_powergating_state(adev, 
> AMD_IP_BLOCK_TYPE_VCN,
> +  AMD_PG_STATE_GATE);
> +   r = amdgpu_dpm_switch_power_profile(adev, 
> PP_SMC_POWER_PROFILE_VIDEO,
> +   false);
> +   if (r)
> +   dev_warn(adev->dev, "(%d) failed to disable video 
> power profile mode\n", r);
> +   } else {
> +   schedule_delayed_work(&adev->vcn.inst[0].idle_work, 
> VCN_IDLE_TIMEOUT);
> +   }
> +}
> +
> +static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring)
> +{
> +   struct amdgpu_device *adev = ring->adev;
> +   struct amdgpu_vcn_inst *v = &adev->vcn.inst[ring->me];
> +   int r = 0;
> +
> +   atomic_inc(&adev->vcn.inst[0].total_submission_cnt);
> +
> +   if (!cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work)) {
> +   r = amdgpu_dpm_switch_power_profile(adev, 
> PP_SMC_POWER_PROFILE_VIDEO,
> +   true);
> +   if (r)
> +   dev_warn(adev->dev, "(%d) failed to switch to video 
> power profile mode\n", r);
> +   }
> +
> +   mutex_lock(&adev->vcn.inst[0].vcn_pg_lock);
> +   amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
> +  AMD_PG_STATE_UNGATE);
> +
> +   /* Only set DPG pause for VCN3 or below, VCN4 and above will be 
> handled by FW */
> +   if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
> +   !v->using_unified_queue) {
> +   struct dpg_pause_state new_state;
> +
> +   if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
> +   atomic_inc(&v->dpg_enc_submission_cnt);
> +   new_state.fw_based = VCN_DPG_STATE__PAUSE;
> +   } else {
> +   unsigned int fences = 0;
> +   unsigned int i;
> +
> +   for (i = 0; i < v->num_enc_rings; ++i)
> +   fences += 
> amdgpu_fence_count_emitted(&v->ring_enc[i]);
> +
> +   if (fences || atomic_read(&v->dpg_enc_submission_cnt))
> +   new_state.fw_based = VCN_DPG_STATE__PAUSE;
> +   else
> +   new_state.fw_based

Re: [PATCH v3 1/4] drm/amdgpu: add or move defines for DCE6 in sid.h

2025-03-06 Thread Alex Deucher

On Wed, Mar 5, 2025 at 9:08 PM Alexandre Demers
 wrote:
>
> For coherence with DCE8 et DCE10, add or move some values under sid.h.
>
> Signed-off-by: Alexandre Demers 

This change doesn't build.  Please adjust the order of the patches as
needed to make sure they all build.

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 63 ++-
>  drivers/gpu/drm/amd/amdgpu/si_enums.h |  7 ---
>  drivers/gpu/drm/amd/amdgpu/sid.h  | 29 +---
>  3 files changed, 55 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 
> b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> index a72fd7220081..185401d66961 100644
> --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> @@ -32,6 +32,7 @@
>  #include "amdgpu.h"
>  #include "amdgpu_pm.h"
>  #include "amdgpu_i2c.h"
> +#include "sid.h"
>  #include "atom.h"
>  #include "amdgpu_atombios.h"
>  #include "atombios_crtc.h"
> @@ -59,31 +60,31 @@ static void dce_v6_0_set_irq_funcs(struct amdgpu_device 
> *adev);
>
>  static const u32 crtc_offsets[6] =
>  {
> -   SI_CRTC0_REGISTER_OFFSET,
> -   SI_CRTC1_REGISTER_OFFSET,
> -   SI_CRTC2_REGISTER_OFFSET,
> -   SI_CRTC3_REGISTER_OFFSET,
> -   SI_CRTC4_REGISTER_OFFSET,
> -   SI_CRTC5_REGISTER_OFFSET
> +   CRTC0_REGISTER_OFFSET,
> +   CRTC1_REGISTER_OFFSET,
> +   CRTC2_REGISTER_OFFSET,
> +   CRTC3_REGISTER_OFFSET,
> +   CRTC4_REGISTER_OFFSET,
> +   CRTC5_REGISTER_OFFSET
>  };
>
>  static const u32 hpd_offsets[] =
>  {
> -   mmDC_HPD1_INT_STATUS - mmDC_HPD1_INT_STATUS,
> -   mmDC_HPD2_INT_STATUS - mmDC_HPD1_INT_STATUS,
> -   mmDC_HPD3_INT_STATUS - mmDC_HPD1_INT_STATUS,
> -   mmDC_HPD4_INT_STATUS - mmDC_HPD1_INT_STATUS,
> -   mmDC_HPD5_INT_STATUS - mmDC_HPD1_INT_STATUS,
> -   mmDC_HPD6_INT_STATUS - mmDC_HPD1_INT_STATUS,
> +   HPD0_REGISTER_OFFSET,
> +   HPD1_REGISTER_OFFSET,
> +   HPD2_REGISTER_OFFSET,
> +   HPD3_REGISTER_OFFSET,
> +   HPD4_REGISTER_OFFSET,
> +   HPD5_REGISTER_OFFSET
>  };
>
>  static const uint32_t dig_offsets[] = {
> -   SI_CRTC0_REGISTER_OFFSET,
> -   SI_CRTC1_REGISTER_OFFSET,
> -   SI_CRTC2_REGISTER_OFFSET,
> -   SI_CRTC3_REGISTER_OFFSET,
> -   SI_CRTC4_REGISTER_OFFSET,
> -   SI_CRTC5_REGISTER_OFFSET,
> +   CRTC0_REGISTER_OFFSET,
> +   CRTC1_REGISTER_OFFSET,
> +   CRTC2_REGISTER_OFFSET,
> +   CRTC3_REGISTER_OFFSET,
> +   CRTC4_REGISTER_OFFSET,
> +   CRTC5_REGISTER_OFFSET,
> (0x13830 - 0x7030) >> 2,
>  };
>
> @@ -1359,13 +1360,13 @@ static void dce_v6_0_audio_enable(struct 
> amdgpu_device *adev,
>
>  static const u32 pin_offsets[7] =
>  {
> -   (0x1780 - 0x1780),
> -   (0x1786 - 0x1780),
> -   (0x178c - 0x1780),
> -   (0x1792 - 0x1780),
> -   (0x1798 - 0x1780),
> -   (0x179d - 0x1780),
> -   (0x17a4 - 0x1780),
> +   AUD0_REGISTER_OFFSET,
> +   AUD1_REGISTER_OFFSET,
> +   AUD2_REGISTER_OFFSET,
> +   AUD3_REGISTER_OFFSET,
> +   AUD4_REGISTER_OFFSET,
> +   AUD5_REGISTER_OFFSET,
> +   AUD6_REGISTER_OFFSET,
>  };
>
>  static int dce_v6_0_audio_init(struct amdgpu_device *adev)
> @@ -2876,22 +2877,22 @@ static void 
> dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
>
> switch (crtc) {
> case 0:
> -   reg_block = SI_CRTC0_REGISTER_OFFSET;
> +   reg_block = CRTC0_REGISTER_OFFSET;
> break;
> case 1:
> -   reg_block = SI_CRTC1_REGISTER_OFFSET;
> +   reg_block = CRTC1_REGISTER_OFFSET;
> break;
> case 2:
> -   reg_block = SI_CRTC2_REGISTER_OFFSET;
> +   reg_block = CRTC2_REGISTER_OFFSET;
> break;
> case 3:
> -   reg_block = SI_CRTC3_REGISTER_OFFSET;
> +   reg_block = CRTC3_REGISTER_OFFSET;
> break;
> case 4:
> -   reg_block = SI_CRTC4_REGISTER_OFFSET;
> +   reg_block = CRTC4_REGISTER_OFFSET;
> break;
> case 5:
> -   reg_block = SI_CRTC5_REGISTER_OFFSET;
> +   reg_block = CRTC5_REGISTER_OFFSET;
> break;
> default:
> DRM_DEBUG("invalid crtc %d\n", crtc);
> diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h 
> b/drivers/gpu/drm/amd/amdgpu/si_enums.h
> index 4e935baa7b91..cdf8eebaa392 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h
> +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h
> @@ -121,13 +121,6 @@
>  #define CURSOR_UPDATE_LOCK (1 << 16)
>  #define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
>
> -#define SI_CRTC0_REGISTER_OFFSET0
> -#define SI_CRTC1_REGISTER_OFFSET0x300
> -#define SI_CRTC2_REGISTER_OFFSET0x2600
> -#define SI_CRTC3_REGISTER_OFFSET0x2900
> -#define SI_CRTC4_REGISTER_OFFSET0x2c00
> -#define SI_CRTC5

Re: [PATCH v3 2/4] drm/amdgpu: add defines for pin_offsets in DCE8

2025-03-06 Thread Alex Deucher

Applied.  Thanks.

On Wed, Mar 5, 2025 at 9:08 PM Alexandre Demers
 wrote:
>
> Define pin_offsets values in the same way it is done in DCE8
>
> Signed-off-by: Alexandre Demers 
> ---
>  drivers/gpu/drm/amd/amdgpu/cikd.h |  9 +
>  drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 14 +++---
>  2 files changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h 
> b/drivers/gpu/drm/amd/amdgpu/cikd.h
> index 06088d52d81c..279288365940 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cikd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
> @@ -51,6 +51,15 @@
>  #define HPD4_REGISTER_OFFSET (0x1813 - 0x1807)
>  #define HPD5_REGISTER_OFFSET (0x1816 - 0x1807)
>
> +/* audio endpt instance offsets */
> +#define AUD0_REGISTER_OFFSET (0x1780 - 0x1780)
> +#define AUD1_REGISTER_OFFSET (0x1786 - 0x1780)
> +#define AUD2_REGISTER_OFFSET (0x178c - 0x1780)
> +#define AUD3_REGISTER_OFFSET (0x1792 - 0x1780)
> +#define AUD4_REGISTER_OFFSET (0x1798 - 0x1780)
> +#define AUD5_REGISTER_OFFSET (0x179d - 0x1780)
> +#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780)
> +
>  #define BONAIRE_GB_ADDR_CONFIG_GOLDEN0x12010001
>  #define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c 
> b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
> index 04b79ff87f75..5e657b43a159 100644
> --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
> @@ -1395,13 +1395,13 @@ static void dce_v8_0_audio_enable(struct 
> amdgpu_device *adev,
>  }
>
>  static const u32 pin_offsets[7] = {
> -   (0x1780 - 0x1780),
> -   (0x1786 - 0x1780),
> -   (0x178c - 0x1780),
> -   (0x1792 - 0x1780),
> -   (0x1798 - 0x1780),
> -   (0x179d - 0x1780),
> -   (0x17a4 - 0x1780),
> +   AUD0_REGISTER_OFFSET,
> +   AUD1_REGISTER_OFFSET,
> +   AUD2_REGISTER_OFFSET,
> +   AUD3_REGISTER_OFFSET,
> +   AUD4_REGISTER_OFFSET,
> +   AUD5_REGISTER_OFFSET,
> +   AUD6_REGISTER_OFFSET,
>  };
>
>  static int dce_v8_0_audio_init(struct amdgpu_device *adev)
> --
> 2.48.1
>

Re: [PATCH 08/11] drm/amdgpu/gfx12: add support for disable_kq

2025-03-06 Thread Khatri, Sunil


Reviewed-by: Sunil Khatri 

On 3/6/2025 2:17 AM, Alex Deucher wrote:

Plumb in support for disabling kernel queues.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 114 ++---
  1 file changed, 65 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 8a448a20774a8..8fde7b239fdbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -1453,37 +1453,39 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block 
*ip_block)
return r;
}
  
-	/* set up the gfx ring */

-   for (i = 0; i < adev->gfx.me.num_me; i++) {
-   for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
-   for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
-   if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, 
j))
-   continue;
-
-   r = gfx_v12_0_gfx_ring_init(adev, ring_id,
-   i, k, j);
-   if (r)
-   return r;
-   ring_id++;
+   if (!adev->gfx.disable_kq) {
+   /* set up the gfx ring */
+   for (i = 0; i < adev->gfx.me.num_me; i++) {
+   for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+   for (k = 0; k < adev->gfx.me.num_pipe_per_me; 
k++) {
+   if 
(!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+   continue;
+
+   r = gfx_v12_0_gfx_ring_init(adev, 
ring_id,
+   i, k, j);
+   if (r)
+   return r;
+   ring_id++;
+   }
}
}
-   }
-
-   ring_id = 0;
-   /* set up the compute queues - allocate horizontally across pipes */
-   for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
-   for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
-   for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
-   if (!amdgpu_gfx_is_mec_queue_enabled(adev,
-   0, i, k, j))
-   continue;
-
-   r = gfx_v12_0_compute_ring_init(adev, ring_id,
-   i, k, j);
-   if (r)
-   return r;
  
-ring_id++;

+   ring_id = 0;
+   /* set up the compute queues - allocate horizontally across 
pipes */
+   for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+   for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+   for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; 
k++) {
+   if 
(!amdgpu_gfx_is_mec_queue_enabled(adev,
+0, 
i, k, j))
+   continue;
+
+   r = gfx_v12_0_compute_ring_init(adev, 
ring_id,
+   i, k, 
j);
+   if (r)
+   return r;
+
+   ring_id++;
+   }
}
}
}
@@ -1572,10 +1574,12 @@ static int gfx_v12_0_sw_fini(struct amdgpu_ip_block 
*ip_block)
int i;
struct amdgpu_device *adev = ip_block->adev;
  
-	for (i = 0; i < adev->gfx.num_gfx_rings; i++)

-   amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
-   for (i = 0; i < adev->gfx.num_compute_rings; i++)
-   amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+   if (!adev->gfx.disable_kq) {
+   for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+   amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+   for (i = 0; i < adev->gfx.num_compute_rings; i++)
+   amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+   }
  
  	amdgpu_gfx_mqd_sw_fini(adev, 0);
  
@@ -3418,6 +3422,9 @@ static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev)

if (!amdgpu_async_gfx_ring)
gfx_v12_0_cp_compute_enable(adev, true);
  
+	if (adev->gfx.disable_kq)

+   return 0;
+
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
  
@@

Re: [PATCH 10/11] drm/amdgpu/sdma6: add support for disable_kq

2025-03-06 Thread Khatri, Sunil


Reviewed-by: Sunil Khatri 

On 3/6/2025 2:17 AM, Alex Deucher wrote:

When the parameter is set, disable user submissions
to kernel queues.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 3aa4fec4d9e4a..bcc72737f8084 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1304,6 +1304,9 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block 
*ip_block)
struct amdgpu_device *adev = ip_block->adev;
int r;
  
+	if (amdgpu_disable_kq == 1)

+   adev->sdma.no_user_submission = true;
+
r = amdgpu_sdma_init_microcode(adev, 0, true);
if (r)
return r;
@@ -1338,6 +1341,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block 
*ip_block)
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->me = i;
+   ring->no_user_submission = adev->sdma.no_user_submission;
  
  		DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,

ring->use_doorbell?"true":"false");

Re: [PATCH 09/11] drm/amdgpu/sdma: add flag for tracking disable_kq

2025-03-06 Thread Khatri, Sunil


Reviewed-by: Sunil Khatri 

On 3/6/2025 2:17 AM, Alex Deucher wrote:

For SDMA, we still need kernel queues for paging so
they need to be initialized, but we no not want to
accept submissions from userspace when disable_kq
is set.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 9651693200655..edc856e10337a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -129,6 +129,7 @@ struct amdgpu_sdma {
/* track guilty state of GFX and PAGE queues */
bool gfx_guilty;
bool page_guilty;
+   boolno_user_submission;
  };
  
  /*

[PATCH] drm/amd/display: fix missing .is_two_pixels_per_container

2025-03-06 Thread Aliaksei Urbanski

Starting from 6.11, AMDGPU driver, while being loaded with amdgpu.dc=1,
due to lack of .is_two_pixels_per_container function in dce60_tg_funcs,
causes a NULL pointer dereference on PCs with old GPUs, such as R9 280X.

So this fix adds missing .is_two_pixels_per_container to dce60_tg_funcs.

Reported-by: Rosen Penev 
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3942
Fixes: e6a901a00822 ("drm/amd/display: use even ODM slice width for two pixels 
per container")
Cc:  # 6.11+
Signed-off-by: Aliaksei Urbanski 
---
 drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c 
b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c
index e5fb0e8333..e691a1cf33 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c
@@ -239,6 +239,7 @@ static const struct timing_generator_funcs dce60_tg_funcs = 
{
dce60_timing_generator_enable_advanced_request,
.configure_crc = dce60_configure_crc,
.get_crc = dce110_get_crc,
+   .is_two_pixels_per_container = 
dce110_is_two_pixels_per_container,
 };
 
 void dce60_timing_generator_construct(
-- 
2.48.1

Re: [PATCH v4] drm/amdgpu: Fix the race condition for draining retry fault

2025-03-06 Thread Felix Kuehling



On 2025-03-06 1:03, Emily Deng wrote:
> Issue:
> In the scenario where svm_range_restore_pages is called, but 
> svm->checkpoint_ts
>  has not been set and the retry fault has not been drained, 
> svm_range_unmap_from_cpu
> is triggered and calls svm_range_free. Meanwhile, svm_range_restore_pages
> continues execution and reaches svm_range_from_addr. This results in
> a "failed to find prange..." error, causing the page recovery to fail.
>
> How to fix:
> Move the timestamp check code under the protection of svm->lock.
>
> v2:
> Make sure all right locks are released before go out.
>
> v3:
> Directly goto out_unlock_svms, and return -EAGAIN.
>
> v4:
> Refine code.
>
> Signed-off-by: Emily Deng 

Reviewed-by: Felix Kuehling 


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 30 +++-
>  1 file changed, 16 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index d04725583f19..83ac14bf7a7a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -3008,19 +3008,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>   goto out;
>   }
>  
> - /* check if this page fault time stamp is before svms->checkpoint_ts */
> - if (svms->checkpoint_ts[gpuidx] != 0) {
> - if (amdgpu_ih_ts_after_or_equal(ts,  
> svms->checkpoint_ts[gpuidx])) {
> - pr_debug("draining retry fault, drop fault 0x%llx\n", 
> addr);
> - r = 0;
> - goto out;
> - } else
> - /* ts is after svms->checkpoint_ts now, reset 
> svms->checkpoint_ts
> -  * to zero to avoid following ts wrap around give wrong 
> comparing
> -  */
> - svms->checkpoint_ts[gpuidx] = 0;
> - }
> -
>   if (!p->xnack_enabled) {
>   pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
>   r = -EFAULT;
> @@ -3040,6 +3027,20 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>   mmap_read_lock(mm);
>  retry_write_locked:
>   mutex_lock(&svms->lock);
> +
> + /* check if this page fault time stamp is before svms->checkpoint_ts */
> + if (svms->checkpoint_ts[gpuidx] != 0) {
> + if (amdgpu_ih_ts_after_or_equal(ts,  
> svms->checkpoint_ts[gpuidx])) {
> + pr_debug("draining retry fault, drop fault 0x%llx\n", 
> addr);
> + r = -EAGAIN;
> + goto out_unlock_svms;
> + } else
> + /* ts is after svms->checkpoint_ts now, reset 
> svms->checkpoint_ts
> +  * to zero to avoid following ts wrap around give wrong 
> comparing
> +  */
> + svms->checkpoint_ts[gpuidx] = 0;
> + }
> +
>   prange = svm_range_from_addr(svms, addr, NULL);
>   if (!prange) {
>   pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
> @@ -3165,7 +3166,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>   mutex_unlock(&svms->lock);
>   mmap_read_unlock(mm);
>  
> - svm_range_count_fault(node, p, gpuidx);
> + if (r != -EAGAIN)
> + svm_range_count_fault(node, p, gpuidx);
>  
>   mmput(mm);
>  out:

[PATCH] drm/amdgpu: add initial documentation for debugfs files

2025-03-06 Thread Alex Deucher

Describes what debugfs files are available and what
they are used for.

v2: fix some typos (Mark Glines)
v3: Address comments from Siqueira and Kent

Signed-off-by: Alex Deucher 
---
 Documentation/gpu/amdgpu/debugfs.rst   | 210 +
 Documentation/gpu/amdgpu/debugging.rst |   7 +
 Documentation/gpu/amdgpu/index.rst |   1 +
 3 files changed, 218 insertions(+)
 create mode 100644 Documentation/gpu/amdgpu/debugfs.rst

diff --git a/Documentation/gpu/amdgpu/debugfs.rst 
b/Documentation/gpu/amdgpu/debugfs.rst
new file mode 100644
index 0..fdfc1a8773c72
--- /dev/null
+++ b/Documentation/gpu/amdgpu/debugfs.rst
@@ -0,0 +1,210 @@
+==
+AMDGPU DebugFS
+==
+
+The amdgpu driver provides a number of debugfs files to aid in debugging
+issues in the driver.  Thse are usually found in
+/sys/kernel/debug/dri/.
+
+DebugFS Files
+=
+
+amdgpu_benchmark
+
+
+Run benchmarks using the DMA engine the driver uses for GPU memory paging.
+Write a number to the file to run the test.  The results are written to the
+kernel log.  VRAM is on device memory (dGPUs) or cave out (APUs) and GTT
+(Graphics Translation Tables) is system memory that is accessible by the GPU.
+The following tests are available:
+
+- 1: simple test, VRAM to GTT and GTT to VRAM
+- 2: simple test, VRAM to VRAM
+- 3: GTT to VRAM, buffer size sweep, powers of 2
+- 4: VRAM to GTT, buffer size sweep, powers of 2
+- 5: VRAM to VRAM, buffer size sweep, powers of 2
+- 6: GTT to VRAM, buffer size sweep, common display sizes
+- 7: VRAM to GTT, buffer size sweep, common display sizes
+- 8: VRAM to VRAM, buffer size sweep, common display sizes
+
+amdgpu_test_ib
+--
+
+Read this file to run simple IB (Indirect Buffer) tests on all kernel managed
+rings.  IBs are command buffers usually generated by userspace applications
+which are submitted to the kernel for execution on an particular GPU engine.
+This just runs the simple IB tests included in the kernel.  These tests
+are engine specific and verify that IB submission works.
+
+amdgpu_discovery
+
+
+Provides raw access to the IP discovery binary provided by the GPU.  Read this
+file to acess the raw binary.  This is useful for verifying the contents of
+the IP discovery table.  It is chip specific.
+
+amdgpu_vbios
+
+
+Provides raw access to the ROM binary image from the GPU.  Read this file to
+access the raw binary.  This is useful for verifying the contents of the
+video BIOS ROM.  It is board specific.
+
+amdgpu_evict_gtt
+
+
+Evict all buffers from the GTT memory pool.  Read this file to evict all
+buffers from this pool.
+
+amdgpu_evict_vram
+-
+
+Evict all buffers from the VRAM memory pool.  Read this file to evict all
+buffers from this pool.
+
+amdgpu_gpu_recover
+--
+
+Trigger a GPU reset.  Read this file to trigger reset the entire GPU.
+All work currently running  on the GPU will be lost.
+
+amdgpu_ring_
+--
+
+Provides read access to the kernel managed ring buffers for each ring .
+These are useful for debugging problems on a particular ring.  The ring buffer
+is how the CPU sends commands to the GPU.  The CPU writes commands into the
+buffer and then asks the GPU engine to process it.  This is the raw binary
+contents of the ring buffer.  Use a tool like UMR to decode the rings into 
human
+readable form.
+
+amdgpu_mqd_
+-
+
+Provides read access to the kernel managed MQD (Memory Queue Descriptor) for
+ring  managed by the kernel driver.  MQDs define the features of the ring
+and are used to store the ring's state when it is not connected to hardware.
+The driver writes the requested ring features and metadata (GPU addresses of
+the ring itself and associated buffers) to the MQD and the firmware uses the 
MQD
+to populate the hardware when the ring is mapped to a hardware slot.  Only
+available on engines which use MQDs.  This provides access to the raw MQD
+binary.
+
+amdgpu_error_
+---
+
+Provides an interface to set an error code on the dma fences associated with
+ring .  The error code specified is propogated to all fences associated
+with the ring.  Use this to inject a fence error into a ring.
+
+amdgpu_pm_info
+--
+
+Provides human readable information about the power management features
+and state of the GPU.  This includes current GFX clock, Memory clock,
+voltages, average SoC power, temperature, GFX load, Memory load, SMU
+feature mask, VCN power state, clock and power gating features.
+
+amdgpu_firmware_info
+
+
+Lists the firmware versions for all firmwares used by the GPU.  Only
+entries with a non-0 version are valid.  If the version is 0, the firmware
+is not valid for the GPU.
+
+amdgpu_fence_info
+-
+
+Shows the last signalled and emitted fence sequence numbers for each
+kernel driver managed ring.  Fences are associated with s

[PATCH 3/3] drm/amdkfd: Add support for more per-process flag

2025-03-06 Thread Amber Lin

From: Harish Kasiviswanathan 

Add support for more per-process flags starting with option to configure
MFMA precision for gfx 9.5

v2: Change flag name to KFD_PROC_FLAG_MFMA_HIGH_PRECISION
Remove unused else condition

Signed-off-by: Harish Kasiviswanathan 
Reviewed-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  3 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |  6 --
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c  | 11 +--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c  |  6 --
 include/uapi/linux/kfd_ioctl.h|  5 -
 10 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 8c2e92378b49..1e9dd00620bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -606,7 +606,8 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
default_policy,
alternate_policy,
(void __user *)args->alternate_aperture_base,
-   args->alternate_aperture_size))
+   args->alternate_aperture_size,
+   args->misc_process_flag))
err = -EINVAL;
 
 out:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d23c6a358d34..2afcc1b4856a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2596,7 +2596,8 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
   enum cache_policy default_policy,
   enum cache_policy alternate_policy,
   void __user *alternate_aperture_base,
-  uint64_t alternate_aperture_size)
+  uint64_t alternate_aperture_size,
+  u32 misc_process_properties)
 {
bool retval = true;
 
@@ -2611,7 +2612,8 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
default_policy,
alternate_policy,
alternate_aperture_base,
-   alternate_aperture_size);
+   alternate_aperture_size,
+   misc_process_properties);
 
if (retval)
goto out;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 7146e227e2c1..122eb745e9c4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -174,7 +174,8 @@ struct device_queue_manager_ops {
   enum cache_policy default_policy,
   enum cache_policy alternate_policy,
   void __user *alternate_aperture_base,
-  uint64_t alternate_aperture_size);
+  uint64_t alternate_aperture_size,
+  u32 misc_process_properties);
 
int (*process_termination)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
@@ -210,7 +211,8 @@ struct device_queue_manager_asic_ops {
   enum cache_policy default_policy,
   enum cache_policy alternate_policy,
   void __user *alternate_aperture_base,
-  uint64_t alternate_aperture_size);
+  uint64_t alternate_aperture_size,
+  u32 misc_process_properties);
void(*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 32bedef912b3..0508ef5a41d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -40,7 +40,8 @@ static bool set_cache_memory_policy_cik(struct 
device_queue_manager

[PATCH 1/3] drm/amdkfd: Set per-process flags only once cik/vi

2025-03-06 Thread Amber Lin

From: Harish Kasiviswanathan 

Set per-process static sh_mem config only once during process
initialization. Move all static changes from update_qpd() which is
called each time a queue is created to set_cache_memory_policy() which
is called once during process initialization.

set_cache_memory_policy() is currently defined only for cik and vi
family. So this commit only focuses on these two. A separate commit will
address other asics.

Signed-off-by: Harish Kasiviswanathan 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 39 +-
 .../amd/amdkfd/kfd_device_queue_manager_cik.c | 69 --
 .../amd/amdkfd/kfd_device_queue_manager_vi.c  | 71 ---
 3 files changed, 94 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f3f2fd6ee65c..d23c6a358d34 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2591,14 +2591,6 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
return retval;
 }
 
-/*
- * Low bits must be / as required by HW, high bits must be 0 to
- * stay in user mode.
- */
-#define APE1_FIXED_BITS_MASK 0x8000ULL
-/* APE1 limit is inclusive and 64K aligned. */
-#define APE1_LIMIT_ALIGNMENT 0x
-
 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
   struct qcm_process_device *qpd,
   enum cache_policy default_policy,
@@ -2613,34 +2605,6 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
 
dqm_lock(dqm);
 
-   if (alternate_aperture_size == 0) {
-   /* base > limit disables APE1 */
-   qpd->sh_mem_ape1_base = 1;
-   qpd->sh_mem_ape1_limit = 0;
-   } else {
-   /*
-* In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
-*  SH_MEM_APE1_BASE[31:0], 0x }
-* APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
-*  SH_MEM_APE1_LIMIT[31:0], 0x }
-* Verify that the base and size parameters can be
-* represented in this format and convert them.
-* Additionally restrict APE1 to user-mode addresses.
-*/
-
-   uint64_t base = (uintptr_t)alternate_aperture_base;
-   uint64_t limit = base + alternate_aperture_size - 1;
-
-   if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
-  (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
-   retval = false;
-   goto out;
-   }
-
-   qpd->sh_mem_ape1_base = base >> 16;
-   qpd->sh_mem_ape1_limit = limit >> 16;
-   }
-
retval = dqm->asic_ops.set_cache_memory_policy(
dqm,
qpd,
@@ -2649,6 +2613,9 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
alternate_aperture_base,
alternate_aperture_size);
 
+   if (retval)
+   goto out;
+
if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
program_sh_mem_settings(dqm, qpd);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index d4d95c7f2e5d..32bedef912b3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -27,6 +27,14 @@
 #include "oss/oss_2_4_sh_mask.h"
 #include "gca/gfx_7_2_sh_mask.h"
 
+/*
+ * Low bits must be / as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0x8000ULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0x
+
 static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
   struct qcm_process_device *qpd,
   enum cache_policy default_policy,
@@ -84,6 +92,36 @@ static bool set_cache_memory_policy_cik(struct 
device_queue_manager *dqm,
 {
uint32_t default_mtype;
uint32_t ape1_mtype;
+   unsigned int temp;
+   bool retval = true;
+
+   if (alternate_aperture_size == 0) {
+   /* base > limit disables APE1 */
+   qpd->sh_mem_ape1_base = 1;
+   qpd->sh_mem_ape1_limit = 0;
+   } else {
+   /*
+* In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+*  SH_MEM_APE1_BASE[31:0], 0x }
+* APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+*  SH_MEM_APE1_LIMIT[31:0], 0x }

[PATCH 2/3] drm/amdkfd: Set per-process flags only once for gfx9/10/11/12

2025-03-06 Thread Amber Lin

From: Harish Kasiviswanathan 

Define set_cache_memory_policy() for these asics and move all static
changes from update_qpd() which is called each time a queue is created
to set_cache_memory_policy() which is called once during process
initialization

Signed-off-by: Harish Kasiviswanathan 
---
 .../amd/amdkfd/kfd_device_queue_manager_v10.c | 41 +++
 .../amd/amdkfd/kfd_device_queue_manager_v11.c | 41 +++
 .../amd/amdkfd/kfd_device_queue_manager_v12.c | 41 +++
 .../amd/amdkfd/kfd_device_queue_manager_v9.c  | 36 +++-
 4 files changed, 107 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index 245a90dfc2f6..b5f5f141353b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -31,10 +31,17 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
 struct qcm_process_device *qpd);
 static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+  struct qcm_process_device *qpd,
+  enum cache_policy default_policy,
+  enum cache_policy alternate_policy,
+  void __user *alternate_aperture_base,
+  uint64_t alternate_aperture_size);
 
 void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops)
 {
+   asic_ops->set_cache_memory_policy = set_cache_memory_policy_v10;
asic_ops->update_qpd = update_qpd_v10;
asic_ops->init_sdma_vm = init_sdma_vm_v10;
asic_ops->mqd_manager_init = mqd_manager_init_v10;
@@ -49,27 +56,27 @@ static uint32_t compute_sh_mem_bases_64bit(struct 
kfd_process_device *pdd)
private_base;
 }
 
-static int update_qpd_v10(struct device_queue_manager *dqm,
-struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+  struct qcm_process_device *qpd,
+  enum cache_policy default_policy,
+  enum cache_policy alternate_policy,
+  void __user *alternate_aperture_base,
+  uint64_t alternate_aperture_size)
 {
-   struct kfd_process_device *pdd;
-
-   pdd = qpd_to_pdd(qpd);
-
-   /* check if sh_mem_config register already configured */
-   if (qpd->sh_mem_config == 0) {
-   qpd->sh_mem_config =
-   (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
-   SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
-   (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-   qpd->sh_mem_ape1_limit = 0;
-   qpd->sh_mem_ape1_base = 0;
-   }
-
-   qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+   qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << 
SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+   qpd->sh_mem_ape1_limit = 0;
+   qpd->sh_mem_ape1_base = 0;
+   qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
 
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+   return true;
+}
 
+static int update_qpd_v10(struct device_queue_manager *dqm,
+struct qcm_process_device *qpd)
+{
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
index 2e129da7acb4..f436878d0d62 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -30,10 +30,17 @@ static int update_qpd_v11(struct device_queue_manager *dqm,
 struct qcm_process_device *qpd);
 static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+  struct qcm_process_device *qpd,
+  enum cache_policy default_policy,
+  enum cache_policy alternate_policy,
+  void __user *alternate_aperture_base,
+  uint64_t alternate_aperture_size);
 
 void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops)
 {
+   asic_ops->set_cache_memory_policy = set_cache_memory_policy_v11;

Re: [PATCH 02/11] drm/amdgpu: add ring flag for no user submissions

2025-03-06 Thread Alex Deucher

On Thu, Mar 6, 2025 at 2:48 AM Khatri, Sunil  wrote:
>
>
> On 3/6/2025 2:17 AM, Alex Deucher wrote:
> > This would be set by IPs which only accept submissions
> > from the kernel, not userspace, such as when kernel
> > queues are disabled. Don't expose the rings to userspace
> > and reject any submissions in the CS IOCTL.
> >
> > Signed-off-by: Alex Deucher 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 30 
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
> >   3 files changed, 25 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index 5df21529b3b13..5cc18034b75df 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -349,6 +349,10 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
> >   ring = amdgpu_job_ring(job);
> >   ib = &job->ibs[job->num_ibs++];
> >
> > + /* submissions to kernel queus are disabled */
> > + if (ring->no_user_submission)
> > + return -EINVAL;
> > +
> Alex, dont we need to disable submission for other type of chunks, from
> function amdgpu_cs_pass2 ? chunk type of bo list and fences are
> submitted by users and we do want to handle that too.
> If that's not needed then lgtm.

That is the only chunk type which directly involves the kernel rings.

Alex


> >   /* MM engine doesn't support user fences */
> >   if (p->uf_bo && ring->funcs->no_user_fence)
> >   return -EINVAL;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > index cd6eb7a3bc58a..3b7dfd56ccd0e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > @@ -408,7 +408,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >   case AMDGPU_HW_IP_GFX:
> >   type = AMD_IP_BLOCK_TYPE_GFX;
> >   for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> > - if (adev->gfx.gfx_ring[i].sched.ready)
> > + if (adev->gfx.gfx_ring[i].sched.ready &&
> > + !adev->gfx.gfx_ring[i].no_user_submission)
> >   ++num_rings;
> >   ib_start_alignment = 32;
> >   ib_size_alignment = 32;
> > @@ -416,7 +417,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >   case AMDGPU_HW_IP_COMPUTE:
> >   type = AMD_IP_BLOCK_TYPE_GFX;
> >   for (i = 0; i < adev->gfx.num_compute_rings; i++)
> > - if (adev->gfx.compute_ring[i].sched.ready)
> > + if (adev->gfx.compute_ring[i].sched.ready &&
> > + !adev->gfx.compute_ring[i].no_user_submission)
> >   ++num_rings;
> >   ib_start_alignment = 32;
> >   ib_size_alignment = 32;
> > @@ -424,7 +426,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >   case AMDGPU_HW_IP_DMA:
> >   type = AMD_IP_BLOCK_TYPE_SDMA;
> >   for (i = 0; i < adev->sdma.num_instances; i++)
> > - if (adev->sdma.instance[i].ring.sched.ready)
> > + if (adev->sdma.instance[i].ring.sched.ready &&
> > + !adev->gfx.gfx_ring[i].no_user_submission)
> >   ++num_rings;
> >   ib_start_alignment = 256;
> >   ib_size_alignment = 4;
> > @@ -435,7 +438,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >   if (adev->uvd.harvest_config & (1 << i))
> >   continue;
> >
> > - if (adev->uvd.inst[i].ring.sched.ready)
> > + if (adev->uvd.inst[i].ring.sched.ready &&
> > + !adev->uvd.inst[i].ring.no_user_submission)
> >   ++num_rings;
> >   }
> >   ib_start_alignment = 256;
> > @@ -444,7 +448,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >   case AMDGPU_HW_IP_VCE:
> >   type = AMD_IP_BLOCK_TYPE_VCE;
> >   for (i = 0; i < adev->vce.num_rings; i++)
> > - if (adev->vce.ring[i].sched.ready)
> > + if (adev->vce.ring[i].sched.ready &&
> > + !adev->vce.ring[i].no_user_submission)
> >   ++num_rings;
> >   ib_start_alignment = 256;
> >   ib_size_alignment = 4;
> > @@ -456,7 +461,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >   continue;
> >
> >   for (j = 0; j < adev->uvd.num_enc_rings; j++)
> > - if (adev->uvd.inst[i].ring_enc[j].sched.ready)
> > + if (adev->uvd.inst[i].ring_enc

Re: [PATCH v2] drm/amd: Fail initialization earlier when DC is disabled

2025-03-06 Thread Mario Limonciello


On 3/6/2025 14:11, Alex Deucher wrote:

On Thu, Mar 6, 2025 at 2:31 PM Mario Limonciello
 wrote:


On 3/6/2025 13:19, Alex Deucher wrote:

On Thu, Mar 6, 2025 at 1:58 PM Mario Limonciello
 wrote:


Modern APU and dGPU require DC support to be able to light up the
display.  If DC support has been disabled either by kernel config
or by kernel command line the screen will visibly freeze when the
driver finishes early init.

As it's known before early init is done whether DC support is required
detect this during discovery and bail if DC support was disabled
for any reason.  This will ensure that the existing framebuffer
provided by efifb or simpledrm keeps working.


I think there are a couple of corner cases we need to handle:
1. if adev->enable_virtual_display is set.  The user has configured
virtual displays and hence they want to use them rather than the
actual physical displays.  This is useful with GPUs in servers or for
early bring up.

  > 2. If the board supports DCN IP, but all it's been fused off due to>
silicon flaws (e.g., adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK).

In that case, we don't want to fail.


In that case I wonder if it's better to use
amdgpu_device_asic_has_dc_support() instead of
amdgpu_device_has_dc_support() which should cover both of those concerns.


That should work, or maybe just warn once in
amdgpu_device_asic_has_dc_support().  E.g., something like:

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1b9b4f8daf531..c986e619dbe99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3988,6 +3988,8 @@ bool amdgpu_device_asic_has_dc_support(enum
amd_asic_type asic_type)
  */
 return amdgpu_dc > 0;
 default:
+   if (amdgpu_dc == 0)
+   DRM_INFO_ONCE("Display Core has been disable
via kernel parameter, No display!\n");
 return amdgpu_dc != 0;
  #else
 default:



The problem is without a display that message will probably not be seen 
unless someone knows to look for journalctl -k -b-1 or similar.


So my main concern is that people who shoot themselves in the foot at 
least have a display to see the hole in their foot.


I'll have a try with my other idea and follow up with a v3 if I'm happy 
with that.








Alex



Signed-off-by: Mario Limonciello 
---
v2:
   * Update commit message justification
   * Add correct "default" handling
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 46 +--
   1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index a4258127083d..24f532de6322 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -2139,10 +2139,6 @@ static int amdgpu_discovery_set_display_ip_blocks(struct 
amdgpu_device *adev)
  return 0;
  }

-   if (!amdgpu_device_has_dc_support(adev))
-   return 0;
-
-#if defined(CONFIG_DRM_AMD_DC)
  if (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
  switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
  case IP_VERSION(1, 0, 0):
@@ -2166,39 +2162,63 @@ static int 
amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
  case IP_VERSION(3, 5, 1):
  case IP_VERSION(3, 6, 0):
  case IP_VERSION(4, 1, 0):
+   if (!amdgpu_device_has_dc_support(adev)) {
+   dev_err(adev->dev,
+   "DC support is required for dm ip 
block(DCE_HWIP:0x%x)\n",
+   amdgpu_ip_version(adev, DCE_HWIP, 0));
+   return -EINVAL;
+   }
+
  /* TODO: Fix IP version. DC code expects version 
4.0.1 */
  if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 
1, 0))
  adev->ip_versions[DCE_HWIP][0] = 
IP_VERSION(4, 0, 1);

+#if defined(CONFIG_DRM_AMD_DC)
  if (amdgpu_sriov_vf(adev))
  amdgpu_discovery_set_sriov_display(adev);
  else
  amdgpu_device_ip_block_add(adev, 
&dm_ip_block);
  break;
+#endif
  default:
-   dev_err(adev->dev,
-   "Failed to add dm ip block(DCE_HWIP:0x%x)\n",
-   amdgpu_ip_version(adev, DCE_HWIP, 0));
-   return -EINVAL;
+   if (amdgpu_device_has_dc_support(adev)) {
+   dev_err(adev->dev,
+   "Failed to add dm ip 
block(DCE_HWIP:0x%x)\n",
+

Re: [PATCH v2] drm/amd: Fail initialization earlier when DC is disabled

2025-03-06 Thread Alex Deucher

On Thu, Mar 6, 2025 at 2:31 PM Mario Limonciello
 wrote:
>
> On 3/6/2025 13:19, Alex Deucher wrote:
> > On Thu, Mar 6, 2025 at 1:58 PM Mario Limonciello
> >  wrote:
> >>
> >> Modern APU and dGPU require DC support to be able to light up the
> >> display.  If DC support has been disabled either by kernel config
> >> or by kernel command line the screen will visibly freeze when the
> >> driver finishes early init.
> >>
> >> As it's known before early init is done whether DC support is required
> >> detect this during discovery and bail if DC support was disabled
> >> for any reason.  This will ensure that the existing framebuffer
> >> provided by efifb or simpledrm keeps working.
> >
> > I think there are a couple of corner cases we need to handle:
> > 1. if adev->enable_virtual_display is set.  The user has configured
> > virtual displays and hence they want to use them rather than the
> > actual physical displays.  This is useful with GPUs in servers or for
> > early bring up.
>  > 2. If the board supports DCN IP, but all it's been fused off due to>
> silicon flaws (e.g., adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK).
> > In that case, we don't want to fail.
>
> In that case I wonder if it's better to use
> amdgpu_device_asic_has_dc_support() instead of
> amdgpu_device_has_dc_support() which should cover both of those concerns.

That should work, or maybe just warn once in
amdgpu_device_asic_has_dc_support().  E.g., something like:

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1b9b4f8daf531..c986e619dbe99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3988,6 +3988,8 @@ bool amdgpu_device_asic_has_dc_support(enum
amd_asic_type asic_type)
 */
return amdgpu_dc > 0;
default:
+   if (amdgpu_dc == 0)
+   DRM_INFO_ONCE("Display Core has been disable
via kernel parameter, No display!\n");
return amdgpu_dc != 0;
 #else
default:


>
> >
> > Alex
> >
> >>
> >> Signed-off-by: Mario Limonciello 
> >> ---
> >> v2:
> >>   * Update commit message justification
> >>   * Add correct "default" handling
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 46 +--
> >>   1 file changed, 33 insertions(+), 13 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> >> index a4258127083d..24f532de6322 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> >> @@ -2139,10 +2139,6 @@ static int 
> >> amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
> >>  return 0;
> >>  }
> >>
> >> -   if (!amdgpu_device_has_dc_support(adev))
> >> -   return 0;
> >> -
> >> -#if defined(CONFIG_DRM_AMD_DC)
> >>  if (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
> >>  switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
> >>  case IP_VERSION(1, 0, 0):
> >> @@ -2166,39 +2162,63 @@ static int 
> >> amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
> >>  case IP_VERSION(3, 5, 1):
> >>  case IP_VERSION(3, 6, 0):
> >>  case IP_VERSION(4, 1, 0):
> >> +   if (!amdgpu_device_has_dc_support(adev)) {
> >> +   dev_err(adev->dev,
> >> +   "DC support is required for dm ip 
> >> block(DCE_HWIP:0x%x)\n",
> >> +   amdgpu_ip_version(adev, DCE_HWIP, 
> >> 0));
> >> +   return -EINVAL;
> >> +   }
> >> +
> >>  /* TODO: Fix IP version. DC code expects version 
> >> 4.0.1 */
> >>  if (adev->ip_versions[DCE_HWIP][0] == 
> >> IP_VERSION(4, 1, 0))
> >>  adev->ip_versions[DCE_HWIP][0] = 
> >> IP_VERSION(4, 0, 1);
> >>
> >> +#if defined(CONFIG_DRM_AMD_DC)
> >>  if (amdgpu_sriov_vf(adev))
> >>  amdgpu_discovery_set_sriov_display(adev);
> >>  else
> >>  amdgpu_device_ip_block_add(adev, 
> >> &dm_ip_block);
> >>  break;
> >> +#endif
> >>  default:
> >> -   dev_err(adev->dev,
> >> -   "Failed to add dm ip 
> >> block(DCE_HWIP:0x%x)\n",
> >> -   amdgpu_ip_version(adev, DCE_HWIP, 0));
> >> -   return -EINVAL;
> >> +   if (amdgpu_device_has_dc_support(adev)) {
> >> +   dev_err(adev->dev,
> >> +   "Failed to add dm ip 
> >> block(DCE_HWIP:0x%x)\n",
> >> +

[PATCH 08/11] drm/amdgpu/gfx12: add support for disable_kq

2025-03-06 Thread Alex Deucher

Plumb in support for disabling kernel queues.

v2: use ring counts per Felix' suggestion

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 96 --
 1 file changed, 58 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 34cf187e72d9f..23ee4651cbffb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -1421,11 +1421,13 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block 
*ip_block)
break;
}
 
-   /* recalculate compute rings to use based on hardware configuration */
-   num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
-adev->gfx.mec.num_queue_per_pipe) / 2;
-   adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
- num_compute_rings);
+   if (adev->gfx.num_compute_rings) {
+   /* recalculate compute rings to use based on hardware 
configuration */
+   num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
+adev->gfx.mec.num_queue_per_pipe) / 2;
+   adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
+ num_compute_rings);
+   }
 
/* EOP Event */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
@@ -1471,37 +1473,41 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block 
*ip_block)
return r;
}
 
-   /* set up the gfx ring */
-   for (i = 0; i < adev->gfx.me.num_me; i++) {
-   for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
-   for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
-   if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, 
j))
-   continue;
-
-   r = gfx_v12_0_gfx_ring_init(adev, ring_id,
-   i, k, j);
-   if (r)
-   return r;
-   ring_id++;
+   if (adev->gfx.num_gfx_rings) {
+   /* set up the gfx ring */
+   for (i = 0; i < adev->gfx.me.num_me; i++) {
+   for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+   for (k = 0; k < adev->gfx.me.num_pipe_per_me; 
k++) {
+   if 
(!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+   continue;
+
+   r = gfx_v12_0_gfx_ring_init(adev, 
ring_id,
+   i, k, j);
+   if (r)
+   return r;
+   ring_id++;
+   }
}
}
}
 
-   ring_id = 0;
-   /* set up the compute queues - allocate horizontally across pipes */
-   for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
-   for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
-   for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
-   if (!amdgpu_gfx_is_mec_queue_enabled(adev,
-   0, i, k, j))
-   continue;
+   if (adev->gfx.num_compute_rings) {
+   ring_id = 0;
+   /* set up the compute queues - allocate horizontally across 
pipes */
+   for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+   for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+   for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; 
k++) {
+   if 
(!amdgpu_gfx_is_mec_queue_enabled(adev,
+0, 
i, k, j))
+   continue;
 
-   r = gfx_v12_0_compute_ring_init(adev, ring_id,
-   i, k, j);
-   if (r)
-   return r;
+   r = gfx_v12_0_compute_ring_init(adev, 
ring_id,
+   i, k, 
j);
+   if (r)
+   return r;
 
-   ring_id++;
+   ring_id++;
+   }
}
}
}
@@ -3495,12 +3501,18 @@ static int gfx_v12_0_cp_resume(struct

Re: [PATCH v4 1/3] drm/amdkfd: Set per-process flags only once cik/vi

2025-03-06 Thread Amber Lin


Reviewed-by: Amber Lin 

Regards,
Amber

On 3/6/25 14:52, Harish Kasiviswanathan wrote:

Set per-process static sh_mem config only once during process
initialization. Move all static changes from update_qpd() which is
called each time a queue is created to set_cache_memory_policy() which
is called once during process initialization.

set_cache_memory_policy() is currently defined only for cik and vi
family. So this commit only focuses on these two. A separate commit will
address other asics.

Signed-off-by: Harish Kasiviswanathan 
---
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 39 +-
  .../amd/amdkfd/kfd_device_queue_manager_cik.c | 69 --
  .../amd/amdkfd/kfd_device_queue_manager_vi.c  | 71 ---
  3 files changed, 94 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f3f2fd6ee65c..d23c6a358d34 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2591,14 +2591,6 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
return retval;
  }
  
-/*

- * Low bits must be / as required by HW, high bits must be 0 to
- * stay in user mode.
- */
-#define APE1_FIXED_BITS_MASK 0x8000ULL
-/* APE1 limit is inclusive and 64K aligned. */
-#define APE1_LIMIT_ALIGNMENT 0x
-
  static bool set_cache_memory_policy(struct device_queue_manager *dqm,
   struct qcm_process_device *qpd,
   enum cache_policy default_policy,
@@ -2613,34 +2605,6 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
  
  	dqm_lock(dqm);
  
-	if (alternate_aperture_size == 0) {

-   /* base > limit disables APE1 */
-   qpd->sh_mem_ape1_base = 1;
-   qpd->sh_mem_ape1_limit = 0;
-   } else {
-   /*
-* In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
-*  SH_MEM_APE1_BASE[31:0], 0x }
-* APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
-*  SH_MEM_APE1_LIMIT[31:0], 0x }
-* Verify that the base and size parameters can be
-* represented in this format and convert them.
-* Additionally restrict APE1 to user-mode addresses.
-*/
-
-   uint64_t base = (uintptr_t)alternate_aperture_base;
-   uint64_t limit = base + alternate_aperture_size - 1;
-
-   if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
-  (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
-   retval = false;
-   goto out;
-   }
-
-   qpd->sh_mem_ape1_base = base >> 16;
-   qpd->sh_mem_ape1_limit = limit >> 16;
-   }
-
retval = dqm->asic_ops.set_cache_memory_policy(
dqm,
qpd,
@@ -2649,6 +2613,9 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
alternate_aperture_base,
alternate_aperture_size);
  
+	if (retval)

+   goto out;
+
if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
program_sh_mem_settings(dqm, qpd);
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c

index d4d95c7f2e5d..32bedef912b3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -27,6 +27,14 @@
  #include "oss/oss_2_4_sh_mask.h"
  #include "gca/gfx_7_2_sh_mask.h"
  
+/*

+ * Low bits must be / as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0x8000ULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0x
+
  static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
   struct qcm_process_device *qpd,
   enum cache_policy default_policy,
@@ -84,6 +92,36 @@ static bool set_cache_memory_policy_cik(struct 
device_queue_manager *dqm,
  {
uint32_t default_mtype;
uint32_t ape1_mtype;
+   unsigned int temp;
+   bool retval = true;
+
+   if (alternate_aperture_size == 0) {
+   /* base > limit disables APE1 */
+   qpd->sh_mem_ape1_base = 1;
+   qpd->sh_mem_ape1_limit = 0;
+   } else {
+   /*
+* In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+*  SH_MEM_APE1_BASE[31:0], 0x }
+* APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+

[PATCH v3] drm/amd: Fail initialization earlier when DC is disabled

2025-03-06 Thread Mario Limonciello

Modern APU and dGPU require DC support to be able to light up the
display.  If DC support has been disabled either by kernel config
or by kernel command line the screen will visibly freeze when the
driver finishes early init.

As it's known before early init is done whether DC support is required
detect this during discovery and bail if DC support was disabled
for any reason.  This will ensure that the existing framebuffer
provided by efifb or simpledrm keeps working.

Signed-off-by: Mario Limonciello 
---
v3:
 * Use amdgpu_device_asic_has_dc_support() instead to cover virtual
   displays and bringup
v2:
 * Update commit message justification
 * Add correct "default" handling
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 49 ++-
 1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index a4258127083d0..ddd10e6345601 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -2134,15 +2134,14 @@ static void amdgpu_discovery_set_sriov_display(struct 
amdgpu_device *adev)
 
 static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
 {
+   bool asic_support;
+
if (adev->enable_virtual_display) {
amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
return 0;
}
 
-   if (!amdgpu_device_has_dc_support(adev))
-   return 0;
-
-#if defined(CONFIG_DRM_AMD_DC)
+   asic_support = amdgpu_device_asic_has_dc_support(adev->asic_type);
if (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(1, 0, 0):
@@ -2166,39 +2165,63 @@ static int 
amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 5, 1):
case IP_VERSION(3, 6, 0):
case IP_VERSION(4, 1, 0):
+   if (!asic_support) {
+   dev_err(adev->dev,
+   "DC support is required for dm ip 
block(DCE_HWIP:0x%x)\n",
+   amdgpu_ip_version(adev, DCE_HWIP, 0));
+   return -EINVAL;
+   }
+
/* TODO: Fix IP version. DC code expects version 4.0.1 
*/
if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 1, 
0))
adev->ip_versions[DCE_HWIP][0] = IP_VERSION(4, 
0, 1);
 
+#if defined(CONFIG_DRM_AMD_DC)
if (amdgpu_sriov_vf(adev))
amdgpu_discovery_set_sriov_display(adev);
else
amdgpu_device_ip_block_add(adev, &dm_ip_block);
break;
+#endif
default:
-   dev_err(adev->dev,
-   "Failed to add dm ip block(DCE_HWIP:0x%x)\n",
-   amdgpu_ip_version(adev, DCE_HWIP, 0));
-   return -EINVAL;
+   if (asic_support) {
+   dev_err(adev->dev,
+   "Failed to add dm ip 
block(DCE_HWIP:0x%x)\n",
+   amdgpu_ip_version(adev, DCE_HWIP, 0));
+   return -EINVAL;
+   }
+   return 0;
}
} else if (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
switch (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
case IP_VERSION(12, 1, 0):
+
+   if (!asic_support) {
+   dev_err(adev->dev,
+   "DC support is required for dm ip 
block(DCI_HWIP:0x%x)\n",
+   amdgpu_ip_version(adev, DCI_HWIP, 0));
+   return -EINVAL;
+   }
+
+#if defined(CONFIG_DRM_AMD_DC)
if (amdgpu_sriov_vf(adev))
amdgpu_discovery_set_sriov_display(adev);
else
amdgpu_device_ip_block_add(adev, &dm_ip_block);
break;
+#endif
default:
-   dev_err(adev->dev,
-   "Failed to add dm ip block(DCI_HWIP:0x%x)\n",
-   amdgpu_ip_version(adev, DCI_HWIP, 0));
-   return -EINVAL;
+   if (asic_support) {
+   dev_err(adev->dev,
+   "Failed to add dm ip 
block(DCI_HWIP:0x%x)\n",
+   amdgpu_ip_version(adev, DCI_HWIP, 0));
+

Re: [PATCH v4 2/3] drm/amdkfd: Set per-process flags only once for gfx9/10/11/12

2025-03-06 Thread Amber Lin


Reviewed-by: Amber Lin 

Regards,
Amber

On 3/6/25 14:52, Harish Kasiviswanathan wrote:

Define set_cache_memory_policy() for these asics and move all static
changes from update_qpd() which is called each time a queue is created
to set_cache_memory_policy() which is called once during process
initialization

Signed-off-by: Harish Kasiviswanathan 
---
  .../amd/amdkfd/kfd_device_queue_manager_v10.c | 41 +++
  .../amd/amdkfd/kfd_device_queue_manager_v11.c | 41 +++
  .../amd/amdkfd/kfd_device_queue_manager_v12.c | 41 +++
  .../amd/amdkfd/kfd_device_queue_manager_v9.c  | 36 +++-
  4 files changed, 107 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index 245a90dfc2f6..b5f5f141353b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -31,10 +31,17 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
 struct qcm_process_device *qpd);
  static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue 
*q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+  struct qcm_process_device *qpd,
+  enum cache_policy default_policy,
+  enum cache_policy alternate_policy,
+  void __user *alternate_aperture_base,
+  uint64_t alternate_aperture_size);
  
  void device_queue_manager_init_v10(

struct device_queue_manager_asic_ops *asic_ops)
  {
+   asic_ops->set_cache_memory_policy = set_cache_memory_policy_v10;
asic_ops->update_qpd = update_qpd_v10;
asic_ops->init_sdma_vm = init_sdma_vm_v10;
asic_ops->mqd_manager_init = mqd_manager_init_v10;
@@ -49,27 +56,27 @@ static uint32_t compute_sh_mem_bases_64bit(struct 
kfd_process_device *pdd)
private_base;
  }
  
-static int update_qpd_v10(struct device_queue_manager *dqm,

-struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+  struct qcm_process_device *qpd,
+  enum cache_policy default_policy,
+  enum cache_policy alternate_policy,
+  void __user *alternate_aperture_base,
+  uint64_t alternate_aperture_size)
  {
-   struct kfd_process_device *pdd;
-
-   pdd = qpd_to_pdd(qpd);
-
-   /* check if sh_mem_config register already configured */
-   if (qpd->sh_mem_config == 0) {
-   qpd->sh_mem_config =
-   (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
-   SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
-   (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-   qpd->sh_mem_ape1_limit = 0;
-   qpd->sh_mem_ape1_base = 0;
-   }
-
-   qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+   qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << 
SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+   qpd->sh_mem_ape1_limit = 0;
+   qpd->sh_mem_ape1_base = 0;
+   qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
  
  	pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);

+   return true;
+}
  
+static int update_qpd_v10(struct device_queue_manager *dqm,

+struct qcm_process_device *qpd)
+{
return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c

index 2e129da7acb4..f436878d0d62 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -30,10 +30,17 @@ static int update_qpd_v11(struct device_queue_manager *dqm,
 struct qcm_process_device *qpd);
  static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue 
*q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+  struct qcm_process_device *qpd,
+  enum cache_policy default_policy,
+  enum cache_policy alternate_policy,
+  void __user *alternate_aperture_base,
+  uint64_t alternate_aperture_size);
  
  void device_queue_manager_init_v11(

struct device_queue_manager_asic_ops *asic_ops

Re: [PATCH 0/4] drm/amd/display: move from kzalloc(size * nr, ...) to kcalloc(nr, size, ...)

2025-03-06 Thread Alex Hung


This series LGTM too.

Reviewed-by: Alex Hung 

On 2/27/25 16:16, Ethan Carter Edwards wrote:

We are trying to get rid of all multiplications from allocation
functions to prevent integer overflows. Here the multiplications are
probably safe, but using kcalloc() is more appropriate and improves
readability. It is also safer. This series contains a few patches
with these fixes.

Part of the Kernel Self Protection Project efforts. Links below have
more details.

Link: https://github.com/KSPP/linux/issues/162
Link: 
https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments

Signed-off-by: Ethan Carter Edwards 
---
Ethan Carter Edwards (4):
   drm/amd/display: change kzalloc to kcalloc in dcn30_validate_bandwidth()
   drm/amd/display: change kzalloc to kcalloc in dcn31_validate_bandwidth()
   drm/amd/display: change kzalloc to kcalloc in dcn314_validate_bandwidth()
   drm/amd/display: change kzalloc to kcalloc in dml1_validate()

  drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c   | 3 ++-
  drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c   | 3 ++-
  drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c | 3 ++-
  drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c   | 3 ++-
  4 files changed, 8 insertions(+), 4 deletions(-)
---
base-commit: be5c7bbb3a64baf884481a1ba0c2f8fb2f93f7c3
change-id: 20250227-amd-display-a8342c55a9a0

Best regards,

[PATCH V2 00/11] Add disable kernel queue support

2025-03-06 Thread Alex Deucher

To better evaluate user queues, add a module parameter
to disable kernel queues.  With this set kernel queues
are disabled and only user queues are available.  This
frees up hardware resources for use in user queues which
would otherwise be used by kernel queues and provides
a way to validate user queues without the presence
of kernel queues.

v2: use num_gfx_rings and num_compute_rings per
Felix suggestion

Alex Deucher (11):
  drm/amdgpu: add parameter to disable kernel queues
  drm/amdgpu: add ring flag for no user submissions
  drm/amdgpu/gfx: add generic handling for disable_kq
  drm/amdgpu/mes: centralize gfx_hqd mask management
  drm/amdgpu/mes: update hqd masks when disable_kq is set
  drm/amdgpu/mes: make more vmids available when disable_kq=1
  drm/amdgpu/gfx11: add support for disable_kq
  drm/amdgpu/gfx12: add support for disable_kq
  drm/amdgpu/sdma: add flag for tracking disable_kq
  drm/amdgpu/sdma6: add support for disable_kq
  drm/amdgpu/sdma7: add support for disable_kq

 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  9 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  |  3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 30 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c  | 26 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c   | 99 
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c   | 96 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c   | 16 +---
 drivers/gpu/drm/amd/amdgpu/mes_v12_0.c   | 15 +---
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   |  4 +
 drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c   |  4 +
 17 files changed, 204 insertions(+), 112 deletions(-)

-- 
2.48.1

[PATCH 10/11] drm/amdgpu/sdma6: add support for disable_kq

2025-03-06 Thread Alex Deucher

When the parameter is set, disable user submissions
to kernel queues.

Reviewed-by: Sunil Khatri 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 3aa4fec4d9e4a..bcc72737f8084 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1304,6 +1304,9 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block 
*ip_block)
struct amdgpu_device *adev = ip_block->adev;
int r;
 
+   if (amdgpu_disable_kq == 1)
+   adev->sdma.no_user_submission = true;
+
r = amdgpu_sdma_init_microcode(adev, 0, true);
if (r)
return r;
@@ -1338,6 +1341,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block 
*ip_block)
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->me = i;
+   ring->no_user_submission = adev->sdma.no_user_submission;
 
DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
ring->use_doorbell?"true":"false");
-- 
2.48.1

[PATCH 02/11] drm/amdgpu: add ring flag for no user submissions

2025-03-06 Thread Alex Deucher

This would be set by IPs which only accept submissions
from the kernel, not userspace, such as when kernel
queues are disabled. Don't expose the rings to userspace
and reject any submissions in the CS IOCTL.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 30 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
 3 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5df21529b3b13..5cc18034b75df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -349,6 +349,10 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
ring = amdgpu_job_ring(job);
ib = &job->ibs[job->num_ibs++];
 
+   /* submissions to kernel queus are disabled */
+   if (ring->no_user_submission)
+   return -EINVAL;
+
/* MM engine doesn't support user fences */
if (p->uf_bo && ring->funcs->no_user_fence)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index cd6eb7a3bc58a..3b7dfd56ccd0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -408,7 +408,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_GFX:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-   if (adev->gfx.gfx_ring[i].sched.ready)
+   if (adev->gfx.gfx_ring[i].sched.ready &&
+   !adev->gfx.gfx_ring[i].no_user_submission)
++num_rings;
ib_start_alignment = 32;
ib_size_alignment = 32;
@@ -416,7 +417,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
-   if (adev->gfx.compute_ring[i].sched.ready)
+   if (adev->gfx.compute_ring[i].sched.ready &&
+   !adev->gfx.compute_ring[i].no_user_submission)
++num_rings;
ib_start_alignment = 32;
ib_size_alignment = 32;
@@ -424,7 +426,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
-   if (adev->sdma.instance[i].ring.sched.ready)
+   if (adev->sdma.instance[i].ring.sched.ready &&
+   !adev->gfx.gfx_ring[i].no_user_submission)
++num_rings;
ib_start_alignment = 256;
ib_size_alignment = 4;
@@ -435,7 +438,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->uvd.harvest_config & (1 << i))
continue;
 
-   if (adev->uvd.inst[i].ring.sched.ready)
+   if (adev->uvd.inst[i].ring.sched.ready &&
+   !adev->uvd.inst[i].ring.no_user_submission)
++num_rings;
}
ib_start_alignment = 256;
@@ -444,7 +448,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
-   if (adev->vce.ring[i].sched.ready)
+   if (adev->vce.ring[i].sched.ready &&
+   !adev->vce.ring[i].no_user_submission)
++num_rings;
ib_start_alignment = 256;
ib_size_alignment = 4;
@@ -456,7 +461,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
continue;
 
for (j = 0; j < adev->uvd.num_enc_rings; j++)
-   if (adev->uvd.inst[i].ring_enc[j].sched.ready)
+   if (adev->uvd.inst[i].ring_enc[j].sched.ready &&
+   
!adev->uvd.inst[i].ring_enc[j].no_user_submission)
++num_rings;
}
ib_start_alignment = 256;
@@ -468,7 +474,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->vcn.harvest_config & (1 << i))
continue;
 
-   if (adev->vcn.inst[i].ring_dec.sched.ready)
+   if (adev->vcn.inst[i].ring_dec.sched.ready &&
+   !adev->vcn.inst[i].ring_dec.no_user_submission)

[PATCH 06/11] drm/amdgpu/mes: make more vmids available when disable_kq=1

2025-03-06 Thread Alex Deucher

If we don't have kernel queues, the vmids can be used by
the MES for user queues.

Reviewed-by: Sunil Khatri 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index e585e8690edf0..d7cdd2895889a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -142,7 +142,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
 
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
adev->mes.vmid_mask_mmhub = 0xff00;
-   adev->mes.vmid_mask_gfxhub = 0xff00;
+   adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xfffe : 
0xff00;
 
for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) {
/* use only 1st ME pipe */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 95d894a231fcf..19a5f196829f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -900,7 +900,7 @@ static int gmc_v10_0_sw_init(struct amdgpu_ip_block 
*ip_block)
 * amdgpu graphics/compute will use VMIDs 1-7
 * amdkfd will use VMIDs 8-15
 */
-   adev->vm_manager.first_kfd_vmid = 8;
+   adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
 
amdgpu_vm_manager_init(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index ea7c32d8380ba..598324e736092 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -837,7 +837,7 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block 
*ip_block)
 * amdgpu graphics/compute will use VMIDs 1-7
 * amdkfd will use VMIDs 8-15
 */
-   adev->vm_manager.first_kfd_vmid = 8;
+   adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
 
amdgpu_vm_manager_init(adev);
 
-- 
2.48.1

[PATCH 05/11] drm/amdgpu/mes: update hqd masks when disable_kq is set

2025-03-06 Thread Alex Deucher

Make all resources available to user queues.

Suggested-by: Sunil Khatri 
Reviewed-by: Sunil Khatri 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 5913c5ba85ed0..e585e8690edf0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -156,21 +156,21 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
 * Set GFX pipe 0 queue 1-7 for MES scheduling
 * mask =  1110b
 */
-   adev->mes.gfx_hqd_mask[i] = 0xFE;
+   adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0xFF 
: 0xFE;
else
/*
 * GFX pipe 0 queue 0 is being used by Kernel queue.
 * Set GFX pipe 0 queue 1 for MES scheduling
 * mask = 10b
 */
-   adev->mes.gfx_hqd_mask[i] = 0x2;
+   adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0x3 
: 0x2;
}
 
for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
/* use only 1st MEC pipes */
if (i >= adev->gfx.mec.num_pipe_per_mec)
continue;
-   adev->mes.compute_hqd_mask[i] = 0xc;
+   adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 
0xC;
}
 
for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
-- 
2.48.1

[PATCH 09/11] drm/amdgpu/sdma: add flag for tracking disable_kq

2025-03-06 Thread Alex Deucher

For SDMA, we still need kernel queues for paging so
they need to be initialized, but we no not want to
accept submissions from userspace when disable_kq
is set.

Reviewed-by: Sunil Khatri 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 9651693200655..edc856e10337a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -129,6 +129,7 @@ struct amdgpu_sdma {
/* track guilty state of GFX and PAGE queues */
bool gfx_guilty;
bool page_guilty;
+   boolno_user_submission;
 };
 
 /*
-- 
2.48.1

[PATCH] drm/amd: Keep display off while going into S4

2025-03-06 Thread Mario Limonciello

When userspace invokes S4 the flow is:

1) amdgpu_pmops_prepare()
2) amdgpu_pmops_freeze()
3) Create hibernation image
4) amdgpu_pmops_thaw()
5) Write out image to disk
6) Turn off system

Then on resume amdgpu_pmops_restore() is called.

This flow has a problem that because amdgpu_pmops_thaw() is called
it will call amdgpu_device_resume() which will resume all of the GPU.

This includes turning the display hardware back on and discovering
connectors again.

This is an unexpected experience for the display to turn back on.
Adjust the flow so that during the S4 sequence display hardware is
not turned back on.

Reported-by: Xaver Hugl 
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2038
Cc: Muhammad Usama Anjum 
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 11 +--
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  5 +
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b161daa90019..b54c4b2f3f7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2565,7 +2565,6 @@ static int amdgpu_pmops_freeze(struct device *dev)
int r;
 
r = amdgpu_device_suspend(drm_dev, true);
-   adev->in_s4 = false;
if (r)
return r;
 
@@ -2577,8 +2576,13 @@ static int amdgpu_pmops_freeze(struct device *dev)
 static int amdgpu_pmops_thaw(struct device *dev)
 {
struct drm_device *drm_dev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(drm_dev);
+   int r;
 
-   return amdgpu_device_resume(drm_dev, true);
+   r = amdgpu_device_resume(drm_dev, true);
+   adev->in_s4 = false;
+
+   return r;
 }
 
 static int amdgpu_pmops_poweroff(struct device *dev)
@@ -2591,6 +2595,9 @@ static int amdgpu_pmops_poweroff(struct device *dev)
 static int amdgpu_pmops_restore(struct device *dev)
 {
struct drm_device *drm_dev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+   adev->in_s4 = false;
 
return amdgpu_device_resume(drm_dev, true);
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 6f9331fe91c3..5939796db74c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3431,6 +3431,11 @@ static int dm_resume(struct amdgpu_ip_block *ip_block)
 
return 0;
}
+
+   /* leave display off for S4 sequence */
+   if (adev->in_s4)
+   return 0;
+
/* Recreate dc_state - DC invalidates it when setting power state to 
S3. */
dc_state_release(dm_state->context);
dm_state->context = dc_state_create(dm->dc, NULL);
-- 
2.48.1

Re: [PATCH] drm/amd/amdgpu: Add missing GC 11.5.0 register

2025-03-06 Thread Alex Deucher

On Thu, Mar 6, 2025 at 1:20 PM Tom St Denis  wrote:
>
> Adds register needed for debugging purposes.
>
> Signed-off-by: Tom St Denis 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h 
> b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h
> index abdb8728156e..d6c02cf815be 100644
> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h
> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h
> @@ -9478,6 +9478,8 @@
>  #define regRLC_GFX_IMU_CMD_BASE_IDX  
>1
>  #define regGFX_IMU_RLC_STATUS
>0x4054
>  #define regGFX_IMU_RLC_STATUS_BASE_IDX   
>1
> +#define regGFX_IMU_STATUS
>   0x4055
> +#define regGFX_IMU_STATUS_BASE_IDX   
>   1
>  #define regGFX_IMU_SOC_DATA  
>0x4059
>  #define regGFX_IMU_SOC_DATA_BASE_IDX 
>1
>  #define regGFX_IMU_SOC_ADDR  
>0x405a
> --
> 2.45.2
>

[PATCH 01/11] drm/amdgpu: add parameter to disable kernel queues

2025-03-06 Thread Alex Deucher

On chips that support user queues, setting this option
will disable kernel queues to be used to validate
user queues without kernel queues.

Reviewed-by: Sunil Khatri 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 9 +
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 87062c1adcdf7..45437a8f29d3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -270,6 +270,7 @@ extern int amdgpu_user_partt_mode;
 extern int amdgpu_agp;
 
 extern int amdgpu_wbrf;
+extern int amdgpu_disable_kq;
 
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b161daa900198..42a7619592ab9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -237,6 +237,7 @@ int amdgpu_agp = -1; /* auto */
 int amdgpu_wbrf = -1;
 int amdgpu_damage_clips = -1; /* auto */
 int amdgpu_umsch_mm_fwlog;
+int amdgpu_disable_kq = -1;
 
 DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
"DRM_UT_CORE",
@@ -1083,6 +1084,14 @@ MODULE_PARM_DESC(wbrf,
"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 
= auto(default)");
 module_param_named(wbrf, amdgpu_wbrf, int, 0444);
 
+/**
+ * DOC: disable_kq (int)
+ * Disable kernel queues on systems that support user queues.
+ * (0 = kernel queues enabled, 1 = kernel queues disabled, -1 = auto (default 
setting))
+ */
+MODULE_PARM_DESC(disable_kq, "Disable kernel queues (-1 = auto (default), 0 = 
enable KQ, 1 = disable KQ)");
+module_param_named(disable_kq, amdgpu_disable_kq, int, 0444);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
-- 
2.48.1

Re: [PATCH v2] drm/amd: Fail initialization earlier when DC is disabled

2025-03-06 Thread Alex Deucher

On Thu, Mar 6, 2025 at 1:58 PM Mario Limonciello
 wrote:
>
> Modern APU and dGPU require DC support to be able to light up the
> display.  If DC support has been disabled either by kernel config
> or by kernel command line the screen will visibly freeze when the
> driver finishes early init.
>
> As it's known before early init is done whether DC support is required
> detect this during discovery and bail if DC support was disabled
> for any reason.  This will ensure that the existing framebuffer
> provided by efifb or simpledrm keeps working.

I think there are a couple of corner cases we need to handle:
1. if adev->enable_virtual_display is set.  The user has configured
virtual displays and hence they want to use them rather than the
actual physical displays.  This is useful with GPUs in servers or for
early bring up.
2. If the board supports DCN IP, but all it's been fused off due to
silicon flaws (e.g., adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK).
In that case, we don't want to fail.

Alex

>
> Signed-off-by: Mario Limonciello 
> ---
> v2:
>  * Update commit message justification
>  * Add correct "default" handling
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 46 +--
>  1 file changed, 33 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> index a4258127083d..24f532de6322 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> @@ -2139,10 +2139,6 @@ static int 
> amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
> return 0;
> }
>
> -   if (!amdgpu_device_has_dc_support(adev))
> -   return 0;
> -
> -#if defined(CONFIG_DRM_AMD_DC)
> if (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
> switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
> case IP_VERSION(1, 0, 0):
> @@ -2166,39 +2162,63 @@ static int 
> amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
> case IP_VERSION(3, 5, 1):
> case IP_VERSION(3, 6, 0):
> case IP_VERSION(4, 1, 0):
> +   if (!amdgpu_device_has_dc_support(adev)) {
> +   dev_err(adev->dev,
> +   "DC support is required for dm ip 
> block(DCE_HWIP:0x%x)\n",
> +   amdgpu_ip_version(adev, DCE_HWIP, 0));
> +   return -EINVAL;
> +   }
> +
> /* TODO: Fix IP version. DC code expects version 
> 4.0.1 */
> if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 
> 1, 0))
> adev->ip_versions[DCE_HWIP][0] = 
> IP_VERSION(4, 0, 1);
>
> +#if defined(CONFIG_DRM_AMD_DC)
> if (amdgpu_sriov_vf(adev))
> amdgpu_discovery_set_sriov_display(adev);
> else
> amdgpu_device_ip_block_add(adev, 
> &dm_ip_block);
> break;
> +#endif
> default:
> -   dev_err(adev->dev,
> -   "Failed to add dm ip block(DCE_HWIP:0x%x)\n",
> -   amdgpu_ip_version(adev, DCE_HWIP, 0));
> -   return -EINVAL;
> +   if (amdgpu_device_has_dc_support(adev)) {
> +   dev_err(adev->dev,
> +   "Failed to add dm ip 
> block(DCE_HWIP:0x%x)\n",
> +   amdgpu_ip_version(adev, DCE_HWIP, 0));
> +   return -EINVAL;
> +   }
> +   return 0;
> }
> } else if (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
> switch (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
> case IP_VERSION(12, 0, 0):
> case IP_VERSION(12, 0, 1):
> case IP_VERSION(12, 1, 0):
> +
> +   if (!amdgpu_device_has_dc_support(adev)) {
> +   dev_err(adev->dev,
> +   "DC support is required for dm ip 
> block(DCI_HWIP:0x%x)\n",
> +   amdgpu_ip_version(adev, DCI_HWIP, 0));
> +   return -EINVAL;
> +   }
> +
> +#if defined(CONFIG_DRM_AMD_DC)
> if (amdgpu_sriov_vf(adev))
> amdgpu_discovery_set_sriov_display(adev);
> else
> amdgpu_device_ip_block_add(adev, 
> &dm_ip_block);
> break;
> +#endif
> default:
> -   dev_err(adev->dev,
> -   "Failed to add dm ip block(DCI_HWIP:0x%x)\n",
> -   amdgp

Re: [PATCH v2] drm/amd: Fail initialization earlier when DC is disabled

2025-03-06 Thread Mario Limonciello


On 3/6/2025 13:19, Alex Deucher wrote:

On Thu, Mar 6, 2025 at 1:58 PM Mario Limonciello
 wrote:


Modern APU and dGPU require DC support to be able to light up the
display.  If DC support has been disabled either by kernel config
or by kernel command line the screen will visibly freeze when the
driver finishes early init.

As it's known before early init is done whether DC support is required
detect this during discovery and bail if DC support was disabled
for any reason.  This will ensure that the existing framebuffer
provided by efifb or simpledrm keeps working.


I think there are a couple of corner cases we need to handle:
1. if adev->enable_virtual_display is set.  The user has configured
virtual displays and hence they want to use them rather than the
actual physical displays.  This is useful with GPUs in servers or for
early bring up.
> 2. If the board supports DCN IP, but all it's been fused off due to> 
silicon flaws (e.g., adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK).

In that case, we don't want to fail.


In that case I wonder if it's better to use 
amdgpu_device_asic_has_dc_support() instead of 
amdgpu_device_has_dc_support() which should cover both of those concerns.




Alex



Signed-off-by: Mario Limonciello 
---
v2:
  * Update commit message justification
  * Add correct "default" handling
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 46 +--
  1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index a4258127083d..24f532de6322 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -2139,10 +2139,6 @@ static int amdgpu_discovery_set_display_ip_blocks(struct 
amdgpu_device *adev)
 return 0;
 }

-   if (!amdgpu_device_has_dc_support(adev))
-   return 0;
-
-#if defined(CONFIG_DRM_AMD_DC)
 if (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
 switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
 case IP_VERSION(1, 0, 0):
@@ -2166,39 +2162,63 @@ static int 
amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
 case IP_VERSION(3, 5, 1):
 case IP_VERSION(3, 6, 0):
 case IP_VERSION(4, 1, 0):
+   if (!amdgpu_device_has_dc_support(adev)) {
+   dev_err(adev->dev,
+   "DC support is required for dm ip 
block(DCE_HWIP:0x%x)\n",
+   amdgpu_ip_version(adev, DCE_HWIP, 0));
+   return -EINVAL;
+   }
+
 /* TODO: Fix IP version. DC code expects version 4.0.1 
*/
 if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 1, 
0))
 adev->ip_versions[DCE_HWIP][0] = IP_VERSION(4, 
0, 1);

+#if defined(CONFIG_DRM_AMD_DC)
 if (amdgpu_sriov_vf(adev))
 amdgpu_discovery_set_sriov_display(adev);
 else
 amdgpu_device_ip_block_add(adev, &dm_ip_block);
 break;
+#endif
 default:
-   dev_err(adev->dev,
-   "Failed to add dm ip block(DCE_HWIP:0x%x)\n",
-   amdgpu_ip_version(adev, DCE_HWIP, 0));
-   return -EINVAL;
+   if (amdgpu_device_has_dc_support(adev)) {
+   dev_err(adev->dev,
+   "Failed to add dm ip 
block(DCE_HWIP:0x%x)\n",
+   amdgpu_ip_version(adev, DCE_HWIP, 0));
+   return -EINVAL;
+   }
+   return 0;
 }
 } else if (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
 switch (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
 case IP_VERSION(12, 0, 0):
 case IP_VERSION(12, 0, 1):
 case IP_VERSION(12, 1, 0):
+
+   if (!amdgpu_device_has_dc_support(adev)) {
+   dev_err(adev->dev,
+   "DC support is required for dm ip 
block(DCI_HWIP:0x%x)\n",
+   amdgpu_ip_version(adev, DCI_HWIP, 0));
+   return -EINVAL;
+   }
+
+#if defined(CONFIG_DRM_AMD_DC)
 if (amdgpu_sriov_vf(adev))
 amdgpu_discovery_set_sriov_display(adev);
 else
 amdgpu_device_ip_block_add(adev, &dm_ip_block);
 break;
+#endif
 default:
-   dev_err(adev->dev,
-   "Failed to add dm ip block(DCI_HWIP

[pull] amdgpu, amdkfd, radeon drm-fixes-6.14

2025-03-06 Thread Alex Deucher

Hi Dave, Simona,

Fixes for 6.14.

The following changes since commit 7eb172143d5508b4da468ed59ee857c6e5e01da6:

  Linux 6.14-rc5 (2025-03-02 11:48:20 -0800)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-6.14-2025-03-06

for you to fetch changes up to da552bda987420e877500fdd90bd0172e3bf412b:

  drm/amd/pm: always allow ih interrupt from fw (2025-03-05 12:34:09 -0500)


amd-drm-fixes-6.14-2025-03-06:

amdgpu:
- Fix NULL check in DC code
- SMU 14 fix

amdkfd:
- Fix NULL check in queue validation

radeon:
- RS400 HyperZ fix


Andrew Martin (1):
  drm/amdkfd: Fix NULL Pointer Dereference in KFD queue

Kenneth Feng (1):
  drm/amd/pm: always allow ih interrupt from fw

Ma Ke (1):
  drm/amd/display: Fix null check for pipe_ctx->plane_state in 
resource_build_scaling_params

Richard Thier (1):
  drm/radeon: Fix rs400_gpu_init for ATI mobility radeon Xpress 200M

 drivers/gpu/drm/amd/amdkfd/kfd_queue.c|  4 ++--
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c |  3 ++-
 drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c| 12 +---
 drivers/gpu/drm/radeon/r300.c |  3 ++-
 drivers/gpu/drm/radeon/radeon_asic.h  |  1 +
 drivers/gpu/drm/radeon/rs400.c| 18 --
 6 files changed, 24 insertions(+), 17 deletions(-)

[PATCH] drm/amdkfd: clear F8_MODE for gfx950

2025-03-06 Thread Amber Lin

From: Alex Sierra 

Default F8_MODE should be OCP format on gfx950.

Signed-off-by: Alex Sierra 
Reviewed-by: Harish Kasiviswanathan 
Signed-off-by: Amber Lin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 67137e674f1d..210bcc048f4c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -64,8 +64,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config |= 1 << 
SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
 
if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
-   KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) ||
-   KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0))
+   KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4))
qpd->sh_mem_config |=
(1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
 
-- 
2.34.1

[PATCH] drm/amd/amdgpu: Add missing GC 11.5.0 register

2025-03-06 Thread Tom St Denis

Adds register needed for debugging purposes.

Signed-off-by: Tom St Denis 
---
 drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h 
b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h
index abdb8728156e..d6c02cf815be 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h
@@ -9478,6 +9478,8 @@
 #define regRLC_GFX_IMU_CMD_BASE_IDX
 1
 #define regGFX_IMU_RLC_STATUS  
 0x4054
 #define regGFX_IMU_RLC_STATUS_BASE_IDX 
 1
+#define regGFX_IMU_STATUS  
0x4055
+#define regGFX_IMU_STATUS_BASE_IDX 
1
 #define regGFX_IMU_SOC_DATA
 0x4059
 #define regGFX_IMU_SOC_DATA_BASE_IDX   
 1
 #define regGFX_IMU_SOC_ADDR
 0x405a
-- 
2.45.2

[PATCH v3 2/3] drm/amdkfd: Set per-process flags only once for gfx9/10/11/12

2025-03-06 Thread Harish Kasiviswanathan

Define set_cache_memory_policy() for these asics and move all static
changes from update_qpd() which is called each time a queue is created
to set_cache_memory_policy() which is called once during process
initialization

Signed-off-by: Harish Kasiviswanathan 
---
 .../amd/amdkfd/kfd_device_queue_manager_v10.c | 41 +++
 .../amd/amdkfd/kfd_device_queue_manager_v11.c | 41 +++
 .../amd/amdkfd/kfd_device_queue_manager_v12.c | 41 +++
 .../amd/amdkfd/kfd_device_queue_manager_v9.c  | 36 +++-
 4 files changed, 107 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index 245a90dfc2f6..b5f5f141353b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -31,10 +31,17 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
 struct qcm_process_device *qpd);
 static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+  struct qcm_process_device *qpd,
+  enum cache_policy default_policy,
+  enum cache_policy alternate_policy,
+  void __user *alternate_aperture_base,
+  uint64_t alternate_aperture_size);
 
 void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops)
 {
+   asic_ops->set_cache_memory_policy = set_cache_memory_policy_v10;
asic_ops->update_qpd = update_qpd_v10;
asic_ops->init_sdma_vm = init_sdma_vm_v10;
asic_ops->mqd_manager_init = mqd_manager_init_v10;
@@ -49,27 +56,27 @@ static uint32_t compute_sh_mem_bases_64bit(struct 
kfd_process_device *pdd)
private_base;
 }
 
-static int update_qpd_v10(struct device_queue_manager *dqm,
-struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+  struct qcm_process_device *qpd,
+  enum cache_policy default_policy,
+  enum cache_policy alternate_policy,
+  void __user *alternate_aperture_base,
+  uint64_t alternate_aperture_size)
 {
-   struct kfd_process_device *pdd;
-
-   pdd = qpd_to_pdd(qpd);
-
-   /* check if sh_mem_config register already configured */
-   if (qpd->sh_mem_config == 0) {
-   qpd->sh_mem_config =
-   (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
-   SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
-   (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-   qpd->sh_mem_ape1_limit = 0;
-   qpd->sh_mem_ape1_base = 0;
-   }
-
-   qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+   qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << 
SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+   qpd->sh_mem_ape1_limit = 0;
+   qpd->sh_mem_ape1_base = 0;
+   qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
 
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+   return true;
+}
 
+static int update_qpd_v10(struct device_queue_manager *dqm,
+struct qcm_process_device *qpd)
+{
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
index 2e129da7acb4..f436878d0d62 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -30,10 +30,17 @@ static int update_qpd_v11(struct device_queue_manager *dqm,
 struct qcm_process_device *qpd);
 static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+  struct qcm_process_device *qpd,
+  enum cache_policy default_policy,
+  enum cache_policy alternate_policy,
+  void __user *alternate_aperture_base,
+  uint64_t alternate_aperture_size);
 
 void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops)
 {
+   asic_ops->set_cache_memory_policy = set_cache_memory_policy_v11;
asic_ops->update_qpd = up

[PATCH v4 3/3] drm/amdkfd: Add support for more per-process flag

2025-03-06 Thread Harish Kasiviswanathan

Add support for more per-process flags starting with option to configure
MFMA precision for gfx 9.5

v2: Change flag name to KFD_PROC_FLAG_MFMA_HIGH_PRECISION
Remove unused else condition
v3: Bump the KFD API version
v4: Missed SH_MEM_CONFIG__PRECISION_MODE__SHIFT define. Added it.

Signed-off-by: Harish Kasiviswanathan 
Reviewed-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  3 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |  6 --
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c |  6 --
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c  | 11 +--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c  |  6 --
 .../drm/amd/include/asic_reg/gc/gc_9_4_3_sh_mask.h|  2 ++
 include/uapi/linux/kfd_ioctl.h|  8 ++--
 11 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 8c2e92378b49..1e9dd00620bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -606,7 +606,8 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
default_policy,
alternate_policy,
(void __user *)args->alternate_aperture_base,
-   args->alternate_aperture_size))
+   args->alternate_aperture_size,
+   args->misc_process_flag))
err = -EINVAL;
 
 out:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d23c6a358d34..2afcc1b4856a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2596,7 +2596,8 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
   enum cache_policy default_policy,
   enum cache_policy alternate_policy,
   void __user *alternate_aperture_base,
-  uint64_t alternate_aperture_size)
+  uint64_t alternate_aperture_size,
+  u32 misc_process_properties)
 {
bool retval = true;
 
@@ -2611,7 +2612,8 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
default_policy,
alternate_policy,
alternate_aperture_base,
-   alternate_aperture_size);
+   alternate_aperture_size,
+   misc_process_properties);
 
if (retval)
goto out;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 7146e227e2c1..122eb745e9c4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -174,7 +174,8 @@ struct device_queue_manager_ops {
   enum cache_policy default_policy,
   enum cache_policy alternate_policy,
   void __user *alternate_aperture_base,
-  uint64_t alternate_aperture_size);
+  uint64_t alternate_aperture_size,
+  u32 misc_process_properties);
 
int (*process_termination)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
@@ -210,7 +211,8 @@ struct device_queue_manager_asic_ops {
   enum cache_policy default_policy,
   enum cache_policy alternate_policy,
   void __user *alternate_aperture_base,
-  uint64_t alternate_aperture_size);
+  uint64_t alternate_aperture_size,
+  u32 misc_process_properties);
void(*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 32bedef912b3..0508ef5a41d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/dr

[PATCH v4 2/3] drm/amdkfd: Set per-process flags only once for gfx9/10/11/12

2025-03-06 Thread Harish Kasiviswanathan

Define set_cache_memory_policy() for these asics and move all static
changes from update_qpd() which is called each time a queue is created
to set_cache_memory_policy() which is called once during process
initialization

Signed-off-by: Harish Kasiviswanathan 
---
 .../amd/amdkfd/kfd_device_queue_manager_v10.c | 41 +++
 .../amd/amdkfd/kfd_device_queue_manager_v11.c | 41 +++
 .../amd/amdkfd/kfd_device_queue_manager_v12.c | 41 +++
 .../amd/amdkfd/kfd_device_queue_manager_v9.c  | 36 +++-
 4 files changed, 107 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index 245a90dfc2f6..b5f5f141353b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -31,10 +31,17 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
 struct qcm_process_device *qpd);
 static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+  struct qcm_process_device *qpd,
+  enum cache_policy default_policy,
+  enum cache_policy alternate_policy,
+  void __user *alternate_aperture_base,
+  uint64_t alternate_aperture_size);
 
 void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops)
 {
+   asic_ops->set_cache_memory_policy = set_cache_memory_policy_v10;
asic_ops->update_qpd = update_qpd_v10;
asic_ops->init_sdma_vm = init_sdma_vm_v10;
asic_ops->mqd_manager_init = mqd_manager_init_v10;
@@ -49,27 +56,27 @@ static uint32_t compute_sh_mem_bases_64bit(struct 
kfd_process_device *pdd)
private_base;
 }
 
-static int update_qpd_v10(struct device_queue_manager *dqm,
-struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+  struct qcm_process_device *qpd,
+  enum cache_policy default_policy,
+  enum cache_policy alternate_policy,
+  void __user *alternate_aperture_base,
+  uint64_t alternate_aperture_size)
 {
-   struct kfd_process_device *pdd;
-
-   pdd = qpd_to_pdd(qpd);
-
-   /* check if sh_mem_config register already configured */
-   if (qpd->sh_mem_config == 0) {
-   qpd->sh_mem_config =
-   (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
-   SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
-   (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-   qpd->sh_mem_ape1_limit = 0;
-   qpd->sh_mem_ape1_base = 0;
-   }
-
-   qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+   qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << 
SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+   qpd->sh_mem_ape1_limit = 0;
+   qpd->sh_mem_ape1_base = 0;
+   qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
 
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+   return true;
+}
 
+static int update_qpd_v10(struct device_queue_manager *dqm,
+struct qcm_process_device *qpd)
+{
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
index 2e129da7acb4..f436878d0d62 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -30,10 +30,17 @@ static int update_qpd_v11(struct device_queue_manager *dqm,
 struct qcm_process_device *qpd);
 static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+  struct qcm_process_device *qpd,
+  enum cache_policy default_policy,
+  enum cache_policy alternate_policy,
+  void __user *alternate_aperture_base,
+  uint64_t alternate_aperture_size);
 
 void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops)
 {
+   asic_ops->set_cache_memory_policy = set_cache_memory_policy_v11;
asic_ops->update_qpd = up

53 matches

Mail list logo