when amdgpu_bad_page_threshold == -1, driver won't write BADG and RMA when amdgpu_bad_page_threshold == -2, driver will write BADG and RMA
Signed-off-by: ganglxie <gangl...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 26 ++++++++----------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9dfef13babfe..a1b97d516a27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3161,7 +3161,7 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, * which is intended for debugging purpose. * -2: Threshold is determined by a formula * that assumes 1 bad page per 100M of local memory. - * Driver will continue runtime services when threhold is reached. + * Driver will halt runtime services when this custom threshold is reached. * 0 < threshold < max number of bad page records in EEPROM, * A user-defined threshold is set * Driver will halt runtime services when this custom threshold is reached. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 2ddedf476542..50a6e975addb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -568,8 +568,7 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) if (con->eeprom_control.ras_num_bad_pages > con->bad_page_cnt_threshold) dev_warn(adev->dev, "RAS records:%d exceed threshold:%d", con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold); - if ((amdgpu_bad_page_threshold == -1) || - (amdgpu_bad_page_threshold == -2)) { + if (amdgpu_bad_page_threshold == -1) { dev_warn(adev->dev, "Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n"); return false; @@ -763,18 +762,16 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) dev_warn(adev->dev, "Saved bad pages %d reaches threshold value %d\n", control->ras_num_bad_pages, ras->bad_page_cnt_threshold); - control->tbl_hdr.header = RAS_TABLE_HDR_BAD; - if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) { - control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; - control->tbl_rai.health_percent = 0; - } - - if ((amdgpu_bad_page_threshold != -1) && - (amdgpu_bad_page_threshold != -2)) + if (amdgpu_bad_page_threshold != -1) { + control->tbl_hdr.header = RAS_TABLE_HDR_BAD; + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) { + control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; + control->tbl_rai.health_percent = 0; + } ras->is_rma = true; - - /* ignore the -ENOTSUPP return value */ - amdgpu_dpm_send_rma_reason(adev); + /* ignore the -ENOTSUPP return value */ + amdgpu_dpm_send_rma_reason(adev); + } } if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) @@ -1508,8 +1505,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) dev_warn(adev->dev, "RAS records:%d exceed threshold:%d\n", control->ras_num_bad_pages, ras->bad_page_cnt_threshold); - if ((amdgpu_bad_page_threshold == -1) || - (amdgpu_bad_page_threshold == -2)) { + if ((amdgpu_bad_page_threshold == -1)) { res = 0; dev_warn(adev->dev, "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n"); -- 2.34.1