Move recovery handlers to schedule reset work. Make use of the workpool in the reset domain and delete the individual work items.
Signed-off-by: Lijo Lazar <lijo.la...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 +++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 ----- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 40 ++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 71 +++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 - drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 38 ++++++------ drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 44 ++++++-------- drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 33 +++++----- 10 files changed, 118 insertions(+), 159 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2e3c7c15cb8e..4186d8342a15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1096,8 +1096,6 @@ struct amdgpu_device { bool scpm_enabled; uint32_t scpm_status; - struct work_struct reset_work; - bool job_hang; bool dc_enabled; /* Mask of active clusters */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 629ca1ad75a8..e4c5e8f68843 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -120,21 +120,10 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, } } - -static void amdgpu_amdkfd_reset_work(struct work_struct *work) +static void amdgpu_amdkfd_reset_work(struct amdgpu_reset_context *reset_context) { - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, - kfd.reset_work); - - struct amdgpu_reset_context reset_context; - - memset(&reset_context, 0, sizeof(reset_context)); - - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - - amdgpu_device_gpu_recover(adev, NULL, &reset_context); + amdgpu_device_gpu_recover(reset_context->reset_req_dev, NULL, + reset_context); } void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) @@ -200,7 +189,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; - INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work); } } @@ -268,9 +256,17 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) { - if (amdgpu_device_should_recover_gpu(adev)) - amdgpu_reset_domain_schedule(adev->reset_domain, - &adev->kfd.reset_work); + struct amdgpu_reset_context reset_context; + + if (amdgpu_device_should_recover_gpu(adev)) { + memset(&reset_context, 0, sizeof(reset_context)); + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_reset_schedule_work(adev, &reset_context, + amdgpu_amdkfd_reset_work); + } } int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b34418e3e006..c36501f9ae0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -102,7 +102,6 @@ struct amdgpu_kfd_dev { int64_t vram_used[MAX_XCP]; uint64_t vram_used_aligned[MAX_XCP]; bool init_complete; - struct work_struct reset_work; /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ struct dev_pagemap pgmap; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9061d79cd387..3e56ccb742bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5152,21 +5152,6 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) { - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - -#if defined(CONFIG_DEBUG_FS) - if (!amdgpu_sriov_vf(adev)) - cancel_work(&adev->reset_work); -#endif - - if (adev->kfd.dev) - cancel_work(&adev->kfd.reset_work); - - if (amdgpu_sriov_vf(adev)) - cancel_work(&adev->virt.flr_work); - - if (con && adev->ras_enabled) - cancel_work(&con->recovery_work); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index c694b41f6461..40786b135f4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -899,6 +899,14 @@ static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused) return 0; } +static void +amdgpu_debugfs_reset_work(struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_device *adev = reset_context->reset_req_dev; + + amdgpu_device_gpu_recover(adev, NULL, reset_context); +} + /* * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover * @@ -908,6 +916,7 @@ static int gpu_recover_get(void *data, u64 *val) { struct amdgpu_device *adev = (struct amdgpu_device *)data; struct drm_device *dev = adev_to_drm(adev); + struct amdgpu_reset_context reset_context; int r; r = pm_runtime_get_sync(dev->dev); @@ -916,8 +925,14 @@ static int gpu_recover_get(void *data, u64 *val) return 0; } - if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work)) - flush_work(&adev->reset_work); + memset(&reset_context, 0, sizeof(reset_context)); + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + set_bit(AMDGPU_RESET_SCHEDULE_NOW, &reset_context.flags); + + amdgpu_reset_schedule_work(adev, &reset_context, + amdgpu_debugfs_reset_work); *val = atomic_read(&adev->reset_domain->reset_res); @@ -931,22 +946,6 @@ DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info); DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL, "%lld\n"); -static void amdgpu_debugfs_reset_work(struct work_struct *work) -{ - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, - reset_work); - - struct amdgpu_reset_context reset_context; - - memset(&reset_context, 0, sizeof(reset_context)); - - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - - amdgpu_device_gpu_recover(adev, NULL, &reset_context); -} - #endif void amdgpu_debugfs_fence_init(struct amdgpu_device *adev) @@ -958,12 +957,9 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev) debugfs_create_file("amdgpu_fence_info", 0444, root, adev, &amdgpu_debugfs_fence_info_fops); - if (!amdgpu_sriov_vf(adev)) { - - INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work); + if (!amdgpu_sriov_vf(adev)) debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev, &amdgpu_debugfs_gpu_recover_fops); - } #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 7689395e44fd..9e8e904434f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2011,12 +2011,11 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, return ret; } -static void amdgpu_ras_do_recovery(struct work_struct *work) +static void amdgpu_ras_do_recovery(struct amdgpu_reset_context *reset_context) { - struct amdgpu_ras *ras = - container_of(work, struct amdgpu_ras, recovery_work); + struct amdgpu_device *adev = reset_context->reset_req_dev; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); struct amdgpu_device *remote_adev = NULL; - struct amdgpu_device *adev = ras->adev; struct list_head device_list, *device_list_handle = NULL; if (!ras->disable_ras_err_cnt_harvest) { @@ -2040,37 +2039,9 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) amdgpu_put_xgmi_hive(hive); } - if (amdgpu_device_should_recover_gpu(ras->adev)) { - struct amdgpu_reset_context reset_context; - memset(&reset_context, 0, sizeof(reset_context)); - - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - - /* Perform full reset in fatal error mode */ - if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - else { - clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - - if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) { - ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET; - reset_context.method = AMD_RESET_METHOD_MODE2; - } - - /* Fatal error occurs in poison mode, mode1 reset is used to - * recover gpu. - */ - if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) { - ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET; - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - - psp_fatal_error_recovery_quirk(&adev->psp); - } - } + if (amdgpu_device_should_recover_gpu(ras->adev)) + amdgpu_device_gpu_recover(ras->adev, NULL, reset_context); - amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); - } atomic_set(&ras->in_recovery, 0); } @@ -2313,7 +2284,6 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) } mutex_init(&con->recovery_lock); - INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); atomic_set(&con->in_recovery, 0); con->eeprom_control.bad_channel_bitmap = 0; @@ -3160,9 +3130,38 @@ int amdgpu_ras_is_supported(struct amdgpu_device *adev, int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_reset_context reset_context; + + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + + /* Perform full reset in fatal error mode */ + if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) { + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + } + else { + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) { + ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET; + reset_context.method = AMD_RESET_METHOD_MODE2; + } + + /* Fatal error occurs in poison mode, mode1 reset is used to + * recover gpu. + */ + if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) { + ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + psp_fatal_error_recovery_quirk(&adev->psp); + } + } if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) - amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work); + amdgpu_reset_schedule_work(ras->adev, &reset_context, + amdgpu_ras_do_recovery); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index fabb83e9d9ae..87e0a8b918df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -237,7 +237,6 @@ struct amdgpu_virt { uint32_t reg_val_offs; struct amdgpu_irq_src ack_irq; struct amdgpu_irq_src rcv_irq; - struct work_struct flr_work; struct amdgpu_mm_table mm_table; const struct amdgpu_virt_ops *ops; struct amdgpu_vf_error_buffer vf_errors; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 63725b2ebc03..53fdf6e70ad2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -249,10 +249,9 @@ static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev, return 0; } -static void xgpu_ai_mailbox_flr_work(struct work_struct *work) +static void xgpu_ai_mailbox_flr_work(struct amdgpu_reset_context *reset_context) { - struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); - struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + struct amdgpu_device *adev = reset_context->reset_req_dev; int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT; /* block amdgpu_gpu_recover till msg FLR COMPLETE received, @@ -281,18 +280,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) up_write(&adev->reset_domain->sem); /* Trigger recovery for world switch failure if no TDR */ - if (amdgpu_device_should_recover_gpu(adev) - && (!amdgpu_device_has_job_running(adev) || - adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) { - struct amdgpu_reset_context reset_context; - memset(&reset_context, 0, sizeof(reset_context)); - - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - - amdgpu_device_gpu_recover(adev, NULL, &reset_context); - } + if (amdgpu_device_should_recover_gpu(adev) && + (!amdgpu_device_has_job_running(adev) || + adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) + amdgpu_device_gpu_recover(adev, NULL, reset_context); } static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -314,14 +305,21 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { enum idh_event event = xgpu_ai_mailbox_peek_msg(adev); + struct amdgpu_reset_context reset_context; switch (event) { case IDH_FLR_NOTIFICATION: + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) - WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, - &adev->virt.flr_work), - "Failed to queue work! at %s", - __func__); + WARN_ONCE(!amdgpu_reset_schedule_work( + adev, &reset_context, + xgpu_ai_mailbox_flr_work), + "Failed to queue work! at %s", __func__); break; case IDH_QUERY_ALIVE: xgpu_ai_mailbox_send_ack(adev); @@ -388,8 +386,6 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev) return r; } - INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 6a68ee946f1c..171fe3e84ddf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -271,10 +271,9 @@ static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev, return 0; } -static void xgpu_nv_mailbox_flr_work(struct work_struct *work) +static void xgpu_nv_mailbox_flr_work(struct amdgpu_reset_context *reset_context) { - struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); - struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + struct amdgpu_device *adev = reset_context->reset_req_dev; int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT; /* block amdgpu_gpu_recover till msg FLR COMPLETE received, @@ -303,21 +302,13 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) up_write(&adev->reset_domain->sem); /* Trigger recovery for world switch failure if no TDR */ - if (amdgpu_device_should_recover_gpu(adev) - && (!amdgpu_device_has_job_running(adev) || - adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT || - adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || - adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || - adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) { - struct amdgpu_reset_context reset_context; - memset(&reset_context, 0, sizeof(reset_context)); - - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - - amdgpu_device_gpu_recover(adev, NULL, &reset_context); - } + if (amdgpu_device_should_recover_gpu(adev) && + (!amdgpu_device_has_job_running(adev) || + adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT || + adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || + adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || + adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) + amdgpu_device_gpu_recover(adev, NULL, reset_context); } static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -342,14 +333,21 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { enum idh_event event = xgpu_nv_mailbox_peek_msg(adev); + struct amdgpu_reset_context reset_context; switch (event) { case IDH_FLR_NOTIFICATION: + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) - WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, - &adev->virt.flr_work), - "Failed to queue work! at %s", - __func__); + WARN_ONCE(!amdgpu_reset_schedule_work( + adev, &reset_context, + xgpu_nv_mailbox_flr_work), + "Failed to queue work! at %s", __func__); break; /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore * it byfar since that polling thread will handle it, @@ -413,8 +411,6 @@ int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev) return r; } - INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 59f53c743362..a39805bc69c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -510,10 +510,9 @@ static int xgpu_vi_set_mailbox_ack_irq(struct amdgpu_device *adev, return 0; } -static void xgpu_vi_mailbox_flr_work(struct work_struct *work) +static void xgpu_vi_mailbox_flr_work(struct amdgpu_reset_context *reset_context) { - struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); - struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + struct amdgpu_device *adev = reset_context->reset_req_dev; /* wait until RCV_MSG become 3 */ if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) { @@ -522,16 +521,8 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - if (amdgpu_device_should_recover_gpu(adev)) { - struct amdgpu_reset_context reset_context; - memset(&reset_context, 0, sizeof(reset_context)); - - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - - amdgpu_device_gpu_recover(adev, NULL, &reset_context); - } + if (amdgpu_device_should_recover_gpu(adev)) + amdgpu_device_gpu_recover(adev, NULL, reset_context); } static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -553,18 +544,24 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int r; + struct amdgpu_reset_context reset_context; /* trigger gpu-reset by hypervisor only if TDR disabled */ if (!amdgpu_gpu_recovery) { /* see what event we get */ r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); /* only handle FLR_NOTIFY now */ if (!r && !amdgpu_in_reset(adev)) - WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, - &adev->virt.flr_work), - "Failed to queue work! at %s", - __func__); + WARN_ONCE(!amdgpu_reset_schedule_work( + adev, &reset_context, + xgpu_vi_mailbox_flr_work), + "Failed to queue work! at %s", __func__); } return 0; @@ -618,8 +615,6 @@ int xgpu_vi_mailbox_get_irq(struct amdgpu_device *adev) return r; } - INIT_WORK(&adev->virt.flr_work, xgpu_vi_mailbox_flr_work); - return 0; } -- 2.25.1