Hit a warning: WARNING: CPU: 1 PID: 241 at kernel/workqueue.c:1627 __queue_delayed_work+0x6d/0x90 with trace: mod_delayed_work_on+0x59/0x90 nvmet_update_cc+0xee/0x100 [nvmet] nvmet_execute_prop_set+0x72/0x80 [nvmet] nvmet_tcp_try_recv_pdu+0x2f7/0x770 [nvmet_tcp] nvmet_tcp_io_work+0x63f/0xb2d [nvmet_tcp] ...
This could be reproduced easily with a keep alive time 0: nvme connect -t tcp -n NQN -a ADDR -s PORT --keep-alive-tmo=0 The reason is: Starting an uninitialized work when initiator connects with zero kato. Althrough keep-alive timer is disabled during allocating a ctrl (fix in 0d3b6a8d213a), ka_work still has a chance to run (called by nvmet_start_ctrl to detect dead host). Initilize ka_work during allocating ctrl, and set a reasonable kato before scheduling ka_work. Signed-off-by: zhenwei pi <pizhen...@bytedance.com> --- drivers/nvme/target/core.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b7b63330b5ef..3c5b2b065476 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -19,6 +19,8 @@ struct workqueue_struct *buffered_io_wq; static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; static DEFINE_IDA(cntlid_ida); +#define NVMET_DEFAULT_KATO 5 + /* * This read/write semaphore is used to synchronize access to configuration * information on a target system that will result in discovery log page @@ -385,6 +387,11 @@ static void nvmet_keep_alive_timer(struct work_struct *work) if (cmd_seen) { pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", ctrl->cntlid); + + /* run once, trigger from nvmet_start_ctrl to detect dead link */ + if (!ctrl->kato) + return; + schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); return; } @@ -403,15 +410,11 @@ static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) pr_debug("ctrl %d start keep-alive timer for %d secs\n", ctrl->cntlid, ctrl->kato); - INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); } static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) { - if (unlikely(ctrl->kato == 0)) - return; - pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); cancel_delayed_work_sync(&ctrl->ka_work); @@ -1107,6 +1110,8 @@ static inline u8 nvmet_cc_iocqes(u32 cc) static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) { + u32 kato = ctrl->kato ? ctrl->kato : NVMET_DEFAULT_KATO; + lockdep_assert_held(&ctrl->lock); if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || @@ -1126,7 +1131,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) * in case a host died before it enabled the controller. Hence, simply * reset the keep alive timer when the controller is enabled. */ - mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); + mod_delayed_work(system_wq, &ctrl->ka_work, kato * HZ); } static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) @@ -1378,6 +1383,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, /* keep-alive timeout in seconds */ ctrl->kato = DIV_ROUND_UP(kato, 1000); + INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); ctrl->err_counter = 0; spin_lock_init(&ctrl->error_lock); -- 2.11.0