The branch main has been updated by imp: URL: https://cgit.FreeBSD.org/src/commit/?id=99c14fb99ffc8fd601ad15bbe54a54fac8f4f203
commit 99c14fb99ffc8fd601ad15bbe54a54fac8f4f203 Author: Warner Losh <i...@freebsd.org> AuthorDate: 2024-05-24 14:32:04 +0000 Commit: Warner Losh <i...@freebsd.org> CommitDate: 2024-05-24 14:32:04 +0000 cam: Drop periph lock when completing I/O with ENOMEM status When biofinish calls g_io_deliver with an error of ENOMEM, that kicks off the slowdown protocol, forcing I/O to go through g_down rather than be directly dispatch. One of the side effects is that the I/O is resubmitted, so the start routines get called recursively, leading to a recursive lock panic. Rather than make the periph lock recursive, drop and reacquire the lock around such calls to biofinish. For nda, this happens only when we can't allocate space to construct a TRIM. For ada and da, this is only for certain ZONE operations. Sponsored by: Netflix Reviewed by: gallatin Differential Revision: https://reviews.freebsd.org/D45310 --- sys/cam/ata/ata_da.c | 9 +++++++++ sys/cam/nvme/nvme_da.c | 10 ++++++++++ sys/cam/scsi/scsi_da.c | 11 ++++++++++- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index d4a591943307..6e008cfc8d22 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -2518,7 +2518,16 @@ adastart(struct cam_periph *periph, union ccb *start_ccb) error = ada_zone_cmd(periph, start_ccb, bp, &queue_ccb); if ((error != 0) || (queue_ccb == 0)) { + /* + * g_io_deliver will recurisvely call start + * routine for ENOMEM, so drop the periph + * lock to allow that recursion. + */ + if (error == ENOMEM) + cam_periph_unlock(periph); biofinish(bp, NULL, error); + if (error == ENOMEM) + cam_periph_lock(periph); xpt_release_ccb(start_ccb); return; } diff --git a/sys/cam/nvme/nvme_da.c b/sys/cam/nvme/nvme_da.c index 3f6cf8702870..41c552e2780a 100644 --- a/sys/cam/nvme/nvme_da.c +++ b/sys/cam/nvme/nvme_da.c @@ -1077,7 +1077,17 @@ ndastart(struct cam_periph *periph, union ccb *start_ccb) trim = malloc(sizeof(*trim), M_NVMEDA, M_ZERO | M_NOWAIT); if (trim == NULL) { + /* + * We have to drop the periph lock when + * returning ENOMEM. g_io_deliver treats these + * request differently and will recursively call + * the start routine which causes us to get into + * ndastrategy with the periph lock held, + * leading to a panic when its acquired again. + */ + cam_periph_unlock(periph); biofinish(bp, NULL, ENOMEM); + cam_periph_lock(periph); xpt_release_ccb(start_ccb); ndaschedule(periph); return; diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index 0daaff9229b0..59745231bca5 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -3455,10 +3455,19 @@ more: queue_ccb = 0; - error = da_zone_cmd(periph, start_ccb, bp,&queue_ccb); + error = da_zone_cmd(periph, start_ccb, bp, &queue_ccb); if ((error != 0) || (queue_ccb == 0)) { + /* + * g_io_deliver will recurisvely call start + * routine for ENOMEM, so drop the periph + * lock to allow that recursion. + */ + if (error == ENOMEM) + cam_periph_unlock(periph); biofinish(bp, NULL, error); + if (error == ENOMEM) + cam_periph_lock(periph); xpt_release_ccb(start_ccb); return; }