From: Klaus Jensen <[email protected]> Allow the controller to release open resources by transitioning implicitly and explicitly opened zones to closed. This is done using a naive "least recently opened" strategy.
Signed-off-by: Klaus Jensen <[email protected]> --- hw/block/nvme-ns.h | 5 ++++ hw/block/nvme-ns.c | 5 ++++ hw/block/nvme.c | 57 ++++++++++++++++++++++++++++++++++++++++--- hw/block/trace-events | 1 + 4 files changed, 65 insertions(+), 3 deletions(-) diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h index 3d0269eef6f0..5d8523c047d8 100644 --- a/hw/block/nvme-ns.h +++ b/hw/block/nvme-ns.h @@ -38,6 +38,8 @@ typedef struct NvmeZone { uint8_t *zde; uint64_t wp_staging; + + QTAILQ_ENTRY(NvmeZone) lru_entry; } NvmeZone; typedef struct NvmeNamespace { @@ -64,6 +66,9 @@ typedef struct NvmeNamespace { struct { uint32_t open; uint32_t active; + + QTAILQ_HEAD(, NvmeZone) lru_open; + QTAILQ_HEAD(, NvmeZone) lru_active; } resources; } zns; } NvmeNamespace; diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index a01cc5eeb445..cb8b44a78450 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -135,6 +135,9 @@ void nvme_ns_zns_init_zone_state(NvmeNamespace *ns) ns->zns.resources.open = ns->params.zns.mor != 0xffffffff ? ns->params.zns.mor + 1 : ns->zns.num_zones; + QTAILQ_INIT(&ns->zns.resources.lru_open); + QTAILQ_INIT(&ns->zns.resources.lru_active); + for (int i = 0; i < ns->zns.num_zones; i++) { NvmeZone *zone = &ns->zns.zones[i]; zone->zd = &ns->zns.zd[i]; @@ -158,6 +161,8 @@ void nvme_ns_zns_init_zone_state(NvmeNamespace *ns) if (ns->zns.resources.active) { ns->zns.resources.active--; + QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, + lru_entry); break; } diff --git a/hw/block/nvme.c b/hw/block/nvme.c index cc637b3a68e9..1fab9d69261c 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1105,11 +1105,47 @@ static inline void nvme_zone_reset_wp(NvmeZone *zone) zone->wp_staging = nvme_zslba(zone); } +static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone, + NvmeZoneState to); + +static uint16_t nvme_zrm_release_open(NvmeNamespace *ns) +{ + NvmeZone *candidate; + NvmeZoneState zs; + uint16_t status; + + trace_pci_nvme_zrm_release_open(ns->params.nsid); + + QTAILQ_FOREACH(candidate, &ns->zns.resources.lru_open, lru_entry) { + zs = nvme_zs(candidate); + + /* skip explicitly opened zones */ + if (zs == NVME_ZS_ZSEO) { + continue; + } + + /* skip zones that have in-flight writes */ + if (candidate->wp_staging != nvme_wp(candidate)) { + continue; + } + + status = nvme_zrm_transition(ns, candidate, NVME_ZS_ZSC); + if (status) { + return status; + } + + return NVME_SUCCESS; + } + + return NVME_TOO_MANY_OPEN_ZONES; +} + static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone, NvmeZoneState to) { NvmeZoneState from = nvme_zs(zone); NvmeZoneDescriptor *zd = zone->zd; + uint16_t status; trace_pci_nvme_zrm_transition(ns->params.nsid, nvme_zslba(zone), from, to); @@ -1131,6 +1167,7 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone, } ns->zns.resources.active--; + QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, lru_entry); break; @@ -1141,11 +1178,15 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone, } if (!ns->zns.resources.open) { - return NVME_TOO_MANY_OPEN_ZONES; + status = nvme_zrm_release_open(ns); + if (status) { + return status; + } } ns->zns.resources.active--; ns->zns.resources.open--; + QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_open, zone, lru_entry); break; @@ -1172,11 +1213,15 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone, case NVME_ZS_ZSF: case NVME_ZS_ZSRO: ns->zns.resources.active++; + ns->zns.resources.open++; + QTAILQ_REMOVE(&ns->zns.resources.lru_open, zone, lru_entry); - /* fallthrough */ + break; case NVME_ZS_ZSC: ns->zns.resources.open++; + QTAILQ_REMOVE(&ns->zns.resources.lru_open, zone, lru_entry); + QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, lru_entry); break; @@ -1201,16 +1246,22 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone, case NVME_ZS_ZSF: case NVME_ZS_ZSRO: ns->zns.resources.active++; + QTAILQ_REMOVE(&ns->zns.resources.lru_active, zone, lru_entry); break; case NVME_ZS_ZSIO: case NVME_ZS_ZSEO: if (!ns->zns.resources.open) { - return NVME_TOO_MANY_OPEN_ZONES; + status = nvme_zrm_release_open(ns); + if (status) { + return status; + } } ns->zns.resources.open--; + QTAILQ_REMOVE(&ns->zns.resources.lru_active, zone, lru_entry); + QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_open, zone, lru_entry); break; diff --git a/hw/block/trace-events b/hw/block/trace-events index 2363412a9375..0064fedf31ae 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -83,6 +83,7 @@ pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16"" pci_nvme_zrm_transition(uint32_t nsid, uint64_t zslba, uint8_t from, uint8_t to) "nsid %"PRIu32" zslba 0x%"PRIx64" from 0x%"PRIx8" to 0x%"PRIx8"" +pci_nvme_zrm_release_open(uint32_t nsid) "nsid %"PRIu32"" pci_nvme_mmio_read(uint64_t addr) "addr 0x%"PRIx64"" pci_nvme_mmio_write(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64"" pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16"" -- 2.28.0
