> From: Klaus Jensen <i...@irrelevant.dk>
> Sent: Thursday, October 20, 2022 11:26 PM
> 
> On Okt 20 17:18, clay.may...@kioxia.com wrote:
> > From: Clay Mayers <clay.may...@kioxia.com>
> >
> > Zones marked with ZONE_FINISH_RECOMMENDED are added to the zone
> > descriptor changed log page.  Once read with RAE cleared, they are
> > removed from the list.
> >
> > Zones stay in the list regardless of what other states the zones may
> > go through so applications must be aware of ABA issues where finish
> > may be recommended, the zone freed and re-opened and now the attribute
> > is now clear.
> >
> > Signed-off-by: Clay Mayers <clay.may...@kioxia.com>
> > ---
> >  hw/nvme/ctrl.c       | 50 ++++++++++++++++++++++++++++++++++++++++++++
> >  hw/nvme/ns.c         |  6 ++++++
> >  hw/nvme/nvme.h       |  8 +++++++
> >  hw/nvme/trace-events |  1 +
> >  include/block/nvme.h |  8 +++++++
> >  5 files changed, 73 insertions(+)
> >
> > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > index d7e9fae0b0..3ffd0fb469 100644
> > --- a/hw/nvme/ctrl.c
> > +++ b/hw/nvme/ctrl.c
> > @@ -1516,15 +1516,42 @@ static void nvme_clear_events(NvmeCtrl *n,
> uint8_t event_type)
> >      }
> >  }
> >
> > +static void nvme_zdc_list(NvmeNamespace *ns, NvmeZoneIdList *zlist, bool
> reset)
> > +{
> > +    NvmeZdc *zdc;
> > +    NvmeZdc *next;
> > +    int index = 0;
> > +
> > +    QTAILQ_FOREACH_SAFE(zdc, &ns->zdc_list, entry, next) {
> > +        if (index >= ARRAY_SIZE(zlist->zids)) {
> > +            break;
> > +        }
> 
> I could've sweared that a previous revision of the spec stated that this
> required `nzid` to be set to 0xffff and the log page be zeroed in this
> case.
> 
> However, upon reading it again, that doesnt seem to be the case, so this
> seems to be just fine.

My reading was it only does that if the list can't be returned.

> 
> > +        zlist->zids[index++] = zdc->zone->d.zslba;
> > +        if (reset) {
> > +            QTAILQ_REMOVE(&ns->zdc_list, zdc, entry);
> > +            zdc->zone->zdc_entry = NULL;
> > +            g_free(zdc);
> > +        }
> > +    }
> > +    zlist->nzid = cpu_to_le16(index);
> > +}
> > +
> >  static void nvme_check_finish(NvmeNamespace *ns, NvmeZoneListHead
> *list)
> >  {
> >      int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> >      NvmeZone *zone;
> > +    NvmeZdc  *zdc;
> >
> >      QTAILQ_FOREACH(zone, list, entry) {
> >          if (zone->finish_ms <= now) {
> >              zone->finish_ms = INT64_MAX;
> >              zone->d.za |= NVME_ZA_FINISH_RECOMMENDED;
> > +            if (!zone->zdc_entry) {
> > +                zdc = g_malloc0(sizeof(*zdc));
> > +                zdc->zone = zone;
> > +                zone->zdc_entry = zdc;
> > +                QTAILQ_INSERT_TAIL(&ns->zdc_list, zdc, entry);
> > +            }
> >          } else if (zone->finish_ms != INT64_MAX) {
> >              timer_mod_anticipate(ns->active_timer, zone->finish_ms);
> >          }
> > @@ -4675,6 +4702,27 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n,
> uint8_t csi, uint32_t buf_len,
> >      return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
> >  }
> >
> > +static uint16_t nvme_changed_zones(NvmeCtrl *n, uint8_t rae, uint32_t
> buf_len,
> > +                                    uint64_t off, NvmeRequest *req)
> > +{
> > +    NvmeNamespace *ns;
> > +    NvmeCmd *cmd = &req->cmd;
> > +    uint32_t nsid = le32_to_cpu(cmd->nsid);
> > +    NvmeZoneIdList zlist = { };
> > +    uint32_t trans_len = MIN(sizeof(zlist) - off, buf_len);
> 
> I believe this is unsafe if off >= sizeof(zlist).

Yep, I copied that from how most of the other log pages are
handled, but missed the check for off > sizeof(zlist).

> 
> > +
> > +    nsid = le32_to_cpu(cmd->nsid);
> > +    trace_pci_nvme_changed_zones(nsid);
> > +
> > +    ns = nvme_ns(n, nsid);
> > +    if (!ns) {
> > +        return NVME_INVALID_NSID | NVME_DNR;
> > +    }
> > +    nvme_zdc_list(ns, &zlist, !rae);
> > +
> > +    return nvme_c2h(n, ((uint8_t *)&zlist) + off, trans_len, req);
> > +}
> > +
> >  static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
> >  {
> >      NvmeCmd *cmd = &req->cmd;
> > @@ -4722,6 +4770,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n,
> NvmeRequest *req)
> >          return nvme_changed_nslist(n, rae, len, off, req);
> >      case NVME_LOG_CMD_EFFECTS:
> >          return nvme_cmd_effects(n, csi, len, off, req);
> > +    case NVME_LOG_CHANGED_ZONE:
> > +        return nvme_changed_zones(n, rae, len, off, req);
> >      default:
> >          trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
> >          return NVME_INVALID_FIELD | NVME_DNR;
> > diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> > index b577f2d8e0..25cd490c99 100644
> > --- a/hw/nvme/ns.c
> > +++ b/hw/nvme/ns.c
> > @@ -240,6 +240,7 @@ static void
> nvme_ns_zoned_init_state(NvmeNamespace *ns)
> >      QTAILQ_INIT(&ns->imp_open_zones);
> >      QTAILQ_INIT(&ns->closed_zones);
> >      QTAILQ_INIT(&ns->full_zones);
> > +    QTAILQ_INIT(&ns->zdc_list);
> >
> >      zone = ns->zone_array;
> >      for (i = 0; i < ns->num_zones; i++, zone++) {
> > @@ -526,8 +527,13 @@ void nvme_ns_shutdown(NvmeNamespace *ns)
> >
> >  void nvme_ns_cleanup(NvmeNamespace *ns)
> >  {
> > +    NvmeZdc *zdc;
> > +
> >      if (ns->params.zoned) {
> >          timer_free(ns->active_timer);
> > +        while ((zdc = QTAILQ_FIRST(&ns->zdc_list))) {
> > +            g_free(zdc);
> > +        }
> >          g_free(ns->id_ns_zoned);
> >          g_free(ns->zone_array);
> >          g_free(ns->zd_extensions);
> > diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
> > index 9a54dcdb32..ae65226150 100644
> > --- a/hw/nvme/nvme.h
> > +++ b/hw/nvme/nvme.h
> > @@ -32,6 +32,7 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES >
> NVME_NSID_BROADCAST - 1);
> >
> >  typedef struct NvmeCtrl NvmeCtrl;
> >  typedef struct NvmeNamespace NvmeNamespace;
> > +typedef struct NvmeZone NvmeZone;
> >
> >  #define TYPE_NVME_BUS "nvme-bus"
> >  OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
> > @@ -90,10 +91,16 @@ static inline NvmeNamespace
> *nvme_subsys_ns(NvmeSubsystem *subsys,
> >  #define NVME_NS(obj) \
> >      OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
> >
> > +typedef struct NvmeZdc {
> > +    QTAILQ_ENTRY(NvmeZdc) entry;
> > +    NvmeZone *zone;
> > +} NvmeZdc;
> > +
> >  typedef struct NvmeZone {
> >      NvmeZoneDescr   d;
> >      uint64_t        w_ptr;
> >      int64_t         finish_ms;
> > +    NvmeZdc         *zdc_entry;
> >      QTAILQ_ENTRY(NvmeZone) entry;
> >  } NvmeZone;
> >
> > @@ -172,6 +179,7 @@ typedef struct NvmeNamespace {
> >
> >      int64_t         fto_ms;
> >      QEMUTimer       *active_timer;
> > +    QTAILQ_HEAD(, NvmeZdc) zdc_list;
> >
> >      NvmeNamespaceParams params;
> >
> > diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
> > index fccb79f489..337927e607 100644
> > --- a/hw/nvme/trace-events
> > +++ b/hw/nvme/trace-events
> > @@ -64,6 +64,7 @@ pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
> >  pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16",
> csi=0x%"PRIx8""
> >  pci_nvme_identify_cmd_set(void) "identify i/o command set"
> >  pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32""
> > +pci_nvme_changed_zones(uint32_t ns) "nsid %"PRIu32""
> >  pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, 
> > uint32_t
> len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8"
> len %"PRIu32" off %"PRIu64""
> >  pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel,
> uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel
> 0x%"PRIx8" cdw11 0x%"PRIx32""
> >  pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save,
> uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save
> 0x%"PRIx8" cdw11 0x%"PRIx32""
> > diff --git a/include/block/nvme.h b/include/block/nvme.h
> > index 8027b7126b..c747cc4948 100644
> > --- a/include/block/nvme.h
> > +++ b/include/block/nvme.h
> > @@ -1010,6 +1010,7 @@ enum NvmeLogIdentifier {
> >      NVME_LOG_FW_SLOT_INFO   = 0x03,
> >      NVME_LOG_CHANGED_NSLIST = 0x04,
> >      NVME_LOG_CMD_EFFECTS    = 0x05,
> > +    NVME_LOG_CHANGED_ZONE   = 0xbf,
> >  };
> >
> >  typedef struct QEMU_PACKED NvmePSD {
> > @@ -1617,6 +1618,12 @@ typedef enum NvmeVirtualResourceType {
> >      NVME_VIRT_RES_INTERRUPT     = 0x01,
> >  } NvmeVirtualResourceType;
> >
> > +typedef struct QEMU_PACKED NvmeZoneIdList {
> > +    uint16_t nzid;
> > +    uint16_t rsvd2[3];
> > +    uint64_t zids[511];
> > +} NvmeZoneIdList;
> > +
> >  static inline void _nvme_check_size(void)
> >  {
> >      QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096);
> > @@ -1655,5 +1662,6 @@ static inline void _nvme_check_size(void)
> >      QEMU_BUILD_BUG_ON(sizeof(NvmePriCtrlCap) != 4096);
> >      QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlEntry) != 32);
> >      QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlList) != 4096);
> > +    QEMU_BUILD_BUG_ON(sizeof(NvmeZoneIdList) != 4096);
> >  }
> >  #endif
> > --
> > 2.27.0
> >
> 
> --
> One of us - No more doubt, silence or taboo about mental illness.

Reply via email to