On Jun 18 06:34, Dmitry Fomichev wrote: > Define values and structures that are needed to support Zoned > Namespace Command Set (NVMe TP 4053) in PCI NVMe controller emulator. > > All new protocol definitions are located in include/block/nvme.h > and everything added that is specific to this implementation is kept > in hw/block/nvme.h. > > In order to improve scalability, all open, closed and full zones > are organized in separate linked lists. Consequently, almost all > zone operations don't require scanning of the entire zone array > (which potentially can be quite large) - it is only necessary to > enumerate one or more zone lists. Zone lists are designed to be > position-independent as they can be persisted to the backing file > as a part of zone metadata. NvmeZoneList struct defined in this patch > serves as a head of every zone list. > > NvmeZone structure encapsulates NvmeZoneDescriptor defined in Zoned > Command Set specification and adds a few more fields that are > internal to this implementation. > > Signed-off-by: Niklas Cassel <niklas.cas...@wdc.com> > Signed-off-by: Hans Holmberg <hans.holmb...@wdc.com> > Signed-off-by: Ajay Joshi <ajay.jo...@wdc.com> > Signed-off-by: Matias Bjorling <matias.bjorl...@wdc.com> > Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawas...@wdc.com> > Signed-off-by: Alexey Bogoslavsky <alexey.bogoslav...@wdc.com> > Signed-off-by: Dmitry Fomichev <dmitry.fomic...@wdc.com> > --- > hw/block/nvme.h | 130 +++++++++++++++++++++++++++++++++++++++++++ > include/block/nvme.h | 119 ++++++++++++++++++++++++++++++++++++++- > 2 files changed, 248 insertions(+), 1 deletion(-) > > diff --git a/hw/block/nvme.h b/hw/block/nvme.h > index 0d29f75475..2c932b5e29 100644 > --- a/hw/block/nvme.h > +++ b/hw/block/nvme.h > @@ -3,12 +3,22 @@ > > #include "block/nvme.h" > > +#define NVME_DEFAULT_ZONE_SIZE 128 /* MiB */ > +#define NVME_DEFAULT_MAX_ZA_SIZE 128 /* KiB */ > + > typedef struct NvmeParams { > char *serial; > uint32_t num_queues; /* deprecated since 5.1 */ > uint32_t max_ioqpairs; > uint16_t msix_qsize; > uint32_t cmb_size_mb; > + > + bool zoned; > + bool cross_zone_read; > + uint8_t fill_pattern; > + uint32_t zamds_bs;
Rename to zasl. > + uint64_t zone_size; > + uint64_t zone_capacity; > } NvmeParams; > > typedef struct NvmeAsyncEvent { > @@ -17,6 +27,8 @@ typedef struct NvmeAsyncEvent { > > enum NvmeRequestFlags { > NVME_REQ_FLG_HAS_SG = 1 << 0, > + NVME_REQ_FLG_FILL = 1 << 1, > + NVME_REQ_FLG_APPEND = 1 << 2, > }; > > typedef struct NvmeRequest { > @@ -24,6 +36,7 @@ typedef struct NvmeRequest { > BlockAIOCB *aiocb; > uint16_t status; > uint16_t flags; > + uint64_t fill_ofs; > NvmeCqe cqe; > BlockAcctCookie acct; > QEMUSGList qsg; > @@ -61,11 +74,35 @@ typedef struct NvmeCQueue { > QTAILQ_HEAD(, NvmeRequest) req_list; > } NvmeCQueue; > > +typedef struct NvmeZone { > + NvmeZoneDescr d; > + uint64_t tstamp; > + uint32_t next; > + uint32_t prev; > + uint8_t rsvd80[8]; > +} NvmeZone; > + > +#define NVME_ZONE_LIST_NIL UINT_MAX > + > +typedef struct NvmeZoneList { > + uint32_t head; > + uint32_t tail; > + uint32_t size; > + uint8_t rsvd12[4]; > +} NvmeZoneList; > + > typedef struct NvmeNamespace { > NvmeIdNs id_ns; > uint32_t nsid; > uint8_t csi; > QemuUUID uuid; > + > + NvmeIdNsZoned *id_ns_zoned; > + NvmeZone *zone_array; > + NvmeZoneList *exp_open_zones; > + NvmeZoneList *imp_open_zones; > + NvmeZoneList *closed_zones; > + NvmeZoneList *full_zones; > } NvmeNamespace; > > static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns) > @@ -100,6 +137,7 @@ typedef struct NvmeCtrl { > uint32_t num_namespaces; > uint32_t max_q_ents; > uint64_t ns_size; > + > uint8_t *cmbuf; > uint32_t irq_status; > uint64_t host_timestamp; /* Timestamp sent by the > host */ > @@ -107,6 +145,12 @@ typedef struct NvmeCtrl { > > HostMemoryBackend *pmrdev; > > + int zone_file_fd; > + uint32_t num_zones; > + uint64_t zone_size_bs; > + uint64_t zone_array_size; > + uint8_t zamds; Rename to zasl. > + > NvmeNamespace *namespaces; > NvmeSQueue **sq; > NvmeCQueue **cq; > @@ -121,6 +165,86 @@ static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, > NvmeNamespace *ns) > return n->ns_size >> nvme_ns_lbads(ns); > } > > +static inline uint8_t nvme_get_zone_state(NvmeZone *zone) > +{ > + return zone->d.zs >> 4; > +} > + > +static inline void nvme_set_zone_state(NvmeZone *zone, enum NvmeZoneState > state) > +{ > + zone->d.zs = state << 4; > +} > + > +static inline uint64_t nvme_zone_rd_boundary(NvmeCtrl *n, NvmeZone *zone) > +{ > + return zone->d.zslba + n->params.zone_size; > +} > + > +static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) > +{ > + return zone->d.zslba + zone->d.zcap; > +} Everything working on zone->d needs leXX_to_cpu() conversions. > + > +static inline bool nvme_wp_is_valid(NvmeZone *zone) > +{ > + uint8_t st = nvme_get_zone_state(zone); > + > + return st != NVME_ZONE_STATE_FULL && > + st != NVME_ZONE_STATE_READ_ONLY && > + st != NVME_ZONE_STATE_OFFLINE; > +} > + > +/* > + * Initialize a zone list head. > + */ > +static inline void nvme_init_zone_list(NvmeZoneList *zl) > +{ > + zl->head = NVME_ZONE_LIST_NIL; > + zl->tail = NVME_ZONE_LIST_NIL; > + zl->size = 0; > +} > + > +/* > + * Initialize the number of entries contained in a zone list. > + */ > +static inline uint32_t nvme_zone_list_size(NvmeZoneList *zl) > +{ > + return zl->size; > +} > + > +/* > + * Check if the zone is not currently included into any zone list. > + */ > +static inline bool nvme_zone_not_in_list(NvmeZone *zone) > +{ > + return (bool)(zone->prev == 0 && zone->next == 0); > +} > + > +/* > + * Return the zone at the head of zone list or NULL if the list is empty. > + */ > +static inline NvmeZone *nvme_peek_zone_head(NvmeNamespace *ns, NvmeZoneList > *zl) > +{ > + if (zl->head == NVME_ZONE_LIST_NIL) { > + return NULL; > + } > + return &ns->zone_array[zl->head]; > +} > + > +/* > + * Return the next zone in the list. > + */ > +static inline NvmeZone *nvme_next_zone_in_list(NvmeNamespace *ns, NvmeZone > *z, > + NvmeZoneList *zl) > +{ > + assert(!nvme_zone_not_in_list(z)); > + > + if (z->next == NVME_ZONE_LIST_NIL) { > + return NULL; > + } > + return &ns->zone_array[z->next]; > +} > + > static inline int nvme_ilog2(uint64_t i) > { > int log = -1; > @@ -132,4 +256,10 @@ static inline int nvme_ilog2(uint64_t i) > return log; > } > > +static inline void _hw_nvme_check_size(void) > +{ > + QEMU_BUILD_BUG_ON(sizeof(NvmeZoneList) != 16); > + QEMU_BUILD_BUG_ON(sizeof(NvmeZone) != 88); > +} > + > #endif /* HW_NVME_H */ > diff --git a/include/block/nvme.h b/include/block/nvme.h > index 5a1e5e137c..596c39162b 100644 > --- a/include/block/nvme.h > +++ b/include/block/nvme.h > @@ -446,6 +446,9 @@ enum NvmeIoCommands { > NVME_CMD_COMPARE = 0x05, > NVME_CMD_WRITE_ZEROS = 0x08, > NVME_CMD_DSM = 0x09, > + NVME_CMD_ZONE_MGMT_SEND = 0x79, > + NVME_CMD_ZONE_MGMT_RECV = 0x7a, > + NVME_CMD_ZONE_APND = 0x7d, > }; > > typedef struct NvmeDeleteQ { > @@ -539,6 +542,7 @@ enum NvmeNidLength { > > enum NvmeCsi { > NVME_CSI_NVM = 0x00, > + NVME_CSI_ZONED = 0x02, > }; > > #define NVME_SET_CSI(vec, csi) (vec |= (uint8_t)(1 << (csi))) > @@ -661,6 +665,7 @@ enum NvmeStatusCodes { > NVME_INVALID_NSID = 0x000b, > NVME_CMD_SEQ_ERROR = 0x000c, > NVME_CMD_SET_CMB_REJECTED = 0x002b, > + NVME_INVALID_CMD_SET = 0x002c, > NVME_LBA_RANGE = 0x0080, > NVME_CAP_EXCEEDED = 0x0081, > NVME_NS_NOT_READY = 0x0082, > @@ -684,6 +689,14 @@ enum NvmeStatusCodes { > NVME_CONFLICTING_ATTRS = 0x0180, > NVME_INVALID_PROT_INFO = 0x0181, > NVME_WRITE_TO_RO = 0x0182, > + NVME_ZONE_BOUNDARY_ERROR = 0x01b8, > + NVME_ZONE_FULL = 0x01b9, > + NVME_ZONE_READ_ONLY = 0x01ba, > + NVME_ZONE_OFFLINE = 0x01bb, > + NVME_ZONE_INVALID_WRITE = 0x01bc, > + NVME_ZONE_TOO_MANY_ACTIVE = 0x01bd, > + NVME_ZONE_TOO_MANY_OPEN = 0x01be, > + NVME_ZONE_INVAL_TRANSITION = 0x01bf, > NVME_WRITE_FAULT = 0x0280, > NVME_UNRECOVERED_READ = 0x0281, > NVME_E2E_GUARD_ERROR = 0x0282, > @@ -807,7 +820,17 @@ typedef struct NvmeIdCtrl { > uint8_t ieee[3]; > uint8_t cmic; > uint8_t mdts; > - uint8_t rsvd255[178]; > + uint16_t cntlid; > + uint32_t ver; > + uint32_t rtd3r; > + uint32_t rtd3e; > + uint32_t oaes; > + uint32_t ctratt; > + uint8_t rsvd100[28]; > + uint16_t crdt1; > + uint16_t crdt2; > + uint16_t crdt3; > + uint8_t rsvd134[122]; Would be nice in a separate patch, see my "bump to ..." patches. > uint16_t oacs; > uint8_t acl; > uint8_t aerl; > @@ -832,6 +855,11 @@ typedef struct NvmeIdCtrl { > uint8_t vs[1024]; > } NvmeIdCtrl; > > +typedef struct NvmeIdCtrlZoned { > + uint8_t zamds; zasl. > + uint8_t rsvd1[4095]; > +} NvmeIdCtrlZoned; > + > enum NvmeIdCtrlOacs { > NVME_OACS_SECURITY = 1 << 0, > NVME_OACS_FORMAT = 1 << 1, > @@ -908,6 +936,12 @@ typedef struct NvmeLBAF { > uint8_t rp; > } NvmeLBAF; > > +typedef struct NvmeLBAFE { > + uint64_t zsze; > + uint8_t zdes; > + uint8_t rsvd9[7]; > +} NvmeLBAFE; > + > typedef struct NvmeIdNs { > uint64_t nsze; > uint64_t ncap; > @@ -930,6 +964,19 @@ typedef struct NvmeIdNs { > uint8_t vs[3712]; > } NvmeIdNs; > > +typedef struct NvmeIdNsZoned { > + uint16_t zoc; > + uint16_t ozcs; > + uint32_t mar; > + uint32_t mor; > + uint32_t rrl; > + uint32_t frl; > + uint8_t rsvd20[2796]; > + NvmeLBAFE lbafe[16]; > + uint8_t rsvd3072[768]; > + uint8_t vs[256]; > +} NvmeIdNsZoned; > + > > /*Deallocate Logical Block Features*/ > #define NVME_ID_NS_DLFEAT_GUARD_CRC(dlfeat) ((dlfeat) & 0x10) > @@ -962,6 +1009,71 @@ enum NvmeIdNsDps { > DPS_FIRST_EIGHT = 8, > }; > > +enum NvmeZoneAttr { > + NVME_ZA_FINISHED_BY_CTLR = 1 << 0, > + NVME_ZA_FINISH_RECOMMENDED = 1 << 1, > + NVME_ZA_RESET_RECOMMENDED = 1 << 2, > + NVME_ZA_ZD_EXT_VALID = 1 << 7, > +}; > + > +typedef struct NvmeZoneReportHeader { > + uint64_t nr_zones; > + uint8_t rsvd[56]; > +} NvmeZoneReportHeader; > + > +enum NvmeZoneReceiveAction { > + NVME_ZONE_REPORT = 0, > + NVME_ZONE_REPORT_EXTENDED = 1, > +}; > + > +enum NvmeZoneReportType { > + NVME_ZONE_REPORT_ALL = 0, > + NVME_ZONE_REPORT_EMPTY = 1, > + NVME_ZONE_REPORT_IMPLICITLY_OPEN = 2, > + NVME_ZONE_REPORT_EXPLICITLY_OPEN = 3, > + NVME_ZONE_REPORT_CLOSED = 4, > + NVME_ZONE_REPORT_FULL = 5, > + NVME_ZONE_REPORT_READ_ONLY = 6, > + NVME_ZONE_REPORT_OFFLINE = 7, > +}; > + > +typedef struct NvmeZoneDescr { > + uint8_t zt; > + uint8_t zs; > + uint8_t za; > + uint8_t rsvd3[5]; > + uint64_t zcap; > + uint64_t zslba; > + uint64_t wp; > + uint8_t rsvd32[32]; > +} NvmeZoneDescr; > + > +enum NvmeZoneState { > + NVME_ZONE_STATE_RESERVED = 0x00, > + NVME_ZONE_STATE_EMPTY = 0x01, > + NVME_ZONE_STATE_IMPLICITLY_OPEN = 0x02, > + NVME_ZONE_STATE_EXPLICITLY_OPEN = 0x03, > + NVME_ZONE_STATE_CLOSED = 0x04, > + NVME_ZONE_STATE_READ_ONLY = 0x0D, > + NVME_ZONE_STATE_FULL = 0x0E, > + NVME_ZONE_STATE_OFFLINE = 0x0F, > +}; > + > +enum NvmeZoneType { > + NVME_ZONE_TYPE_RESERVED = 0x00, > + NVME_ZONE_TYPE_SEQ_WRITE = 0x02, > +}; > + > +enum NvmeZoneSendAction { > + NVME_ZONE_ACTION_RSD = 0x00, > + NVME_ZONE_ACTION_CLOSE = 0x01, > + NVME_ZONE_ACTION_FINISH = 0x02, > + NVME_ZONE_ACTION_OPEN = 0x03, > + NVME_ZONE_ACTION_RESET = 0x04, > + NVME_ZONE_ACTION_OFFLINE = 0x05, > + NVME_ZONE_ACTION_SET_ZD_EXT = 0x10, > +}; > + > static inline void _nvme_check_size(void) > { > QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16); > @@ -978,8 +1090,13 @@ static inline void _nvme_check_size(void) > QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512); > QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512); > QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096); > + QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlZoned) != 4096); > QEMU_BUILD_BUG_ON(sizeof(NvmeNsIdDesc) != 4); > + QEMU_BUILD_BUG_ON(sizeof(NvmeLBAF) != 4); > + QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16); > QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096); > + QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsZoned) != 4096); > QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) != 4096); > + QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64); > } > #endif > -- > 2.21.0 > >