On Tue, 2021-01-19 at 14:54 +0100, Klaus Jensen wrote: > From: Klaus Jensen <k.jen...@samsung.com> > > When a zone append is processed the controller checks that validity of > the write before assigning the LBA to the append command. This causes > the boundary check to be wrong. > > Fix this by checking the write *after* assigning the LBA. Remove the > append special case from the nvme_check_zone_write and open code it in > nvme_do_write, assigning the slba when basic sanity checks have been > performed. Then check the validity of the resulting write like any other > write command. >
Klaus, I tested this series and it works fine. I however think that the problem that Niklas has found can be fixed in the much simpler way by applying the following to the existing code - --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1152,6 +1152,9 @@ static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns, trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba); status = NVME_INVALID_FIELD; } + if (zone->w_ptr + nlb > nvme_zone_wr_boundary(zone)) { + status = NVME_ZONE_BOUNDARY_ERROR; + } if (nvme_l2b(ns, nlb) > (n->page_size << n->zasl)) { trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl); status = NVME_INVALID_FIELD; I am going to send a few patches that take this approach, please take a look. In my testing, it works just as well :) > > In the process, also fix a missing endianness conversion for the zone > append ALBA. Great catch! This could be placed to a separate patch though... A few more comments below. Reported-by: Niklas Cassel <niklas.cas...@wdc.com> Cc: Dmitry Fomichev <dmitry.fomic...@wdc.com> Signed-off-by: Klaus Jensen <k.jen...@samsung.com> --- hw/block/nvme.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 309c26db8ff7..f05dea657b01 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1133,7 +1133,7 @@ static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone) static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone, uint64_t slba, - uint32_t nlb, bool append) + uint32_t nlb) { uint16_t status; @@ -1147,16 +1147,8 @@ static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns, trace_pci_nvme_err_zone_write_not_ok(slba, nlb, status); } else { assert(nvme_wp_is_valid(zone)); - if (append) { - if (unlikely(slba != zone->d.zslba)) { - trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba); - status = NVME_INVALID_FIELD; - } - if (nvme_l2b(ns, nlb) > (n->page_size << n->zasl)) { - trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl); - status = NVME_INVALID_FIELD; - } - } else if (unlikely(slba != zone->w_ptr)) { + + if (unlikely(slba != zone->w_ptr)) { trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, zone->w_ptr); status = NVME_ZONE_INVALID_WRITE; @@ -1294,10 +1286,9 @@ static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req, } } -static uint64_t nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, - uint32_t nlb) +static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, + uint32_t nlb) { - uint64_t result = zone->w_ptr; uint8_t zs; zone->w_ptr += nlb; @@ -1313,8 +1304,6 @@ static uint64_t nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN); } } - - return result; } static inline bool nvme_is_write(NvmeRequest *req) @@ -1692,7 +1681,24 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, if (ns->params.zoned) { zone = nvme_get_zone_by_slba(ns, slba); - status = nvme_check_zone_write(n, ns, zone, slba, nlb, append); + if (append) { This is what I see as a drawback of this approach. You have to move the ZA checks that were previously nicely tucked in nvme_check_zone_write() to the spot below and now this validation is done in two different places for appends and regular writes. This can be avoided. + if (unlikely(slba != zone->d.zslba)) { + trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba); + status = NVME_INVALID_FIELD; + goto invalid; + } + + if (nvme_l2b(ns, nlb) > (n->page_size << n->zasl)) { + trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl); + status = NVME_INVALID_FIELD; + goto invalid; + } + + slba = zone->w_ptr; + res->slba = cpu_to_le64(slba); It is a bit premature to set the result here since the nvme_check_zone_write() below can still fail. As I recall, ALBA is only returned by a successful command. Again, good find about endianness! Regards, Dmitry + } + + status = nvme_check_zone_write(n, ns, zone, slba, nlb); if (status != NVME_SUCCESS) { goto invalid; } @@ -1702,11 +1708,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, goto invalid; } - if (append) { - slba = zone->w_ptr; - } - - res->slba = nvme_advance_zone_wp(ns, zone, nlb); + nvme_advance_zone_wp(ns, zone, nlb); } data_offset = nvme_l2b(ns, slba);