Am 03.06.2016 um 19:03 hat Eric Blake geschrieben: > Sector-based limits are awkward to think about; in our on-going > quest to move to byte-based interfaces, convert max_transfer_length > and opt_transfer_length. Rename them (dropping the _length suffix) > so that the compiler will help us catch the change in semantics > across any rebased code. Improve the documentation, and guarantee > that blk_get_max_transfer() returns a non-zero value. Use unsigned > values, so that we don't have to worry about negative values and > so that bit-twiddling is easier; however, we are still constrained > by 2^31 of signed int in most APIs. > > Of note: the iscsi calculations use a 64-bit number internally, > but the final result is guaranteed to fit in 32 bits. NBD is > fixed to advertise the maximum length of 32M that the qemu and > kernel servers enforce, rather than a number of sectors that > would overflow int when converted back to bytes. scsi-generic > now advertises a maximum always, rather than just when the > underlying device advertised a maximum. > > Signed-off-by: Eric Blake <ebl...@redhat.com>
> @@ -1177,7 +1176,7 @@ static int coroutine_fn > bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, > > if (ret == -ENOTSUP) { > /* Fall back to bounce buffer if write zeroes is unsupported */ > - int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length, > + int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer, > MAX_WRITE_ZEROES_BOUNCE_BUFFER); You could consider renaming the variable to max_transfer to keep it consistent with the BlockLimits field name and to make it even more obvious that you converted all uses. > BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE; > > @@ -1188,7 +1187,7 @@ static int coroutine_fn > bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, > write_flags &= ~BDRV_REQ_FUA; > need_flush = true; > } > - num = MIN(num, max_xfer_len << BDRV_SECTOR_BITS); > + num = MIN(num, max_xfer_len); > iov.iov_len = num; > if (iov.iov_base == NULL) { > iov.iov_base = qemu_try_blockalign(bs, num); > @@ -1205,7 +1204,7 @@ static int coroutine_fn > bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, > /* Keep bounce buffer around if it is big enough for all > * all future requests. > */ > - if (num < max_xfer_len << BDRV_SECTOR_BITS) { > + if (num < max_xfer_len) { > qemu_vfree(iov.iov_base); > iov.iov_base = NULL; > } > diff --git a/block/iscsi.c b/block/iscsi.c > index 7e78ade..c5855be 100644 > --- a/block/iscsi.c > +++ b/block/iscsi.c > @@ -473,9 +473,10 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t > sector_num, int nb_sectors, > return -EINVAL; > } > > - if (bs->bl.max_transfer_length && nb_sectors > > bs->bl.max_transfer_length) { > + if (bs->bl.max_transfer && > + nb_sectors << BDRV_SECTOR_BITS > bs->bl.max_transfer) { > error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len " > - "of %d sectors", nb_sectors, > bs->bl.max_transfer_length); > + "of %" PRIu32 " bytes", nb_sectors, > bs->bl.max_transfer); > return -EINVAL; > } > > @@ -650,9 +651,10 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState > *bs, > return -EINVAL; > } > > - if (bs->bl.max_transfer_length && nb_sectors > > bs->bl.max_transfer_length) { > + if (bs->bl.max_transfer && > + nb_sectors << BDRV_SECTOR_BITS > bs->bl.max_transfer) { > error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len " > - "of %d sectors", nb_sectors, > bs->bl.max_transfer_length); > + "of %" PRIu32 " bytes", nb_sectors, > bs->bl.max_transfer); > return -EINVAL; > } > > @@ -1706,13 +1708,14 @@ static void iscsi_refresh_limits(BlockDriverState > *bs, Error **errp) > * iscsi_open(): iscsi targets don't change their limits. */ > > IscsiLun *iscsilun = bs->opaque; > - uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff; > + uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff; > > if (iscsilun->bl.max_xfer_len) { > max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len); > } > > - bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, > iscsilun); > + bs->bl.max_transfer = MIN(BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS, > + max_xfer_len * iscsilun->block_size); Why don't you simply use 0 when you defined it as no limit? If we make some drivers put 0 there and others INT_MAX, chances are that we'll introduce places where we fail to handle 0 correctly. > if (iscsilun->lbp.lbpu) { > if (iscsilun->bl.max_unmap < 0xffffffff) { > @@ -1735,8 +1738,7 @@ static void iscsi_refresh_limits(BlockDriverState *bs, > Error **errp) > } else { > bs->bl.pwrite_zeroes_alignment = iscsilun->block_size; > } > - bs->bl.opt_transfer_length = > - sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun); > + bs->bl.opt_transfer = iscsilun->bl.opt_xfer_len * iscsilun->block_size; > } > > /* Note that this will not re-establish a connection with an iSCSI target - > it > diff --git a/block/nbd.c b/block/nbd.c > index 6015e8b..2ce7b4d 100644 > --- a/block/nbd.c > +++ b/block/nbd.c > @@ -363,7 +363,7 @@ static int nbd_co_flush(BlockDriverState *bs) > static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) > { > bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS; > - bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS; > + bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE; > } This introduces a semantic difference and should therefore be a separate patch. Here, it should become UINT32_MAX through mechanical conversion. Or actually, it can't because that's not a power of two. So maybe have the NBD patch first... > static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num, > diff --git a/block/raw-posix.c b/block/raw-posix.c > index ce2e20f..f3bd5ef 100644 > --- a/block/raw-posix.c > +++ b/block/raw-posix.c > @@ -752,7 +752,7 @@ static void raw_refresh_limits(BlockDriverState *bs, > Error **errp) > if (S_ISBLK(st.st_mode)) { > int ret = hdev_get_max_transfer_length(s->fd); > if (ret >= 0) { > - bs->bl.max_transfer_length = ret; > + bs->bl.max_transfer = ret << BDRV_SECTOR_BITS; I assume that we don't care about overflows here because in practice the values are very low? Should we check (or maybe better just cap) it anyway? > } > } > } > diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c > index 284e646..fbaf8f5 100644 > --- a/hw/block/virtio-blk.c > +++ b/hw/block/virtio-blk.c > @@ -382,7 +382,7 @@ static int multireq_compare(const void *a, const void *b) > void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb) > { > int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0; > - int max_xfer_len = 0; > + uint32_t max_xfer_len = 0; > int64_t sector_num = 0; Again, consider renaming the variable. > if (mrb->num_reqs == 1) { > @@ -391,8 +391,9 @@ void virtio_blk_submit_multireq(BlockBackend *blk, > MultiReqBuffer *mrb) > return; > } > > - max_xfer_len = blk_get_max_transfer_length(mrb->reqs[0]->dev->blk); > - max_xfer_len = MIN_NON_ZERO(max_xfer_len, BDRV_REQUEST_MAX_SECTORS); > + max_xfer_len = blk_get_max_transfer(mrb->reqs[0]->dev->blk); > + assert(max_xfer_len && > + max_xfer_len >> BDRV_SECTOR_BITS <= BDRV_REQUEST_MAX_SECTORS); Why can we assert this here? The comment for BlockLimits.max_xfer_len doesn't document any maximum. Of course, as I already said above, it might not happen in practice, but INT_MAX + 1 is theoretically valid and would fail the assertion. > qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs), > &multireq_compare); > @@ -408,8 +409,9 @@ void virtio_blk_submit_multireq(BlockBackend *blk, > MultiReqBuffer *mrb) > */ > if (sector_num + nb_sectors != req->sector_num || > niov > blk_get_max_iov(blk) - req->qiov.niov || > - req->qiov.size / BDRV_SECTOR_SIZE > max_xfer_len || > - nb_sectors > max_xfer_len - req->qiov.size / > BDRV_SECTOR_SIZE) { > + req->qiov.size > max_xfer_len || > + nb_sectors > (max_xfer_len - > + req->qiov.size) / BDRV_SECTOR_SIZE) { > submit_requests(blk, mrb, start, num_reqs, niov); > num_reqs = 0; > } > diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c > index 71372a8..c6fef32 100644 > --- a/hw/scsi/scsi-generic.c > +++ b/hw/scsi/scsi-generic.c > @@ -225,13 +225,13 @@ static void scsi_read_complete(void * opaque, int ret) > if (s->type == TYPE_DISK && > r->req.cmd.buf[0] == INQUIRY && > r->req.cmd.buf[2] == 0xb0) { > - uint32_t max_xfer_len = blk_get_max_transfer_length(s->conf.blk); > - if (max_xfer_len) { > - stl_be_p(&r->buf[8], max_xfer_len); > - /* Also take care of the opt xfer len. */ > - if (ldl_be_p(&r->buf[12]) > max_xfer_len) { > - stl_be_p(&r->buf[12], max_xfer_len); > - } > + uint32_t max_xfer_len = > + blk_get_max_transfer(s->conf.blk) >> BDRV_SECTOR_BITS; > + > + stl_be_p(&r->buf[8], max_xfer_len); > + /* Also take care of the opt xfer len. */ > + if (ldl_be_p(&r->buf[12]) > max_xfer_len) { > + stl_be_p(&r->buf[12], max_xfer_len); > } > } This is another hidden semantic change. Can we have a separate scsi-generic patch first that changes the handling for the max_transfer == 0 case and only then make the change in blk_get_max_transfer() as pure refactoring without a change in behaviour? > scsi_req_data(&r->req, len); > diff --git a/qemu-img.c b/qemu-img.c > index 4b56ad3..577df0d 100644 > --- a/qemu-img.c > +++ b/qemu-img.c > @@ -2074,13 +2074,13 @@ static int img_convert(int argc, char **argv) > } > out_bs = blk_bs(out_blk); > > - /* increase bufsectors from the default 4096 (2M) if opt_transfer_length > + /* increase bufsectors from the default 4096 (2M) if opt_transfer > * or discard_alignment of the out_bs is greater. Limit to 32768 (16MB) > * as maximum. */ > bufsectors = MIN(32768, > - MAX(bufsectors, MAX(out_bs->bl.opt_transfer_length, > - out_bs->bl.discard_alignment)) > - ); > + MAX(bufsectors, > + MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS, > + out_bs->bl.discard_alignment))); > > if (skip_create) { > int64_t output_sectors = blk_nb_sectors(out_blk); > -- > 2.5.5 Kevin