Currently block can not handle big write well when write size is bigger than INT_MAX, so split the write zero into smaller size of chunks to meet block's requirement.
This patch fixes one WRITE SAME 16 failure in linux VM side. Cc: Max Reitz <mre...@redhat.com> Signed-off-by: Ming Lei <ming....@canonical.com> --- hw/scsi/scsi-disk.c | 67 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 2f75d7d..a843f9b 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -42,6 +42,9 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0) #include <scsi/sg.h> #endif +/* bytes in one single write should be held in one 'int' variable */ +#define SCSI_WRITE_ZERO_MAX (INT_MAX) + #define SCSI_WRITE_SAME_MAX 524288 #define SCSI_DMA_BUF_SIZE 131072 #define SCSI_MAX_INQUIRY_LEN 256 @@ -1618,10 +1621,53 @@ typedef struct WriteSameCBData { SCSIDiskReq *r; int64_t sector; int nb_sectors; + int curr_sectors; + int flags; QEMUIOVector qiov; struct iovec iov; } WriteSameCBData; +static void scsi_write_zero_complete(void *opaque, int ret) +{ + WriteSameCBData *data = opaque; + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + goto done; + } + + if (ret < 0) { + if (scsi_handle_rw_error(r, -ret)) { + goto done; + } + } + + data->nb_sectors -= data->curr_sectors; + data->sector += data->curr_sectors; + data->curr_sectors = MIN(data->nb_sectors, SCSI_WRITE_ZERO_MAX / 512); + if (data->nb_sectors) { + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, + data->curr_sectors * s->qdev.blocksize, + BLOCK_ACCT_WRITE); + r->req.aiocb = blk_aio_write_zeroes(s->qdev.conf.blk, + data->sector, + data->curr_sectors, + data->flags, scsi_write_zero_complete, data); + return; + } + + scsi_req_complete(&r->req, GOOD); + +done: + scsi_req_unref(&r->req); + g_free(data); +} + static void scsi_write_same_complete(void *opaque, int ret) { WriteSameCBData *data = opaque; @@ -1686,25 +1732,26 @@ static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf) return; } + data = g_new0(WriteSameCBData, 1); + data->r = r; + data->sector = r->req.cmd.lba * (s->qdev.blocksize / 512); + data->nb_sectors = nb_sectors * (s->qdev.blocksize / 512); + data->curr_sectors = MIN(data->nb_sectors, SCSI_WRITE_ZERO_MAX / 512); if (buffer_is_zero(inbuf, s->qdev.blocksize)) { - int flags = (req->cmd.buf[1] & 0x8) ? BDRV_REQ_MAY_UNMAP : 0; + data->flags = (req->cmd.buf[1] & 0x8) ? BDRV_REQ_MAY_UNMAP : 0; /* The request is used as the AIO opaque value, so add a ref. */ scsi_req_ref(&r->req); block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, - nb_sectors * s->qdev.blocksize, - BLOCK_ACCT_WRITE); + data->curr_sectors * s->qdev.blocksize, + BLOCK_ACCT_WRITE); r->req.aiocb = blk_aio_write_zeroes(s->qdev.conf.blk, - r->req.cmd.lba * (s->qdev.blocksize / 512), - nb_sectors * (s->qdev.blocksize / 512), - flags, scsi_aio_complete, r); + data->sector, + data->curr_sectors, + data->flags, scsi_write_zero_complete, data); return; } - data = g_new0(WriteSameCBData, 1); - data->r = r; - data->sector = r->req.cmd.lba * (s->qdev.blocksize / 512); - data->nb_sectors = nb_sectors * (s->qdev.blocksize / 512); data->iov.iov_len = MIN(data->nb_sectors * 512, SCSI_WRITE_SAME_MAX); data->iov.iov_base = buf = blk_blockalign(s->qdev.conf.blk, data->iov.iov_len); -- 1.7.9.5