Thanks Jeff! All of the review comments have been addressed in v5.
On Mon, Oct 31, 2016 at 2:55 PM, Jeff Cody <jc...@redhat.com> wrote:
> On Fri, Oct 28, 2016 at 11:47:11PM -0700, Ashish Mittal wrote:
>> Source code for the qnio library that this code loads can be downloaded from:
>> https://github.com/MittalAshish/libqnio.git
>>
>> Sample command line using the JSON syntax:
>> ./qemu-system-x86_64 -name instance-00000008 -S -vnc 0.0.0.0:0 -k en-us
>> -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5
>> -msg timestamp=on
>> 'json:{"driver":"vxhs","vdisk_id":"c3e9095a-a5ee-4dce-afeb-2a59fb387410",
>> "server":{"host":"172.172.17.4","port":"9999"}}'
>>
>> Sample command line using the URI syntax:
>> qemu-img convert -f raw -O raw -n
>> /var/lib/nova/instances/_base/0c5eacd5ebea5ed914b6a3e7b18f1ce734c386ad
>> vxhs://192.168.0.1:9999/c6718f6b-0401-441d-a8c3-1f0064d75ee0
>>
>> Signed-off-by: Ashish Mittal <ashish.mit...@veritas.com>
>> ---
>> v4 changelog:
>> (1) Incorporated v3 review comments on QAPI changes.
>> (2) Added refcounting for device open/close.
>> Free library resources on last device close.
>>
>> v3 changelog:
>> (1) Added QAPI schema for the VxHS driver.
>>
>> v2 changelog:
>> (1) Changes done in response to v1 comments.
>>
>> TODO:
>> (1) Add qemu-iotest
>>
>> block/Makefile.objs | 2 +
>> block/trace-events | 21 ++
>> block/vxhs.c | 703
>> +++++++++++++++++++++++++++++++++++++++++++++++++++
>> configure | 41 +++
>> qapi/block-core.json | 20 +-
>> 5 files changed, 785 insertions(+), 2 deletions(-)
>> create mode 100644 block/vxhs.c
>>
>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>> index 67a036a..58313a2 100644
>> --- a/block/Makefile.objs
>> +++ b/block/Makefile.objs
>> @@ -18,6 +18,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
>> block-obj-$(CONFIG_CURL) += curl.o
>> block-obj-$(CONFIG_RBD) += rbd.o
>> block-obj-$(CONFIG_GLUSTERFS) += gluster.o
>> +block-obj-$(CONFIG_VXHS) += vxhs.o
>> block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
>> block-obj-$(CONFIG_LIBSSH2) += ssh.o
>> block-obj-y += accounting.o dirty-bitmap.o
>> @@ -38,6 +39,7 @@ rbd.o-cflags := $(RBD_CFLAGS)
>> rbd.o-libs := $(RBD_LIBS)
>> gluster.o-cflags := $(GLUSTERFS_CFLAGS)
>> gluster.o-libs := $(GLUSTERFS_LIBS)
>> +vxhs.o-libs := $(VXHS_LIBS)
>> ssh.o-cflags := $(LIBSSH2_CFLAGS)
>> ssh.o-libs := $(LIBSSH2_LIBS)
>> archipelago.o-libs := $(ARCHIPELAGO_LIBS)
>> diff --git a/block/trace-events b/block/trace-events
>> index aff8a96..5cb8089 100644
>> --- a/block/trace-events
>> +++ b/block/trace-events
>> @@ -113,3 +113,24 @@ qed_aio_write_data(void *s, void *acb, int ret,
>> uint64_t offset, size_t len) "s
>> qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len,
>> uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
>> qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len,
>> uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
>> qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t
>> len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
>> +
>> +# block/vxhs.c
>> +vxhs_iio_callback(int error, int reason) "ctx is NULL: error %d, reason %d"
>> +vxhs_setup_qnio(void *s) "Context to HyperScale IO manager = %p"
>> +vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o
>> %d, %d"
>> +vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno
>> %d"
>> +vxhs_open_fail(int ret) "Could not open the device. Error = %d"
>> +vxhs_open_epipe(int ret) "Could not create a pipe for device. Bailing out.
>> Error=%d"
>> +vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d"
>> +vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void
>> *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size =
>> %lu offset = %lu ACB = %p. Error = %d, errno = %d"
>> +vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat
>> ioctl failed, ret = %d, errno = %d"
>> +vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat
>> ioctl returned size %lu"
>> +vxhs_qnio_iio_open(const char *ip) "Failed to connect to storage agent on
>> host-ip %s"
>> +vxhs_qnio_iio_devopen(const char *fname) "Failed to open vdisk device: %s"
>> +vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %ld"
>> +vxhs_parse_uri_filename(const char *filename) "URI passed via
>> bdrv_parse_filename %s"
>> +vxhs_qemu_init_vdisk(const char *vdisk_id) "vdisk-id from json %s"
>> +vxhs_parse_uri_hostinfo(int num, char *host, int port) "Host %d: IP %s,
>> Port %d"
>> +vxhs_qemu_init(char *of_vsa_addr, int port) "Adding host %s:%d to
>> BDRVVXHSState"
>> +vxhs_qemu_init_filename(const char *filename) "Filename passed as %s"
>> +vxhs_close(char *vdisk_guid) "Closing vdisk %s"
>> diff --git a/block/vxhs.c b/block/vxhs.c
>> new file mode 100644
>> index 0000000..d95be11
>> --- /dev/null
>> +++ b/block/vxhs.c
>> @@ -0,0 +1,703 @@
>> +/*
>> + * QEMU Block driver for Veritas HyperScale (VxHS)
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "block/block_int.h"
>> +#include <qnio/qnio_api.h>
>> +#include "qapi/qmp/qerror.h"
>> +#include "qapi/qmp/qdict.h"
>> +#include "qapi/qmp/qstring.h"
>> +#include "trace.h"
>> +#include "qemu/uri.h"
>> +#include "qapi/error.h"
>> +#include "qemu/error-report.h"
>> +
>> +#define VDISK_FD_READ 0
>> +#define VDISK_FD_WRITE 1
>> +
>> +#define VXHS_OPT_FILENAME "filename"
>> +#define VXHS_OPT_VDISK_ID "vdisk-id"
>> +#define VXHS_OPT_SERVER "server"
>> +#define VXHS_OPT_HOST "host"
>> +#define VXHS_OPT_PORT "port"
>> +
>> +typedef struct QNIOLibState {
>> + int refcnt;
>> + QemuMutex lock;
>
> The mutex lock is not needed, all the open/close paths are on a single IO
> thread.
>
>> + void *context;
>> +} QNIOLibState;
>> +
>> +typedef enum {
>> + VDISK_AIO_READ,
>> + VDISK_AIO_WRITE,
>> + VDISK_STAT
>> +} VDISKAIOCmd;
>> +
>> +/*
>> + * HyperScale AIO callbacks structure
>> + */
>> +typedef struct VXHSAIOCB {
>> + BlockAIOCB common;
>> + int err;
>> + int direction; /* IO direction (r/w) */
>> + size_t io_offset;
>> + size_t size;
>> + QEMUIOVector *qiov;
>> +} VXHSAIOCB;
>> +
>> +typedef struct VXHSvDiskHostsInfo {
>> + int qnio_cfd; /* Channel FD */
>> + int vdisk_rfd; /* vDisk remote FD */
>> + char *hostip; /* Host's IP addresses */
>> + int port; /* Host's port number */
>> +} VXHSvDiskHostsInfo;
>> +
>> +/*
>> + * Structure per vDisk maintained for state
>> + */
>> +typedef struct BDRVVXHSState {
>> + int fds[2];
>> + int64_t vdisk_size;
>> + int event_reader_pos;
>> + VXHSAIOCB *qnio_event_acb;
>> + VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */
>> + char *vdisk_guid;
>> +} BDRVVXHSState;
>> +
>> +/* QNIO Library State */
>> +QNIOLibState qniolib;
>
> This should be static.
>
>> +
>> +/* vdisk prefix to pass to qnio */
>> +static const char vdisk_prefix[] = "/dev/of/vdisk";
>> +
>> +/*
>> + * Cleanup QNIO library on last close.
>> + */
>> +static void vxhs_qnio_close(void)
>> +{
>> + qemu_mutex_lock(&qniolib.lock);
>> + qniolib.refcnt--;
>> + if (qniolib.refcnt == 0) {
>> + iio_fini(qniolib.context);
>> + qniolib.context = NULL;
>> + }
>> + qemu_mutex_unlock(&qniolib.lock);
>> +}
>> +
>> +static void vxhs_qnio_iio_close(BDRVVXHSState *s)
>> +{
>> + /*
>> + * Close vDisk device
>> + */
>> + if (s->vdisk_hostinfo.vdisk_rfd >= 0) {
>> + iio_devclose(qniolib.context, 0, s->vdisk_hostinfo.vdisk_rfd);
>> + s->vdisk_hostinfo.vdisk_rfd = -1;
>> + }
>> +
>> + /*
>> + * Close QNIO channel against cached channel-fd
>> + */
>> + if (s->vdisk_hostinfo.qnio_cfd >= 0) {
>> + iio_close(qniolib.context, s->vdisk_hostinfo.qnio_cfd);
>> + s->vdisk_hostinfo.qnio_cfd = -1;
>> + }
>> +
>> + vxhs_qnio_close();
>> +}
>> +
>> +static void vxhs_iio_callback(int32_t rfd, uint32_t reason, void *ctx,
>> + uint32_t error, uint32_t opcode)
>> +{
>> + VXHSAIOCB *acb = NULL;
>> + BDRVVXHSState *s = NULL;
>> + ssize_t ret;
>> +
>> + switch (opcode) {
>> + case IRP_READ_REQUEST:
>> + case IRP_WRITE_REQUEST:
>> +
>> + /*
>> + * ctx is VXHSAIOCB*
>> + * ctx is NULL if error is QNIOERROR_CHANNEL_HUP or
>> + * reason is IIO_REASON_HUP
>> + */
>> + if (ctx) {
>> + acb = ctx;
>> + s = acb->common.bs->opaque;
>> + } else {
>> + trace_vxhs_iio_callback(error, reason);
>> + goto out;
>> + }
>> +
>> + if (error) {
>> + if (!acb->err) {
>> + acb->err = error;
>> + }
>> + trace_vxhs_iio_callback(error, reason);
>> + }
>> +
>> + ret = qemu_write_full(s->fds[VDISK_FD_WRITE], &acb, sizeof(acb));
>> + g_assert(ret == sizeof(acb));
>> + break;
>> +
>> + default:
>> + if (error == QNIOERROR_CHANNEL_HUP) {
>> + /*
>> + * Channel failed, spontaneous notification,
>> + * not in response to I/O
>> + */
>> + trace_vxhs_iio_callback_chnfail(error, errno);
>> + } else {
>> + trace_vxhs_iio_callback_unknwn(opcode, error);
>> + }
>> + break;
>> + }
>> +out:
>> + return;
>> +}
>> +
>> +/*
>> + * Initialize QNIO library on first open.
>> + */
>> +static int vxhs_qnio_open(void)
>> +{
>> + int ret = 0;
>> +
>> + qemu_mutex_lock(&qniolib.lock);
>> + if (qniolib.refcnt != 0) {
>> + g_assert(qniolib.context != NULL);
>> + qniolib.refcnt++;
>> + qemu_mutex_unlock(&qniolib.lock);
>> + return 0;
>> + }
>> + qniolib.context = iio_init(QNIO_VERSION, vxhs_iio_callback);
>> + if (qniolib.context == NULL) {
>> + ret = -ENODEV;
>> + } else {
>> + qniolib.refcnt = 1;
>> + }
>> + qemu_mutex_unlock(&qniolib.lock);
>> + return ret;
>> +}
>> +
>> +static int vxhs_qnio_iio_open(int *cfd, const char *of_vsa_addr,
>> + int *rfd, const char *file_name)
>> +{
>> + int ret = 0;
>> + bool qnio_open = false;
>
> Variable not needed.
>
>> +
>> + ret = vxhs_qnio_open();
>> + if (ret) {
>> + return ret;
>> + }
>> +
>> + /*
>> + * Open qnio channel to storage agent if not opened before.
>> + */
>> + *cfd = iio_open(qniolib.context, of_vsa_addr, 0);
>> + if (*cfd < 0) {
>> + trace_vxhs_qnio_iio_open(of_vsa_addr);
>> + ret = -ENODEV;
>> + goto err_out;
>> + }
>> +
>> + /*
>> + * Open vdisk device
>> + */
>> + *rfd = iio_devopen(qniolib.context, *cfd, file_name, 0);
>> + if (*rfd < 0) {
>> + trace_vxhs_qnio_iio_devopen(file_name);
>> + ret = -ENODEV;
>> + goto err_out;
>> + }
>> + return 0;
>> +
>> +err_out:
>> + if (*cfd >= 0) {
>> + iio_close(qniolib.context, *cfd);
>> + }
>
>
>> + if (qnio_open) {
>> + vxhs_qnio_close();
>> + }
>
> This can be removed, qnio_open is always false.
>
>> + *cfd = -1;
>> + *rfd = -1;
>> + return ret;
>> +}
>> +
>> +static void vxhs_complete_aio(VXHSAIOCB *acb, BDRVVXHSState *s)
>> +{
>> + BlockCompletionFunc *cb = acb->common.cb;
>> + void *opaque = acb->common.opaque;
>> + int ret = 0;
>> +
>> + if (acb->err != 0) {
>> + trace_vxhs_complete_aio(acb, acb->err);
>> + /*
>> + * We mask all the IO errors generically as EIO for upper layers
>> + * Right now our IO Manager uses non standard error codes. Instead
>> + * of confusing upper layers with incorrect interpretation we are
>> + * doing this workaround.
>> + */
>> + ret = (-EIO);
>> + }
>> +
>> + qemu_aio_unref(acb);
>> + cb(opaque, ret);
>> +}
>> +
>> +/*
>> + * This is the HyperScale event handler registered to QEMU.
>> + * It is invoked when any IO gets completed and written on pipe
>> + * by callback called from QNIO thread context. Then it marks
>> + * the AIO as completed, and releases HyperScale AIO callbacks.
>> + */
>> +static void vxhs_aio_event_reader(void *opaque)
>> +{
>> + BDRVVXHSState *s = opaque;
>> + char *p;
>> + ssize_t ret;
>> +
>> + do {
>> + p = (char *)&s->qnio_event_acb;
>> + ret = read(s->fds[VDISK_FD_READ], p + s->event_reader_pos,
>> + sizeof(s->qnio_event_acb) - s->event_reader_pos);
>> + if (ret > 0) {
>> + s->event_reader_pos += ret;
>> + if (s->event_reader_pos == sizeof(s->qnio_event_acb)) {
>> + s->event_reader_pos = 0;
>> + vxhs_complete_aio(s->qnio_event_acb, s);
>> + }
>> + }
>> + } while (ret < 0 && errno == EINTR);
>> +}
>> +
>> +static QemuOptsList runtime_opts = {
>> + .name = "vxhs",
>> + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
>> + .desc = {
>> + {
>> + .name = VXHS_OPT_FILENAME,
>> + .type = QEMU_OPT_STRING,
>> + .help = "URI to the Veritas HyperScale image",
>> + },
>> + {
>> + .name = VXHS_OPT_VDISK_ID,
>> + .type = QEMU_OPT_STRING,
>> + .help = "UUID of the VxHS vdisk",
>> + },
>> + { /* end of list */ }
>> + },
>> +};
>> +
>> +static QemuOptsList runtime_tcp_opts = {
>> + .name = "vxhs_tcp",
>> + .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
>> + .desc = {
>> + {
>> + .name = VXHS_OPT_HOST,
>> + .type = QEMU_OPT_STRING,
>> + .help = "host address (ipv4 addresses)",
>> + },
>> + {
>> + .name = VXHS_OPT_PORT,
>> + .type = QEMU_OPT_NUMBER,
>> + .help = "port number on which VxHSD is listening (default
>> 9999)",
>> + .def_value_str = "9999"
>> + },
>> + { /* end of list */ }
>> + },
>> +};
>> +
>> +/*
>> + * Parse the incoming URI and populate *options with the host information.
>> + * URI syntax has the limitation of supporting only one host info.
>> + * To pass multiple host information, use the JSON syntax.
>> + */
>> +static int vxhs_parse_uri(const char *filename, QDict *options)
>> +{
>> + URI *uri = NULL;
>> + char *hoststr, *portstr;
>> + char *port;
>> + int ret = 0;
>> +
>> + trace_vxhs_parse_uri_filename(filename);
>> + uri = uri_parse(filename);
>> + if (!uri || !uri->server || !uri->path) {
>> + uri_free(uri);
>> + return -EINVAL;
>> + }
>> +
>> + hoststr = g_strdup(VXHS_OPT_SERVER".host");
>> + qdict_put(options, hoststr, qstring_from_str(uri->server));
>> + g_free(hoststr);
>> +
>> + portstr = g_strdup(VXHS_OPT_SERVER".port");
>> + if (uri->port) {
>> + port = g_strdup_printf("%d", uri->port);
>> + qdict_put(options, portstr, qstring_from_str(port));
>> + g_free(port);
>> + }
>> + g_free(portstr);
>> +
>> + if (strstr(uri->path, "vxhs") == NULL) {
>> + qdict_put(options, "vdisk-id", qstring_from_str(uri->path));
>> + }
>> +
>> + trace_vxhs_parse_uri_hostinfo(1, uri->server, uri->port);
>> + uri_free(uri);
>> +
>> + return ret;
>> +}
>> +
>> +static void vxhs_parse_filename(const char *filename, QDict *options,
>> + Error **errp)
>> +{
>> + if (qdict_haskey(options, "vdisk-id") || qdict_haskey(options,
>> "server")) {
>> + error_setg(errp, "vdisk-id/server and a file name may not be
>> specified "
>> + "at the same time");
>> + return;
>> + }
>> +
>> + if (strstr(filename, "://")) {
>> + int ret = vxhs_parse_uri(filename, options);
>> + if (ret < 0) {
>> + error_setg(errp, "Invalid URI. URI should be of the form "
>> + " vxhs://<host_ip>:<port>/{<vdisk-id>}");
>> + }
>> + }
>> +}
>> +
>> +static int vxhs_qemu_init(QDict *options, BDRVVXHSState *s,
>> + int *cfd, int *rfd, Error **errp)
>> +{
>> + QDict *backing_options = NULL;
>> + QemuOpts *opts, *tcp_opts;
>> + const char *vxhs_filename;
>> + char *of_vsa_addr = NULL;
>> + Error *local_err = NULL;
>> + const char *vdisk_id_opt;
>> + const char *server_host_opt;
>> + char *file_name = NULL;
>> + char *str = NULL;
>> + int ret = 0;
>> +
>> + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
>> + qemu_opts_absorb_qdict(opts, options, &local_err);
>> + if (local_err) {
>> + ret = -EINVAL;
>> + goto out;
>> + }
>> +
>> + vxhs_filename = qemu_opt_get(opts, VXHS_OPT_FILENAME);
>> + if (vxhs_filename) {
>> + trace_vxhs_qemu_init_filename(vxhs_filename);
>> + }
>> +
>> + vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID);
>> + if (!vdisk_id_opt) {
>> + error_setg(&local_err, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID);
>> + ret = -EINVAL;
>> + goto out;
>> + }
>> + s->vdisk_guid = g_strdup(vdisk_id_opt);
>> + trace_vxhs_qemu_init_vdisk(vdisk_id_opt);
>> +
>> + str = g_strdup_printf(VXHS_OPT_SERVER".");
>> + qdict_extract_subqdict(options, &backing_options, str);
>> +
>> + /* Create opts info from runtime_tcp_opts list */
>> + tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
>> + qemu_opts_absorb_qdict(tcp_opts, backing_options, &local_err);
>> + if (local_err) {
>> + qdict_del(backing_options, str);
>> + qemu_opts_del(tcp_opts);
>> + ret = -EINVAL;
>> + goto out;
>> + }
>> +
>> + server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST);
>> + if (!server_host_opt) {
>> + error_setg(&local_err, QERR_MISSING_PARAMETER,
>> + VXHS_OPT_SERVER"."VXHS_OPT_HOST);
>> + ret = -EINVAL;
>> + goto out;
>> + }
>> +
>> + s->vdisk_hostinfo.hostip = g_strdup(server_host_opt);
>> +
>> + s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts,
>> + VXHS_OPT_PORT),
>> + NULL, 0);
>> +
>> + s->vdisk_hostinfo.qnio_cfd = -1;
>> + s->vdisk_hostinfo.vdisk_rfd = -1;
>> + trace_vxhs_qemu_init(s->vdisk_hostinfo.hostip,
>> + s->vdisk_hostinfo.port);
>> +
>> + qdict_del(backing_options, str);
>> + qemu_opts_del(tcp_opts);
>> +
>> + file_name = g_strdup_printf("%s%s", vdisk_prefix, s->vdisk_guid);
>> + of_vsa_addr = g_strdup_printf("of://%s:%d",
>> + s->vdisk_hostinfo.hostip,
>> + s->vdisk_hostinfo.port);
>> +
>> + ret = vxhs_qnio_iio_open(cfd, of_vsa_addr, rfd, file_name);
>> + if (ret) {
>> + error_setg(&local_err, "Failed qnio_iio_open");
>> + ret = -EIO;
>> + }
>> +
>> +out:
>> + g_free(str);
>> + g_free(file_name);
>> + g_free(of_vsa_addr);
>> + qemu_opts_del(opts);
>> +
>> + if (ret < 0) {
>> + error_propagate(errp, local_err);
>> + g_free(s->vdisk_hostinfo.hostip);
>> + g_free(s->vdisk_guid);
>> + s->vdisk_guid = NULL;
>> + errno = -ret;
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static int vxhs_open(BlockDriverState *bs, QDict *options,
>> + int bdrv_flags, Error **errp)
>> +{
>> + BDRVVXHSState *s = bs->opaque;
>> + AioContext *aio_context;
>> + int qemu_qnio_cfd = -1;
>> + int qemu_rfd = -1;
>> + int ret = 0;
>> +
>> + ret = vxhs_qemu_init(options, s, &qemu_qnio_cfd, &qemu_rfd, errp);
>> + if (ret < 0) {
>> + trace_vxhs_open_fail(ret);
>> + return ret;
>> + }
>> +
>> + s->vdisk_hostinfo.qnio_cfd = qemu_qnio_cfd;
>> + s->vdisk_hostinfo.vdisk_rfd = qemu_rfd;
>> + s->vdisk_size = -1;
>> +
>> + /*
>> + * Create a pipe for communicating between two threads in different
>> + * context. Set handler for read event, which gets triggered when
>> + * IO completion is done by non-QEMU context.
>> + */
>> + ret = qemu_pipe(s->fds);
>> + if (ret < 0) {
>> + trace_vxhs_open_epipe(ret);
>> + ret = -errno;
>> + goto errout;
>> + }
>> + fcntl(s->fds[VDISK_FD_READ], F_SETFL, O_NONBLOCK);
>> +
>> + aio_context = bdrv_get_aio_context(bs);
>> + aio_set_fd_handler(aio_context, s->fds[VDISK_FD_READ],
>> + false, vxhs_aio_event_reader, NULL, s);
>> + return 0;
>> +
>> +errout:
>> + /*
>> + * Close remote vDisk device if it was opened earlier
>> + */
>> + vxhs_qnio_iio_close(s);
>> + trace_vxhs_open_fail(ret);
>> + return ret;
>> +}
>> +
>> +static const AIOCBInfo vxhs_aiocb_info = {
>> + .aiocb_size = sizeof(VXHSAIOCB)
>> +};
>> +
>> +/*
>> + * This allocates QEMU-VXHS callback for each IO
>> + * and is passed to QNIO. When QNIO completes the work,
>> + * it will be passed back through the callback.
>> + */
>> +static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
>> + QEMUIOVector *qiov, int nb_sectors,
>> + BlockCompletionFunc *cb, void *opaque, int
>> iodir)
>> +{
>> + VXHSAIOCB *acb = NULL;
>> + BDRVVXHSState *s = bs->opaque;
>> + size_t size;
>> + uint64_t offset;
>> + int iio_flags = 0;
>> + int ret = 0;
>> + uint32_t rfd = s->vdisk_hostinfo.vdisk_rfd;
>> +
>> + offset = sector_num * BDRV_SECTOR_SIZE;
>> + size = nb_sectors * BDRV_SECTOR_SIZE;
>> + acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque);
>> + /*
>> + * Setup or initialize VXHSAIOCB.
>> + * Every single field should be initialized since
>> + * acb will be picked up from the slab without
>> + * initializing with zero.
>> + */
>> + acb->io_offset = offset;
>> + acb->size = size;
>> + acb->err = 0;
>> + acb->qiov = qiov;
>> + acb->direction = iodir;
>> +
>> + iio_flags = (IIO_FLAG_DONE | IIO_FLAG_ASYNC);
>> +
>> + switch (iodir) {
>> + case VDISK_AIO_WRITE:
>> + ret = iio_writev(qniolib.context, rfd, qiov->iov, qiov->niov,
>> + offset, (uint64_t)size, (void *)acb,
>> iio_flags);
>> + break;
>> + case VDISK_AIO_READ:
>> + ret = iio_readv(qniolib.context, rfd, qiov->iov, qiov->niov,
>> + offset, (uint64_t)size, (void *)acb, iio_flags);
>> + break;
>> + default:
>> + trace_vxhs_aio_rw_invalid(iodir);
>> + goto errout;
>> + }
>> +
>> + if (ret != 0) {
>> + trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset,
>> + acb, ret, errno);
>> + goto errout;
>> + }
>> + return &acb->common;
>> +
>> +errout:
>> + qemu_aio_unref(acb);
>> + return NULL;
>> +}
>> +
>> +static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs,
>> + int64_t sector_num, QEMUIOVector *qiov,
>> + int nb_sectors,
>> + BlockCompletionFunc *cb, void *opaque)
>> +{
>> + return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
>> + opaque, VDISK_AIO_READ);
>> +}
>> +
>> +static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs,
>> + int64_t sector_num, QEMUIOVector *qiov,
>> + int nb_sectors,
>> + BlockCompletionFunc *cb, void *opaque)
>> +{
>> + return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors,
>> + cb, opaque, VDISK_AIO_WRITE);
>> +}
>> +
>> +static void vxhs_close(BlockDriverState *bs)
>> +{
>> + BDRVVXHSState *s = bs->opaque;
>> +
>> + trace_vxhs_close(s->vdisk_guid);
>> + close(s->fds[VDISK_FD_READ]);
>> + close(s->fds[VDISK_FD_WRITE]);
>> +
>> + /*
>> + * Clearing all the event handlers for oflame registered to QEMU
>> + */
>> + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fds[VDISK_FD_READ],
>> + false, NULL, NULL, NULL);
>> + g_free(s->vdisk_guid);
>> + s->vdisk_guid = NULL;
>> + vxhs_qnio_iio_close(s);
>> +
>> + /*
>> + * Free the dynamically allocated hostip string
>> + */
>> + g_free(s->vdisk_hostinfo.hostip);
>> + s->vdisk_hostinfo.hostip = NULL;
>> + s->vdisk_hostinfo.port = 0;
>> +}
>> +
>> +static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s)
>> +{
>> + int64_t vdisk_size = -1;
>> + int ret = 0;
>> + uint32_t rfd = s->vdisk_hostinfo.vdisk_rfd;
>> +
>> + ret = iio_ioctl(qniolib.context, rfd, IOR_VDISK_STAT, &vdisk_size,
>> NULL, 0);
>> + if (ret < 0) {
>> + trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno);
>> + return -EIO;
>> + }
>> +
>> + trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size);
>> + return vdisk_size;
>> +}
>> +
>> +/*
>> + * Returns the size of vDisk in bytes. This is required
>> + * by QEMU block upper block layer so that it is visible
>> + * to guest.
>> + */
>> +static int64_t vxhs_getlength(BlockDriverState *bs)
>> +{
>> + BDRVVXHSState *s = bs->opaque;
>> + int64_t vdisk_size;
>> +
>> + if (s->vdisk_size <= 0) {
>> + vdisk_size = vxhs_get_vdisk_stat(s);
>> + if (vdisk_size < 0) {
>> + return -EIO;
>> + }
>> + s->vdisk_size = vdisk_size;
>> + }
>
> I'm curious regarding the caching of vdisk_size. Is this a performance
> optimization? I'm not seeing the practical benefit of it, and if you ever
> support resizing images it may be an issue.
>
>
>> +
>> + return s->vdisk_size;
>> +}
>> +
>> +static void vxhs_detach_aio_context(BlockDriverState *bs)
>> +{
>> + BDRVVXHSState *s = bs->opaque;
>> +
>> + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fds[VDISK_FD_READ],
>> + false, NULL, NULL, NULL);
>> +
>> +}
>> +
>> +static void vxhs_attach_aio_context(BlockDriverState *bs,
>> + AioContext *new_context)
>> +{
>> + BDRVVXHSState *s = bs->opaque;
>> +
>> + aio_set_fd_handler(new_context, s->fds[VDISK_FD_READ],
>> + false, vxhs_aio_event_reader, NULL, s);
>> +}
>> +
>> +static BlockDriver bdrv_vxhs = {
>> + .format_name = "vxhs",
>> + .protocol_name = "vxhs",
>> + .instance_size = sizeof(BDRVVXHSState),
>> + .bdrv_file_open = vxhs_open,
>> + .bdrv_parse_filename = vxhs_parse_filename,
>> + .bdrv_close = vxhs_close,
>> + .bdrv_getlength = vxhs_getlength,
>> + .bdrv_aio_readv = vxhs_aio_readv,
>> + .bdrv_aio_writev = vxhs_aio_writev,
>> + .bdrv_detach_aio_context = vxhs_detach_aio_context,
>> + .bdrv_attach_aio_context = vxhs_attach_aio_context,
>> +};
>> +
>> +static void bdrv_vxhs_init(void)
>> +{
>> + qniolib.refcnt = 0;
>> + qniolib.context = NULL;
>
> Once qniolib is static, it will be zero initialized, so this can be dropped.
>
>> + qemu_mutex_init(&qniolib.lock);
>
> (and of course this isn't needed once the lock is removed)
>
>> + bdrv_register(&bdrv_vxhs);
>> +}
>> +
>> +block_init(bdrv_vxhs_init);
>> diff --git a/configure b/configure
>> index f83cdf8..053358d 100755
>> --- a/configure
>> +++ b/configure
>> @@ -321,6 +321,7 @@ numa=""
>> tcmalloc="no"
>> jemalloc="no"
>> replication="yes"
>> +vxhs=""
>>
>> # parse CC options first
>> for opt do
>> @@ -1162,6 +1163,11 @@ for opt do
>> ;;
>> --enable-replication) replication="yes"
>> ;;
>> + --disable-vxhs) vxhs="no"
>> + ;;
>> + --enable-vxhs) vxhs="yes"
>> + ;;
>> +
>> *)
>> echo "ERROR: unknown option $opt"
>> echo "Try '$0 --help' for more information"
>> @@ -1394,6 +1400,7 @@ disabled with --disable-FEATURE, default is enabled if
>> available:
>> tcmalloc tcmalloc support
>> jemalloc jemalloc support
>> replication replication support
>> + vxhs Veritas HyperScale vDisk backend support
>>
>> NOTE: The object files are built at the place where configure is launched
>> EOF
>> @@ -4688,6 +4695,33 @@ if do_cc -nostdlib -Wl,-r -Wl,--no-relax -o $TMPMO
>> $TMPO; then
>> fi
>>
>> ##########################################
>> +# Veritas HyperScale block driver VxHS
>> +# Check if libqnio is installed
>> +
>> +if test "$vxhs" != "no" ; then
>> + cat > $TMPC <<EOF
>> +#include <stdint.h>
>> +#include <qnio/qnio_api.h>
>> +
>> +void *vxhs_callback;
>> +
>> +int main(void) {
>> + iio_init(vxhs_callback);
>> + return 0;
>> +}
>> +EOF
>> + vxhs_libs="-lqnio"
>> + if compile_prog "" "$vxhs_libs" ; then
>> + vxhs=yes
>> + else
>> + if test "$vxhs" = "yes" ; then
>> + feature_not_found "vxhs block device" "Install libqnio. See github"
>> + fi
>> + vxhs=no
>> + fi
>> +fi
>> +
>> +##########################################
>> # End of CC checks
>> # After here, no more $cc or $ld runs
>>
>> @@ -5053,6 +5087,7 @@ echo "tcmalloc support $tcmalloc"
>> echo "jemalloc support $jemalloc"
>> echo "avx2 optimization $avx2_opt"
>> echo "replication support $replication"
>> +echo "VxHS block device $vxhs"
>>
>> if test "$sdl_too_old" = "yes"; then
>> echo "-> Your SDL version is too old - please upgrade to have SDL support"
>> @@ -5661,6 +5696,12 @@ if test "$pthread_setname_np" = "yes" ; then
>> echo "CONFIG_PTHREAD_SETNAME_NP=y" >> $config_host_mak
>> fi
>>
>> +if test "$vxhs" = "yes" ; then
>> + echo "CONFIG_VXHS=y" >> $config_host_mak
>> + echo "VXHS_CFLAGS=$vxhs_cflags" >> $config_host_mak
>> + echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak
>> +fi
>> +
>> if test "$tcg_interpreter" = "yes"; then
>> QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
>> elif test "$ARCH" = "sparc64" ; then
>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>> index cd1fa7b..1c4a7af 100644
>> --- a/qapi/block-core.json
>> +++ b/qapi/block-core.json
>> @@ -1704,12 +1704,13 @@
>> # @host_device, @host_cdrom: Since 2.1
>> # @gluster: Since 2.7
>> # @nbd: Since 2.8
>> +# @vxhs: Since 2.8
>> #
>> # Since: 2.0
>> ##
>> { 'enum': 'BlockdevDriver',
>> 'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
>> - 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
>> + 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', 'vxhs',
>> 'host_device', 'http', 'https', 'luks', 'nbd', 'null-aio',
>> 'null-co', 'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw',
>> 'replication', 'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
>> @@ -2238,6 +2239,20 @@
>> '*export': 'str',
>> '*tls-creds': 'str' } }
>>
>> +# @BlockdevOptionsVxHS
>> +#
>> +# Driver specific block device options for VxHS
>> +#
>> +# @vdisk-id: UUID of VxHS volume
>> +#
>> +# @server: vxhs server IP, port
>> +#
>> +# Since: 2.8
>> +##
>> +{ 'struct': 'BlockdevOptionsVxHS',
>> + 'data': { 'vdisk-id': 'str',
>> + 'server': 'InetSocketAddress' } }
>> +
>> ##
>> # @BlockdevOptions
>> #
>> @@ -2302,7 +2317,8 @@
>> 'vhdx': 'BlockdevOptionsGenericFormat',
>> 'vmdk': 'BlockdevOptionsGenericCOWFormat',
>> 'vpc': 'BlockdevOptionsGenericFormat',
>> - 'vvfat': 'BlockdevOptionsVVFAT'
>> + 'vvfat': 'BlockdevOptionsVVFAT',
>> + 'vxhs': 'BlockdevOptionsVxHS'
>> } }
>>
>> ##
>> --
>> 2.5.5
>>