diff mbox

[v6,1/2] block/vxhs.c: Add support for a new block device type called "vxhs"

Message ID 1478566785-4002-2-git-send-email-ashish.mittal@veritas.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ashish Mittal Nov. 8, 2016, 12:59 a.m. UTC
Source code for the qnio library that this code loads can be downloaded from:
https://github.com/MittalAshish/libqnio.git

Sample command line using the JSON syntax:
./qemu-system-x86_64 -name instance-00000008 -S -vnc 0.0.0.0:0 -k en-us
-vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5
-msg timestamp=on
'json:{"driver":"vxhs","vdisk-id":"c3e9095a-a5ee-4dce-afeb-2a59fb387410",
"server":{"host":"172.172.17.4","port":"9999"}}'

Sample command line using the URI syntax:
qemu-img convert -f raw -O raw -n
/var/lib/nova/instances/_base/0c5eacd5ebea5ed914b6a3e7b18f1ce734c386ad
vxhs://192.168.0.1:9999/c6718f6b-0401-441d-a8c3-1f0064d75ee0

Signed-off-by: Ashish Mittal <ashish.mittal@veritas.com>
---
v6 changelog:
(1) Added qemu-iotests for VxHS as a new patch in the series.
(2) Replaced release version from 2.8 to 2.9 in block-core.json.

v5 changelog:
(1) Incorporated v4 review comments.

v4 changelog:
(1) Incorporated v3 review comments on QAPI changes.
(2) Added refcounting for device open/close.
    Free library resources on last device close.

v3 changelog:
(1) Added QAPI schema for the VxHS driver.

v2 changelog:
(1) Changes done in response to v1 comments.

 block/Makefile.objs  |   2 +
 block/trace-events   |  21 ++
 block/vxhs.c         | 689 +++++++++++++++++++++++++++++++++++++++++++++++++++
 configure            |  41 +++
 qapi/block-core.json |  21 +-
 5 files changed, 772 insertions(+), 2 deletions(-)
 create mode 100644 block/vxhs.c

Comments

Stefan Hajnoczi Nov. 14, 2016, 3:07 p.m. UTC | #1
On Mon, Nov 07, 2016 at 04:59:44PM -0800, Ashish Mittal wrote:
> Source code for the qnio library that this code loads can be downloaded from:
> https://github.com/MittalAshish/libqnio.git
> 
> Sample command line using the JSON syntax:
> ./qemu-system-x86_64 -name instance-00000008 -S -vnc 0.0.0.0:0 -k en-us
> -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5
> -msg timestamp=on
> 'json:{"driver":"vxhs","vdisk-id":"c3e9095a-a5ee-4dce-afeb-2a59fb387410",
> "server":{"host":"172.172.17.4","port":"9999"}}'
> 
> Sample command line using the URI syntax:
> qemu-img convert -f raw -O raw -n
> /var/lib/nova/instances/_base/0c5eacd5ebea5ed914b6a3e7b18f1ce734c386ad
> vxhs://192.168.0.1:9999/c6718f6b-0401-441d-a8c3-1f0064d75ee0
> 
> Signed-off-by: Ashish Mittal <ashish.mittal@veritas.com>
> ---
> v6 changelog:
> (1) Added qemu-iotests for VxHS as a new patch in the series.
> (2) Replaced release version from 2.8 to 2.9 in block-core.json.
> 
> v5 changelog:
> (1) Incorporated v4 review comments.
> 
> v4 changelog:
> (1) Incorporated v3 review comments on QAPI changes.
> (2) Added refcounting for device open/close.
>     Free library resources on last device close.
> 
> v3 changelog:
> (1) Added QAPI schema for the VxHS driver.
> 
> v2 changelog:
> (1) Changes done in response to v1 comments.
> 
>  block/Makefile.objs  |   2 +
>  block/trace-events   |  21 ++
>  block/vxhs.c         | 689 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  configure            |  41 +++
>  qapi/block-core.json |  21 +-
>  5 files changed, 772 insertions(+), 2 deletions(-)
>  create mode 100644 block/vxhs.c
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 67a036a..58313a2 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -18,6 +18,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
>  block-obj-$(CONFIG_CURL) += curl.o
>  block-obj-$(CONFIG_RBD) += rbd.o
>  block-obj-$(CONFIG_GLUSTERFS) += gluster.o
> +block-obj-$(CONFIG_VXHS) += vxhs.o
>  block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
>  block-obj-$(CONFIG_LIBSSH2) += ssh.o
>  block-obj-y += accounting.o dirty-bitmap.o
> @@ -38,6 +39,7 @@ rbd.o-cflags       := $(RBD_CFLAGS)
>  rbd.o-libs         := $(RBD_LIBS)
>  gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
>  gluster.o-libs     := $(GLUSTERFS_LIBS)
> +vxhs.o-libs        := $(VXHS_LIBS)
>  ssh.o-cflags       := $(LIBSSH2_CFLAGS)
>  ssh.o-libs         := $(LIBSSH2_LIBS)
>  archipelago.o-libs := $(ARCHIPELAGO_LIBS)
> diff --git a/block/trace-events b/block/trace-events
> index 882c903..efdd5ef 100644
> --- a/block/trace-events
> +++ b/block/trace-events
> @@ -112,3 +112,24 @@ qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s
>  qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
>  qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
>  qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
> +
> +# block/vxhs.c
> +vxhs_iio_callback(int error, int reason) "ctx is NULL: error %d, reason %d"
> +vxhs_setup_qnio(void *s) "Context to HyperScale IO manager = %p"
> +vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o %d, %d"
> +vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno %d"
> +vxhs_open_fail(int ret) "Could not open the device. Error = %d"
> +vxhs_open_epipe(int ret) "Could not create a pipe for device. Bailing out. Error=%d"
> +vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d"
> +vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %lu offset = %lu ACB = %p. Error = %d, errno = %d"
> +vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl failed, ret = %d, errno = %d"
> +vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat ioctl returned size %lu"
> +vxhs_qnio_iio_open(const char *ip) "Failed to connect to storage agent on host-ip %s"
> +vxhs_qnio_iio_devopen(const char *fname) "Failed to open vdisk device: %s"
> +vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %ld"
> +vxhs_parse_uri_filename(const char *filename) "URI passed via bdrv_parse_filename %s"
> +vxhs_qemu_init_vdisk(const char *vdisk_id) "vdisk-id from json %s"
> +vxhs_parse_uri_hostinfo(int num, char *host, int port) "Host %d: IP %s, Port %d"
> +vxhs_qemu_init(char *of_vsa_addr, int port) "Adding host %s:%d to BDRVVXHSState"
> +vxhs_qemu_init_filename(const char *filename) "Filename passed as %s"
> +vxhs_close(char *vdisk_guid) "Closing vdisk %s"
> diff --git a/block/vxhs.c b/block/vxhs.c
> new file mode 100644
> index 0000000..8913e8f
> --- /dev/null
> +++ b/block/vxhs.c
> @@ -0,0 +1,689 @@
> +/*
> + * QEMU Block driver for Veritas HyperScale (VxHS)
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "block/block_int.h"
> +#include <qnio/qnio_api.h>

Please move system headers (<>) above user headers ("").  This way you
can be sure the system header isn't affected by any macros defined by
user headers.

> +#include "qapi/qmp/qerror.h"
> +#include "qapi/qmp/qdict.h"
> +#include "qapi/qmp/qstring.h"
> +#include "trace.h"
> +#include "qemu/uri.h"
> +#include "qapi/error.h"
> +#include "qemu/error-report.h"

Is this header file needed?

> +
> +#define VDISK_FD_READ               0
> +#define VDISK_FD_WRITE              1
> +
> +#define VXHS_OPT_FILENAME           "filename"
> +#define VXHS_OPT_VDISK_ID           "vdisk-id"
> +#define VXHS_OPT_SERVER             "server"
> +#define VXHS_OPT_HOST               "host"
> +#define VXHS_OPT_PORT               "port"
> +
> +typedef struct QNIOLibState {
> +    int refcnt;
> +    void *context;
> +} QNIOLibState;
> +
> +typedef enum {
> +    VDISK_AIO_READ,
> +    VDISK_AIO_WRITE,
> +    VDISK_STAT

This is unused.

> +} VDISKAIOCmd;

This typedef is unused but the VDISK_AIO_READ/VDISK_AIO_WRITE enum
constants are used.  Please use the typedef name instead of int.  That
way it's clear that the only valid values are the VDISK_* enum
constants.

> +
> +/*
> + * HyperScale AIO callbacks structure
> + */
> +typedef struct VXHSAIOCB {
> +    BlockAIOCB common;
> +    int err;
> +    int direction; /* IO direction (r/w) */

This field is unused.

> +    size_t io_offset;

This field is unused.

> +    size_t size;

This field is unused.

> +    QEMUIOVector *qiov;
> +} VXHSAIOCB;
> +
> +typedef struct VXHSvDiskHostsInfo {
> +    int qnio_cfd; /* Channel FD */
> +    int vdisk_rfd; /* vDisk remote FD */

Please don't call things FDs if they are not FDs.  This is confusing
because dup(), close(), etc don't work on them.  They are handles.

Handles are an unnecessary layer of indirection in the first place.
libqnio would be simpler if it returned opaque pointers to structs
instead.  This way hash/map lookups can be eliminated.

Instead of storing strings in the hash/map, define structs with useful
fields.  This eliminates some of the string parsing in libqnio.

> +    char *hostip; /* Host's IP addresses */

Is this strictly an IP address?  If a hostname can be used too then
"host" would be clearer name.

> +    int port; /* Host's port number */
> +} VXHSvDiskHostsInfo;
> +
> +/*
> + * Structure per vDisk maintained for state
> + */
> +typedef struct BDRVVXHSState {
> +    int fds[2];
> +    int event_reader_pos;

Why is a pipe still being used?  Back in August I mentioned that this
approach isn't the best practice anymore.  It's more code and slower
than QEMUBH.

You didn't respond to my review comment.  Feel free to disagree with my
comments but please respond so I know what to expect.  Now I'm wondering
whether other comments have been ignored too...

> +static int vxhs_qnio_iio_open(int *cfd, const char *of_vsa_addr,
> +                              int *rfd, const char *file_name)
> +{
> +    int ret = 0;
> +    bool qnio_open = false;

This variable isn't necessary since the vxhs_qnio_open() error uses
return instead of goto.


Pausing review at this point because I realized that my previous review
comments weren't addressed.
Fam Zheng Nov. 14, 2016, 3:49 p.m. UTC | #2
On Mon, 11/14 15:07, Stefan Hajnoczi wrote:
> On Mon, Nov 07, 2016 at 04:59:44PM -0800, Ashish Mittal wrote:
> > diff --git a/block/vxhs.c b/block/vxhs.c
> > new file mode 100644
> > index 0000000..8913e8f
> > --- /dev/null
> > +++ b/block/vxhs.c
> > @@ -0,0 +1,689 @@
> > +/*
> > + * QEMU Block driver for Veritas HyperScale (VxHS)
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "block/block_int.h"
> > +#include <qnio/qnio_api.h>
> 
> Please move system headers (<>) above user headers ("").  This way you
> can be sure the system header isn't affected by any macros defined by
> user headers.

Yes, but still after "qemu/osdep.h", which prepares necessary bits for any other
headers.

Fam
Stefan Hajnoczi Nov. 14, 2016, 4:50 p.m. UTC | #3
On Mon, Nov 14, 2016 at 11:49:06PM +0800, Fam Zheng wrote:
> On Mon, 11/14 15:07, Stefan Hajnoczi wrote:
> > On Mon, Nov 07, 2016 at 04:59:44PM -0800, Ashish Mittal wrote:
> > > diff --git a/block/vxhs.c b/block/vxhs.c
> > > new file mode 100644
> > > index 0000000..8913e8f
> > > --- /dev/null
> > > +++ b/block/vxhs.c
> > > @@ -0,0 +1,689 @@
> > > +/*
> > > + * QEMU Block driver for Veritas HyperScale (VxHS)
> > > + *
> > > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > > + * See the COPYING file in the top-level directory.
> > > + *
> > > + */
> > > +
> > > +#include "qemu/osdep.h"
> > > +#include "block/block_int.h"
> > > +#include <qnio/qnio_api.h>
> > 
> > Please move system headers (<>) above user headers ("").  This way you
> > can be sure the system header isn't affected by any macros defined by
> > user headers.
> 
> Yes, but still after "qemu/osdep.h", which prepares necessary bits for any other
> headers.

I disagree.  qnio_api.h is a third-party library that doesn't need QEMU
headers to fix up the environment for it.

By including osdep.h first you mask bugs in qnio_api.h.  Perhaps
qnio_api.h forgot to include a header and we won't notice because
osdep.h happened to bring in those headers first...

Can you explain the rationale for your statement?

Stefan
Fam Zheng Nov. 14, 2016, 6:03 p.m. UTC | #4
On Mon, 11/14 16:50, Stefan Hajnoczi wrote:
> On Mon, Nov 14, 2016 at 11:49:06PM +0800, Fam Zheng wrote:
> > On Mon, 11/14 15:07, Stefan Hajnoczi wrote:
> > > On Mon, Nov 07, 2016 at 04:59:44PM -0800, Ashish Mittal wrote:
> > > > diff --git a/block/vxhs.c b/block/vxhs.c
> > > > new file mode 100644
> > > > index 0000000..8913e8f
> > > > --- /dev/null
> > > > +++ b/block/vxhs.c
> > > > @@ -0,0 +1,689 @@
> > > > +/*
> > > > + * QEMU Block driver for Veritas HyperScale (VxHS)
> > > > + *
> > > > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > > > + * See the COPYING file in the top-level directory.
> > > > + *
> > > > + */
> > > > +
> > > > +#include "qemu/osdep.h"
> > > > +#include "block/block_int.h"
> > > > +#include <qnio/qnio_api.h>
> > > 
> > > Please move system headers (<>) above user headers ("").  This way you
> > > can be sure the system header isn't affected by any macros defined by
> > > user headers.
> > 
> > Yes, but still after "qemu/osdep.h", which prepares necessary bits for any other
> > headers.
> 
> I disagree.  qnio_api.h is a third-party library that doesn't need QEMU
> headers to fix up the environment for it.
> 
> By including osdep.h first you mask bugs in qnio_api.h.  Perhaps
> qnio_api.h forgot to include a header and we won't notice because
> osdep.h happened to bring in those headers first...
> 
> Can you explain the rationale for your statement?

I point this out just because I rememebr this effort happened not long ago,
which is to make osdep.h always included first (there is also a
./scripts/clean-includes to reorder the include):

https://lists.nongnu.org/archive/html/qemu-devel/2015-12/msg01110.html

I think it is mostly for uncommon compilers that should have little to do with
libqnio in particular, but this is a common practice of current QEMU.

Fam
Eric Blake Nov. 14, 2016, 7:06 p.m. UTC | #5
On 11/14/2016 12:03 PM, Fam Zheng wrote:

>>>> Please move system headers (<>) above user headers ("").  This way you
>>>> can be sure the system header isn't affected by any macros defined by
>>>> user headers.
>>>
>>> Yes, but still after "qemu/osdep.h", which prepares necessary bits for any other
>>> headers.
>>
>> I disagree.  qnio_api.h is a third-party library that doesn't need QEMU
>> headers to fix up the environment for it.
>>
>> By including osdep.h first you mask bugs in qnio_api.h.  Perhaps
>> qnio_api.h forgot to include a header and we won't notice because
>> osdep.h happened to bring in those headers first...
>>
>> Can you explain the rationale for your statement?
> 
> I point this out just because I rememebr this effort happened not long ago,
> which is to make osdep.h always included first (there is also a
> ./scripts/clean-includes to reorder the include):
> 
> https://lists.nongnu.org/archive/html/qemu-devel/2015-12/msg01110.html
> 
> I think it is mostly for uncommon compilers that should have little to do with
> libqnio in particular, but this is a common practice of current QEMU.

If the file is copied in verbatim from a third-party source, then it
should not be including osdep.h, and should be added to the list of
exempt files in scripts/clean-includes - at which point the file SHOULD
be clean because it should already be usable as-is in its third-party
original location.

If we modify the file as part of including it in qemu, then qemu rules
apply and having osdep.h first is good practice.

So I guess you have to determine if libqnio is something that should
compile completely independent from qemu, or whether it is so closely
tied to the rest of qemu that it should follow qemu conventions.
Fam Zheng Nov. 15, 2016, 2:04 a.m. UTC | #6
On Mon, 11/14 13:06, Eric Blake wrote:
> So I guess you have to determine if libqnio is something that should
> compile completely independent from qemu, or whether it is so closely
> tied to the rest of qemu that it should follow qemu conventions.

The question is on include directives in block/vxhs.c, not libnqio library
header, so qemu conventions apply.

Fam

> 
> -- 
> Eric Blake   eblake redhat com    +1-919-301-3266
> Libvirt virtualization library http://libvirt.org
>
Markus Armbruster Nov. 15, 2016, 7:49 a.m. UTC | #7
Fam Zheng <famz@redhat.com> writes:

> On Mon, 11/14 15:07, Stefan Hajnoczi wrote:
>> On Mon, Nov 07, 2016 at 04:59:44PM -0800, Ashish Mittal wrote:
>> > diff --git a/block/vxhs.c b/block/vxhs.c
>> > new file mode 100644
>> > index 0000000..8913e8f
>> > --- /dev/null
>> > +++ b/block/vxhs.c
>> > @@ -0,0 +1,689 @@
>> > +/*
>> > + * QEMU Block driver for Veritas HyperScale (VxHS)
>> > + *
>> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> > + * See the COPYING file in the top-level directory.
>> > + *
>> > + */
>> > +
>> > +#include "qemu/osdep.h"
>> > +#include "block/block_int.h"
>> > +#include <qnio/qnio_api.h>
>> 
>> Please move system headers (<>) above user headers ("").  This way you
>> can be sure the system header isn't affected by any macros defined by
>> user headers.
>
> Yes, but still after "qemu/osdep.h", which prepares necessary bits for any other
> headers.

Yes, osdep.h must come first.  See also scripts/clean-includes.
Stefan Hajnoczi Nov. 15, 2016, 10:18 a.m. UTC | #8
On Tue, Nov 15, 2016 at 10:04:05AM +0800, Fam Zheng wrote:
> On Mon, 11/14 13:06, Eric Blake wrote:
> > So I guess you have to determine if libqnio is something that should
> > compile completely independent from qemu, or whether it is so closely
> > tied to the rest of qemu that it should follow qemu conventions.
> 
> The question is on include directives in block/vxhs.c, not libnqio library
> header, so qemu conventions apply.

Eric: The libqnio library header is not copied into the QEMU source
tree.  It is an external library dependency like libnfs or libglfs.

Fam, Markus: Unfortunately neither the clean-includes script nor its
patch series cover letter explains *why* osdep.h should be included
before system headers.

The libqnio header is self-contained (i.e. you can #include it and it
has no dependencies) and only used by vxhs.c.  Why is it a good idea to
include qemu/osdep.h first?

Seems like a bad idea to me because it masks missing dependencies in the
libqnio header.

Stefan
Fam Zheng Nov. 15, 2016, 12:44 p.m. UTC | #9
On Tue, 11/15 10:18, Stefan Hajnoczi wrote:
> Fam, Markus: Unfortunately neither the clean-includes script nor its
> patch series cover letter explains *why* osdep.h should be included
> before system headers.

I don't know Peter's exact intention either, but AFAICT it is about the few
quirks in osdep.h:


/* Older versions of C++ don't get definitions of various macros from
 * stdlib.h unless we define these macros before first inclusion of
 * that system header.
 */
#ifndef __STDC_CONSTANT_MACROS
#define __STDC_CONSTANT_MACROS
#endif
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS
#endif
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif

/* The following block of code temporarily renames the daemon() function so the
 * compiler does not see the warning associated with it in stdlib.h on OSX
 */
#ifdef __APPLE__
#define daemon qemu_fake_daemon_function
#include <stdlib.h>
#undef daemon
extern int daemon(int, int);
#endif

<...>


/* Mac OSX has a <stdint.h> bug that incorrectly defines SIZE_MAX with
 * the wrong type. Our replacement isn't usable in preprocessor
 * expressions, but it is sufficient for our needs. */
#if defined(HAVE_BROKEN_SIZE_MAX) && HAVE_BROKEN_SIZE_MAX
#undef SIZE_MAX
#define SIZE_MAX ((size_t)-1)
#endif
Stefan Hajnoczi Nov. 15, 2016, 2:45 p.m. UTC | #10
On Tue, Nov 15, 2016 at 08:44:17PM +0800, Fam Zheng wrote:
> On Tue, 11/15 10:18, Stefan Hajnoczi wrote:
> > Fam, Markus: Unfortunately neither the clean-includes script nor its
> > patch series cover letter explains *why* osdep.h should be included
> > before system headers.
> 
> I don't know Peter's exact intention either, but AFAICT it is about the few
> quirks in osdep.h:
> 
> 
> /* Older versions of C++ don't get definitions of various macros from
>  * stdlib.h unless we define these macros before first inclusion of
>  * that system header.
>  */
> #ifndef __STDC_CONSTANT_MACROS
> #define __STDC_CONSTANT_MACROS
> #endif
> #ifndef __STDC_LIMIT_MACROS
> #define __STDC_LIMIT_MACROS
> #endif
> #ifndef __STDC_FORMAT_MACROS
> #define __STDC_FORMAT_MACROS
> #endif
> 
> /* The following block of code temporarily renames the daemon() function so the
>  * compiler does not see the warning associated with it in stdlib.h on OSX
>  */
> #ifdef __APPLE__
> #define daemon qemu_fake_daemon_function
> #include <stdlib.h>
> #undef daemon
> extern int daemon(int, int);
> #endif
> 
> <...>
> 
> 
> /* Mac OSX has a <stdint.h> bug that incorrectly defines SIZE_MAX with
>  * the wrong type. Our replacement isn't usable in preprocessor
>  * expressions, but it is sufficient for our needs. */
> #if defined(HAVE_BROKEN_SIZE_MAX) && HAVE_BROKEN_SIZE_MAX
> #undef SIZE_MAX
> #define SIZE_MAX ((size_t)-1)
> #endif

This is exactly the kind of stuff we should not be doing to the libqnio
header file!

Those redefinitions are useful for QEMU code.  They should not be done
to third-party system headers though.

Stefan
Fam Zheng Nov. 15, 2016, 3 p.m. UTC | #11
On Tue, 11/15 14:45, Stefan Hajnoczi wrote:
> On Tue, Nov 15, 2016 at 08:44:17PM +0800, Fam Zheng wrote:
> > On Tue, 11/15 10:18, Stefan Hajnoczi wrote:
> > > Fam, Markus: Unfortunately neither the clean-includes script nor its
> > > patch series cover letter explains *why* osdep.h should be included
> > > before system headers.
> > 
> > I don't know Peter's exact intention either, but AFAICT it is about the few
> > quirks in osdep.h:
> > 
> > 
> > /* Older versions of C++ don't get definitions of various macros from
> >  * stdlib.h unless we define these macros before first inclusion of
> >  * that system header.
> >  */
> > #ifndef __STDC_CONSTANT_MACROS
> > #define __STDC_CONSTANT_MACROS
> > #endif
> > #ifndef __STDC_LIMIT_MACROS
> > #define __STDC_LIMIT_MACROS
> > #endif
> > #ifndef __STDC_FORMAT_MACROS
> > #define __STDC_FORMAT_MACROS
> > #endif
> > 
> > /* The following block of code temporarily renames the daemon() function so the
> >  * compiler does not see the warning associated with it in stdlib.h on OSX
> >  */
> > #ifdef __APPLE__
> > #define daemon qemu_fake_daemon_function
> > #include <stdlib.h>
> > #undef daemon
> > extern int daemon(int, int);
> > #endif
> > 
> > <...>
> > 
> > 
> > /* Mac OSX has a <stdint.h> bug that incorrectly defines SIZE_MAX with
> >  * the wrong type. Our replacement isn't usable in preprocessor
> >  * expressions, but it is sufficient for our needs. */
> > #if defined(HAVE_BROKEN_SIZE_MAX) && HAVE_BROKEN_SIZE_MAX
> > #undef SIZE_MAX
> > #define SIZE_MAX ((size_t)-1)
> > #endif
> 
> This is exactly the kind of stuff we should not be doing to the libqnio
> header file!
> 
> Those redefinitions are useful for QEMU code.  They should not be done
> to third-party system headers though.

But we still want to include osdep.h before libqnio pulls in stdint.h. If we
don't make osdep.h the very first, that cannot be guaranteed.

Fam
Eric Blake Nov. 15, 2016, 3:52 p.m. UTC | #12
On 11/15/2016 04:18 AM, Stefan Hajnoczi wrote:
> On Tue, Nov 15, 2016 at 10:04:05AM +0800, Fam Zheng wrote:
>> On Mon, 11/14 13:06, Eric Blake wrote:
>>> So I guess you have to determine if libqnio is something that should
>>> compile completely independent from qemu, or whether it is so closely
>>> tied to the rest of qemu that it should follow qemu conventions.
>>
>> The question is on include directives in block/vxhs.c, not libnqio library
>> header, so qemu conventions apply.
> 
> Eric: The libqnio library header is not copied into the QEMU source
> tree.  It is an external library dependency like libnfs or libglfs.
> 
> Fam, Markus: Unfortunately neither the clean-includes script nor its
> patch series cover letter explains *why* osdep.h should be included
> before system headers.

For the same reason that <config.h> should be included first before
system headers in an autoconf'd project. Many platforms ship
(semi-)broken system headers, where configure detects AND works around
the problems, but only if the workaround CONSISTENTLY happens before any
system header is included.  It is a LOT harder to track down compilation
or link or even subtle runtime failures if you don't consistently use
your workaround all the time, such that some .c files got the workaround
but others did not.

> The libqnio header is self-contained (i.e. you can #include it and it
> has no dependencies) and only used by vxhs.c.  Why is it a good idea to
> include qemu/osdep.h first?
> 
> Seems like a bad idea to me because it masks missing dependencies in the
> libqnio header.

If the libqnio header is completely independent, then it should not be
part of the qemu source tree (with "" inclusion), but should instead be
externally included (with <> inclusion), just like any other third-party
library we link against.  But even then, we STILL want our osdep.h to
occur before ANY third-party library, so that we have a CONSISTENT
environment, which is why we mandate that osdep.h be included first in
all qemu .c files.
Stefan Hajnoczi Nov. 15, 2016, 7:20 p.m. UTC | #13
On Tue, Nov 15, 2016 at 11:00:16PM +0800, Fam Zheng wrote:
> On Tue, 11/15 14:45, Stefan Hajnoczi wrote:
> > On Tue, Nov 15, 2016 at 08:44:17PM +0800, Fam Zheng wrote:
> > > On Tue, 11/15 10:18, Stefan Hajnoczi wrote:
> > > > Fam, Markus: Unfortunately neither the clean-includes script nor its
> > > > patch series cover letter explains *why* osdep.h should be included
> > > > before system headers.
> > > 
> > > I don't know Peter's exact intention either, but AFAICT it is about the few
> > > quirks in osdep.h:
> > > 
> > > 
> > > /* Older versions of C++ don't get definitions of various macros from
> > >  * stdlib.h unless we define these macros before first inclusion of
> > >  * that system header.
> > >  */
> > > #ifndef __STDC_CONSTANT_MACROS
> > > #define __STDC_CONSTANT_MACROS
> > > #endif
> > > #ifndef __STDC_LIMIT_MACROS
> > > #define __STDC_LIMIT_MACROS
> > > #endif
> > > #ifndef __STDC_FORMAT_MACROS
> > > #define __STDC_FORMAT_MACROS
> > > #endif
> > > 
> > > /* The following block of code temporarily renames the daemon() function so the
> > >  * compiler does not see the warning associated with it in stdlib.h on OSX
> > >  */
> > > #ifdef __APPLE__
> > > #define daemon qemu_fake_daemon_function
> > > #include <stdlib.h>
> > > #undef daemon
> > > extern int daemon(int, int);
> > > #endif
> > > 
> > > <...>
> > > 
> > > 
> > > /* Mac OSX has a <stdint.h> bug that incorrectly defines SIZE_MAX with
> > >  * the wrong type. Our replacement isn't usable in preprocessor
> > >  * expressions, but it is sufficient for our needs. */
> > > #if defined(HAVE_BROKEN_SIZE_MAX) && HAVE_BROKEN_SIZE_MAX
> > > #undef SIZE_MAX
> > > #define SIZE_MAX ((size_t)-1)
> > > #endif
> > 
> > This is exactly the kind of stuff we should not be doing to the libqnio
> > header file!
> > 
> > Those redefinitions are useful for QEMU code.  They should not be done
> > to third-party system headers though.
> 
> But we still want to include osdep.h before libqnio pulls in stdint.h. If we
> don't make osdep.h the very first, that cannot be guaranteed.

Thanks, that makes sense!

I'll add a comment to clean-includes with this explanation of why
osdep.h must go before system headers.

Stefan
Ashish Mittal Nov. 15, 2016, 8:39 p.m. UTC | #14
Thanks for concluding on this.

I will rearrange the qnio_api.h header accordingly as follows:

+#include "qemu/osdep.h"
+#include <qnio/qnio_api.h>   <=== after osdep.h
+#include "block/block_int.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "trace.h"
+#include "qemu/uri.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"  <==== remove


On Tue, Nov 15, 2016 at 11:20 AM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Tue, Nov 15, 2016 at 11:00:16PM +0800, Fam Zheng wrote:
>> On Tue, 11/15 14:45, Stefan Hajnoczi wrote:
>> > On Tue, Nov 15, 2016 at 08:44:17PM +0800, Fam Zheng wrote:
>> > > On Tue, 11/15 10:18, Stefan Hajnoczi wrote:
>> > > > Fam, Markus: Unfortunately neither the clean-includes script nor its
>> > > > patch series cover letter explains *why* osdep.h should be included
>> > > > before system headers.
>> > >
>> > > I don't know Peter's exact intention either, but AFAICT it is about the few
>> > > quirks in osdep.h:
>> > >
>> > >
>> > > /* Older versions of C++ don't get definitions of various macros from
>> > >  * stdlib.h unless we define these macros before first inclusion of
>> > >  * that system header.
>> > >  */
>> > > #ifndef __STDC_CONSTANT_MACROS
>> > > #define __STDC_CONSTANT_MACROS
>> > > #endif
>> > > #ifndef __STDC_LIMIT_MACROS
>> > > #define __STDC_LIMIT_MACROS
>> > > #endif
>> > > #ifndef __STDC_FORMAT_MACROS
>> > > #define __STDC_FORMAT_MACROS
>> > > #endif
>> > >
>> > > /* The following block of code temporarily renames the daemon() function so the
>> > >  * compiler does not see the warning associated with it in stdlib.h on OSX
>> > >  */
>> > > #ifdef __APPLE__
>> > > #define daemon qemu_fake_daemon_function
>> > > #include <stdlib.h>
>> > > #undef daemon
>> > > extern int daemon(int, int);
>> > > #endif
>> > >
>> > > <...>
>> > >
>> > >
>> > > /* Mac OSX has a <stdint.h> bug that incorrectly defines SIZE_MAX with
>> > >  * the wrong type. Our replacement isn't usable in preprocessor
>> > >  * expressions, but it is sufficient for our needs. */
>> > > #if defined(HAVE_BROKEN_SIZE_MAX) && HAVE_BROKEN_SIZE_MAX
>> > > #undef SIZE_MAX
>> > > #define SIZE_MAX ((size_t)-1)
>> > > #endif
>> >
>> > This is exactly the kind of stuff we should not be doing to the libqnio
>> > header file!
>> >
>> > Those redefinitions are useful for QEMU code.  They should not be done
>> > to third-party system headers though.
>>
>> But we still want to include osdep.h before libqnio pulls in stdint.h. If we
>> don't make osdep.h the very first, that cannot be guaranteed.
>
> Thanks, that makes sense!
>
> I'll add a comment to clean-includes with this explanation of why
> osdep.h must go before system headers.
>
> Stefan
Stefan Hajnoczi Nov. 15, 2016, 8:40 p.m. UTC | #15
On Tue, Nov 15, 2016 at 8:39 PM, ashish mittal <ashmit602@gmail.com> wrote:
> Thanks for concluding on this.
>
> I will rearrange the qnio_api.h header accordingly as follows:
>
> +#include "qemu/osdep.h"
> +#include <qnio/qnio_api.h>   <=== after osdep.h
> +#include "block/block_int.h"
> +#include "qapi/qmp/qerror.h"
> +#include "qapi/qmp/qdict.h"
> +#include "qapi/qmp/qstring.h"
> +#include "trace.h"
> +#include "qemu/uri.h"
> +#include "qapi/error.h"
> +#include "qemu/error-report.h"  <==== remove

Looks good.

Stefan
Ashish Mittal Nov. 15, 2016, 8:49 p.m. UTC | #16
On Mon, Nov 14, 2016 at 7:07 AM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Mon, Nov 07, 2016 at 04:59:44PM -0800, Ashish Mittal wrote:
>> Source code for the qnio library that this code loads can be downloaded from:
>> https://github.com/MittalAshish/libqnio.git
>>
>> Sample command line using the JSON syntax:
>> ./qemu-system-x86_64 -name instance-00000008 -S -vnc 0.0.0.0:0 -k en-us
>> -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5
>> -msg timestamp=on
>> 'json:{"driver":"vxhs","vdisk-id":"c3e9095a-a5ee-4dce-afeb-2a59fb387410",
>> "server":{"host":"172.172.17.4","port":"9999"}}'
>>
>> Sample command line using the URI syntax:
>> qemu-img convert -f raw -O raw -n
>> /var/lib/nova/instances/_base/0c5eacd5ebea5ed914b6a3e7b18f1ce734c386ad
>> vxhs://192.168.0.1:9999/c6718f6b-0401-441d-a8c3-1f0064d75ee0
>>
>> Signed-off-by: Ashish Mittal <ashish.mittal@veritas.com>
>> ---
>> v6 changelog:
>> (1) Added qemu-iotests for VxHS as a new patch in the series.
>> (2) Replaced release version from 2.8 to 2.9 in block-core.json.
>>
>> v5 changelog:
>> (1) Incorporated v4 review comments.
>>
>> v4 changelog:
>> (1) Incorporated v3 review comments on QAPI changes.
>> (2) Added refcounting for device open/close.
>>     Free library resources on last device close.
>>
>> v3 changelog:
>> (1) Added QAPI schema for the VxHS driver.
>>
>> v2 changelog:
>> (1) Changes done in response to v1 comments.
>>
>>  block/Makefile.objs  |   2 +
>>  block/trace-events   |  21 ++
>>  block/vxhs.c         | 689 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>  configure            |  41 +++
>>  qapi/block-core.json |  21 +-
>>  5 files changed, 772 insertions(+), 2 deletions(-)
>>  create mode 100644 block/vxhs.c
>>
>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>> index 67a036a..58313a2 100644
>> --- a/block/Makefile.objs
>> +++ b/block/Makefile.objs
>> @@ -18,6 +18,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
>>  block-obj-$(CONFIG_CURL) += curl.o
>>  block-obj-$(CONFIG_RBD) += rbd.o
>>  block-obj-$(CONFIG_GLUSTERFS) += gluster.o
>> +block-obj-$(CONFIG_VXHS) += vxhs.o
>>  block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
>>  block-obj-$(CONFIG_LIBSSH2) += ssh.o
>>  block-obj-y += accounting.o dirty-bitmap.o
>> @@ -38,6 +39,7 @@ rbd.o-cflags       := $(RBD_CFLAGS)
>>  rbd.o-libs         := $(RBD_LIBS)
>>  gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
>>  gluster.o-libs     := $(GLUSTERFS_LIBS)
>> +vxhs.o-libs        := $(VXHS_LIBS)
>>  ssh.o-cflags       := $(LIBSSH2_CFLAGS)
>>  ssh.o-libs         := $(LIBSSH2_LIBS)
>>  archipelago.o-libs := $(ARCHIPELAGO_LIBS)
>> diff --git a/block/trace-events b/block/trace-events
>> index 882c903..efdd5ef 100644
>> --- a/block/trace-events
>> +++ b/block/trace-events
>> @@ -112,3 +112,24 @@ qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s
>>  qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
>>  qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
>>  qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
>> +
>> +# block/vxhs.c
>> +vxhs_iio_callback(int error, int reason) "ctx is NULL: error %d, reason %d"
>> +vxhs_setup_qnio(void *s) "Context to HyperScale IO manager = %p"
>> +vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o %d, %d"
>> +vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno %d"
>> +vxhs_open_fail(int ret) "Could not open the device. Error = %d"
>> +vxhs_open_epipe(int ret) "Could not create a pipe for device. Bailing out. Error=%d"
>> +vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d"
>> +vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %lu offset = %lu ACB = %p. Error = %d, errno = %d"
>> +vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl failed, ret = %d, errno = %d"
>> +vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat ioctl returned size %lu"
>> +vxhs_qnio_iio_open(const char *ip) "Failed to connect to storage agent on host-ip %s"
>> +vxhs_qnio_iio_devopen(const char *fname) "Failed to open vdisk device: %s"
>> +vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %ld"
>> +vxhs_parse_uri_filename(const char *filename) "URI passed via bdrv_parse_filename %s"
>> +vxhs_qemu_init_vdisk(const char *vdisk_id) "vdisk-id from json %s"
>> +vxhs_parse_uri_hostinfo(int num, char *host, int port) "Host %d: IP %s, Port %d"
>> +vxhs_qemu_init(char *of_vsa_addr, int port) "Adding host %s:%d to BDRVVXHSState"
>> +vxhs_qemu_init_filename(const char *filename) "Filename passed as %s"
>> +vxhs_close(char *vdisk_guid) "Closing vdisk %s"
>> diff --git a/block/vxhs.c b/block/vxhs.c
>> new file mode 100644
>> index 0000000..8913e8f
>> --- /dev/null
>> +++ b/block/vxhs.c
>> @@ -0,0 +1,689 @@
>> +/*
>> + * QEMU Block driver for Veritas HyperScale (VxHS)
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "block/block_int.h"
>> +#include <qnio/qnio_api.h>
>
> Please move system headers (<>) above user headers ("").  This way you
> can be sure the system header isn't affected by any macros defined by
> user headers.
>
>> +#include "qapi/qmp/qerror.h"
>> +#include "qapi/qmp/qdict.h"
>> +#include "qapi/qmp/qstring.h"
>> +#include "trace.h"
>> +#include "qemu/uri.h"
>> +#include "qapi/error.h"
>> +#include "qemu/error-report.h"
>
> Is this header file needed?
>
>> +
>> +#define VDISK_FD_READ               0
>> +#define VDISK_FD_WRITE              1
>> +
>> +#define VXHS_OPT_FILENAME           "filename"
>> +#define VXHS_OPT_VDISK_ID           "vdisk-id"
>> +#define VXHS_OPT_SERVER             "server"
>> +#define VXHS_OPT_HOST               "host"
>> +#define VXHS_OPT_PORT               "port"
>> +
>> +typedef struct QNIOLibState {
>> +    int refcnt;
>> +    void *context;
>> +} QNIOLibState;
>> +
>> +typedef enum {
>> +    VDISK_AIO_READ,
>> +    VDISK_AIO_WRITE,
>> +    VDISK_STAT
>
> This is unused.
>
>> +} VDISKAIOCmd;
>
> This typedef is unused but the VDISK_AIO_READ/VDISK_AIO_WRITE enum
> constants are used.  Please use the typedef name instead of int.  That
> way it's clear that the only valid values are the VDISK_* enum
> constants.
>
>> +
>> +/*
>> + * HyperScale AIO callbacks structure
>> + */
>> +typedef struct VXHSAIOCB {
>> +    BlockAIOCB common;
>> +    int err;
>> +    int direction; /* IO direction (r/w) */
>
> This field is unused.
>
>> +    size_t io_offset;
>
> This field is unused.
>
>> +    size_t size;
>
> This field is unused.
>
>> +    QEMUIOVector *qiov;
>> +} VXHSAIOCB;
>> +
>> +typedef struct VXHSvDiskHostsInfo {
>> +    int qnio_cfd; /* Channel FD */
>> +    int vdisk_rfd; /* vDisk remote FD */
>
> Please don't call things FDs if they are not FDs.  This is confusing
> because dup(), close(), etc don't work on them.  They are handles.
>
> Handles are an unnecessary layer of indirection in the first place.
> libqnio would be simpler if it returned opaque pointers to structs
> instead.  This way hash/map lookups can be eliminated.
>
> Instead of storing strings in the hash/map, define structs with useful
> fields.  This eliminates some of the string parsing in libqnio.
>
>> +    char *hostip; /* Host's IP addresses */
>
> Is this strictly an IP address?  If a hostname can be used too then
> "host" would be clearer name.
>
>> +    int port; /* Host's port number */
>> +} VXHSvDiskHostsInfo;
>> +
>> +/*
>> + * Structure per vDisk maintained for state
>> + */
>> +typedef struct BDRVVXHSState {
>> +    int fds[2];
>> +    int event_reader_pos;
>
> Why is a pipe still being used?  Back in August I mentioned that this
> approach isn't the best practice anymore.  It's more code and slower
> than QEMUBH.
>
> You didn't respond to my review comment.  Feel free to disagree with my
> comments but please respond so I know what to expect.  Now I'm wondering
> whether other comments have been ignored too...
>

I did respond, also resent the same email today in case it was missed
the first time. Please let me know if you did not receive the one I
forwarded today and I will check if there's a problem at my end.
Here's the text again:
/+++++++++++++++++/
On Tue, Aug 23, 2016 at 3:22 PM, ashish mittal <ashmit602@gmail.com> wrote:
> Thanks Stefan, I will look at block/quorum.c.
>
> I have sent V4 of the patch with a reworked parsing logic for both
> JSON and URI. Both of them are quite compact now.
>
> URI parsing now follows the suggestion given by Kevin.
> /================/
> However, you should use the proper interfaces to implement this, which
> is .bdrv_parse_filename(). This is a function that gets a string and
> converts it into a QDict, which is then passed to .bdrv_open(). So it
> uses exactly the same code path in .bdrv_open() as if used directly with
> QAPI.
> /================/
>
> Additionally, I have fixed all the issues pointed out by you on V1 of
> the patch. The only change I haven't done is to replace pipes with
> QEMUBH. I am hoping this will not hold up the patch from being
> accepted, and I can make this transition later with proper dev and
> testing.
>
> Regards,
> Ashish
/+++++++++++++++++/

Will work on all the other suggestions in this email and the one you
pointed out earlier. Thanks!


>> +static int vxhs_qnio_iio_open(int *cfd, const char *of_vsa_addr,
>> +                              int *rfd, const char *file_name)
>> +{
>> +    int ret = 0;
>> +    bool qnio_open = false;
>
> This variable isn't necessary since the vxhs_qnio_open() error uses
> return instead of goto.
>
>
> Pausing review at this point because I realized that my previous review
> comments weren't addressed.
Markus Armbruster Nov. 16, 2016, 9:04 a.m. UTC | #17
ashish mittal <ashmit602@gmail.com> writes:

> Thanks for concluding on this.
>
> I will rearrange the qnio_api.h header accordingly as follows:
>
> +#include "qemu/osdep.h"

Headers should not include osdep.h.

> +#include <qnio/qnio_api.h>   <=== after osdep.h
> +#include "block/block_int.h"

Including block_int.h in a header is problematic.  Are you sure you need
it?  Will qnio/qnio_api.h ever be included outside block/?

> +#include "qapi/qmp/qerror.h"
> +#include "qapi/qmp/qdict.h"
> +#include "qapi/qmp/qstring.h"
> +#include "trace.h"
> +#include "qemu/uri.h"
> +#include "qapi/error.h"
> +#include "qemu/error-report.h"  <==== remove

In general, headers should include what they need, but no more.
Fam Zheng Nov. 16, 2016, 9:49 a.m. UTC | #18
On Wed, 11/16 10:04, Markus Armbruster wrote:
> ashish mittal <ashmit602@gmail.com> writes:
> 
> > Thanks for concluding on this.
> >
> > I will rearrange the qnio_api.h header accordingly as follows:
> >
> > +#include "qemu/osdep.h"
> 
> Headers should not include osdep.h.

This is about including "osdep.h" _and_ "qnio_api.h" in block/vxhs.c, so what
Ashish means looks good to me.

Fam

> 
> > +#include <qnio/qnio_api.h>   <=== after osdep.h
> > +#include "block/block_int.h"
> 
> Including block_int.h in a header is problematic.  Are you sure you need
> it?  Will qnio/qnio_api.h ever be included outside block/?
> 
> > +#include "qapi/qmp/qerror.h"
> > +#include "qapi/qmp/qdict.h"
> > +#include "qapi/qmp/qstring.h"
> > +#include "trace.h"
> > +#include "qemu/uri.h"
> > +#include "qapi/error.h"
> > +#include "qemu/error-report.h"  <==== remove
> 
> In general, headers should include what they need, but no more.
>
Stefan Hajnoczi Nov. 16, 2016, 11:27 a.m. UTC | #19
On Wed, Nov 16, 2016 at 9:49 AM, Fam Zheng <famz@redhat.com> wrote:
> On Wed, 11/16 10:04, Markus Armbruster wrote:
>> ashish mittal <ashmit602@gmail.com> writes:
>>
>> > Thanks for concluding on this.
>> >
>> > I will rearrange the qnio_api.h header accordingly as follows:
>> >
>> > +#include "qemu/osdep.h"
>>
>> Headers should not include osdep.h.
>
> This is about including "osdep.h" _and_ "qnio_api.h" in block/vxhs.c, so what
> Ashish means looks good to me.

Yes, I think "will rearrange the qnio_api.h header" was a typo and was
supposed to be block/vxhs.c.

Stefan
Ashish Mittal Nov. 16, 2016, 5:05 p.m. UTC | #20
On Wed, Nov 16, 2016 at 3:27 AM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Wed, Nov 16, 2016 at 9:49 AM, Fam Zheng <famz@redhat.com> wrote:
>> On Wed, 11/16 10:04, Markus Armbruster wrote:
>>> ashish mittal <ashmit602@gmail.com> writes:
>>>
>>> > Thanks for concluding on this.
>>> >
>>> > I will rearrange the qnio_api.h header accordingly as follows:
>>> >
>>> > +#include "qemu/osdep.h"
>>>
>>> Headers should not include osdep.h.
>>
>> This is about including "osdep.h" _and_ "qnio_api.h" in block/vxhs.c, so what
>> Ashish means looks good to me.
>
> Yes, I think "will rearrange the qnio_api.h header" was a typo and was
> supposed to be block/vxhs.c.
>
> Stefan

Thanks for the correction. Yes, i meant rearrange headers in block/vxhs.c.
Daniel P. Berrangé Nov. 17, 2016, 4:01 p.m. UTC | #21
On Mon, Nov 07, 2016 at 04:59:44PM -0800, Ashish Mittal wrote:
> Source code for the qnio library that this code loads can be downloaded from:
> https://github.com/MittalAshish/libqnio.git
> 
> Sample command line using the JSON syntax:
> ./qemu-system-x86_64 -name instance-00000008 -S -vnc 0.0.0.0:0 -k en-us
> -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5
> -msg timestamp=on
> 'json:{"driver":"vxhs","vdisk-id":"c3e9095a-a5ee-4dce-afeb-2a59fb387410",
> "server":{"host":"172.172.17.4","port":"9999"}}'
> 
> Sample command line using the URI syntax:
> qemu-img convert -f raw -O raw -n
> /var/lib/nova/instances/_base/0c5eacd5ebea5ed914b6a3e7b18f1ce734c386ad
> vxhs://192.168.0.1:9999/c6718f6b-0401-441d-a8c3-1f0064d75ee0

I'm wondering what the security story is here.  QEMU is connecting to a
remote TCP server but apparently not providing any username or password
or other form of credentials to authenticate itself with.

This seems to imply that the network server accepts connections to access
disks from any client anywhere. Surely this isn't correct, and there must
be some authentication scheme in there, to prevent the server being wide
open to any malicious attacker, whether on the network, or via a compromised
QEMU process accessing volumes it shouldn't ?

If there is authentication, then how is QEMU providing the credentials ?

Regards,
Daniel
Ashish Mittal Feb. 1, 2017, 1:55 a.m. UTC | #22
Hi,

There has been some work going on on the VxHS libvirt patches and we
are making good progress.

A suggestion has been made on the libvirt community to check if we can
change the qemu VxHS device specification syntax as follows.

Replace

(a) +file.server.host=192.168.0.1,file.server.port=9999

with

(b) +file.host=192.168.0.1,file.port=9999

The reasoning being that since we have only one host (true as the
failover is now being handled completely/transparently) within the
libqnio library), the "server" part is redundant.

Excerpt from John Ferlan's email -
/======================================/
#2. Is the desire to ever support more than 1 host? If not, then is the
"server" syntax you've borrowed from the Gluster code necessary? Could
you just go with the single "host" like NBD and SSH. As it relates to
the qemu command line - I'm not quite as clear. From the example I see
in commit id '7b7da9e28', the gluster syntax would have:

+file.server.0.type=tcp,file.server.0.host=example.org,file.server.0.port=6000,\
+file.server.1.type=tcp,file.server.1.host=example.org,file.server.1.port=24007,\
+file.server.2.type=unix,file.server.2.socket=/path/to/sock,format=qcow2,\

whereas, the VxHS syntax is:
 +file.server.host=192.168.0.1,file.server.port=9999,format=raw,if=none,\

FWIW: I also note there is no ".type=tcp" in your output - so perhaps
the "default" is tcp unless otherwise specified, but I'm sure of the
qemu syntax requirements in this area. I assume that since there's only
1 server, the ".0, .1, .2" become unnecessary (something added by commit
id 'f1bbc7df4' for multiple gluster hosts).

I haven't closedly followed the qemu syntax discussion, but it would it
would be possible to use:

+file.host=192.168.0.1,file.port=9999

Similar to how NBD (see commit id 'a1674fd9') and SSH (see commit id
'bc225b1b5') are handled.
/======================================/

If this proposal looks OK to the community, then I will make this user
interface change in the next VxHS qemu patch.

Thanks,
Ashish

On Wed, Nov 16, 2016 at 9:05 AM, ashish mittal <ashmit602@gmail.com> wrote:
> On Wed, Nov 16, 2016 at 3:27 AM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> On Wed, Nov 16, 2016 at 9:49 AM, Fam Zheng <famz@redhat.com> wrote:
>>> On Wed, 11/16 10:04, Markus Armbruster wrote:
>>>> ashish mittal <ashmit602@gmail.com> writes:
>>>>
>>>> > Thanks for concluding on this.
>>>> >
>>>> > I will rearrange the qnio_api.h header accordingly as follows:
>>>> >
>>>> > +#include "qemu/osdep.h"
>>>>
>>>> Headers should not include osdep.h.
>>>
>>> This is about including "osdep.h" _and_ "qnio_api.h" in block/vxhs.c, so what
>>> Ashish means looks good to me.
>>
>> Yes, I think "will rearrange the qnio_api.h header" was a typo and was
>> supposed to be block/vxhs.c.
>>
>> Stefan
>
> Thanks for the correction. Yes, i meant rearrange headers in block/vxhs.c.
Stefan Hajnoczi Feb. 2, 2017, 10:08 a.m. UTC | #23
On Tue, Jan 31, 2017 at 05:55:47PM -0800, ashish mittal wrote:
> Hi,
> 
> There has been some work going on on the VxHS libvirt patches and we
> are making good progress.
> 
> A suggestion has been made on the libvirt community to check if we can
> change the qemu VxHS device specification syntax as follows.
> 
> Replace
> 
> (a) +file.server.host=192.168.0.1,file.server.port=9999
> 
> with
> 
> (b) +file.host=192.168.0.1,file.port=9999
> 
> The reasoning being that since we have only one host (true as the
> failover is now being handled completely/transparently) within the
> libqnio library), the "server" part is redundant.

Sounds good to me.

Stefan
Ashish Mittal Feb. 8, 2017, 9:23 p.m. UTC | #24
We have retained the original syntax in v7 per the following
suggestion on libvirt thread -

>> That is correct. Above syntax would also work for us. I will pose this
>> suggestion to the qemu community and update with their response.
>>

It's not that important... I was looking for a simplification and
generation of only what's required. You can continue using the server
syntax - perhaps just leave a note/comment in the code indicating the
decision point and move on.


On Thu, Feb 2, 2017 at 2:08 AM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Tue, Jan 31, 2017 at 05:55:47PM -0800, ashish mittal wrote:
>> Hi,
>>
>> There has been some work going on on the VxHS libvirt patches and we
>> are making good progress.
>>
>> A suggestion has been made on the libvirt community to check if we can
>> change the qemu VxHS device specification syntax as follows.
>>
>> Replace
>>
>> (a) +file.server.host=192.168.0.1,file.server.port=9999
>>
>> with
>>
>> (b) +file.host=192.168.0.1,file.port=9999
>>
>> The reasoning being that since we have only one host (true as the
>> failover is now being handled completely/transparently) within the
>> libqnio library), the "server" part is redundant.
>
> Sounds good to me.
>
> Stefan
diff mbox

Patch

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 67a036a..58313a2 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -18,6 +18,7 @@  block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+block-obj-$(CONFIG_VXHS) += vxhs.o
 block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
@@ -38,6 +39,7 @@  rbd.o-cflags       := $(RBD_CFLAGS)
 rbd.o-libs         := $(RBD_LIBS)
 gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs     := $(GLUSTERFS_LIBS)
+vxhs.o-libs        := $(VXHS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
 archipelago.o-libs := $(ARCHIPELAGO_LIBS)
diff --git a/block/trace-events b/block/trace-events
index 882c903..efdd5ef 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -112,3 +112,24 @@  qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s
 qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
 qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
 qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
+
+# block/vxhs.c
+vxhs_iio_callback(int error, int reason) "ctx is NULL: error %d, reason %d"
+vxhs_setup_qnio(void *s) "Context to HyperScale IO manager = %p"
+vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o %d, %d"
+vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno %d"
+vxhs_open_fail(int ret) "Could not open the device. Error = %d"
+vxhs_open_epipe(int ret) "Could not create a pipe for device. Bailing out. Error=%d"
+vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d"
+vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %lu offset = %lu ACB = %p. Error = %d, errno = %d"
+vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl failed, ret = %d, errno = %d"
+vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat ioctl returned size %lu"
+vxhs_qnio_iio_open(const char *ip) "Failed to connect to storage agent on host-ip %s"
+vxhs_qnio_iio_devopen(const char *fname) "Failed to open vdisk device: %s"
+vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %ld"
+vxhs_parse_uri_filename(const char *filename) "URI passed via bdrv_parse_filename %s"
+vxhs_qemu_init_vdisk(const char *vdisk_id) "vdisk-id from json %s"
+vxhs_parse_uri_hostinfo(int num, char *host, int port) "Host %d: IP %s, Port %d"
+vxhs_qemu_init(char *of_vsa_addr, int port) "Adding host %s:%d to BDRVVXHSState"
+vxhs_qemu_init_filename(const char *filename) "Filename passed as %s"
+vxhs_close(char *vdisk_guid) "Closing vdisk %s"
diff --git a/block/vxhs.c b/block/vxhs.c
new file mode 100644
index 0000000..8913e8f
--- /dev/null
+++ b/block/vxhs.c
@@ -0,0 +1,689 @@ 
+/*
+ * QEMU Block driver for Veritas HyperScale (VxHS)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+#include <qnio/qnio_api.h>
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "trace.h"
+#include "qemu/uri.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+#define VDISK_FD_READ               0
+#define VDISK_FD_WRITE              1
+
+#define VXHS_OPT_FILENAME           "filename"
+#define VXHS_OPT_VDISK_ID           "vdisk-id"
+#define VXHS_OPT_SERVER             "server"
+#define VXHS_OPT_HOST               "host"
+#define VXHS_OPT_PORT               "port"
+
+typedef struct QNIOLibState {
+    int refcnt;
+    void *context;
+} QNIOLibState;
+
+typedef enum {
+    VDISK_AIO_READ,
+    VDISK_AIO_WRITE,
+    VDISK_STAT
+} VDISKAIOCmd;
+
+/*
+ * HyperScale AIO callbacks structure
+ */
+typedef struct VXHSAIOCB {
+    BlockAIOCB common;
+    int err;
+    int direction; /* IO direction (r/w) */
+    size_t io_offset;
+    size_t size;
+    QEMUIOVector *qiov;
+} VXHSAIOCB;
+
+typedef struct VXHSvDiskHostsInfo {
+    int qnio_cfd; /* Channel FD */
+    int vdisk_rfd; /* vDisk remote FD */
+    char *hostip; /* Host's IP addresses */
+    int port; /* Host's port number */
+} VXHSvDiskHostsInfo;
+
+/*
+ * Structure per vDisk maintained for state
+ */
+typedef struct BDRVVXHSState {
+    int fds[2];
+    int event_reader_pos;
+    VXHSAIOCB *qnio_event_acb;
+    VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */
+    char *vdisk_guid;
+} BDRVVXHSState;
+
+/* QNIO Library State */
+static QNIOLibState qniolib;
+
+/* vdisk prefix to pass to qnio */
+static const char vdisk_prefix[] = "/dev/of/vdisk";
+
+static void vxhs_iio_callback(int32_t rfd, uint32_t reason, void *ctx,
+                              uint32_t error, uint32_t opcode)
+{
+    VXHSAIOCB *acb = NULL;
+    BDRVVXHSState *s = NULL;
+    ssize_t ret;
+
+    switch (opcode) {
+    case IRP_READ_REQUEST:
+    case IRP_WRITE_REQUEST:
+
+        /*
+         * ctx is VXHSAIOCB*
+         * ctx is NULL if error is QNIOERROR_CHANNEL_HUP or
+         * reason is IIO_REASON_HUP
+         */
+        if (ctx) {
+            acb = ctx;
+            s = acb->common.bs->opaque;
+        } else {
+            trace_vxhs_iio_callback(error, reason);
+            goto out;
+        }
+
+        if (error) {
+            if (!acb->err) {
+                acb->err = error;
+            }
+            trace_vxhs_iio_callback(error, reason);
+        }
+
+        ret = qemu_write_full(s->fds[VDISK_FD_WRITE], &acb, sizeof(acb));
+        g_assert(ret == sizeof(acb));
+        break;
+
+    default:
+        if (error == QNIOERROR_CHANNEL_HUP) {
+            /*
+             * Channel failed, spontaneous notification,
+             * not in response to I/O
+             */
+            trace_vxhs_iio_callback_chnfail(error, errno);
+        } else {
+            trace_vxhs_iio_callback_unknwn(opcode, error);
+        }
+        break;
+    }
+out:
+    return;
+}
+
+/*
+ * Initialize QNIO library on first open.
+ */
+static int vxhs_qnio_open(void)
+{
+    int ret = 0;
+
+    if (qniolib.refcnt != 0) {
+        g_assert(qniolib.context != NULL);
+        qniolib.refcnt++;
+        return 0;
+    }
+    qniolib.context = iio_init(QNIO_VERSION, vxhs_iio_callback);
+    if (qniolib.context == NULL) {
+        ret = -ENODEV;
+    } else {
+        qniolib.refcnt = 1;
+    }
+    return ret;
+}
+
+/*
+ * Cleanup QNIO library on last close.
+ */
+static void vxhs_qnio_close(void)
+{
+    qniolib.refcnt--;
+    if (qniolib.refcnt == 0) {
+        iio_fini(qniolib.context);
+        qniolib.context = NULL;
+    }
+}
+
+static int vxhs_qnio_iio_open(int *cfd, const char *of_vsa_addr,
+                              int *rfd, const char *file_name)
+{
+    int ret = 0;
+    bool qnio_open = false;
+
+    ret = vxhs_qnio_open();
+    if (ret) {
+        return ret;
+    }
+    qnio_open = true;
+
+    /*
+     * Open qnio channel to storage agent if not opened before.
+     */
+    *cfd = iio_open(qniolib.context, of_vsa_addr, 0);
+    if (*cfd < 0) {
+        trace_vxhs_qnio_iio_open(of_vsa_addr);
+        ret = -ENODEV;
+        goto err_out;
+    }
+
+    /*
+     * Open vdisk device
+     */
+    *rfd = iio_devopen(qniolib.context, *cfd, file_name, 0);
+    if (*rfd < 0) {
+        trace_vxhs_qnio_iio_devopen(file_name);
+        ret = -ENODEV;
+        goto err_out;
+    }
+    return 0;
+
+err_out:
+    if (*cfd >= 0) {
+        iio_close(qniolib.context, *cfd);
+    }
+    if (qnio_open) {
+        vxhs_qnio_close();
+    }
+    *cfd = -1;
+    *rfd = -1;
+    return ret;
+}
+
+static void vxhs_qnio_iio_close(BDRVVXHSState *s)
+{
+    /*
+     * Close vDisk device
+     */
+    if (s->vdisk_hostinfo.vdisk_rfd >= 0) {
+        iio_devclose(qniolib.context, 0, s->vdisk_hostinfo.vdisk_rfd);
+        s->vdisk_hostinfo.vdisk_rfd = -1;
+    }
+
+    /*
+     * Close QNIO channel against cached channel-fd
+     */
+    if (s->vdisk_hostinfo.qnio_cfd >= 0) {
+        iio_close(qniolib.context, s->vdisk_hostinfo.qnio_cfd);
+        s->vdisk_hostinfo.qnio_cfd = -1;
+    }
+
+    vxhs_qnio_close();
+}
+
+static void vxhs_complete_aio(VXHSAIOCB *acb, BDRVVXHSState *s)
+{
+    BlockCompletionFunc *cb = acb->common.cb;
+    void *opaque = acb->common.opaque;
+    int ret = 0;
+
+    if (acb->err != 0) {
+        trace_vxhs_complete_aio(acb, acb->err);
+        /*
+         * We mask all the IO errors generically as EIO for upper layers
+         * Right now our IO Manager uses non standard error codes. Instead
+         * of confusing upper layers with incorrect interpretation we are
+         * doing this workaround.
+         */
+        ret = (-EIO);
+    }
+
+    qemu_aio_unref(acb);
+    cb(opaque, ret);
+}
+
+/*
+ * This is the HyperScale event handler registered to QEMU.
+ * It is invoked when any IO gets completed and written on pipe
+ * by callback called from QNIO thread context. Then it marks
+ * the AIO as completed, and releases HyperScale AIO callbacks.
+ */
+static void vxhs_aio_event_reader(void *opaque)
+{
+    BDRVVXHSState *s = opaque;
+    char *p;
+    ssize_t ret;
+
+    do {
+        p = (char *)&s->qnio_event_acb;
+        ret = read(s->fds[VDISK_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->qnio_event_acb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->qnio_event_acb)) {
+                s->event_reader_pos = 0;
+                vxhs_complete_aio(s->qnio_event_acb, s);
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "vxhs",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = VXHS_OPT_FILENAME,
+            .type = QEMU_OPT_STRING,
+            .help = "URI to the Veritas HyperScale image",
+        },
+        {
+            .name = VXHS_OPT_VDISK_ID,
+            .type = QEMU_OPT_STRING,
+            .help = "UUID of the VxHS vdisk",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList runtime_tcp_opts = {
+    .name = "vxhs_tcp",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
+    .desc = {
+        {
+            .name = VXHS_OPT_HOST,
+            .type = QEMU_OPT_STRING,
+            .help = "host address (ipv4 addresses)",
+        },
+        {
+            .name = VXHS_OPT_PORT,
+            .type = QEMU_OPT_NUMBER,
+            .help = "port number on which VxHSD is listening (default 9999)",
+            .def_value_str = "9999"
+        },
+        { /* end of list */ }
+    },
+};
+
+/*
+ * Parse the incoming URI and populate *options with the host information.
+ * URI syntax has the limitation of supporting only one host info.
+ * To pass multiple host information, use the JSON syntax.
+ */
+static int vxhs_parse_uri(const char *filename, QDict *options)
+{
+    URI *uri = NULL;
+    char *hoststr, *portstr;
+    char *port;
+    int ret = 0;
+
+    trace_vxhs_parse_uri_filename(filename);
+    uri = uri_parse(filename);
+    if (!uri || !uri->server || !uri->path) {
+        uri_free(uri);
+        return -EINVAL;
+    }
+
+    hoststr = g_strdup(VXHS_OPT_SERVER".host");
+    qdict_put(options, hoststr, qstring_from_str(uri->server));
+    g_free(hoststr);
+
+    portstr = g_strdup(VXHS_OPT_SERVER".port");
+    if (uri->port) {
+        port = g_strdup_printf("%d", uri->port);
+        qdict_put(options, portstr, qstring_from_str(port));
+        g_free(port);
+    }
+    g_free(portstr);
+
+    if (strstr(uri->path, "vxhs") == NULL) {
+        qdict_put(options, "vdisk-id", qstring_from_str(uri->path));
+    }
+
+    trace_vxhs_parse_uri_hostinfo(1, uri->server, uri->port);
+    uri_free(uri);
+
+    return ret;
+}
+
+static void vxhs_parse_filename(const char *filename, QDict *options,
+                                Error **errp)
+{
+    if (qdict_haskey(options, "vdisk-id") || qdict_haskey(options, "server")) {
+        error_setg(errp, "vdisk-id/server and a file name may not be specified "
+                         "at the same time");
+        return;
+    }
+
+    if (strstr(filename, "://")) {
+        int ret = vxhs_parse_uri(filename, options);
+        if (ret < 0) {
+            error_setg(errp, "Invalid URI. URI should be of the form "
+                       "  vxhs://<host_ip>:<port>/{<vdisk-id>}");
+        }
+    }
+}
+
+static int vxhs_qemu_init(QDict *options, BDRVVXHSState *s,
+                          int *cfd, int *rfd, Error **errp)
+{
+    QDict *backing_options = NULL;
+    QemuOpts *opts, *tcp_opts;
+    const char *vxhs_filename;
+    char *of_vsa_addr = NULL;
+    Error *local_err = NULL;
+    const char *vdisk_id_opt;
+    const char *server_host_opt;
+    char *file_name = NULL;
+    char *str = NULL;
+    int ret = 0;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    vxhs_filename = qemu_opt_get(opts, VXHS_OPT_FILENAME);
+    if (vxhs_filename) {
+        trace_vxhs_qemu_init_filename(vxhs_filename);
+    }
+
+    vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID);
+    if (!vdisk_id_opt) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID);
+        ret = -EINVAL;
+        goto out;
+    }
+    s->vdisk_guid = g_strdup(vdisk_id_opt);
+    trace_vxhs_qemu_init_vdisk(vdisk_id_opt);
+
+    str = g_strdup_printf(VXHS_OPT_SERVER".");
+    qdict_extract_subqdict(options, &backing_options, str);
+
+    /* Create opts info from runtime_tcp_opts list */
+    tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(tcp_opts, backing_options, &local_err);
+    if (local_err) {
+        qdict_del(backing_options, str);
+        qemu_opts_del(tcp_opts);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST);
+    if (!server_host_opt) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER,
+                   VXHS_OPT_SERVER"."VXHS_OPT_HOST);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    s->vdisk_hostinfo.hostip = g_strdup(server_host_opt);
+
+    s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts,
+                                                          VXHS_OPT_PORT),
+                                                          NULL, 0);
+
+    s->vdisk_hostinfo.qnio_cfd = -1;
+    s->vdisk_hostinfo.vdisk_rfd = -1;
+    trace_vxhs_qemu_init(s->vdisk_hostinfo.hostip,
+                         s->vdisk_hostinfo.port);
+
+    qdict_del(backing_options, str);
+    qemu_opts_del(tcp_opts);
+
+    file_name = g_strdup_printf("%s%s", vdisk_prefix, s->vdisk_guid);
+    of_vsa_addr = g_strdup_printf("of://%s:%d",
+                                s->vdisk_hostinfo.hostip,
+                                s->vdisk_hostinfo.port);
+
+    ret = vxhs_qnio_iio_open(cfd, of_vsa_addr, rfd, file_name);
+    if (ret) {
+        error_setg(&local_err, "Failed qnio_iio_open");
+        ret = -EIO;
+    }
+
+out:
+    g_free(str);
+    g_free(file_name);
+    g_free(of_vsa_addr);
+    qemu_opts_del(opts);
+
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        g_free(s->vdisk_hostinfo.hostip);
+        g_free(s->vdisk_guid);
+        s->vdisk_guid = NULL;
+        errno = -ret;
+    }
+
+    return ret;
+}
+
+static int vxhs_open(BlockDriverState *bs, QDict *options,
+                     int bdrv_flags, Error **errp)
+{
+    BDRVVXHSState *s = bs->opaque;
+    AioContext *aio_context;
+    int qemu_qnio_cfd = -1;
+    int qemu_rfd = -1;
+    int ret = 0;
+
+    ret = vxhs_qemu_init(options, s, &qemu_qnio_cfd, &qemu_rfd, errp);
+    if (ret < 0) {
+        trace_vxhs_open_fail(ret);
+        return ret;
+    }
+
+    s->vdisk_hostinfo.qnio_cfd = qemu_qnio_cfd;
+    s->vdisk_hostinfo.vdisk_rfd = qemu_rfd;
+
+    /*
+     * Create a pipe for communicating between two threads in different
+     * context. Set handler for read event, which gets triggered when
+     * IO completion is done by non-QEMU context.
+     */
+    ret = qemu_pipe(s->fds);
+    if (ret < 0) {
+        trace_vxhs_open_epipe(ret);
+        ret = -errno;
+        goto errout;
+    }
+    fcntl(s->fds[VDISK_FD_READ], F_SETFL, O_NONBLOCK);
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_set_fd_handler(aio_context, s->fds[VDISK_FD_READ],
+                       false, vxhs_aio_event_reader, NULL, s);
+    return 0;
+
+errout:
+    /*
+     * Close remote vDisk device if it was opened earlier
+     */
+    vxhs_qnio_iio_close(s);
+    trace_vxhs_open_fail(ret);
+    return ret;
+}
+
+static const AIOCBInfo vxhs_aiocb_info = {
+    .aiocb_size = sizeof(VXHSAIOCB)
+};
+
+/*
+ * This allocates QEMU-VXHS callback for each IO
+ * and is passed to QNIO. When QNIO completes the work,
+ * it will be passed back through the callback.
+ */
+static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
+                               QEMUIOVector *qiov, int nb_sectors,
+                               BlockCompletionFunc *cb, void *opaque, int iodir)
+{
+    VXHSAIOCB *acb = NULL;
+    BDRVVXHSState *s = bs->opaque;
+    size_t size;
+    uint64_t offset;
+    int iio_flags = 0;
+    int ret = 0;
+    uint32_t rfd = s->vdisk_hostinfo.vdisk_rfd;
+
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque);
+    /*
+     * Setup or initialize VXHSAIOCB.
+     * Every single field should be initialized since
+     * acb will be picked up from the slab without
+     * initializing with zero.
+     */
+    acb->io_offset = offset;
+    acb->size = size;
+    acb->err = 0;
+    acb->qiov = qiov;
+    acb->direction = iodir;
+
+    iio_flags = (IIO_FLAG_DONE | IIO_FLAG_ASYNC);
+
+    switch (iodir) {
+    case VDISK_AIO_WRITE:
+            ret = iio_writev(qniolib.context, rfd, qiov->iov, qiov->niov,
+                             offset, (uint64_t)size, (void *)acb, iio_flags);
+            break;
+    case VDISK_AIO_READ:
+            ret = iio_readv(qniolib.context, rfd, qiov->iov, qiov->niov,
+                            offset, (uint64_t)size, (void *)acb, iio_flags);
+            break;
+    default:
+            trace_vxhs_aio_rw_invalid(iodir);
+            goto errout;
+    }
+
+    if (ret != 0) {
+        trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset,
+                                acb, ret, errno);
+        goto errout;
+    }
+    return &acb->common;
+
+errout:
+    qemu_aio_unref(acb);
+    return NULL;
+}
+
+static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs,
+                                   int64_t sector_num, QEMUIOVector *qiov,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb, void *opaque)
+{
+    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
+                       opaque, VDISK_AIO_READ);
+}
+
+static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs,
+                                   int64_t sector_num, QEMUIOVector *qiov,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb, void *opaque)
+{
+    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors,
+                       cb, opaque, VDISK_AIO_WRITE);
+}
+
+static void vxhs_close(BlockDriverState *bs)
+{
+    BDRVVXHSState *s = bs->opaque;
+
+    trace_vxhs_close(s->vdisk_guid);
+    close(s->fds[VDISK_FD_READ]);
+    close(s->fds[VDISK_FD_WRITE]);
+
+    /*
+     * Clearing all the event handlers for oflame registered to QEMU
+     */
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fds[VDISK_FD_READ],
+                       false, NULL, NULL, NULL);
+    g_free(s->vdisk_guid);
+    s->vdisk_guid = NULL;
+    vxhs_qnio_iio_close(s);
+
+    /*
+     * Free the dynamically allocated hostip string
+     */
+    g_free(s->vdisk_hostinfo.hostip);
+    s->vdisk_hostinfo.hostip = NULL;
+    s->vdisk_hostinfo.port = 0;
+}
+
+static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s)
+{
+    int64_t vdisk_size = -1;
+    int ret = 0;
+    uint32_t rfd = s->vdisk_hostinfo.vdisk_rfd;
+
+    ret = iio_ioctl(qniolib.context, rfd, IOR_VDISK_STAT, &vdisk_size, NULL, 0);
+    if (ret < 0) {
+        trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno);
+        return -EIO;
+    }
+
+    trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size);
+    return vdisk_size;
+}
+
+/*
+ * Returns the size of vDisk in bytes. This is required
+ * by QEMU block upper block layer so that it is visible
+ * to guest.
+ */
+static int64_t vxhs_getlength(BlockDriverState *bs)
+{
+    BDRVVXHSState *s = bs->opaque;
+    int64_t vdisk_size;
+
+    vdisk_size = vxhs_get_vdisk_stat(s);
+    if (vdisk_size < 0) {
+        return -EIO;
+    }
+
+    return vdisk_size;
+}
+
+static void vxhs_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVVXHSState *s = bs->opaque;
+
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fds[VDISK_FD_READ],
+                       false, NULL, NULL, NULL);
+}
+
+static void vxhs_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+    BDRVVXHSState *s = bs->opaque;
+
+    aio_set_fd_handler(new_context, s->fds[VDISK_FD_READ],
+                       false, vxhs_aio_event_reader, NULL, s);
+}
+
+static BlockDriver bdrv_vxhs = {
+    .format_name                  = "vxhs",
+    .protocol_name                = "vxhs",
+    .instance_size                = sizeof(BDRVVXHSState),
+    .bdrv_file_open               = vxhs_open,
+    .bdrv_parse_filename          = vxhs_parse_filename,
+    .bdrv_close                   = vxhs_close,
+    .bdrv_getlength               = vxhs_getlength,
+    .bdrv_aio_readv               = vxhs_aio_readv,
+    .bdrv_aio_writev              = vxhs_aio_writev,
+    .bdrv_detach_aio_context      = vxhs_detach_aio_context,
+    .bdrv_attach_aio_context      = vxhs_attach_aio_context,
+};
+
+static void bdrv_vxhs_init(void)
+{
+    bdrv_register(&bdrv_vxhs);
+}
+
+block_init(bdrv_vxhs_init);
diff --git a/configure b/configure
index fd6f898..9f1e9cd 100755
--- a/configure
+++ b/configure
@@ -322,6 +322,7 @@  numa=""
 tcmalloc="no"
 jemalloc="no"
 replication="yes"
+vxhs=""
 
 # parse CC options first
 for opt do
@@ -1167,6 +1168,11 @@  for opt do
   ;;
   --enable-replication) replication="yes"
   ;;
+  --disable-vxhs) vxhs="no"
+  ;;
+  --enable-vxhs) vxhs="yes"
+  ;;
+
   *)
       echo "ERROR: unknown option $opt"
       echo "Try '$0 --help' for more information"
@@ -1400,6 +1406,7 @@  disabled with --disable-FEATURE, default is enabled if available:
   tcmalloc        tcmalloc support
   jemalloc        jemalloc support
   replication     replication support
+  vxhs            Veritas HyperScale vDisk backend support
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -4721,6 +4728,33 @@  if do_cc -nostdlib -Wl,-r -Wl,--no-relax -o $TMPMO $TMPO; then
 fi
 
 ##########################################
+# Veritas HyperScale block driver VxHS
+# Check if libqnio is installed
+
+if test "$vxhs" != "no" ; then
+  cat > $TMPC <<EOF
+#include <stdint.h>
+#include <qnio/qnio_api.h>
+
+void *vxhs_callback;
+
+int main(void) {
+    iio_init(QNIO_VERSION, vxhs_callback);
+    return 0;
+}
+EOF
+  vxhs_libs="-lqnio"
+  if compile_prog "" "$vxhs_libs" ; then
+    vxhs=yes
+  else
+    if test "$vxhs" = "yes" ; then
+      feature_not_found "vxhs block device" "Install libqnio. See github"
+    fi
+    vxhs=no
+  fi
+fi
+
+##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
 
@@ -5087,6 +5121,7 @@  echo "tcmalloc support  $tcmalloc"
 echo "jemalloc support  $jemalloc"
 echo "avx2 optimization $avx2_opt"
 echo "replication support $replication"
+echo "VxHS block device $vxhs"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5703,6 +5738,12 @@  if test "$pthread_setname_np" = "yes" ; then
   echo "CONFIG_PTHREAD_SETNAME_NP=y" >> $config_host_mak
 fi
 
+if test "$vxhs" = "yes" ; then
+  echo "CONFIG_VXHS=y" >> $config_host_mak
+  echo "VXHS_CFLAGS=$vxhs_cflags" >> $config_host_mak
+  echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak
+fi
+
 if test "$tcg_interpreter" = "yes"; then
   QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
 elif test "$ARCH" = "sparc64" ; then
diff --git a/qapi/block-core.json b/qapi/block-core.json
index bcd3b9e..bd9729c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1715,6 +1715,7 @@ 
 # @host_device, @host_cdrom: Since 2.1
 # @gluster: Since 2.7
 # @nbd, @nfs, @replication, @ssh: Since 2.8
+# @vxhs: Since 2.9
 #
 # Since: 2.0
 ##
@@ -1724,7 +1725,7 @@ 
             'host_device', 'http', 'https', 'luks', 'nbd', 'nfs', 'null-aio',
             'null-co', 'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw',
             'replication', 'ssh', 'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc',
-            'vvfat' ] }
+            'vvfat','vxhs' ] }
 
 ##
 # @BlockdevOptionsFile
@@ -2352,6 +2353,21 @@ 
   'data': { '*offset': 'int', '*size': 'int' } }
 
 ##
+# @BlockdevOptionsVxHS
+#
+# Driver specific block device options for VxHS
+#
+# @vdisk-id:    UUID of VxHS volume
+#
+# @server:      vxhs server IP, port
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsVxHS',
+  'data': { 'vdisk-id': 'str',
+            'server': 'InetSocketAddress' } }
+
+##
 # @BlockdevOptions
 #
 # Options for creating a block device.  Many options are available for all
@@ -2415,7 +2431,8 @@ 
       'vhdx':       'BlockdevOptionsGenericFormat',
       'vmdk':       'BlockdevOptionsGenericCOWFormat',
       'vpc':        'BlockdevOptionsGenericFormat',
-      'vvfat':      'BlockdevOptionsVVFAT'
+      'vvfat':      'BlockdevOptionsVVFAT',
+      'vxhs':       'BlockdevOptionsVxHS'
   } }
 
 ##