diff mbox series

[RFC,v2,4/9] vhost-vdpa: introduce vhost-vdpa net client

Message ID 20200508163218.22592-5-lulu@redhat.com (mailing list archive)
State New, archived
Headers show
Series vDPA support in qemu | expand

Commit Message

Cindy Lu May 8, 2020, 4:32 p.m. UTC
From: Tiwei Bie <tiwei.bie@intel.com>

This patch set introduces a new net client type: vhost-vdpa.
vhost-vdpa net client will set up a vDPA device which is specified
by a "vhostdev" parameter.

Co-authored-by: Lingshan Zhu <lingshan.zhu@intel.com>
Signed-off-by: Cindy Lu <lulu@redhat.com>
---
 configure                |  21 ++++
 include/net/vhost-vdpa.h |  19 ++++
 include/net/vhost_net.h  |   1 +
 net/Makefile.objs        |   2 +-
 net/clients.h            |   2 +
 net/net.c                |   3 +
 net/vhost-vdpa.c         | 227 +++++++++++++++++++++++++++++++++++++++
 qapi/net.json            |  22 +++-
 qemu-options.hx          |  19 ++++
 9 files changed, 313 insertions(+), 3 deletions(-)
 create mode 100644 include/net/vhost-vdpa.h
 create mode 100644 net/vhost-vdpa.c

Comments

Eric Blake May 8, 2020, 4:41 p.m. UTC | #1
On 5/8/20 11:32 AM, Cindy Lu wrote:
> From: Tiwei Bie <tiwei.bie@intel.com>
> 
> This patch set introduces a new net client type: vhost-vdpa.
> vhost-vdpa net client will set up a vDPA device which is specified
> by a "vhostdev" parameter.
> 
> Co-authored-by: Lingshan Zhu <lingshan.zhu@intel.com>
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---

Just looking at UI:


> +++ b/qapi/net.json
> @@ -441,6 +441,23 @@
>       '*queues':        'int' } }
>   
>   ##
> +# @NetdevVhostVDPAOptions:
> +#
> +# Vhost-vdpa network backend
> +#
> +# @vhostdev: name of a vdpa dev path in sysfs
> +#
> +# @queues: number of queues to be created for multiqueue vhost-vdpa
> +#          (default: 1) (Since 5.1)

No need to mark a 'since' tag on a member introduced at the same time as 
the overall struct itself.

> +#
> +# Since: 5.1
> +##
> +{ 'struct': 'NetdevVhostVDPAOptions',
> +  'data': {
> +    '*vhostdev':     'str',

What does this default to if omitted?

> +    '*fd':           'str',

Not documented above.

> +    '*queues':       'int' } }
> +##

Missing blank line separator.

>   # @NetClientDriver:
>   #
>   # Available netdev drivers.
> @@ -451,7 +468,7 @@
>   ##
>   { 'enum': 'NetClientDriver',
>     'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
> -            'bridge', 'hubport', 'netmap', 'vhost-user' ] }
> +            'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa' ] }

Missing a line above that 'vhost-vdpa' is new to 5.1.


> @@ -2749,6 +2756,18 @@ qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,sha
>        -device virtio-net-pci,netdev=net0
>   @end example
>   
> +@item -netdev vhost-vdpa,vhostdev=/path/to/dev
> +Establish a vhost-vdpa netdev, backed by a vhostdev. The chardev should
> +be a unix domain socket backed one. The vhost-vdpa uses a specifically defined
> +protocol to pass vhost ioctl replacement messages to an application on the other
> +end of the socket.
> +Example:
> +@example
> +qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
> +     -numa node,memdev=mem \
> +     -netdev type=vhost-vdpa,id=net0,vhostdev=/path/to/dev \
> +     -device virtio-net-pci,netdev=net0
> +@end example
>   @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}]
>   
>   Create a hub port on the emulated hub with ID @var{hubid}.
>
Jason Wang May 9, 2020, 2:40 a.m. UTC | #2
On 2020/5/9 上午12:32, Cindy Lu wrote:
> From: Tiwei Bie <tiwei.bie@intel.com>


If you think you've done a huge refactor on the code, you can change the 
author but need to keep the sob of Tiwei.


>
> This patch set introduces a new net client type: vhost-vdpa.
> vhost-vdpa net client will set up a vDPA device which is specified
> by a "vhostdev" parameter.
>
> Co-authored-by: Lingshan Zhu <lingshan.zhu@intel.com>
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>   configure                |  21 ++++
>   include/net/vhost-vdpa.h |  19 ++++
>   include/net/vhost_net.h  |   1 +


Patch 5 which is the infrastructure of vhost-vpda should come first. 
Please re-order the patch in next version.


>   net/Makefile.objs        |   2 +-
>   net/clients.h            |   2 +
>   net/net.c                |   3 +
>   net/vhost-vdpa.c         | 227 +++++++++++++++++++++++++++++++++++++++
>   qapi/net.json            |  22 +++-
>   qemu-options.hx          |  19 ++++
>   9 files changed, 313 insertions(+), 3 deletions(-)
>   create mode 100644 include/net/vhost-vdpa.h
>   create mode 100644 net/vhost-vdpa.c
>
> diff --git a/configure b/configure
> index 6099be1d84..bdd732e3bb 100755
> --- a/configure
> +++ b/configure
> @@ -1505,6 +1505,10 @@ for opt do
>     ;;
>     --enable-vhost-user) vhost_user="yes"
>     ;;
> +  --disable-vhost-vdpa) vhost_vdpa="no"
> +  ;;
> +  --enable-vhost-vdpa) vhost_vdpa="yes"
> +  ;;
>     --disable-vhost-kernel) vhost_kernel="no"
>     ;;
>     --enable-vhost-kernel) vhost_kernel="yes"
> @@ -1780,6 +1784,7 @@ disabled with --disable-FEATURE, default is enabled if available:
>     vhost-crypto    vhost-user-crypto backend support
>     vhost-kernel    vhost kernel backend support
>     vhost-user      vhost-user backend support
> +  vhost-vdpa      vhost-vdpa backend support


Maybe "vhost-vdpa kernel backend support" is better.


>     spice           spice
>     rbd             rados block device (rbd)
>     libiscsi        iscsi support
> @@ -2241,6 +2246,10 @@ test "$vhost_user" = "" && vhost_user=yes
>   if test "$vhost_user" = "yes" && test "$mingw32" = "yes"; then
>     error_exit "vhost-user isn't available on win32"
>   fi
> +test "$vhost_vdpa" = "" && vhost_vdpa=yes
> +if test "$vhost_vdpa" = "yes" && test "$mingw32" = "yes"; then
> +  error_exit "vhost-vdpa isn't available on win32"
> +fi


Let's add a check for Linux like vhost kernel below.


>   test "$vhost_kernel" = "" && vhost_kernel=$linux
>   if test "$vhost_kernel" = "yes" && test "$linux" != "yes"; then
>     error_exit "vhost-kernel is only available on Linux"
> @@ -2269,6 +2278,11 @@ test "$vhost_user_fs" = "" && vhost_user_fs=$vhost_user
>   if test "$vhost_user_fs" = "yes" && test "$vhost_user" = "no"; then
>     error_exit "--enable-vhost-user-fs requires --enable-vhost-user"
>   fi
> +#vhost-vdpa backends
> +test "$vhost_net_vdpa" = "" && vhost_net_vdpa=$vhost_vdpa
> +if test "$vhost_net_vdpa" = "yes" && test "$vhost_vdpa" = "no"; then
> +  error_exit "--enable-vhost-net-vdpa requires --enable-vhost-vdpa"
> +fi
>   
>   # OR the vhost-kernel and vhost-user values for simplicity
>   if test "$vhost_net" = ""; then
> @@ -6543,6 +6557,7 @@ echo "vhost-scsi support $vhost_scsi"
>   echo "vhost-vsock support $vhost_vsock"
>   echo "vhost-user support $vhost_user"
>   echo "vhost-user-fs support $vhost_user_fs"
> +echo "vhost-vdpa support $vhost_vdpa"
>   echo "Trace backends    $trace_backends"
>   if have_backend "simple"; then
>   echo "Trace output file $trace_file-<pid>"
> @@ -7031,6 +7046,9 @@ fi
>   if test "$vhost_net_user" = "yes" ; then
>     echo "CONFIG_VHOST_NET_USER=y" >> $config_host_mak
>   fi
> +if test "$vhost_net_vdpa" = "yes" ; then
> +  echo "CONFIG_VHOST_NET_VDPA=y" >> $config_host_mak
> +fi
>   if test "$vhost_crypto" = "yes" ; then
>     echo "CONFIG_VHOST_CRYPTO=y" >> $config_host_mak
>   fi
> @@ -7043,6 +7061,9 @@ fi
>   if test "$vhost_user" = "yes" ; then
>     echo "CONFIG_VHOST_USER=y" >> $config_host_mak
>   fi
> +if test "$vhost_vdpa" = "yes" ; then
> +  echo "CONFIG_VHOST_VDPA=y" >> $config_host_mak
> +fi
>   if test "$vhost_user_fs" = "yes" ; then
>     echo "CONFIG_VHOST_USER_FS=y" >> $config_host_mak
>   fi
> diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> new file mode 100644
> index 0000000000..6ce0d04f72
> --- /dev/null
> +++ b/include/net/vhost-vdpa.h
> @@ -0,0 +1,19 @@
> +/*
> + * vhost-vdpa.h
> + *
> + * Copyright(c) 2017-2018 Intel Corporation.
> + * Copyright(c) 2020 Red Hat, Inc.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef VHOST_VDPA_H
> +#define VHOST_VDPA_H
> +
> +struct vhost_net;
> +struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> +uint64_t vhost_vdpa_get_acked_features(NetClientState *nc);
> +
> +#endif /* VHOST_VDPA_H */
> diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> index 6548a5a105..b47844bf29 100644
> --- a/include/net/vhost_net.h
> +++ b/include/net/vhost_net.h
> @@ -40,4 +40,5 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net);
>   
>   int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
>   int vhost_set_state(NetClientState *nc, uint8_t state);
> +int vhost_net_get_device_id(struct vhost_net *net, uint32_t *device_id);


Let's move this function to vhost-vdpa generic header instead of net.


>   #endif
> diff --git a/net/Makefile.objs b/net/Makefile.objs
> index c5d076d19c..5ab45545db 100644
> --- a/net/Makefile.objs
> +++ b/net/Makefile.objs
> @@ -26,7 +26,7 @@ tap-obj-$(CONFIG_SOLARIS) = tap-solaris.o
>   tap-obj-y ?= tap-stub.o
>   common-obj-$(CONFIG_POSIX) += tap.o $(tap-obj-y)
>   common-obj-$(CONFIG_WIN32) += tap-win32.o
> -
> +common-obj-$(CONFIG_VHOST_NET_VDPA) += vhost-vdpa.o
>   vde.o-libs = $(VDE_LIBS)
>   
>   common-obj-$(CONFIG_CAN_BUS) += can/
> diff --git a/net/clients.h b/net/clients.h
> index a6ef267e19..92f9b59aed 100644
> --- a/net/clients.h
> +++ b/net/clients.h
> @@ -61,4 +61,6 @@ int net_init_netmap(const Netdev *netdev, const char *name,
>   int net_init_vhost_user(const Netdev *netdev, const char *name,
>                           NetClientState *peer, Error **errp);
>   
> +int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
> +                        NetClientState *peer, Error **errp);
>   #endif /* QEMU_NET_CLIENTS_H */
> diff --git a/net/net.c b/net/net.c
> index b3192deaf1..9eff1ae982 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -965,6 +965,9 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
>   #ifdef CONFIG_VHOST_NET_USER
>           [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
>   #endif
> +#ifdef CONFIG_VHOST_NET_VDPA
> +        [NET_CLIENT_DRIVER_VHOST_VDPA] = net_init_vhost_vdpa,
> +#endif
>   #ifdef CONFIG_L2TPV3
>           [NET_CLIENT_DRIVER_L2TPV3]    = net_init_l2tpv3,
>   #endif
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> new file mode 100644
> index 0000000000..c29678fdf1
> --- /dev/null
> +++ b/net/vhost-vdpa.c
> @@ -0,0 +1,227 @@
> +/*
> + * vhost-vdpa.c
> + *
> + * Copyright(c) 2017-2018 Intel Corporation.
> + * Copyright(c) 2020 Red Hat, Inc.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "clients.h"
> +#include "net/vhost_net.h"
> +#include "net/vhost-vdpa.h"
> +#include "hw/virtio/vhost-vdpa.h"
> +#include "qemu/config-file.h"
> +#include "qemu/error-report.h"
> +#include "qemu/option.h"
> +#include "qapi/error.h"
> +#include <linux/vfio.h>


No need any more.


> +#include <sys/ioctl.h>
> +#include <err.h>
> +#include <linux/virtio_net.h>


That's not the way we include standard linux headers, qemu maintain a 
copy of standard linux headers.

Need use #include "standard-headers/linux/xxx.h"


> +#include "monitor/monitor.h"
> +#include "qemu/sockets.h"


Do we really need this?


> +#include "hw/virtio/vhost.h"
> +
> +/* Todo:need to add the multiqueue support here */
> +typedef struct VhostVDPAState {
> +    NetClientState nc;
> +    struct vhost_vdpa vhost_vdpa;
> +    VHostNetState *vhost_net;
> +    uint64_t acked_features;
> +    bool started;
> +} VhostVDPAState;
> +
> +VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> +{
> +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +    return s->vhost_net;
> +}
> +
> +uint64_t vhost_vdpa_get_acked_features(NetClientState *nc)
> +{
> +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +    return s->acked_features;
> +}
> +
> +static void vhost_vdpa_del(NetClientState *ncs)
> +{
> +    VhostVDPAState *s;
> +
> +    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +
> +    s = DO_UPCAST(VhostVDPAState, nc, ncs);
> +
> +    if (s->vhost_net) {
> +        /* save acked features */
> +        uint64_t features = vhost_net_get_acked_features(s->vhost_net);
> +        if (features) {
> +            s->acked_features = features;
> +         }
> +        vhost_net_cleanup(s->vhost_net);
> +    }
> +}
> +
> +static int vhost_vdpa_add(NetClientState *ncs, void *be)
> +{
> +    VhostNetOptions options;
> +    struct vhost_net *net = NULL;
> +    VhostVDPAState *s;
> +
> +    options.backend_type = VHOST_BACKEND_TYPE_VDPA;
> +
> +    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +
> +    s = DO_UPCAST(VhostVDPAState, nc, ncs);
> +
> +    options.net_backend = ncs;
> +    options.opaque      = be;
> +    options.busyloop_timeout = 0;
> +    net = vhost_net_init(&options);
> +    if (!net) {
> +        error_report("failed to init vhost_net for queue");
> +        goto err;
> +     }
> +
> +     if (s->vhost_net) {
> +        vhost_net_cleanup(s->vhost_net);
> +        g_free(s->vhost_net);
> +     }
> +     s->vhost_net = net;
> +
> +    return 0;
> +
> +err:
> +    if (net) {
> +        vhost_net_cleanup(net);
> +    }
> +    vhost_vdpa_del(ncs);
> +    return -1;
> +}
> +static void vhost_vdpa_cleanup(NetClientState *nc)
> +{
> +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +
> +    if (s->vhost_net) {
> +        vhost_net_cleanup(s->vhost_net);
> +        g_free(s->vhost_net);
> +        s->vhost_net = NULL;
> +    }
> +
> +    qemu_purge_queued_packets(nc);
> +}
> +
> +static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
> +{
> +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +
> +    return true;
> +}
> +
> +static bool vhost_vdpa_has_ufo(NetClientState *nc)
> +{
> +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +    uint64_t  features = 0;
> +
> +    features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
> +    features = vhost_net_get_features(s->vhost_net, features);
> +    return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
> +
> +}
> +
> +static int vhost_vdpa_check_device_id(NetClientState *nc)
> +{
> +    uint32_t device_id;
> +    int ret;
> +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +    /* Get the device id from hw*/
> +    ret = vhost_net_get_device_id(s->vhost_net, &device_id);
> +    if (device_id != VIRTIO_ID_NET) {
> +        return -ENOTSUP;
> +    }
> +    return ret;
> +}
> +
> +static NetClientInfo net_vhost_vdpa_info = {
> +        .type = NET_CLIENT_DRIVER_VHOST_VDPA,
> +        .size = sizeof(VhostVDPAState),
> +        .cleanup = vhost_vdpa_cleanup,
> +        .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
> +        .has_ufo = vhost_vdpa_has_ufo,
> +};
> +
> +static int net_vhost_vdpa_init(NetClientState *peer, const char *device,
> +                               const char *name, const char *vhostdev,
> +                               bool has_fd, char *fd)
> +{
> +    NetClientState *nc = NULL;
> +    VhostVDPAState *s;
> +    int vdpa_device_fd = -1;
> +    Error *err = NULL;
> +
> +    assert(name);
> +
> +    nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, name);
> +    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-vdpa");
> +    nc->queue_index = 0;
> +
> +    s = DO_UPCAST(VhostVDPAState, nc, nc);
> +
> +    if (has_fd) {
> +        vdpa_device_fd = monitor_fd_param(cur_mon, fd, &err);
> +    } else{
> +        vdpa_device_fd = open(vhostdev, O_RDWR);
> +    }
> +
> +    if (vdpa_device_fd == -1) {
> +        return -errno;
> +     }
> +    s->vhost_vdpa.device_fd = vdpa_device_fd;
> +    vhost_vdpa_add(nc, (void *)&s->vhost_vdpa);
> +    assert(s->vhost_net);
> +    /* check the device id for vdpa */
> +    return vhost_vdpa_check_device_id(nc);


We probably need to the check earlier. If we do things like this, we 
will probably leak vhost_device_fd.


> +}
> +
> +static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)
> +{
> +    const char *name = opaque;
> +    const char *driver, *netdev;
> +
> +    driver = qemu_opt_get(opts, "driver");
> +    netdev = qemu_opt_get(opts, "netdev");
> +    if (!driver || !netdev) {
> +        return 0;
> +    }
> +
> +    if (strcmp(netdev, name) == 0 &&
> +        !g_str_has_prefix(driver, "virtio-net-")) {
> +        error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
> +                        NetClientState *peer, Error **errp)
> +{
> +    const NetdevVhostVDPAOptions *opts;
> +
> +    assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +    opts = &netdev->u.vhost_vdpa;
> +    /* verify net frontend */
> +    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
> +                          (char *)name, errp)) {
> +        return -1;
> +    }
> +    return net_vhost_vdpa_init(peer, "vhost_vdpa", name, opts->vhostdev,
> +                    opts->has_fd, opts->fd);
> +}
> diff --git a/qapi/net.json b/qapi/net.json
> index 335295be50..0f4fa6fffc 100644
> --- a/qapi/net.json
> +++ b/qapi/net.json
> @@ -441,6 +441,23 @@
>       '*queues':        'int' } }
>   
>   ##
> +# @NetdevVhostVDPAOptions:
> +#
> +# Vhost-vdpa network backend
> +#
> +# @vhostdev: name of a vdpa dev path in sysfs
> +#
> +# @queues: number of queues to be created for multiqueue vhost-vdpa
> +#          (default: 1) (Since 5.1)
> +#
> +# Since: 5.1
> +##
> +{ 'struct': 'NetdevVhostVDPAOptions',
> +  'data': {
> +    '*vhostdev':     'str',
> +    '*fd':           'str',
> +    '*queues':       'int' } }
> +##
>   # @NetClientDriver:
>   #
>   # Available netdev drivers.
> @@ -451,7 +468,7 @@
>   ##
>   { 'enum': 'NetClientDriver',
>     'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
> -            'bridge', 'hubport', 'netmap', 'vhost-user' ] }
> +            'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa' ] }
>   
>   ##
>   # @Netdev:
> @@ -479,7 +496,8 @@
>       'bridge':   'NetdevBridgeOptions',
>       'hubport':  'NetdevHubPortOptions',
>       'netmap':   'NetdevNetmapOptions',
> -    'vhost-user': 'NetdevVhostUserOptions' } }
> +    'vhost-user': 'NetdevVhostUserOptions',
> +    'vhost-vdpa': 'NetdevVhostVDPAOptions' } }
>   
>   ##
>   # @NetLegacy:
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 65c9473b73..08256d9d4e 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -2291,6 +2291,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
>   #ifdef CONFIG_POSIX
>       "-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
>       "                configure a vhost-user network, backed by a chardev 'dev'\n"
> +#endif
> +#ifdef CONFIG_POSIX
> +    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
> +    "                configure a vhost-vdpa network, backed by a vhostdev 'dev'\n"
>   #endif
>       "-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
>       "                configure a hub port on the hub with ID 'n'\n", QEMU_ARCH_ALL)
> @@ -2310,6 +2314,9 @@ DEF("nic", HAS_ARG, QEMU_OPTION_nic,
>   #endif
>   #ifdef CONFIG_POSIX
>       "vhost-user|"
> +#endif
> +#ifdef CONFIG_POSIX
> +    "vhost-vdpa|"
>   #endif
>       "socket][,option][,...][mac=macaddr]\n"
>       "                initialize an on-board / default host NIC (using MAC address\n"
> @@ -2749,6 +2756,18 @@ qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,sha
>        -device virtio-net-pci,netdev=net0
>   @end example
>   
> +@item -netdev vhost-vdpa,vhostdev=/path/to/dev
> +Establish a vhost-vdpa netdev, backed by a vhostdev. The chardev should
> +be a unix domain socket backed one.


This seems wrong, we don't use unix domain socket.

Thanks


>   The vhost-vdpa uses a specifically defined
> +protocol to pass vhost ioctl replacement messages to an application on the other
> +end of the socket.
> +Example:
> +@example
> +qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
> +     -numa node,memdev=mem \
> +     -netdev type=vhost-vdpa,id=net0,vhostdev=/path/to/dev \
> +     -device virtio-net-pci,netdev=net0
> +@end example
>   @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}]
>   
>   Create a hub port on the emulated hub with ID @var{hubid}.
Cindy Lu May 9, 2020, 7:17 a.m. UTC | #3
On Sat, May 9, 2020 at 12:42 AM Eric Blake <eblake@redhat.com> wrote:
>
> On 5/8/20 11:32 AM, Cindy Lu wrote:
> > From: Tiwei Bie <tiwei.bie@intel.com>
> >
> > This patch set introduces a new net client type: vhost-vdpa.
> > vhost-vdpa net client will set up a vDPA device which is specified
> > by a "vhostdev" parameter.
> >
> > Co-authored-by: Lingshan Zhu <lingshan.zhu@intel.com>
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
>
> Just looking at UI:
>
>
> > +++ b/qapi/net.json
> > @@ -441,6 +441,23 @@
> >       '*queues':        'int' } }
> >
> >   ##
> > +# @NetdevVhostVDPAOptions:
> > +#
> > +# Vhost-vdpa network backend
> > +#
> > +# @vhostdev: name of a vdpa dev path in sysfs
> > +#
> > +# @queues: number of queues to be created for multiqueue vhost-vdpa
> > +#          (default: 1) (Since 5.1)
>
> No need to mark a 'since' tag on a member introduced at the same time as
> the overall struct itself.
>
Will fix this
> > +#
> > +# Since: 5.1
> > +##
> > +{ 'struct': 'NetdevVhostVDPAOptions',
> > +  'data': {
> > +    '*vhostdev':     'str',
>
> What does this default to if omitted?
>
> > +    '*fd':           'str',
>
> Not documented above.
>
> > +    '*queues':       'int' } }
> > +##
>
> Missing blank line separator.
>
Thanks Eric, Will fix  these all
> >   # @NetClientDriver:
> >   #
> >   # Available netdev drivers.
> > @@ -451,7 +468,7 @@
> >   ##
> >   { 'enum': 'NetClientDriver',
> >     'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
> > -            'bridge', 'hubport', 'netmap', 'vhost-user' ] }
> > +            'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa' ] }
>
> Missing a line above that 'vhost-vdpa' is new to 5.1.
>
>
Will fix this
> > @@ -2749,6 +2756,18 @@ qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,sha
> >        -device virtio-net-pci,netdev=net0
> >   @end example
> >
> > +@item -netdev vhost-vdpa,vhostdev=/path/to/dev
> > +Establish a vhost-vdpa netdev, backed by a vhostdev. The chardev should
> > +be a unix domain socket backed one. The vhost-vdpa uses a specifically defined
> > +protocol to pass vhost ioctl replacement messages to an application on the other
> > +end of the socket.
> > +Example:
> > +@example
> > +qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
> > +     -numa node,memdev=mem \
> > +     -netdev type=vhost-vdpa,id=net0,vhostdev=/path/to/dev \
> > +     -device virtio-net-pci,netdev=net0
> > +@end example
> >   @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}]
> >
> >   Create a hub port on the emulated hub with ID @var{hubid}.
> >
>
> --
> Eric Blake, Principal Software Engineer
> Red Hat, Inc.           +1-919-301-3226
> Virtualization:  qemu.org | libvirt.org
>
Cindy Lu May 9, 2020, 7:31 a.m. UTC | #4
On Sat, May 9, 2020 at 10:40 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> On 2020/5/9 上午12:32, Cindy Lu wrote:
> > From: Tiwei Bie <tiwei.bie@intel.com>
>
>
> If you think you've done a huge refactor on the code, you can change the
> author but need to keep the sob of Tiwei.
>
>
> >
> > This patch set introduces a new net client type: vhost-vdpa.
> > vhost-vdpa net client will set up a vDPA device which is specified
> > by a "vhostdev" parameter.
> >
> > Co-authored-by: Lingshan Zhu <lingshan.zhu@intel.com>
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> >   configure                |  21 ++++
> >   include/net/vhost-vdpa.h |  19 ++++
> >   include/net/vhost_net.h  |   1 +
>
>
> Patch 5 which is the infrastructure of vhost-vpda should come first.
> Please re-order the patch in next version.
>
Sure, Will fix this
>
> >   net/Makefile.objs        |   2 +-
> >   net/clients.h            |   2 +
> >   net/net.c                |   3 +
> >   net/vhost-vdpa.c         | 227 +++++++++++++++++++++++++++++++++++++++
> >   qapi/net.json            |  22 +++-
> >   qemu-options.hx          |  19 ++++
> >   9 files changed, 313 insertions(+), 3 deletions(-)
> >   create mode 100644 include/net/vhost-vdpa.h
> >   create mode 100644 net/vhost-vdpa.c
> >
> > diff --git a/configure b/configure
> > index 6099be1d84..bdd732e3bb 100755
> > --- a/configure
> > +++ b/configure
> > @@ -1505,6 +1505,10 @@ for opt do
> >     ;;
> >     --enable-vhost-user) vhost_user="yes"
> >     ;;
> > +  --disable-vhost-vdpa) vhost_vdpa="no"
> > +  ;;
> > +  --enable-vhost-vdpa) vhost_vdpa="yes"
> > +  ;;
> >     --disable-vhost-kernel) vhost_kernel="no"
> >     ;;
> >     --enable-vhost-kernel) vhost_kernel="yes"
> > @@ -1780,6 +1784,7 @@ disabled with --disable-FEATURE, default is enabled if available:
> >     vhost-crypto    vhost-user-crypto backend support
> >     vhost-kernel    vhost kernel backend support
> >     vhost-user      vhost-user backend support
> > +  vhost-vdpa      vhost-vdpa backend support
>
>
> Maybe "vhost-vdpa kernel backend support" is better.
>
>
Will  fix this
> >     spice           spice
> >     rbd             rados block device (rbd)
> >     libiscsi        iscsi support
> > @@ -2241,6 +2246,10 @@ test "$vhost_user" = "" && vhost_user=yes
> >   if test "$vhost_user" = "yes" && test "$mingw32" = "yes"; then
> >     error_exit "vhost-user isn't available on win32"
> >   fi
> > +test "$vhost_vdpa" = "" && vhost_vdpa=yes
> > +if test "$vhost_vdpa" = "yes" && test "$mingw32" = "yes"; then
> > +  error_exit "vhost-vdpa isn't available on win32"
> > +fi
>
>
> Let's add a check for Linux like vhost kernel below.
>
Will fix this
>
> >   test "$vhost_kernel" = "" && vhost_kernel=$linux
> >   if test "$vhost_kernel" = "yes" && test "$linux" != "yes"; then
> >     error_exit "vhost-kernel is only available on Linux"
> > @@ -2269,6 +2278,11 @@ test "$vhost_user_fs" = "" && vhost_user_fs=$vhost_user
> >   if test "$vhost_user_fs" = "yes" && test "$vhost_user" = "no"; then
> >     error_exit "--enable-vhost-user-fs requires --enable-vhost-user"
> >   fi
> > +#vhost-vdpa backends
> > +test "$vhost_net_vdpa" = "" && vhost_net_vdpa=$vhost_vdpa
> > +if test "$vhost_net_vdpa" = "yes" && test "$vhost_vdpa" = "no"; then
> > +  error_exit "--enable-vhost-net-vdpa requires --enable-vhost-vdpa"
> > +fi
> >
> >   # OR the vhost-kernel and vhost-user values for simplicity
> >   if test "$vhost_net" = ""; then
> > @@ -6543,6 +6557,7 @@ echo "vhost-scsi support $vhost_scsi"
> >   echo "vhost-vsock support $vhost_vsock"
> >   echo "vhost-user support $vhost_user"
> >   echo "vhost-user-fs support $vhost_user_fs"
> > +echo "vhost-vdpa support $vhost_vdpa"
> >   echo "Trace backends    $trace_backends"
> >   if have_backend "simple"; then
> >   echo "Trace output file $trace_file-<pid>"
> > @@ -7031,6 +7046,9 @@ fi
> >   if test "$vhost_net_user" = "yes" ; then
> >     echo "CONFIG_VHOST_NET_USER=y" >> $config_host_mak
> >   fi
> > +if test "$vhost_net_vdpa" = "yes" ; then
> > +  echo "CONFIG_VHOST_NET_VDPA=y" >> $config_host_mak
> > +fi
> >   if test "$vhost_crypto" = "yes" ; then
> >     echo "CONFIG_VHOST_CRYPTO=y" >> $config_host_mak
> >   fi
> > @@ -7043,6 +7061,9 @@ fi
> >   if test "$vhost_user" = "yes" ; then
> >     echo "CONFIG_VHOST_USER=y" >> $config_host_mak
> >   fi
> > +if test "$vhost_vdpa" = "yes" ; then
> > +  echo "CONFIG_VHOST_VDPA=y" >> $config_host_mak
> > +fi
> >   if test "$vhost_user_fs" = "yes" ; then
> >     echo "CONFIG_VHOST_USER_FS=y" >> $config_host_mak
> >   fi
> > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > new file mode 100644
> > index 0000000000..6ce0d04f72
> > --- /dev/null
> > +++ b/include/net/vhost-vdpa.h
> > @@ -0,0 +1,19 @@
> > +/*
> > + * vhost-vdpa.h
> > + *
> > + * Copyright(c) 2017-2018 Intel Corporation.
> > + * Copyright(c) 2020 Red Hat, Inc.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#ifndef VHOST_VDPA_H
> > +#define VHOST_VDPA_H
> > +
> > +struct vhost_net;
> > +struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > +uint64_t vhost_vdpa_get_acked_features(NetClientState *nc);
> > +
> > +#endif /* VHOST_VDPA_H */
> > diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> > index 6548a5a105..b47844bf29 100644
> > --- a/include/net/vhost_net.h
> > +++ b/include/net/vhost_net.h
> > @@ -40,4 +40,5 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net);
> >
> >   int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
> >   int vhost_set_state(NetClientState *nc, uint8_t state);
> > +int vhost_net_get_device_id(struct vhost_net *net, uint32_t *device_id);
>
>
> Let's move this function to vhost-vdpa generic header instead of net.
>
Will fix this
>
> >   #endif
> > diff --git a/net/Makefile.objs b/net/Makefile.objs
> > index c5d076d19c..5ab45545db 100644
> > --- a/net/Makefile.objs
> > +++ b/net/Makefile.objs
> > @@ -26,7 +26,7 @@ tap-obj-$(CONFIG_SOLARIS) = tap-solaris.o
> >   tap-obj-y ?= tap-stub.o
> >   common-obj-$(CONFIG_POSIX) += tap.o $(tap-obj-y)
> >   common-obj-$(CONFIG_WIN32) += tap-win32.o
> > -
> > +common-obj-$(CONFIG_VHOST_NET_VDPA) += vhost-vdpa.o
> >   vde.o-libs = $(VDE_LIBS)
> >
> >   common-obj-$(CONFIG_CAN_BUS) += can/
> > diff --git a/net/clients.h b/net/clients.h
> > index a6ef267e19..92f9b59aed 100644
> > --- a/net/clients.h
> > +++ b/net/clients.h
> > @@ -61,4 +61,6 @@ int net_init_netmap(const Netdev *netdev, const char *name,
> >   int net_init_vhost_user(const Netdev *netdev, const char *name,
> >                           NetClientState *peer, Error **errp);
> >
> > +int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
> > +                        NetClientState *peer, Error **errp);
> >   #endif /* QEMU_NET_CLIENTS_H */
> > diff --git a/net/net.c b/net/net.c
> > index b3192deaf1..9eff1ae982 100644
> > --- a/net/net.c
> > +++ b/net/net.c
> > @@ -965,6 +965,9 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
> >   #ifdef CONFIG_VHOST_NET_USER
> >           [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
> >   #endif
> > +#ifdef CONFIG_VHOST_NET_VDPA
> > +        [NET_CLIENT_DRIVER_VHOST_VDPA] = net_init_vhost_vdpa,
> > +#endif
> >   #ifdef CONFIG_L2TPV3
> >           [NET_CLIENT_DRIVER_L2TPV3]    = net_init_l2tpv3,
> >   #endif
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > new file mode 100644
> > index 0000000000..c29678fdf1
> > --- /dev/null
> > +++ b/net/vhost-vdpa.c
> > @@ -0,0 +1,227 @@
> > +/*
> > + * vhost-vdpa.c
> > + *
> > + * Copyright(c) 2017-2018 Intel Corporation.
> > + * Copyright(c) 2020 Red Hat, Inc.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "clients.h"
> > +#include "net/vhost_net.h"
> > +#include "net/vhost-vdpa.h"
> > +#include "hw/virtio/vhost-vdpa.h"
> > +#include "qemu/config-file.h"
> > +#include "qemu/error-report.h"
> > +#include "qemu/option.h"
> > +#include "qapi/error.h"
> > +#include <linux/vfio.h>
>
>
> No need any more.
>
>
> > +#include <sys/ioctl.h>
> > +#include <err.h>
> > +#include <linux/virtio_net.h>
>
>
> That's not the way we include standard linux headers, qemu maintain a
> copy of standard linux headers.
>
> Need use #include "standard-headers/linux/xxx.h"
>
Will fix this
>
> > +#include "monitor/monitor.h"
> > +#include "qemu/sockets.h"
>
>
> Do we really need this?
>
Will fix this
>
> > +#include "hw/virtio/vhost.h"
> > +
> > +/* Todo:need to add the multiqueue support here */
> > +typedef struct VhostVDPAState {
> > +    NetClientState nc;
> > +    struct vhost_vdpa vhost_vdpa;
> > +    VHostNetState *vhost_net;
> > +    uint64_t acked_features;
> > +    bool started;
> > +} VhostVDPAState;
> > +
> > +VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> > +{
> > +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +    return s->vhost_net;
> > +}
> > +
> > +uint64_t vhost_vdpa_get_acked_features(NetClientState *nc)
> > +{
> > +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +    return s->acked_features;
> > +}
> > +
> > +static void vhost_vdpa_del(NetClientState *ncs)
> > +{
> > +    VhostVDPAState *s;
> > +
> > +    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +
> > +    s = DO_UPCAST(VhostVDPAState, nc, ncs);
> > +
> > +    if (s->vhost_net) {
> > +        /* save acked features */
> > +        uint64_t features = vhost_net_get_acked_features(s->vhost_net);
> > +        if (features) {
> > +            s->acked_features = features;
> > +         }
> > +        vhost_net_cleanup(s->vhost_net);
> > +    }
> > +}
> > +
> > +static int vhost_vdpa_add(NetClientState *ncs, void *be)
> > +{
> > +    VhostNetOptions options;
> > +    struct vhost_net *net = NULL;
> > +    VhostVDPAState *s;
> > +
> > +    options.backend_type = VHOST_BACKEND_TYPE_VDPA;
> > +
> > +    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +
> > +    s = DO_UPCAST(VhostVDPAState, nc, ncs);
> > +
> > +    options.net_backend = ncs;
> > +    options.opaque      = be;
> > +    options.busyloop_timeout = 0;
> > +    net = vhost_net_init(&options);
> > +    if (!net) {
> > +        error_report("failed to init vhost_net for queue");
> > +        goto err;
> > +     }
> > +
> > +     if (s->vhost_net) {
> > +        vhost_net_cleanup(s->vhost_net);
> > +        g_free(s->vhost_net);
> > +     }
> > +     s->vhost_net = net;
> > +
> > +    return 0;
> > +
> > +err:
> > +    if (net) {
> > +        vhost_net_cleanup(net);
> > +    }
> > +    vhost_vdpa_del(ncs);
> > +    return -1;
> > +}
> > +static void vhost_vdpa_cleanup(NetClientState *nc)
> > +{
> > +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +
> > +    if (s->vhost_net) {
> > +        vhost_net_cleanup(s->vhost_net);
> > +        g_free(s->vhost_net);
> > +        s->vhost_net = NULL;
> > +    }
> > +
> > +    qemu_purge_queued_packets(nc);
> > +}
> > +
> > +static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
> > +{
> > +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +
> > +    return true;
> > +}
> > +
> > +static bool vhost_vdpa_has_ufo(NetClientState *nc)
> > +{
> > +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +    uint64_t  features = 0;
> > +
> > +    features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
> > +    features = vhost_net_get_features(s->vhost_net, features);
> > +    return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
> > +
> > +}
> > +
> > +static int vhost_vdpa_check_device_id(NetClientState *nc)
> > +{
> > +    uint32_t device_id;
> > +    int ret;
> > +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +    /* Get the device id from hw*/
> > +    ret = vhost_net_get_device_id(s->vhost_net, &device_id);
> > +    if (device_id != VIRTIO_ID_NET) {
> > +        return -ENOTSUP;
> > +    }
> > +    return ret;
> > +}
> > +
> > +static NetClientInfo net_vhost_vdpa_info = {
> > +        .type = NET_CLIENT_DRIVER_VHOST_VDPA,
> > +        .size = sizeof(VhostVDPAState),
> > +        .cleanup = vhost_vdpa_cleanup,
> > +        .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
> > +        .has_ufo = vhost_vdpa_has_ufo,
> > +};
> > +
> > +static int net_vhost_vdpa_init(NetClientState *peer, const char *device,
> > +                               const char *name, const char *vhostdev,
> > +                               bool has_fd, char *fd)
> > +{
> > +    NetClientState *nc = NULL;
> > +    VhostVDPAState *s;
> > +    int vdpa_device_fd = -1;
> > +    Error *err = NULL;
> > +
> > +    assert(name);
> > +
> > +    nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, name);
> > +    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-vdpa");
> > +    nc->queue_index = 0;
> > +
> > +    s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +
> > +    if (has_fd) {
> > +        vdpa_device_fd = monitor_fd_param(cur_mon, fd, &err);
> > +    } else{
> > +        vdpa_device_fd = open(vhostdev, O_RDWR);
> > +    }
> > +
> > +    if (vdpa_device_fd == -1) {
> > +        return -errno;
> > +     }
> > +    s->vhost_vdpa.device_fd = vdpa_device_fd;
> > +    vhost_vdpa_add(nc, (void *)&s->vhost_vdpa);
> > +    assert(s->vhost_net);
> > +    /* check the device id for vdpa */
> > +    return vhost_vdpa_check_device_id(nc);
>
>
> We probably need to the check earlier. If we do things like this, we
> will probably leak vhost_device_fd.
>
there may have some problem to get this device id before the vdpa_add,
I will double check this and try to find a solution
>
> > +}
> > +
> > +static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)
> > +{
> > +    const char *name = opaque;
> > +    const char *driver, *netdev;
> > +
> > +    driver = qemu_opt_get(opts, "driver");
> > +    netdev = qemu_opt_get(opts, "netdev");
> > +    if (!driver || !netdev) {
> > +        return 0;
> > +    }
> > +
> > +    if (strcmp(netdev, name) == 0 &&
> > +        !g_str_has_prefix(driver, "virtio-net-")) {
> > +        error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
> > +        return -1;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
> > +                        NetClientState *peer, Error **errp)
> > +{
> > +    const NetdevVhostVDPAOptions *opts;
> > +
> > +    assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +    opts = &netdev->u.vhost_vdpa;
> > +    /* verify net frontend */
> > +    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
> > +                          (char *)name, errp)) {
> > +        return -1;
> > +    }
> > +    return net_vhost_vdpa_init(peer, "vhost_vdpa", name, opts->vhostdev,
> > +                    opts->has_fd, opts->fd);
> > +}
> > diff --git a/qapi/net.json b/qapi/net.json
> > index 335295be50..0f4fa6fffc 100644
> > --- a/qapi/net.json
> > +++ b/qapi/net.json
> > @@ -441,6 +441,23 @@
> >       '*queues':        'int' } }
> >
> >   ##
> > +# @NetdevVhostVDPAOptions:
> > +#
> > +# Vhost-vdpa network backend
> > +#
> > +# @vhostdev: name of a vdpa dev path in sysfs
> > +#
> > +# @queues: number of queues to be created for multiqueue vhost-vdpa
> > +#          (default: 1) (Since 5.1)
> > +#
> > +# Since: 5.1
> > +##
> > +{ 'struct': 'NetdevVhostVDPAOptions',
> > +  'data': {
> > +    '*vhostdev':     'str',
> > +    '*fd':           'str',
> > +    '*queues':       'int' } }
> > +##
> >   # @NetClientDriver:
> >   #
> >   # Available netdev drivers.
> > @@ -451,7 +468,7 @@
> >   ##
> >   { 'enum': 'NetClientDriver',
> >     'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
> > -            'bridge', 'hubport', 'netmap', 'vhost-user' ] }
> > +            'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa' ] }
> >
> >   ##
> >   # @Netdev:
> > @@ -479,7 +496,8 @@
> >       'bridge':   'NetdevBridgeOptions',
> >       'hubport':  'NetdevHubPortOptions',
> >       'netmap':   'NetdevNetmapOptions',
> > -    'vhost-user': 'NetdevVhostUserOptions' } }
> > +    'vhost-user': 'NetdevVhostUserOptions',
> > +    'vhost-vdpa': 'NetdevVhostVDPAOptions' } }
> >
> >   ##
> >   # @NetLegacy:
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index 65c9473b73..08256d9d4e 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -2291,6 +2291,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
> >   #ifdef CONFIG_POSIX
> >       "-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
> >       "                configure a vhost-user network, backed by a chardev 'dev'\n"
> > +#endif
> > +#ifdef CONFIG_POSIX
> > +    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
> > +    "                configure a vhost-vdpa network, backed by a vhostdev 'dev'\n"
> >   #endif
> >       "-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
> >       "                configure a hub port on the hub with ID 'n'\n", QEMU_ARCH_ALL)
> > @@ -2310,6 +2314,9 @@ DEF("nic", HAS_ARG, QEMU_OPTION_nic,
> >   #endif
> >   #ifdef CONFIG_POSIX
> >       "vhost-user|"
> > +#endif
> > +#ifdef CONFIG_POSIX
> > +    "vhost-vdpa|"
> >   #endif
> >       "socket][,option][,...][mac=macaddr]\n"
> >       "                initialize an on-board / default host NIC (using MAC address\n"
> > @@ -2749,6 +2756,18 @@ qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,sha
> >        -device virtio-net-pci,netdev=net0
> >   @end example
> >
> > +@item -netdev vhost-vdpa,vhostdev=/path/to/dev
> > +Establish a vhost-vdpa netdev, backed by a vhostdev. The chardev should
> > +be a unix domain socket backed one.
>
>
> This seems wrong, we don't use unix domain socket.
>
> Thanks
>
Thanks Jason, will fix this
>
> >   The vhost-vdpa uses a specifically defined
> > +protocol to pass vhost ioctl replacement messages to an application on the other
> > +end of the socket.
> > +Example:
> > +@example
> > +qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
> > +     -numa node,memdev=mem \
> > +     -netdev type=vhost-vdpa,id=net0,vhostdev=/path/to/dev \
> > +     -device virtio-net-pci,netdev=net0
> > +@end example
> >   @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}]
> >
> >   Create a hub port on the emulated hub with ID @var{hubid}.
>
diff mbox series

Patch

diff --git a/configure b/configure
index 6099be1d84..bdd732e3bb 100755
--- a/configure
+++ b/configure
@@ -1505,6 +1505,10 @@  for opt do
   ;;
   --enable-vhost-user) vhost_user="yes"
   ;;
+  --disable-vhost-vdpa) vhost_vdpa="no"
+  ;;
+  --enable-vhost-vdpa) vhost_vdpa="yes"
+  ;;
   --disable-vhost-kernel) vhost_kernel="no"
   ;;
   --enable-vhost-kernel) vhost_kernel="yes"
@@ -1780,6 +1784,7 @@  disabled with --disable-FEATURE, default is enabled if available:
   vhost-crypto    vhost-user-crypto backend support
   vhost-kernel    vhost kernel backend support
   vhost-user      vhost-user backend support
+  vhost-vdpa      vhost-vdpa backend support
   spice           spice
   rbd             rados block device (rbd)
   libiscsi        iscsi support
@@ -2241,6 +2246,10 @@  test "$vhost_user" = "" && vhost_user=yes
 if test "$vhost_user" = "yes" && test "$mingw32" = "yes"; then
   error_exit "vhost-user isn't available on win32"
 fi
+test "$vhost_vdpa" = "" && vhost_vdpa=yes
+if test "$vhost_vdpa" = "yes" && test "$mingw32" = "yes"; then
+  error_exit "vhost-vdpa isn't available on win32"
+fi
 test "$vhost_kernel" = "" && vhost_kernel=$linux
 if test "$vhost_kernel" = "yes" && test "$linux" != "yes"; then
   error_exit "vhost-kernel is only available on Linux"
@@ -2269,6 +2278,11 @@  test "$vhost_user_fs" = "" && vhost_user_fs=$vhost_user
 if test "$vhost_user_fs" = "yes" && test "$vhost_user" = "no"; then
   error_exit "--enable-vhost-user-fs requires --enable-vhost-user"
 fi
+#vhost-vdpa backends
+test "$vhost_net_vdpa" = "" && vhost_net_vdpa=$vhost_vdpa
+if test "$vhost_net_vdpa" = "yes" && test "$vhost_vdpa" = "no"; then
+  error_exit "--enable-vhost-net-vdpa requires --enable-vhost-vdpa"
+fi
 
 # OR the vhost-kernel and vhost-user values for simplicity
 if test "$vhost_net" = ""; then
@@ -6543,6 +6557,7 @@  echo "vhost-scsi support $vhost_scsi"
 echo "vhost-vsock support $vhost_vsock"
 echo "vhost-user support $vhost_user"
 echo "vhost-user-fs support $vhost_user_fs"
+echo "vhost-vdpa support $vhost_vdpa"
 echo "Trace backends    $trace_backends"
 if have_backend "simple"; then
 echo "Trace output file $trace_file-<pid>"
@@ -7031,6 +7046,9 @@  fi
 if test "$vhost_net_user" = "yes" ; then
   echo "CONFIG_VHOST_NET_USER=y" >> $config_host_mak
 fi
+if test "$vhost_net_vdpa" = "yes" ; then
+  echo "CONFIG_VHOST_NET_VDPA=y" >> $config_host_mak
+fi
 if test "$vhost_crypto" = "yes" ; then
   echo "CONFIG_VHOST_CRYPTO=y" >> $config_host_mak
 fi
@@ -7043,6 +7061,9 @@  fi
 if test "$vhost_user" = "yes" ; then
   echo "CONFIG_VHOST_USER=y" >> $config_host_mak
 fi
+if test "$vhost_vdpa" = "yes" ; then
+  echo "CONFIG_VHOST_VDPA=y" >> $config_host_mak
+fi
 if test "$vhost_user_fs" = "yes" ; then
   echo "CONFIG_VHOST_USER_FS=y" >> $config_host_mak
 fi
diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
new file mode 100644
index 0000000000..6ce0d04f72
--- /dev/null
+++ b/include/net/vhost-vdpa.h
@@ -0,0 +1,19 @@ 
+/*
+ * vhost-vdpa.h
+ *
+ * Copyright(c) 2017-2018 Intel Corporation.
+ * Copyright(c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_VDPA_H
+#define VHOST_VDPA_H
+
+struct vhost_net;
+struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
+uint64_t vhost_vdpa_get_acked_features(NetClientState *nc);
+
+#endif /* VHOST_VDPA_H */
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 6548a5a105..b47844bf29 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -40,4 +40,5 @@  uint64_t vhost_net_get_acked_features(VHostNetState *net);
 
 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
 int vhost_set_state(NetClientState *nc, uint8_t state);
+int vhost_net_get_device_id(struct vhost_net *net, uint32_t *device_id);
 #endif
diff --git a/net/Makefile.objs b/net/Makefile.objs
index c5d076d19c..5ab45545db 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -26,7 +26,7 @@  tap-obj-$(CONFIG_SOLARIS) = tap-solaris.o
 tap-obj-y ?= tap-stub.o
 common-obj-$(CONFIG_POSIX) += tap.o $(tap-obj-y)
 common-obj-$(CONFIG_WIN32) += tap-win32.o
-
+common-obj-$(CONFIG_VHOST_NET_VDPA) += vhost-vdpa.o
 vde.o-libs = $(VDE_LIBS)
 
 common-obj-$(CONFIG_CAN_BUS) += can/
diff --git a/net/clients.h b/net/clients.h
index a6ef267e19..92f9b59aed 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -61,4 +61,6 @@  int net_init_netmap(const Netdev *netdev, const char *name,
 int net_init_vhost_user(const Netdev *netdev, const char *name,
                         NetClientState *peer, Error **errp);
 
+int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
+                        NetClientState *peer, Error **errp);
 #endif /* QEMU_NET_CLIENTS_H */
diff --git a/net/net.c b/net/net.c
index b3192deaf1..9eff1ae982 100644
--- a/net/net.c
+++ b/net/net.c
@@ -965,6 +965,9 @@  static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
 #ifdef CONFIG_VHOST_NET_USER
         [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
 #endif
+#ifdef CONFIG_VHOST_NET_VDPA
+        [NET_CLIENT_DRIVER_VHOST_VDPA] = net_init_vhost_vdpa,
+#endif
 #ifdef CONFIG_L2TPV3
         [NET_CLIENT_DRIVER_L2TPV3]    = net_init_l2tpv3,
 #endif
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
new file mode 100644
index 0000000000..c29678fdf1
--- /dev/null
+++ b/net/vhost-vdpa.c
@@ -0,0 +1,227 @@ 
+/*
+ * vhost-vdpa.c
+ *
+ * Copyright(c) 2017-2018 Intel Corporation.
+ * Copyright(c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "clients.h"
+#include "net/vhost_net.h"
+#include "net/vhost-vdpa.h"
+#include "hw/virtio/vhost-vdpa.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qapi/error.h"
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+#include <err.h>
+#include <linux/virtio_net.h>
+#include "monitor/monitor.h"
+#include "qemu/sockets.h"
+#include "hw/virtio/vhost.h"
+
+/* Todo:need to add the multiqueue support here */
+typedef struct VhostVDPAState {
+    NetClientState nc;
+    struct vhost_vdpa vhost_vdpa;
+    VHostNetState *vhost_net;
+    uint64_t acked_features;
+    bool started;
+} VhostVDPAState;
+
+VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
+{
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+    return s->vhost_net;
+}
+
+uint64_t vhost_vdpa_get_acked_features(NetClientState *nc)
+{
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+    return s->acked_features;
+}
+
+static void vhost_vdpa_del(NetClientState *ncs)
+{
+    VhostVDPAState *s;
+
+    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+    s = DO_UPCAST(VhostVDPAState, nc, ncs);
+
+    if (s->vhost_net) {
+        /* save acked features */
+        uint64_t features = vhost_net_get_acked_features(s->vhost_net);
+        if (features) {
+            s->acked_features = features;
+         }
+        vhost_net_cleanup(s->vhost_net);
+    }
+}
+
+static int vhost_vdpa_add(NetClientState *ncs, void *be)
+{
+    VhostNetOptions options;
+    struct vhost_net *net = NULL;
+    VhostVDPAState *s;
+
+    options.backend_type = VHOST_BACKEND_TYPE_VDPA;
+
+    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+    s = DO_UPCAST(VhostVDPAState, nc, ncs);
+
+    options.net_backend = ncs;
+    options.opaque      = be;
+    options.busyloop_timeout = 0;
+    net = vhost_net_init(&options);
+    if (!net) {
+        error_report("failed to init vhost_net for queue");
+        goto err;
+     }
+
+     if (s->vhost_net) {
+        vhost_net_cleanup(s->vhost_net);
+        g_free(s->vhost_net);
+     }
+     s->vhost_net = net;
+
+    return 0;
+
+err:
+    if (net) {
+        vhost_net_cleanup(net);
+    }
+    vhost_vdpa_del(ncs);
+    return -1;
+}
+static void vhost_vdpa_cleanup(NetClientState *nc)
+{
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+
+    if (s->vhost_net) {
+        vhost_net_cleanup(s->vhost_net);
+        g_free(s->vhost_net);
+        s->vhost_net = NULL;
+    }
+
+    qemu_purge_queued_packets(nc);
+}
+
+static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
+{
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+    return true;
+}
+
+static bool vhost_vdpa_has_ufo(NetClientState *nc)
+{
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    uint64_t  features = 0;
+
+    features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
+    features = vhost_net_get_features(s->vhost_net, features);
+    return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
+
+}
+
+static int vhost_vdpa_check_device_id(NetClientState *nc)
+{
+    uint32_t device_id;
+    int ret;
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    /* Get the device id from hw*/
+    ret = vhost_net_get_device_id(s->vhost_net, &device_id);
+    if (device_id != VIRTIO_ID_NET) {
+        return -ENOTSUP;
+    }
+    return ret;
+}
+
+static NetClientInfo net_vhost_vdpa_info = {
+        .type = NET_CLIENT_DRIVER_VHOST_VDPA,
+        .size = sizeof(VhostVDPAState),
+        .cleanup = vhost_vdpa_cleanup,
+        .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
+        .has_ufo = vhost_vdpa_has_ufo,
+};
+
+static int net_vhost_vdpa_init(NetClientState *peer, const char *device,
+                               const char *name, const char *vhostdev,
+                               bool has_fd, char *fd)
+{
+    NetClientState *nc = NULL;
+    VhostVDPAState *s;
+    int vdpa_device_fd = -1;
+    Error *err = NULL;
+
+    assert(name);
+
+    nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, name);
+    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-vdpa");
+    nc->queue_index = 0;
+
+    s = DO_UPCAST(VhostVDPAState, nc, nc);
+
+    if (has_fd) {
+        vdpa_device_fd = monitor_fd_param(cur_mon, fd, &err);
+    } else{
+        vdpa_device_fd = open(vhostdev, O_RDWR);
+    }
+
+    if (vdpa_device_fd == -1) {
+        return -errno;
+     }
+    s->vhost_vdpa.device_fd = vdpa_device_fd;
+    vhost_vdpa_add(nc, (void *)&s->vhost_vdpa);
+    assert(s->vhost_net);
+    /* check the device id for vdpa */
+    return vhost_vdpa_check_device_id(nc);
+}
+
+static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)
+{
+    const char *name = opaque;
+    const char *driver, *netdev;
+
+    driver = qemu_opt_get(opts, "driver");
+    netdev = qemu_opt_get(opts, "netdev");
+    if (!driver || !netdev) {
+        return 0;
+    }
+
+    if (strcmp(netdev, name) == 0 &&
+        !g_str_has_prefix(driver, "virtio-net-")) {
+        error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
+        return -1;
+    }
+
+    return 0;
+}
+
+int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
+                        NetClientState *peer, Error **errp)
+{
+    const NetdevVhostVDPAOptions *opts;
+
+    assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+    opts = &netdev->u.vhost_vdpa;
+    /* verify net frontend */
+    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
+                          (char *)name, errp)) {
+        return -1;
+    }
+    return net_vhost_vdpa_init(peer, "vhost_vdpa", name, opts->vhostdev,
+                    opts->has_fd, opts->fd);
+}
diff --git a/qapi/net.json b/qapi/net.json
index 335295be50..0f4fa6fffc 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -441,6 +441,23 @@ 
     '*queues':        'int' } }
 
 ##
+# @NetdevVhostVDPAOptions:
+#
+# Vhost-vdpa network backend
+#
+# @vhostdev: name of a vdpa dev path in sysfs
+#
+# @queues: number of queues to be created for multiqueue vhost-vdpa
+#          (default: 1) (Since 5.1)
+#
+# Since: 5.1
+##
+{ 'struct': 'NetdevVhostVDPAOptions',
+  'data': {
+    '*vhostdev':     'str',
+    '*fd':           'str',
+    '*queues':       'int' } }
+##
 # @NetClientDriver:
 #
 # Available netdev drivers.
@@ -451,7 +468,7 @@ 
 ##
 { 'enum': 'NetClientDriver',
   'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
-            'bridge', 'hubport', 'netmap', 'vhost-user' ] }
+            'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa' ] }
 
 ##
 # @Netdev:
@@ -479,7 +496,8 @@ 
     'bridge':   'NetdevBridgeOptions',
     'hubport':  'NetdevHubPortOptions',
     'netmap':   'NetdevNetmapOptions',
-    'vhost-user': 'NetdevVhostUserOptions' } }
+    'vhost-user': 'NetdevVhostUserOptions',
+    'vhost-vdpa': 'NetdevVhostVDPAOptions' } }
 
 ##
 # @NetLegacy:
diff --git a/qemu-options.hx b/qemu-options.hx
index 65c9473b73..08256d9d4e 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2291,6 +2291,10 @@  DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
 #ifdef CONFIG_POSIX
     "-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
     "                configure a vhost-user network, backed by a chardev 'dev'\n"
+#endif
+#ifdef CONFIG_POSIX
+    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
+    "                configure a vhost-vdpa network, backed by a vhostdev 'dev'\n"
 #endif
     "-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
     "                configure a hub port on the hub with ID 'n'\n", QEMU_ARCH_ALL)
@@ -2310,6 +2314,9 @@  DEF("nic", HAS_ARG, QEMU_OPTION_nic,
 #endif
 #ifdef CONFIG_POSIX
     "vhost-user|"
+#endif
+#ifdef CONFIG_POSIX
+    "vhost-vdpa|"
 #endif
     "socket][,option][,...][mac=macaddr]\n"
     "                initialize an on-board / default host NIC (using MAC address\n"
@@ -2749,6 +2756,18 @@  qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,sha
      -device virtio-net-pci,netdev=net0
 @end example
 
+@item -netdev vhost-vdpa,vhostdev=/path/to/dev
+Establish a vhost-vdpa netdev, backed by a vhostdev. The chardev should
+be a unix domain socket backed one. The vhost-vdpa uses a specifically defined
+protocol to pass vhost ioctl replacement messages to an application on the other
+end of the socket.
+Example:
+@example
+qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
+     -numa node,memdev=mem \
+     -netdev type=vhost-vdpa,id=net0,vhostdev=/path/to/dev \
+     -device virtio-net-pci,netdev=net0
+@end example
 @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}]
 
 Create a hub port on the emulated hub with ID @var{hubid}.