diff mbox series

[RFC,v1,3/4] vhost-vdpa: implement vhost-vdpa backend

Message ID 20200420093241.4238-4-lulu@redhat.com (mailing list archive)
State New, archived
Headers show
Series vDPA support in qemu | expand

Commit Message

Cindy Lu April 20, 2020, 9:32 a.m. UTC
Currently we have 2 types of vhost backends in QEMU: vhost kernel and
vhost-user. The above patch provides a generic device for vDPA purpose,
this vDPA device exposes to user space a non-vendor-specific configuration
interface for setting up a vhost HW accelerator, this patch set introduces
a third vhost backend called vhost-vdpa based on the vDPA interface.

Vhost-vdpa usage:

  qemu-system-x86_64 -cpu host -enable-kvm \
    ......
  -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
  -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \

Author: Tiwei Bie
Signed-off-by: Cindy Lu <lulu@redhat.com>
---
 hw/net/vhost_net.c                |  43 ++++
 hw/net/virtio-net.c               |   9 +
 hw/virtio/Makefile.objs           |   2 +-
 hw/virtio/vhost-backend.c         |   3 +
 hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
 hw/virtio/vhost.c                 |   5 +
 include/hw/virtio/vhost-backend.h |   6 +-
 include/hw/virtio/vhost-vdpa.h    |  14 ++
 8 files changed, 459 insertions(+), 2 deletions(-)
 create mode 100644 hw/virtio/vhost-vdpa.c
 create mode 100644 include/hw/virtio/vhost-vdpa.h

Comments

Eric Blake April 20, 2020, 2:51 p.m. UTC | #1
On 4/20/20 4:32 AM, Cindy Lu wrote:
> Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> vhost-user. The above patch provides a generic device for vDPA purpose,
> this vDPA device exposes to user space a non-vendor-specific configuration
> interface for setting up a vhost HW accelerator, this patch set introduces
> a third vhost backend called vhost-vdpa based on the vDPA interface.
> 
> Vhost-vdpa usage:
> 
>    qemu-system-x86_64 -cpu host -enable-kvm \
>      ......
>    -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
>    -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
> 
> Author: Tiwei Bie

Another questionable authorship line; should this be Signed-off-by?  (Do 
we have permission from Tiwei Bie to include this code?)

> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---

> +++ b/hw/virtio/vhost-vdpa.c
> @@ -0,0 +1,379 @@
> +/*
> + * vhost-vdpa
> + *
> + *  Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
> + *  Copyright(c) 2020 Red Hat, Inc.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.

Another questionable "All rights reserved"


> --- /dev/null
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -0,0 +1,14 @@
> +
> +#ifndef HW_VIRTIO_VHOST_VDPA_H
> +#define HW_VIRTIO_VHOST_VDPA_H
> +

All new files should include a copyright and license, even if they are 
short.
Jason Wang April 21, 2020, 3:56 a.m. UTC | #2
On 2020/4/20 下午5:32, Cindy Lu wrote:
> Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> vhost-user. The above patch provides a generic device for vDPA purpose,
> this vDPA device exposes to user space a non-vendor-specific configuration
> interface for setting up a vhost HW accelerator, this patch set introduces
> a third vhost backend called vhost-vdpa based on the vDPA interface.
>
> Vhost-vdpa usage:
>
>    qemu-system-x86_64 -cpu host -enable-kvm \
>      ......
>    -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
>    -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \


Actually, this part should belongs to patch 2.

And we probably need to add a comment that vIOMMU is not supported right 
now.


>
> Author: Tiwei Bie
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>   hw/net/vhost_net.c                |  43 ++++
>   hw/net/virtio-net.c               |   9 +
>   hw/virtio/Makefile.objs           |   2 +-
>   hw/virtio/vhost-backend.c         |   3 +
>   hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
>   hw/virtio/vhost.c                 |   5 +
>   include/hw/virtio/vhost-backend.h |   6 +-
>   include/hw/virtio/vhost-vdpa.h    |  14 ++
>   8 files changed, 459 insertions(+), 2 deletions(-)
>   create mode 100644 hw/virtio/vhost-vdpa.c
>   create mode 100644 include/hw/virtio/vhost-vdpa.h
>
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 4096d64aaf..0d13fda2fc 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -17,8 +17,10 @@
>   #include "net/net.h"
>   #include "net/tap.h"
>   #include "net/vhost-user.h"
> +#include "net/vhost-vdpa.h"
>   
>   #include "standard-headers/linux/vhost_types.h"
> +#include "linux-headers/linux/vhost.h"
>   #include "hw/virtio/virtio-net.h"
>   #include "net/vhost_net.h"
>   #include "qemu/error-report.h"
> @@ -85,6 +87,29 @@ static const int user_feature_bits[] = {
>       VHOST_INVALID_FEATURE_BIT
>   };
>   
> +static const int vdpa_feature_bits[] = {
> +    VIRTIO_F_NOTIFY_ON_EMPTY,
> +    VIRTIO_RING_F_INDIRECT_DESC,
> +    VIRTIO_RING_F_EVENT_IDX,
> +    VIRTIO_F_ANY_LAYOUT,
> +    VIRTIO_F_VERSION_1,
> +    VIRTIO_NET_F_CSUM,
> +    VIRTIO_NET_F_GUEST_CSUM,
> +    VIRTIO_NET_F_GSO,
> +    VIRTIO_NET_F_GUEST_TSO4,
> +    VIRTIO_NET_F_GUEST_TSO6,
> +    VIRTIO_NET_F_GUEST_ECN,
> +    VIRTIO_NET_F_GUEST_UFO,
> +    VIRTIO_NET_F_HOST_TSO4,
> +    VIRTIO_NET_F_HOST_TSO6,
> +    VIRTIO_NET_F_HOST_ECN,
> +    VIRTIO_NET_F_HOST_UFO,
> +    VIRTIO_NET_F_MRG_RXBUF,
> +    VIRTIO_NET_F_MTU,
> +    VIRTIO_F_IOMMU_PLATFORM,
> +    VIRTIO_NET_F_GUEST_ANNOUNCE,
> +    VHOST_INVALID_FEATURE_BIT
> +};
>   static const int *vhost_net_get_feature_bits(struct vhost_net *net)
>   {
>       const int *feature_bits = 0;
> @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
>       case NET_CLIENT_DRIVER_VHOST_USER:
>           feature_bits = user_feature_bits;
>           break;
> +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> +        feature_bits = vdpa_feature_bits;
> +        break;
>       default:
>           error_report("Feature bits not defined for this type: %d",
>                   net->nc->info->type);
> @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
>           assert(vhost_net);
>           break;
>   #endif
> +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> +        vhost_net = vhost_vdpa_get_vhost_net(nc);
> +        assert(vhost_net);
> +        break;
>       default:
>           break;
>       }
> @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
>   
>       return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
>   }
> +int vhost_set_state(NetClientState *nc, int state)
> +{
> +    struct vhost_net *net = get_vhost_net(nc);
> +    struct vhost_dev *hdev = &net->dev;
> +    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> +        if (hdev->vhost_ops->vhost_set_state) {
> +                return hdev->vhost_ops->vhost_set_state(hdev, state);
> +             }
> +        }
> +    return 0;
> +}
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index db3d7c38e6..bbecd7ab96 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -206,6 +206,9 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
>       VirtIODevice *vdev = VIRTIO_DEVICE(n);
>       NetClientState *nc = qemu_get_queue(n->nic);
>       int queues = n->multiqueue ? n->max_queues : 1;
> +    NetClientState *peer = nc->peer;


qemu_get_peer()?


> +    uint8_t status_set  = vdev->status ;
> +    uint8_t vhost_started_pre = n->vhost_started;
>   
>       if (!get_vhost_net(nc->peer)) {
>           return;
> @@ -245,6 +248,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
>                   return;
>               }
>           }
> +        status_set = status_set | VIRTIO_CONFIG_S_DRIVER_OK;
>   
>           n->vhost_started = 1;
>           r = vhost_net_start(vdev, n->nic->ncs, queues);
> @@ -252,11 +256,16 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
>               error_report("unable to start vhost net: %d: "
>                            "falling back on userspace virtio", -r);
>               n->vhost_started = 0;
> +            status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK;
>           }
>       } else {
>           vhost_net_stop(vdev, n->nic->ncs, queues);
> +        status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK;
>           n->vhost_started = 0;
>       }
> +    if (vhost_started_pre != n->vhost_started) {
> +            vhost_set_state(peer, status_set);
> +    }
>   }


I think this deserves an independent patch.



>   
>   static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
> diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
> index e2f70fbb89..17361d959e 100644
> --- a/hw/virtio/Makefile.objs
> +++ b/hw/virtio/Makefile.objs
> @@ -2,7 +2,7 @@ ifeq ($(CONFIG_VIRTIO),y)
>   common-obj-y += virtio-bus.o
>   obj-y += virtio.o
>   
> -obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o vhost-backend.o
> +obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o vhost-backend.o vhost-vdpa.o
>   common-obj-$(call lnot,$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL))) += vhost-stub.o
>   obj-$(CONFIG_VHOST_USER) += vhost-user.o
>   
> diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> index 48905383f8..935cd9e561 100644
> --- a/hw/virtio/vhost-backend.c
> +++ b/hw/virtio/vhost-backend.c
> @@ -286,6 +286,9 @@ int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
>           dev->vhost_ops = &user_ops;
>           break;
>   #endif
> +    case VHOST_BACKEND_TYPE_VDPA:
> +        dev->vhost_ops = &vdpa_ops;
> +        break;
>       default:
>           error_report("Unknown vhost backend type");
>           r = -1;
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> new file mode 100644
> index 0000000000..213b327600
> --- /dev/null
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -0,0 +1,379 @@
> +/*
> + * vhost-vdpa
> + *
> + *  Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
> + *  Copyright(c) 2020 Red Hat, Inc.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include <linux/vhost.h>
> +#include <linux/vfio.h>
> +#include <sys/eventfd.h>
> +#include <sys/ioctl.h>
> +#include "hw/virtio/vhost.h"
> +#include "hw/virtio/vhost-backend.h"
> +#include "hw/virtio/virtio-net.h"
> +#include "hw/virtio/vhost-vdpa.h"
> +#include "qemu/main-loop.h"
> +#include <linux/kvm.h>
> +#include "sysemu/kvm.h"
> +
> +
> +static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section)
> +{
> +    return (!memory_region_is_ram(section->mr) &&
> +            !memory_region_is_iommu(section->mr)) ||
> +           /*
> +            * Sizing an enabled 64-bit BAR can cause spurious mappings to
> +            * addresses in the upper part of the 64-bit address space.  These
> +            * are never accessed by the CPU and beyond the address width of
> +            * some IOMMU hardware.  TODO: VDPA should tell us the IOMMU width.
> +            */
> +           section->offset_within_address_space & (1ULL << 63);
> +}
> +
> +static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
> +                              void *vaddr, bool readonly)
> +{
> +    struct vhost_msg_v2 msg;
> +    int fd = v->device_fd;
> +    int ret = 0;
> +
> +    msg.type = VHOST_IOTLB_MSG_V2;


Since V2 of the message is used here, I believe we need a kernel patch 
to allow the querying of backend capability.


> +    msg.iotlb.iova = iova;
> +    msg.iotlb.size = size;
> +    msg.iotlb.uaddr = (uint64_t)vaddr;
> +    msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
> +    msg.iotlb.type = VHOST_IOTLB_UPDATE;
> +
> +    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
> +        error_report("failed to write, fd=%d, errno=%d (%s)",
> +            fd, errno, strerror(errno));
> +        return -EIO ;
> +    }
> +
> +    return ret;
> +}
> +
> +static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
> +                                hwaddr size)
> +{
> +    struct vhost_msg_v2 msg;
> +    int fd = v->device_fd;
> +    int ret = 0;
> +
> +    msg.type = VHOST_IOTLB_MSG_V2;
> +    msg.iotlb.iova = iova;
> +    msg.iotlb.size = size;
> +    msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
> +
> +    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
> +        error_report("failed to write, fd=%d, errno=%d (%s)",
> +            fd, errno, strerror(errno));
> +        return -EIO ;
> +    }
> +
> +    return ret;
> +}
> +
> +static void vhost_vdpa_listener_region_add(MemoryListener *listener,
> +                                           MemoryRegionSection *section)
> +{
> +    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
> +    hwaddr iova;
> +    Int128 llend, llsize;
> +    void *vaddr;
> +    int ret;
> +
> +    if (vhost_vdpa_listener_skipped_section(section)) {
> +        return;
> +    }
> +
> +    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
> +                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> +        error_report("%s received unaligned region", __func__);
> +        return;
> +    }
> +
> +    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> +    llend = int128_make64(section->offset_within_address_space);
> +    llend = int128_add(llend, section->size);
> +    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> +
> +    if (int128_ge(int128_make64(iova), llend)) {
> +        return;
> +    }
> +
> +    memory_region_ref(section->mr);
> +
> +    /* Here we assume that memory_region_is_ram(section->mr)==true */
> +
> +    vaddr = memory_region_get_ram_ptr(section->mr) +
> +            section->offset_within_region +
> +            (iova - section->offset_within_address_space);
> +
> +    llsize = int128_sub(llend, int128_make64(iova));
> +
> +    ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
> +                             vaddr, section->readonly);
> +    if (ret) {
> +        error_report("vhost vdpa map fail!");
> +        if (memory_region_is_ram_device(section->mr)) {
> +            /* Allow unexpected mappings not to be fatal for RAM devices */
> +            error_report("map ram fail!");
> +          return ;
> +        }
> +        goto fail;
> +    }
> +
> +    return;
> +
> +fail:
> +    if (memory_region_is_ram_device(section->mr)) {
> +        error_report("failed to vdpa_dma_map. pci p2p may not work");
> +        return;
> +
> +    }
> +    /*
> +     * On the initfn path, store the first error in the container so we
> +     * can gracefully fail.  Runtime, there's not much we can do other
> +     * than throw a hardware error.
> +     */
> +    error_report("vhost-vdpa: DMA mapping failed, unable to continue");
> +    return;
> +
> +}
> +
> +static void vhost_vdpa_listener_region_del(MemoryListener *listener,
> +                                           MemoryRegionSection *section)
> +{
> +    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
> +    hwaddr iova;
> +    Int128 llend, llsize;
> +    int ret;
> +    bool try_unmap = true;
> +
> +    if (vhost_vdpa_listener_skipped_section(section)) {
> +        return;
> +    }
> +
> +    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
> +                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> +        error_report("%s received unaligned region", __func__);
> +        return;
> +    }
> +
> +    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> +    llend = int128_make64(section->offset_within_address_space);
> +    llend = int128_add(llend, section->size);
> +    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> +
> +    if (int128_ge(int128_make64(iova), llend)) {
> +        return;
> +    }
> +
> +    llsize = int128_sub(llend, int128_make64(iova));
> +
> +    if (try_unmap) {
> +        ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
> +        if (ret) {
> +            error_report("vhost_vdpa dma unmap error!");
> +        }
> +    }
> +
> +    memory_region_unref(section->mr);
> +}
> +


I think it's better to add comment to explain why vhost-vdpa use a 
different listener other than the one used by other vhost backends (e.g 
kernel or user).


> +static const MemoryListener vhost_vdpa_memory_listener = {
> +    .region_add = vhost_vdpa_listener_region_add,
> +    .region_del = vhost_vdpa_listener_region_del,
> +};
> +
> +
> +static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
> +                             void *arg)
> +{
> +    struct vhost_vdpa *v = dev->opaque;
> +    int fd = v->device_fd;
> +
> +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> +
> +    return ioctl(fd, request, arg);
> +}
> +
> +
> +
> +static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque)
> +{
> +    struct vhost_vdpa *v;
> +
> +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> +
> +    v = opaque;
> +    dev->opaque =  opaque ;
> +
> +    v->listener = vhost_vdpa_memory_listener;
> +    memory_listener_register(&v->listener, &address_space_memory);
> +
> +    return 0;
> +}
> +
> +static int vhost_vdpa_cleanup(struct vhost_dev *dev)
> +{
> +    struct vhost_vdpa *v;
> +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> +
> +    v = dev->opaque;
> +    memory_listener_unregister(&v->listener);
> +
> +    dev->opaque = NULL;
> +    return 0;
> +}
> +


A comment here is need to explain why INT_MAX is used.


> +static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
> +{
> +    return INT_MAX;
> +}
> +
> +static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
> +                                   struct vhost_log *log)
> +{
> +    return 0;
> +}


I think we should fail this function since we don't support dirty page 
tracking now.

And it's not guarantee to use dirty page bitmap in the future.


> +
> +static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
> +                                    struct vhost_memory *mem)
> +{
> +
> +    if (mem->padding) {
> +        return -1;
> +    }
> +
> +    return 0;


A comment is need to explain why mem table is not used. (E.g we used 
IOTLB API instead).


> +}
> +
> +static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
> +                                     struct vhost_vring_addr *addr)
> +{
> +    return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
> +}
> +
> +static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
> +                                    struct vhost_vring_state *ring)
> +{
> +    return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
> +}
> +
> +static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
> +                                     struct vhost_vring_state *ring)
> +{
> +    return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
> +}
> +
> +static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
> +                                     struct vhost_vring_state *ring)
> +{
> +
> +    return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
> +}
> +
> +static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
> +                                     struct vhost_vring_file *file)
> +{
> +    return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
> +}
> +
> +static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
> +                                     struct vhost_vring_file *file)
> +{
> +    return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
> +}
> +
> +static int vhost_vdpa_set_features(struct vhost_dev *dev,
> +                                   uint64_t features)
> +{
> +
> +    features |= (1ULL << VIRTIO_F_IOMMU_PLATFORM);


This seems tricky, I don't think we need this actually.


> +    return vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
> +
> +}
> +
> +static int vhost_vdpa_get_features(struct vhost_dev *dev,
> +                                   uint64_t *features)
> +{
> +    return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
> +}
> +
> +static int vhost_vdpa_set_owner(struct vhost_dev *dev)
> +{
> +    return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
> +}
> +
> +static int vhost_vdpa_reset_device(struct vhost_dev *dev)
> +{
> +    return vhost_vdpa_call(dev, VHOST_RESET_OWNER, NULL);
> +}
> +
> +static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
> +{
> +    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
> +
> +    return idx - dev->vq_index;
> +}
> +
> +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> +{
> +    int i;
> +
> +    for (i = 0; i < dev->nvqs; ++i) {
> +        struct vhost_vring_state state = {
> +            .index = dev->vq_index + i,
> +            .num   = enable,
> +        };
> +
> +        state.num = 1;
> +
> +        vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);


Please make sure patch 4 comes first then we don't need to fix this in 
patch 4.


> +    }
> +
> +    return 0;
> +}
> +
> +static int vhost_vdpa_set_state(struct vhost_dev *dev, int state)
> +{
> +    return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &state);
> +}
> +
> +
> +const VhostOps vdpa_ops = {
> +        .backend_type = VHOST_BACKEND_TYPE_VDPA,
> +        .vhost_backend_init = vhost_vdpa_init,
> +        .vhost_backend_cleanup = vhost_vdpa_cleanup,
> +        .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
> +        .vhost_set_log_base = vhost_vdpa_set_log_base,
> +        .vhost_set_mem_table = vhost_vdpa_set_mem_table,
> +        .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
> +        .vhost_set_vring_endian = NULL,
> +        .vhost_set_vring_num = vhost_vdpa_set_vring_num,
> +        .vhost_set_vring_base = vhost_vdpa_set_vring_base,
> +        .vhost_get_vring_base = vhost_vdpa_get_vring_base,
> +        .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
> +        .vhost_set_vring_call = vhost_vdpa_set_vring_call,
> +        .vhost_set_features = vhost_vdpa_set_features,
> +        .vhost_get_features = vhost_vdpa_get_features,
> +        .vhost_set_owner = vhost_vdpa_set_owner,
> +        .vhost_reset_device = vhost_vdpa_reset_device,
> +        .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> +        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> +        .vhost_requires_shm_log = NULL,
> +        .vhost_migration_done = NULL,
> +        .vhost_backend_can_merge = NULL,
> +        .vhost_net_set_mtu = NULL,
> +        .vhost_set_iotlb_callback = NULL,
> +        .vhost_send_device_iotlb_msg = NULL,
> +        .vhost_set_state = vhost_vdpa_set_state,
> +};
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 4da0d5a6c5..d1f2c4add7 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -746,6 +746,11 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
>           .log_guest_addr = vq->used_phys,
>           .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
>       };
> +    if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA) {
> +        addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
> +        addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
> +        addr.used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
> +    }


Comment is needed to explain why vDPA differs from others.

Thanks


>       int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
>       if (r < 0) {
>           VHOST_OPS_DEBUG("vhost_set_vring_addr failed");
> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> index 6f6670783f..d81bd9885f 100644
> --- a/include/hw/virtio/vhost-backend.h
> +++ b/include/hw/virtio/vhost-backend.h
> @@ -17,7 +17,8 @@ typedef enum VhostBackendType {
>       VHOST_BACKEND_TYPE_NONE = 0,
>       VHOST_BACKEND_TYPE_KERNEL = 1,
>       VHOST_BACKEND_TYPE_USER = 2,
> -    VHOST_BACKEND_TYPE_MAX = 3,
> +    VHOST_BACKEND_TYPE_VDPA = 3,
> +    VHOST_BACKEND_TYPE_MAX = 4,
>   } VhostBackendType;
>   
>   typedef enum VhostSetConfigType {
> @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
>   typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
>                                           struct vhost_inflight *inflight);
>   
> +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state);
>   typedef struct VhostOps {
>       VhostBackendType backend_type;
>       vhost_backend_init vhost_backend_init;
> @@ -152,9 +154,11 @@ typedef struct VhostOps {
>       vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter;
>       vhost_get_inflight_fd_op vhost_get_inflight_fd;
>       vhost_set_inflight_fd_op vhost_set_inflight_fd;
> +    vhost_set_state_op vhost_set_state;
>   } VhostOps;
>   
>   extern const VhostOps user_ops;
> +extern const VhostOps vdpa_ops;
>   
>   int vhost_set_backend_type(struct vhost_dev *dev,
>                              VhostBackendType backend_type);
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> new file mode 100644
> index 0000000000..889c1a4410
> --- /dev/null
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -0,0 +1,14 @@
> +
> +#ifndef HW_VIRTIO_VHOST_VDPA_H
> +#define HW_VIRTIO_VHOST_VDPA_H
> +
> +#include "hw/virtio/virtio.h"
> +
> +typedef struct vhost_vdpa {
> +    int device_fd;
> +    MemoryListener listener;
> +} VhostVDPA;
> +
> +extern AddressSpace address_space_memory;
> +
> +#endif
Cindy Lu April 21, 2020, 9:12 a.m. UTC | #3
On Tue, Apr 21, 2020 at 11:57 AM Jason Wang <jasowang@redhat.com> wrote:

>
> On 2020/4/20 下午5:32, Cindy Lu wrote:
> > Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> > vhost-user. The above patch provides a generic device for vDPA purpose,
> > this vDPA device exposes to user space a non-vendor-specific
> configuration
> > interface for setting up a vhost HW accelerator, this patch set
> introduces
> > a third vhost backend called vhost-vdpa based on the vDPA interface.
> >
> > Vhost-vdpa usage:
> >
> >    qemu-system-x86_64 -cpu host -enable-kvm \
> >      ......
> >    -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
> >    -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
>
>
> Actually, this part should belongs to patch 2.
>
> And we probably need to add a comment that vIOMMU is not supported right
> now.
>
>
> Will fix this problem

> >
> > Author: Tiwei Bie
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> >   hw/net/vhost_net.c                |  43 ++++
> >   hw/net/virtio-net.c               |   9 +
> >   hw/virtio/Makefile.objs           |   2 +-
> >   hw/virtio/vhost-backend.c         |   3 +
> >   hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
> >   hw/virtio/vhost.c                 |   5 +
> >   include/hw/virtio/vhost-backend.h |   6 +-
> >   include/hw/virtio/vhost-vdpa.h    |  14 ++
> >   8 files changed, 459 insertions(+), 2 deletions(-)
> >   create mode 100644 hw/virtio/vhost-vdpa.c
> >   create mode 100644 include/hw/virtio/vhost-vdpa.h
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 4096d64aaf..0d13fda2fc 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -17,8 +17,10 @@
> >   #include "net/net.h"
> >   #include "net/tap.h"
> >   #include "net/vhost-user.h"
> > +#include "net/vhost-vdpa.h"
> >
> >   #include "standard-headers/linux/vhost_types.h"
> > +#include "linux-headers/linux/vhost.h"
> >   #include "hw/virtio/virtio-net.h"
> >   #include "net/vhost_net.h"
> >   #include "qemu/error-report.h"
> > @@ -85,6 +87,29 @@ static const int user_feature_bits[] = {
> >       VHOST_INVALID_FEATURE_BIT
> >   };
> >
> > +static const int vdpa_feature_bits[] = {
> > +    VIRTIO_F_NOTIFY_ON_EMPTY,
> > +    VIRTIO_RING_F_INDIRECT_DESC,
> > +    VIRTIO_RING_F_EVENT_IDX,
> > +    VIRTIO_F_ANY_LAYOUT,
> > +    VIRTIO_F_VERSION_1,
> > +    VIRTIO_NET_F_CSUM,
> > +    VIRTIO_NET_F_GUEST_CSUM,
> > +    VIRTIO_NET_F_GSO,
> > +    VIRTIO_NET_F_GUEST_TSO4,
> > +    VIRTIO_NET_F_GUEST_TSO6,
> > +    VIRTIO_NET_F_GUEST_ECN,
> > +    VIRTIO_NET_F_GUEST_UFO,
> > +    VIRTIO_NET_F_HOST_TSO4,
> > +    VIRTIO_NET_F_HOST_TSO6,
> > +    VIRTIO_NET_F_HOST_ECN,
> > +    VIRTIO_NET_F_HOST_UFO,
> > +    VIRTIO_NET_F_MRG_RXBUF,
> > +    VIRTIO_NET_F_MTU,
> > +    VIRTIO_F_IOMMU_PLATFORM,
> > +    VIRTIO_NET_F_GUEST_ANNOUNCE,
> > +    VHOST_INVALID_FEATURE_BIT
> > +};
> >   static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> >   {
> >       const int *feature_bits = 0;
> > @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct
> vhost_net *net)
> >       case NET_CLIENT_DRIVER_VHOST_USER:
> >           feature_bits = user_feature_bits;
> >           break;
> > +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> > +        feature_bits = vdpa_feature_bits;
> > +        break;
> >       default:
> >           error_report("Feature bits not defined for this type: %d",
> >                   net->nc->info->type);
> > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
> >           assert(vhost_net);
> >           break;
> >   #endif
> > +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> > +        vhost_net = vhost_vdpa_get_vhost_net(nc);
> > +        assert(vhost_net);
> > +        break;
> >       default:
> >           break;
> >       }
> > @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net,
> uint16_t mtu)
> >
> >       return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
> >   }
> > +int vhost_set_state(NetClientState *nc, int state)
> > +{
> > +    struct vhost_net *net = get_vhost_net(nc);
> > +    struct vhost_dev *hdev = &net->dev;
> > +    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > +        if (hdev->vhost_ops->vhost_set_state) {
> > +                return hdev->vhost_ops->vhost_set_state(hdev, state);
> > +             }
> > +        }
> > +    return 0;
> > +}
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index db3d7c38e6..bbecd7ab96 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -206,6 +206,9 @@ static void virtio_net_vhost_status(VirtIONet *n,
> uint8_t status)
> >       VirtIODevice *vdev = VIRTIO_DEVICE(n);
> >       NetClientState *nc = qemu_get_queue(n->nic);
> >       int queues = n->multiqueue ? n->max_queues : 1;
> > +    NetClientState *peer = nc->peer;
>
>
> qemu_get_peer()?
>
> will fix it

>
> > +    uint8_t status_set  = vdev->status ;
> > +    uint8_t vhost_started_pre = n->vhost_started;
> >
> >       if (!get_vhost_net(nc->peer)) {
> >           return;
> > @@ -245,6 +248,7 @@ static void virtio_net_vhost_status(VirtIONet *n,
> uint8_t status)
> >                   return;
> >               }
> >           }
> > +        status_set = status_set | VIRTIO_CONFIG_S_DRIVER_OK;
> >
> >           n->vhost_started = 1;
> >           r = vhost_net_start(vdev, n->nic->ncs, queues);
> > @@ -252,11 +256,16 @@ static void virtio_net_vhost_status(VirtIONet *n,
> uint8_t status)
> >               error_report("unable to start vhost net: %d: "
> >                            "falling back on userspace virtio", -r);
> >               n->vhost_started = 0;
> > +            status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK;
> >           }
> >       } else {
> >           vhost_net_stop(vdev, n->nic->ncs, queues);
> > +        status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK;
> >           n->vhost_started = 0;
> >       }
> > +    if (vhost_started_pre != n->vhost_started) {
> > +            vhost_set_state(peer, status_set);
> > +    }
> >   }
>
>
> I think this deserves an independent patch.
>
>
> will fix it

>
> >
> >   static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
> > diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
> > index e2f70fbb89..17361d959e 100644
> > --- a/hw/virtio/Makefile.objs
> > +++ b/hw/virtio/Makefile.objs
> > @@ -2,7 +2,7 @@ ifeq ($(CONFIG_VIRTIO),y)
> >   common-obj-y += virtio-bus.o
> >   obj-y += virtio.o
> >
> > -obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o
> vhost-backend.o
> > +obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o
> vhost-backend.o vhost-vdpa.o
> >   common-obj-$(call lnot,$(call
> lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL))) += vhost-stub.o
> >   obj-$(CONFIG_VHOST_USER) += vhost-user.o
> >
> > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> > index 48905383f8..935cd9e561 100644
> > --- a/hw/virtio/vhost-backend.c
> > +++ b/hw/virtio/vhost-backend.c
> > @@ -286,6 +286,9 @@ int vhost_set_backend_type(struct vhost_dev *dev,
> VhostBackendType backend_type)
> >           dev->vhost_ops = &user_ops;
> >           break;
> >   #endif
> > +    case VHOST_BACKEND_TYPE_VDPA:
> > +        dev->vhost_ops = &vdpa_ops;
> > +        break;
> >       default:
> >           error_report("Unknown vhost backend type");
> >           r = -1;
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > new file mode 100644
> > index 0000000000..213b327600
> > --- /dev/null
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -0,0 +1,379 @@
> > +/*
> > + * vhost-vdpa
> > + *
> > + *  Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
> > + *  Copyright(c) 2020 Red Hat, Inc.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include <linux/vhost.h>
> > +#include <linux/vfio.h>
> > +#include <sys/eventfd.h>
> > +#include <sys/ioctl.h>
> > +#include "hw/virtio/vhost.h"
> > +#include "hw/virtio/vhost-backend.h"
> > +#include "hw/virtio/virtio-net.h"
> > +#include "hw/virtio/vhost-vdpa.h"
> > +#include "qemu/main-loop.h"
> > +#include <linux/kvm.h>
> > +#include "sysemu/kvm.h"
> > +
> > +
> > +static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection
> *section)
> > +{
> > +    return (!memory_region_is_ram(section->mr) &&
> > +            !memory_region_is_iommu(section->mr)) ||
> > +           /*
> > +            * Sizing an enabled 64-bit BAR can cause spurious mappings
> to
> > +            * addresses in the upper part of the 64-bit address space.
> These
> > +            * are never accessed by the CPU and beyond the address
> width of
> > +            * some IOMMU hardware.  TODO: VDPA should tell us the IOMMU
> width.
> > +            */
> > +           section->offset_within_address_space & (1ULL << 63);
> > +}
> > +
> > +static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr
> size,
> > +                              void *vaddr, bool readonly)
> > +{
> > +    struct vhost_msg_v2 msg;
> > +    int fd = v->device_fd;
> > +    int ret = 0;
> > +
> > +    msg.type = VHOST_IOTLB_MSG_V2;
>
>
> Since V2 of the message is used here, I believe we need a kernel patch
> to allow the querying of backend capability.
>
> Sure, I will provide another patch for kernel

>
> > +    msg.iotlb.iova = iova;
> > +    msg.iotlb.size = size;
> > +    msg.iotlb.uaddr = (uint64_t)vaddr;
> > +    msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
> > +    msg.iotlb.type = VHOST_IOTLB_UPDATE;
> > +
> > +    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
> > +        error_report("failed to write, fd=%d, errno=%d (%s)",
> > +            fd, errno, strerror(errno));
> > +        return -EIO ;
> > +    }
> > +
> > +    return ret;
> > +}
> > +
> > +static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
> > +                                hwaddr size)
> > +{
> > +    struct vhost_msg_v2 msg;
> > +    int fd = v->device_fd;
> > +    int ret = 0;
> > +
> > +    msg.type = VHOST_IOTLB_MSG_V2;
> > +    msg.iotlb.iova = iova;
> > +    msg.iotlb.size = size;
> > +    msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
> > +
> > +    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
> > +        error_report("failed to write, fd=%d, errno=%d (%s)",
> > +            fd, errno, strerror(errno));
> > +        return -EIO ;
> > +    }
> > +
> > +    return ret;
> > +}
> > +
> > +static void vhost_vdpa_listener_region_add(MemoryListener *listener,
> > +                                           MemoryRegionSection *section)
> > +{
> > +    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa,
> listener);
> > +    hwaddr iova;
> > +    Int128 llend, llsize;
> > +    void *vaddr;
> > +    int ret;
> > +
> > +    if (vhost_vdpa_listener_skipped_section(section)) {
> > +        return;
> > +    }
> > +
> > +    if (unlikely((section->offset_within_address_space &
> ~TARGET_PAGE_MASK) !=
> > +                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> > +        error_report("%s received unaligned region", __func__);
> > +        return;
> > +    }
> > +
> > +    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> > +    llend = int128_make64(section->offset_within_address_space);
> > +    llend = int128_add(llend, section->size);
> > +    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> > +
> > +    if (int128_ge(int128_make64(iova), llend)) {
> > +        return;
> > +    }
> > +
> > +    memory_region_ref(section->mr);
> > +
> > +    /* Here we assume that memory_region_is_ram(section->mr)==true */
> > +
> > +    vaddr = memory_region_get_ram_ptr(section->mr) +
> > +            section->offset_within_region +
> > +            (iova - section->offset_within_address_space);
> > +
> > +    llsize = int128_sub(llend, int128_make64(iova));
> > +
> > +    ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
> > +                             vaddr, section->readonly);
> > +    if (ret) {
> > +        error_report("vhost vdpa map fail!");
> > +        if (memory_region_is_ram_device(section->mr)) {
> > +            /* Allow unexpected mappings not to be fatal for RAM
> devices */
> > +            error_report("map ram fail!");
> > +          return ;
> > +        }
> > +        goto fail;
> > +    }
> > +
> > +    return;
> > +
> > +fail:
> > +    if (memory_region_is_ram_device(section->mr)) {
> > +        error_report("failed to vdpa_dma_map. pci p2p may not work");
> > +        return;
> > +
> > +    }
> > +    /*
> > +     * On the initfn path, store the first error in the container so we
> > +     * can gracefully fail.  Runtime, there's not much we can do other
> > +     * than throw a hardware error.
> > +     */
> > +    error_report("vhost-vdpa: DMA mapping failed, unable to continue");
> > +    return;
> > +
> > +}
> > +
> > +static void vhost_vdpa_listener_region_del(MemoryListener *listener,
> > +                                           MemoryRegionSection *section)
> > +{
> > +    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa,
> listener);
> > +    hwaddr iova;
> > +    Int128 llend, llsize;
> > +    int ret;
> > +    bool try_unmap = true;
> > +
> > +    if (vhost_vdpa_listener_skipped_section(section)) {
> > +        return;
> > +    }
> > +
> > +    if (unlikely((section->offset_within_address_space &
> ~TARGET_PAGE_MASK) !=
> > +                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> > +        error_report("%s received unaligned region", __func__);
> > +        return;
> > +    }
> > +
> > +    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> > +    llend = int128_make64(section->offset_within_address_space);
> > +    llend = int128_add(llend, section->size);
> > +    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> > +
> > +    if (int128_ge(int128_make64(iova), llend)) {
> > +        return;
> > +    }
> > +
> > +    llsize = int128_sub(llend, int128_make64(iova));
> > +
> > +    if (try_unmap) {
> > +        ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
> > +        if (ret) {
> > +            error_report("vhost_vdpa dma unmap error!");
> > +        }
> > +    }
> > +
> > +    memory_region_unref(section->mr);
> > +}
> > +
>
>
> I think it's better to add comment to explain why vhost-vdpa use a
> different listener other than the one used by other vhost backends (e.g
> kernel or user).
>
> will fix it

>
> > +static const MemoryListener vhost_vdpa_memory_listener = {
> > +    .region_add = vhost_vdpa_listener_region_add,
> > +    .region_del = vhost_vdpa_listener_region_del,
> > +};
> > +
> > +
> > +static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int
> request,
> > +                             void *arg)
> > +{
> > +    struct vhost_vdpa *v = dev->opaque;
> > +    int fd = v->device_fd;
> > +
> > +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> > +
> > +    return ioctl(fd, request, arg);
> > +}
> > +
> > +
> > +
> > +static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque)
> > +{
> > +    struct vhost_vdpa *v;
> > +
> > +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> > +
> > +    v = opaque;
> > +    dev->opaque =  opaque ;
> > +
> > +    v->listener = vhost_vdpa_memory_listener;
> > +    memory_listener_register(&v->listener, &address_space_memory);
> > +
> > +    return 0;
> > +}
> > +
> > +static int vhost_vdpa_cleanup(struct vhost_dev *dev)
> > +{
> > +    struct vhost_vdpa *v;
> > +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> > +
> > +    v = dev->opaque;
> > +    memory_listener_unregister(&v->listener);
> > +
> > +    dev->opaque = NULL;
> > +    return 0;
> > +}
> > +
>
>
> A comment here is need to explain why INT_MAX is used.
>
>
> will do

> > +static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
> > +{
> > +    return INT_MAX;
> > +}
> > +
> > +static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
> > +                                   struct vhost_log *log)
> > +{
> > +    return 0;
> > +}
>
>
> I think we should fail this function since we don't support dirty page
> tracking now.
>
> And it's not guarantee to use dirty page bitmap in the future.
>
>
> > +
> > +static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
> > +                                    struct vhost_memory *mem)
> > +{
> > +
> > +    if (mem->padding) {
> > +        return -1;
> > +    }
> > +
> > +    return 0;
>
>
> A comment is need to explain why mem table is not used. (E.g we used
> IOTLB API instead).
>
> will do

>
> > +}
> > +
> > +static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
> > +                                     struct vhost_vring_addr *addr)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
> > +}
> > +
> > +static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
> > +                                    struct vhost_vring_state *ring)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
> > +}
> > +
> > +static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
> > +                                     struct vhost_vring_state *ring)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
> > +}
> > +
> > +static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
> > +                                     struct vhost_vring_state *ring)
> > +{
> > +
> > +    return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
> > +}
> > +
> > +static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
> > +                                     struct vhost_vring_file *file)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
> > +}
> > +
> > +static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
> > +                                     struct vhost_vring_file *file)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
> > +}
> > +
> > +static int vhost_vdpa_set_features(struct vhost_dev *dev,
> > +                                   uint64_t features)
> > +{
> > +
> > +    features |= (1ULL << VIRTIO_F_IOMMU_PLATFORM);
>
>
> This seems tricky, I don't think we need this actually.
>
>
> I will double check for this problem

> > +    return vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
> > +
> > +}
> > +
> > +static int vhost_vdpa_get_features(struct vhost_dev *dev,
> > +                                   uint64_t *features)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
> > +}
> > +
> > +static int vhost_vdpa_set_owner(struct vhost_dev *dev)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
> > +}
> > +
> > +static int vhost_vdpa_reset_device(struct vhost_dev *dev)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_RESET_OWNER, NULL);
> > +}
> > +
> > +static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
> > +{
> > +    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
> > +
> > +    return idx - dev->vq_index;
> > +}
> > +
> > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int
> enable)
> > +{
> > +    int i;
> > +
> > +    for (i = 0; i < dev->nvqs; ++i) {
> > +        struct vhost_vring_state state = {
> > +            .index = dev->vq_index + i,
> > +            .num   = enable,
> > +        };
> > +
> > +        state.num = 1;
> > +
> > +        vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
>
>
> Please make sure patch 4 comes first then we don't need to fix this in
> patch 4.
>
> will do

>
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int vhost_vdpa_set_state(struct vhost_dev *dev, int state)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &state);
> > +}
> > +
> > +
> > +const VhostOps vdpa_ops = {
> > +        .backend_type = VHOST_BACKEND_TYPE_VDPA,
> > +        .vhost_backend_init = vhost_vdpa_init,
> > +        .vhost_backend_cleanup = vhost_vdpa_cleanup,
> > +        .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
> > +        .vhost_set_log_base = vhost_vdpa_set_log_base,
> > +        .vhost_set_mem_table = vhost_vdpa_set_mem_table,
> > +        .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
> > +        .vhost_set_vring_endian = NULL,
> > +        .vhost_set_vring_num = vhost_vdpa_set_vring_num,
> > +        .vhost_set_vring_base = vhost_vdpa_set_vring_base,
> > +        .vhost_get_vring_base = vhost_vdpa_get_vring_base,
> > +        .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
> > +        .vhost_set_vring_call = vhost_vdpa_set_vring_call,
> > +        .vhost_set_features = vhost_vdpa_set_features,
> > +        .vhost_get_features = vhost_vdpa_get_features,
> > +        .vhost_set_owner = vhost_vdpa_set_owner,
> > +        .vhost_reset_device = vhost_vdpa_reset_device,
> > +        .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> > +        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> > +        .vhost_requires_shm_log = NULL,
> > +        .vhost_migration_done = NULL,
> > +        .vhost_backend_can_merge = NULL,
> > +        .vhost_net_set_mtu = NULL,
> > +        .vhost_set_iotlb_callback = NULL,
> > +        .vhost_send_device_iotlb_msg = NULL,
> > +        .vhost_set_state = vhost_vdpa_set_state,
> > +};
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index 4da0d5a6c5..d1f2c4add7 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -746,6 +746,11 @@ static int vhost_virtqueue_set_addr(struct
> vhost_dev *dev,
> >           .log_guest_addr = vq->used_phys,
> >           .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
> >       };
> > +    if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA) {
> > +        addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
> > +        addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
> > +        addr.used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
> > +    }
>
>
> Comment is needed to explain why vDPA differs from others.
>
> Thanks
>
> will do

>
> >       int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
> >       if (r < 0) {
> >           VHOST_OPS_DEBUG("vhost_set_vring_addr failed");
> > diff --git a/include/hw/virtio/vhost-backend.h
> b/include/hw/virtio/vhost-backend.h
> > index 6f6670783f..d81bd9885f 100644
> > --- a/include/hw/virtio/vhost-backend.h
> > +++ b/include/hw/virtio/vhost-backend.h
> > @@ -17,7 +17,8 @@ typedef enum VhostBackendType {
> >       VHOST_BACKEND_TYPE_NONE = 0,
> >       VHOST_BACKEND_TYPE_KERNEL = 1,
> >       VHOST_BACKEND_TYPE_USER = 2,
> > -    VHOST_BACKEND_TYPE_MAX = 3,
> > +    VHOST_BACKEND_TYPE_VDPA = 3,
> > +    VHOST_BACKEND_TYPE_MAX = 4,
> >   } VhostBackendType;
> >
> >   typedef enum VhostSetConfigType {
> > @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct
> vhost_dev *dev,
> >   typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
> >                                           struct vhost_inflight
> *inflight);
> >
> > +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state);
> >   typedef struct VhostOps {
> >       VhostBackendType backend_type;
> >       vhost_backend_init vhost_backend_init;
> > @@ -152,9 +154,11 @@ typedef struct VhostOps {
> >       vhost_backend_mem_section_filter_op
> vhost_backend_mem_section_filter;
> >       vhost_get_inflight_fd_op vhost_get_inflight_fd;
> >       vhost_set_inflight_fd_op vhost_set_inflight_fd;
> > +    vhost_set_state_op vhost_set_state;
> >   } VhostOps;
> >
> >   extern const VhostOps user_ops;
> > +extern const VhostOps vdpa_ops;
> >
> >   int vhost_set_backend_type(struct vhost_dev *dev,
> >                              VhostBackendType backend_type);
> > diff --git a/include/hw/virtio/vhost-vdpa.h
> b/include/hw/virtio/vhost-vdpa.h
> > new file mode 100644
> > index 0000000000..889c1a4410
> > --- /dev/null
> > +++ b/include/hw/virtio/vhost-vdpa.h
> > @@ -0,0 +1,14 @@
> > +
> > +#ifndef HW_VIRTIO_VHOST_VDPA_H
> > +#define HW_VIRTIO_VHOST_VDPA_H
> > +
> > +#include "hw/virtio/virtio.h"
> > +
> > +typedef struct vhost_vdpa {
> > +    int device_fd;
> > +    MemoryListener listener;
> > +} VhostVDPA;
> > +
> > +extern AddressSpace address_space_memory;
> > +
> > +#endif
>
>
Laurent Vivier April 21, 2020, 3:54 p.m. UTC | #4
On 20/04/2020 11:32, Cindy Lu wrote:
> Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> vhost-user. The above patch provides a generic device for vDPA purpose,
> this vDPA device exposes to user space a non-vendor-specific configuration
> interface for setting up a vhost HW accelerator, this patch set introduces
> a third vhost backend called vhost-vdpa based on the vDPA interface.
> 
> Vhost-vdpa usage:
> 
>   qemu-system-x86_64 -cpu host -enable-kvm \
>     ......
>   -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
>   -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
> 
> Author: Tiwei Bie

Use "git commit --author" to set that.

> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>  hw/net/vhost_net.c                |  43 ++++
>  hw/net/virtio-net.c               |   9 +
>  hw/virtio/Makefile.objs           |   2 +-
>  hw/virtio/vhost-backend.c         |   3 +
>  hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
>  hw/virtio/vhost.c                 |   5 +
>  include/hw/virtio/vhost-backend.h |   6 +-
>  include/hw/virtio/vhost-vdpa.h    |  14 ++
>  8 files changed, 459 insertions(+), 2 deletions(-)
>  create mode 100644 hw/virtio/vhost-vdpa.c
>  create mode 100644 include/hw/virtio/vhost-vdpa.h
> 
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 4096d64aaf..0d13fda2fc 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
...
> @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
>          assert(vhost_net);
>          break;
>  #endif
> +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> +        vhost_net = vhost_vdpa_get_vhost_net(nc);
> +        assert(vhost_net);
> +        break;

This should be inside a "#ifdef".

Thanks,
Laurent
Cindy Lu April 22, 2020, 9:24 a.m. UTC | #5
On Tue, Apr 21, 2020 at 11:54 PM Laurent Vivier <lvivier@redhat.com> wrote:
>
> On 20/04/2020 11:32, Cindy Lu wrote:
> > Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> > vhost-user. The above patch provides a generic device for vDPA purpose,
> > this vDPA device exposes to user space a non-vendor-specific configuration
> > interface for setting up a vhost HW accelerator, this patch set introduces
> > a third vhost backend called vhost-vdpa based on the vDPA interface.
> >
> > Vhost-vdpa usage:
> >
> >   qemu-system-x86_64 -cpu host -enable-kvm \
> >     ......
> >   -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
> >   -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
> >
> > Author: Tiwei Bie
>
> Use "git commit --author" to set that.
Thanks, I will fix this

> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> >  hw/net/vhost_net.c                |  43 ++++
> >  hw/net/virtio-net.c               |   9 +
> >  hw/virtio/Makefile.objs           |   2 +-
> >  hw/virtio/vhost-backend.c         |   3 +
> >  hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
> >  hw/virtio/vhost.c                 |   5 +
> >  include/hw/virtio/vhost-backend.h |   6 +-
> >  include/hw/virtio/vhost-vdpa.h    |  14 ++
> >  8 files changed, 459 insertions(+), 2 deletions(-)
> >  create mode 100644 hw/virtio/vhost-vdpa.c
> >  create mode 100644 include/hw/virtio/vhost-vdpa.h
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 4096d64aaf..0d13fda2fc 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> ...
> > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
> >          assert(vhost_net);
> >          break;
> >  #endif
> > +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> > +        vhost_net = vhost_vdpa_get_vhost_net(nc);
> > +        assert(vhost_net);
> > +        break;
>
> This should be inside a "#ifdef".
>
Thanks Laurent, I will add a new macro for vDPA

> Thanks,
> Laurent
>
Maxime Coquelin May 7, 2020, 3:12 p.m. UTC | #6
On 4/20/20 11:32 AM, Cindy Lu wrote:
> Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> vhost-user. The above patch provides a generic device for vDPA purpose,
> this vDPA device exposes to user space a non-vendor-specific configuration
> interface for setting up a vhost HW accelerator, this patch set introduces
> a third vhost backend called vhost-vdpa based on the vDPA interface.
> 
> Vhost-vdpa usage:
> 
>   qemu-system-x86_64 -cpu host -enable-kvm \
>     ......
>   -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
>   -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
> 
> Author: Tiwei Bie
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>  hw/net/vhost_net.c                |  43 ++++
>  hw/net/virtio-net.c               |   9 +
>  hw/virtio/Makefile.objs           |   2 +-
>  hw/virtio/vhost-backend.c         |   3 +
>  hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
>  hw/virtio/vhost.c                 |   5 +
>  include/hw/virtio/vhost-backend.h |   6 +-
>  include/hw/virtio/vhost-vdpa.h    |  14 ++
>  8 files changed, 459 insertions(+), 2 deletions(-)
>  create mode 100644 hw/virtio/vhost-vdpa.c
>  create mode 100644 include/hw/virtio/vhost-vdpa.h
> 
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 4096d64aaf..0d13fda2fc 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -17,8 +17,10 @@
>  #include "net/net.h"
>  #include "net/tap.h"
>  #include "net/vhost-user.h"
> +#include "net/vhost-vdpa.h"
>  
>  #include "standard-headers/linux/vhost_types.h"
> +#include "linux-headers/linux/vhost.h"
>  #include "hw/virtio/virtio-net.h"
>  #include "net/vhost_net.h"
>  #include "qemu/error-report.h"
> @@ -85,6 +87,29 @@ static const int user_feature_bits[] = {
>      VHOST_INVALID_FEATURE_BIT
>  };
>  
> +static const int vdpa_feature_bits[] = {
> +    VIRTIO_F_NOTIFY_ON_EMPTY,
> +    VIRTIO_RING_F_INDIRECT_DESC,
> +    VIRTIO_RING_F_EVENT_IDX,
> +    VIRTIO_F_ANY_LAYOUT,
> +    VIRTIO_F_VERSION_1,
> +    VIRTIO_NET_F_CSUM,
> +    VIRTIO_NET_F_GUEST_CSUM,
> +    VIRTIO_NET_F_GSO,
> +    VIRTIO_NET_F_GUEST_TSO4,
> +    VIRTIO_NET_F_GUEST_TSO6,
> +    VIRTIO_NET_F_GUEST_ECN,
> +    VIRTIO_NET_F_GUEST_UFO,
> +    VIRTIO_NET_F_HOST_TSO4,
> +    VIRTIO_NET_F_HOST_TSO6,
> +    VIRTIO_NET_F_HOST_ECN,
> +    VIRTIO_NET_F_HOST_UFO,
> +    VIRTIO_NET_F_MRG_RXBUF,
> +    VIRTIO_NET_F_MTU,
> +    VIRTIO_F_IOMMU_PLATFORM,
> +    VIRTIO_NET_F_GUEST_ANNOUNCE,
> +    VHOST_INVALID_FEATURE_BIT
> +};
>  static const int *vhost_net_get_feature_bits(struct vhost_net *net)
>  {
>      const int *feature_bits = 0;
> @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
>      case NET_CLIENT_DRIVER_VHOST_USER:
>          feature_bits = user_feature_bits;
>          break;
> +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> +        feature_bits = vdpa_feature_bits;
> +        break;
>      default:
>          error_report("Feature bits not defined for this type: %d",
>                  net->nc->info->type);
> @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
>          assert(vhost_net);
>          break;
>  #endif
> +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> +        vhost_net = vhost_vdpa_get_vhost_net(nc);
> +        assert(vhost_net);
> +        break;
>      default:
>          break;
>      }
> @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
>  
>      return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
>  }
> +int vhost_set_state(NetClientState *nc, int state)
> +{
> +    struct vhost_net *net = get_vhost_net(nc);
> +    struct vhost_dev *hdev = &net->dev;
> +    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {

Maybe checking the vhost_set_state callback is implemented is enough,
and it is not need to restrict that to Vhost-vDPA?

> +        if (hdev->vhost_ops->vhost_set_state) {
> +                return hdev->vhost_ops->vhost_set_state(hdev, state);
> +             }
> +        }
> +    return 0;
> +}
Maxime Coquelin May 7, 2020, 3:30 p.m. UTC | #7
On 4/20/20 11:32 AM, Cindy Lu wrote:
> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> index 6f6670783f..d81bd9885f 100644
> --- a/include/hw/virtio/vhost-backend.h
> +++ b/include/hw/virtio/vhost-backend.h
> @@ -17,7 +17,8 @@ typedef enum VhostBackendType {
>      VHOST_BACKEND_TYPE_NONE = 0,
>      VHOST_BACKEND_TYPE_KERNEL = 1,
>      VHOST_BACKEND_TYPE_USER = 2,
> -    VHOST_BACKEND_TYPE_MAX = 3,
> +    VHOST_BACKEND_TYPE_VDPA = 3,
> +    VHOST_BACKEND_TYPE_MAX = 4,
>  } VhostBackendType;
>  
>  typedef enum VhostSetConfigType {
> @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
>  typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
>                                          struct vhost_inflight *inflight);
>  
> +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state);

I think state should be of type uint8_t.
Cindy Lu May 7, 2020, 3:56 p.m. UTC | #8
On Thu, May 7, 2020 at 11:13 PM Maxime Coquelin
<maxime.coquelin@redhat.com> wrote:
>
>
>
> On 4/20/20 11:32 AM, Cindy Lu wrote:
> > Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> > vhost-user. The above patch provides a generic device for vDPA purpose,
> > this vDPA device exposes to user space a non-vendor-specific configuration
> > interface for setting up a vhost HW accelerator, this patch set introduces
> > a third vhost backend called vhost-vdpa based on the vDPA interface.
> >
> > Vhost-vdpa usage:
> >
> >   qemu-system-x86_64 -cpu host -enable-kvm \
> >     ......
> >   -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
> >   -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
> >
> > Author: Tiwei Bie
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> >  hw/net/vhost_net.c                |  43 ++++
> >  hw/net/virtio-net.c               |   9 +
> >  hw/virtio/Makefile.objs           |   2 +-
> >  hw/virtio/vhost-backend.c         |   3 +
> >  hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
> >  hw/virtio/vhost.c                 |   5 +
> >  include/hw/virtio/vhost-backend.h |   6 +-
> >  include/hw/virtio/vhost-vdpa.h    |  14 ++
> >  8 files changed, 459 insertions(+), 2 deletions(-)
> >  create mode 100644 hw/virtio/vhost-vdpa.c
> >  create mode 100644 include/hw/virtio/vhost-vdpa.h
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 4096d64aaf..0d13fda2fc 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -17,8 +17,10 @@
> >  #include "net/net.h"
> >  #include "net/tap.h"
> >  #include "net/vhost-user.h"
> > +#include "net/vhost-vdpa.h"
> >
> >  #include "standard-headers/linux/vhost_types.h"
> > +#include "linux-headers/linux/vhost.h"
> >  #include "hw/virtio/virtio-net.h"
> >  #include "net/vhost_net.h"
> >  #include "qemu/error-report.h"
> > @@ -85,6 +87,29 @@ static const int user_feature_bits[] = {
> >      VHOST_INVALID_FEATURE_BIT
> >  };
> >
> > +static const int vdpa_feature_bits[] = {
> > +    VIRTIO_F_NOTIFY_ON_EMPTY,
> > +    VIRTIO_RING_F_INDIRECT_DESC,
> > +    VIRTIO_RING_F_EVENT_IDX,
> > +    VIRTIO_F_ANY_LAYOUT,
> > +    VIRTIO_F_VERSION_1,
> > +    VIRTIO_NET_F_CSUM,
> > +    VIRTIO_NET_F_GUEST_CSUM,
> > +    VIRTIO_NET_F_GSO,
> > +    VIRTIO_NET_F_GUEST_TSO4,
> > +    VIRTIO_NET_F_GUEST_TSO6,
> > +    VIRTIO_NET_F_GUEST_ECN,
> > +    VIRTIO_NET_F_GUEST_UFO,
> > +    VIRTIO_NET_F_HOST_TSO4,
> > +    VIRTIO_NET_F_HOST_TSO6,
> > +    VIRTIO_NET_F_HOST_ECN,
> > +    VIRTIO_NET_F_HOST_UFO,
> > +    VIRTIO_NET_F_MRG_RXBUF,
> > +    VIRTIO_NET_F_MTU,
> > +    VIRTIO_F_IOMMU_PLATFORM,
> > +    VIRTIO_NET_F_GUEST_ANNOUNCE,
> > +    VHOST_INVALID_FEATURE_BIT
> > +};
> >  static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> >  {
> >      const int *feature_bits = 0;
> > @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> >      case NET_CLIENT_DRIVER_VHOST_USER:
> >          feature_bits = user_feature_bits;
> >          break;
> > +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> > +        feature_bits = vdpa_feature_bits;
> > +        break;
> >      default:
> >          error_report("Feature bits not defined for this type: %d",
> >                  net->nc->info->type);
> > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
> >          assert(vhost_net);
> >          break;
> >  #endif
> > +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> > +        vhost_net = vhost_vdpa_get_vhost_net(nc);
> > +        assert(vhost_net);
> > +        break;
> >      default:
> >          break;
> >      }
> > @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
> >
> >      return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
> >  }
> > +int vhost_set_state(NetClientState *nc, int state)
> > +{
> > +    struct vhost_net *net = get_vhost_net(nc);
> > +    struct vhost_dev *hdev = &net->dev;
> > +    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
>
> Maybe checking the vhost_set_state callback is implemented is enough,
> and it is not need to restrict that to Vhost-vDPA?
Sure, Will remove this

> > +        if (hdev->vhost_ops->vhost_set_state) {
> > +                return hdev->vhost_ops->vhost_set_state(hdev, state);
> > +             }
> > +        }
> > +    return 0;
> > +}
>
Cindy Lu May 7, 2020, 4:02 p.m. UTC | #9
On Thu, May 7, 2020 at 11:30 PM Maxime Coquelin
<maxime.coquelin@redhat.com> wrote:
>
>
>
> On 4/20/20 11:32 AM, Cindy Lu wrote:
> > diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> > index 6f6670783f..d81bd9885f 100644
> > --- a/include/hw/virtio/vhost-backend.h
> > +++ b/include/hw/virtio/vhost-backend.h
> > @@ -17,7 +17,8 @@ typedef enum VhostBackendType {
> >      VHOST_BACKEND_TYPE_NONE = 0,
> >      VHOST_BACKEND_TYPE_KERNEL = 1,
> >      VHOST_BACKEND_TYPE_USER = 2,
> > -    VHOST_BACKEND_TYPE_MAX = 3,
> > +    VHOST_BACKEND_TYPE_VDPA = 3,
> > +    VHOST_BACKEND_TYPE_MAX = 4,
> >  } VhostBackendType;
> >
> >  typedef enum VhostSetConfigType {
> > @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
> >  typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
> >                                          struct vhost_inflight *inflight);
> >
> > +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state);
>
> I think state should be of type uint8_t.
>
ok, I will change this to uint8_t
diff mbox series

Patch

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 4096d64aaf..0d13fda2fc 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -17,8 +17,10 @@ 
 #include "net/net.h"
 #include "net/tap.h"
 #include "net/vhost-user.h"
+#include "net/vhost-vdpa.h"
 
 #include "standard-headers/linux/vhost_types.h"
+#include "linux-headers/linux/vhost.h"
 #include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "qemu/error-report.h"
@@ -85,6 +87,29 @@  static const int user_feature_bits[] = {
     VHOST_INVALID_FEATURE_BIT
 };
 
+static const int vdpa_feature_bits[] = {
+    VIRTIO_F_NOTIFY_ON_EMPTY,
+    VIRTIO_RING_F_INDIRECT_DESC,
+    VIRTIO_RING_F_EVENT_IDX,
+    VIRTIO_F_ANY_LAYOUT,
+    VIRTIO_F_VERSION_1,
+    VIRTIO_NET_F_CSUM,
+    VIRTIO_NET_F_GUEST_CSUM,
+    VIRTIO_NET_F_GSO,
+    VIRTIO_NET_F_GUEST_TSO4,
+    VIRTIO_NET_F_GUEST_TSO6,
+    VIRTIO_NET_F_GUEST_ECN,
+    VIRTIO_NET_F_GUEST_UFO,
+    VIRTIO_NET_F_HOST_TSO4,
+    VIRTIO_NET_F_HOST_TSO6,
+    VIRTIO_NET_F_HOST_ECN,
+    VIRTIO_NET_F_HOST_UFO,
+    VIRTIO_NET_F_MRG_RXBUF,
+    VIRTIO_NET_F_MTU,
+    VIRTIO_F_IOMMU_PLATFORM,
+    VIRTIO_NET_F_GUEST_ANNOUNCE,
+    VHOST_INVALID_FEATURE_BIT
+};
 static const int *vhost_net_get_feature_bits(struct vhost_net *net)
 {
     const int *feature_bits = 0;
@@ -96,6 +121,9 @@  static const int *vhost_net_get_feature_bits(struct vhost_net *net)
     case NET_CLIENT_DRIVER_VHOST_USER:
         feature_bits = user_feature_bits;
         break;
+    case NET_CLIENT_DRIVER_VHOST_VDPA:
+        feature_bits = vdpa_feature_bits;
+        break;
     default:
         error_report("Feature bits not defined for this type: %d",
                 net->nc->info->type);
@@ -434,6 +462,10 @@  VHostNetState *get_vhost_net(NetClientState *nc)
         assert(vhost_net);
         break;
 #endif
+    case NET_CLIENT_DRIVER_VHOST_VDPA:
+        vhost_net = vhost_vdpa_get_vhost_net(nc);
+        assert(vhost_net);
+        break;
     default:
         break;
     }
@@ -465,3 +497,14 @@  int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
 
     return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
 }
+int vhost_set_state(NetClientState *nc, int state)
+{
+    struct vhost_net *net = get_vhost_net(nc);
+    struct vhost_dev *hdev = &net->dev;
+    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
+        if (hdev->vhost_ops->vhost_set_state) {
+                return hdev->vhost_ops->vhost_set_state(hdev, state);
+             }
+        }
+    return 0;
+}
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index db3d7c38e6..bbecd7ab96 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -206,6 +206,9 @@  static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
     VirtIODevice *vdev = VIRTIO_DEVICE(n);
     NetClientState *nc = qemu_get_queue(n->nic);
     int queues = n->multiqueue ? n->max_queues : 1;
+    NetClientState *peer = nc->peer;
+    uint8_t status_set  = vdev->status ;
+    uint8_t vhost_started_pre = n->vhost_started;
 
     if (!get_vhost_net(nc->peer)) {
         return;
@@ -245,6 +248,7 @@  static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
                 return;
             }
         }
+        status_set = status_set | VIRTIO_CONFIG_S_DRIVER_OK;
 
         n->vhost_started = 1;
         r = vhost_net_start(vdev, n->nic->ncs, queues);
@@ -252,11 +256,16 @@  static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
             error_report("unable to start vhost net: %d: "
                          "falling back on userspace virtio", -r);
             n->vhost_started = 0;
+            status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK;
         }
     } else {
         vhost_net_stop(vdev, n->nic->ncs, queues);
+        status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK;
         n->vhost_started = 0;
     }
+    if (vhost_started_pre != n->vhost_started) {
+            vhost_set_state(peer, status_set);
+    }
 }
 
 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index e2f70fbb89..17361d959e 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -2,7 +2,7 @@  ifeq ($(CONFIG_VIRTIO),y)
 common-obj-y += virtio-bus.o
 obj-y += virtio.o
 
-obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o vhost-backend.o
+obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o vhost-backend.o vhost-vdpa.o
 common-obj-$(call lnot,$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL))) += vhost-stub.o
 obj-$(CONFIG_VHOST_USER) += vhost-user.o
 
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 48905383f8..935cd9e561 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -286,6 +286,9 @@  int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
         dev->vhost_ops = &user_ops;
         break;
 #endif
+    case VHOST_BACKEND_TYPE_VDPA:
+        dev->vhost_ops = &vdpa_ops;
+        break;
     default:
         error_report("Unknown vhost backend type");
         r = -1;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
new file mode 100644
index 0000000000..213b327600
--- /dev/null
+++ b/hw/virtio/vhost-vdpa.c
@@ -0,0 +1,379 @@ 
+/*
+ * vhost-vdpa
+ *
+ *  Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
+ *  Copyright(c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <linux/vhost.h>
+#include <linux/vfio.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-backend.h"
+#include "hw/virtio/virtio-net.h"
+#include "hw/virtio/vhost-vdpa.h"
+#include "qemu/main-loop.h"
+#include <linux/kvm.h>
+#include "sysemu/kvm.h"
+
+
+static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section)
+{
+    return (!memory_region_is_ram(section->mr) &&
+            !memory_region_is_iommu(section->mr)) ||
+           /*
+            * Sizing an enabled 64-bit BAR can cause spurious mappings to
+            * addresses in the upper part of the 64-bit address space.  These
+            * are never accessed by the CPU and beyond the address width of
+            * some IOMMU hardware.  TODO: VDPA should tell us the IOMMU width.
+            */
+           section->offset_within_address_space & (1ULL << 63);
+}
+
+static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+                              void *vaddr, bool readonly)
+{
+    struct vhost_msg_v2 msg;
+    int fd = v->device_fd;
+    int ret = 0;
+
+    msg.type = VHOST_IOTLB_MSG_V2;
+    msg.iotlb.iova = iova;
+    msg.iotlb.size = size;
+    msg.iotlb.uaddr = (uint64_t)vaddr;
+    msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
+    msg.iotlb.type = VHOST_IOTLB_UPDATE;
+
+    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
+        error_report("failed to write, fd=%d, errno=%d (%s)",
+            fd, errno, strerror(errno));
+        return -EIO ;
+    }
+
+    return ret;
+}
+
+static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
+                                hwaddr size)
+{
+    struct vhost_msg_v2 msg;
+    int fd = v->device_fd;
+    int ret = 0;
+
+    msg.type = VHOST_IOTLB_MSG_V2;
+    msg.iotlb.iova = iova;
+    msg.iotlb.size = size;
+    msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
+
+    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
+        error_report("failed to write, fd=%d, errno=%d (%s)",
+            fd, errno, strerror(errno));
+        return -EIO ;
+    }
+
+    return ret;
+}
+
+static void vhost_vdpa_listener_region_add(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+    hwaddr iova;
+    Int128 llend, llsize;
+    void *vaddr;
+    int ret;
+
+    if (vhost_vdpa_listener_skipped_section(section)) {
+        return;
+    }
+
+    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+        error_report("%s received unaligned region", __func__);
+        return;
+    }
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    llend = int128_make64(section->offset_within_address_space);
+    llend = int128_add(llend, section->size);
+    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
+
+    if (int128_ge(int128_make64(iova), llend)) {
+        return;
+    }
+
+    memory_region_ref(section->mr);
+
+    /* Here we assume that memory_region_is_ram(section->mr)==true */
+
+    vaddr = memory_region_get_ram_ptr(section->mr) +
+            section->offset_within_region +
+            (iova - section->offset_within_address_space);
+
+    llsize = int128_sub(llend, int128_make64(iova));
+
+    ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
+                             vaddr, section->readonly);
+    if (ret) {
+        error_report("vhost vdpa map fail!");
+        if (memory_region_is_ram_device(section->mr)) {
+            /* Allow unexpected mappings not to be fatal for RAM devices */
+            error_report("map ram fail!");
+          return ;
+        }
+        goto fail;
+    }
+
+    return;
+
+fail:
+    if (memory_region_is_ram_device(section->mr)) {
+        error_report("failed to vdpa_dma_map. pci p2p may not work");
+        return;
+
+    }
+    /*
+     * On the initfn path, store the first error in the container so we
+     * can gracefully fail.  Runtime, there's not much we can do other
+     * than throw a hardware error.
+     */
+    error_report("vhost-vdpa: DMA mapping failed, unable to continue");
+    return;
+
+}
+
+static void vhost_vdpa_listener_region_del(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+    hwaddr iova;
+    Int128 llend, llsize;
+    int ret;
+    bool try_unmap = true;
+
+    if (vhost_vdpa_listener_skipped_section(section)) {
+        return;
+    }
+
+    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+        error_report("%s received unaligned region", __func__);
+        return;
+    }
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    llend = int128_make64(section->offset_within_address_space);
+    llend = int128_add(llend, section->size);
+    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
+
+    if (int128_ge(int128_make64(iova), llend)) {
+        return;
+    }
+
+    llsize = int128_sub(llend, int128_make64(iova));
+
+    if (try_unmap) {
+        ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
+        if (ret) {
+            error_report("vhost_vdpa dma unmap error!");
+        }
+    }
+
+    memory_region_unref(section->mr);
+}
+
+static const MemoryListener vhost_vdpa_memory_listener = {
+    .region_add = vhost_vdpa_listener_region_add,
+    .region_del = vhost_vdpa_listener_region_del,
+};
+
+
+static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
+                             void *arg)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    int fd = v->device_fd;
+
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
+
+    return ioctl(fd, request, arg);
+}
+
+
+
+static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque)
+{
+    struct vhost_vdpa *v;
+
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
+
+    v = opaque;
+    dev->opaque =  opaque ;
+
+    v->listener = vhost_vdpa_memory_listener;
+    memory_listener_register(&v->listener, &address_space_memory);
+
+    return 0;
+}
+
+static int vhost_vdpa_cleanup(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v;
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
+
+    v = dev->opaque;
+    memory_listener_unregister(&v->listener);
+
+    dev->opaque = NULL;
+    return 0;
+}
+
+static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
+{
+    return INT_MAX;
+}
+
+static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
+                                   struct vhost_log *log)
+{
+    return 0;
+}
+
+static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
+                                    struct vhost_memory *mem)
+{
+
+    if (mem->padding) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
+                                     struct vhost_vring_addr *addr)
+{
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
+}
+
+static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
+                                    struct vhost_vring_state *ring)
+{
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
+}
+
+static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
+                                     struct vhost_vring_state *ring)
+{
+    return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
+}
+
+static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
+                                     struct vhost_vring_state *ring)
+{
+
+    return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
+}
+
+static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
+                                     struct vhost_vring_file *file)
+{
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
+}
+
+static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
+                                     struct vhost_vring_file *file)
+{
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
+}
+
+static int vhost_vdpa_set_features(struct vhost_dev *dev,
+                                   uint64_t features)
+{
+
+    features |= (1ULL << VIRTIO_F_IOMMU_PLATFORM);
+    return vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
+
+}
+
+static int vhost_vdpa_get_features(struct vhost_dev *dev,
+                                   uint64_t *features)
+{
+    return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
+}
+
+static int vhost_vdpa_set_owner(struct vhost_dev *dev)
+{
+    return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
+}
+
+static int vhost_vdpa_reset_device(struct vhost_dev *dev)
+{
+    return vhost_vdpa_call(dev, VHOST_RESET_OWNER, NULL);
+}
+
+static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
+{
+    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
+
+    return idx - dev->vq_index;
+}
+
+static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
+{
+    int i;
+
+    for (i = 0; i < dev->nvqs; ++i) {
+        struct vhost_vring_state state = {
+            .index = dev->vq_index + i,
+            .num   = enable,
+        };
+
+        state.num = 1;
+
+        vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
+    }
+
+    return 0;
+}
+
+static int vhost_vdpa_set_state(struct vhost_dev *dev, int state)
+{
+    return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &state);
+}
+
+
+const VhostOps vdpa_ops = {
+        .backend_type = VHOST_BACKEND_TYPE_VDPA,
+        .vhost_backend_init = vhost_vdpa_init,
+        .vhost_backend_cleanup = vhost_vdpa_cleanup,
+        .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
+        .vhost_set_log_base = vhost_vdpa_set_log_base,
+        .vhost_set_mem_table = vhost_vdpa_set_mem_table,
+        .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
+        .vhost_set_vring_endian = NULL,
+        .vhost_set_vring_num = vhost_vdpa_set_vring_num,
+        .vhost_set_vring_base = vhost_vdpa_set_vring_base,
+        .vhost_get_vring_base = vhost_vdpa_get_vring_base,
+        .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
+        .vhost_set_vring_call = vhost_vdpa_set_vring_call,
+        .vhost_set_features = vhost_vdpa_set_features,
+        .vhost_get_features = vhost_vdpa_get_features,
+        .vhost_set_owner = vhost_vdpa_set_owner,
+        .vhost_reset_device = vhost_vdpa_reset_device,
+        .vhost_get_vq_index = vhost_vdpa_get_vq_index,
+        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
+        .vhost_requires_shm_log = NULL,
+        .vhost_migration_done = NULL,
+        .vhost_backend_can_merge = NULL,
+        .vhost_net_set_mtu = NULL,
+        .vhost_set_iotlb_callback = NULL,
+        .vhost_send_device_iotlb_msg = NULL,
+        .vhost_set_state = vhost_vdpa_set_state,
+};
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 4da0d5a6c5..d1f2c4add7 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -746,6 +746,11 @@  static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
         .log_guest_addr = vq->used_phys,
         .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
     };
+    if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA) {
+        addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
+        addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
+        addr.used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
+    }
     int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
     if (r < 0) {
         VHOST_OPS_DEBUG("vhost_set_vring_addr failed");
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index 6f6670783f..d81bd9885f 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -17,7 +17,8 @@  typedef enum VhostBackendType {
     VHOST_BACKEND_TYPE_NONE = 0,
     VHOST_BACKEND_TYPE_KERNEL = 1,
     VHOST_BACKEND_TYPE_USER = 2,
-    VHOST_BACKEND_TYPE_MAX = 3,
+    VHOST_BACKEND_TYPE_VDPA = 3,
+    VHOST_BACKEND_TYPE_MAX = 4,
 } VhostBackendType;
 
 typedef enum VhostSetConfigType {
@@ -112,6 +113,7 @@  typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
 typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
                                         struct vhost_inflight *inflight);
 
+typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state);
 typedef struct VhostOps {
     VhostBackendType backend_type;
     vhost_backend_init vhost_backend_init;
@@ -152,9 +154,11 @@  typedef struct VhostOps {
     vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter;
     vhost_get_inflight_fd_op vhost_get_inflight_fd;
     vhost_set_inflight_fd_op vhost_set_inflight_fd;
+    vhost_set_state_op vhost_set_state;
 } VhostOps;
 
 extern const VhostOps user_ops;
+extern const VhostOps vdpa_ops;
 
 int vhost_set_backend_type(struct vhost_dev *dev,
                            VhostBackendType backend_type);
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
new file mode 100644
index 0000000000..889c1a4410
--- /dev/null
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -0,0 +1,14 @@ 
+
+#ifndef HW_VIRTIO_VHOST_VDPA_H
+#define HW_VIRTIO_VHOST_VDPA_H
+
+#include "hw/virtio/virtio.h"
+
+typedef struct vhost_vdpa {
+    int device_fd;
+    MemoryListener listener;
+} VhostVDPA;
+
+extern AddressSpace address_space_memory;
+
+#endif