Message ID | 20200420093241.4238-4-lulu@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vDPA support in qemu | expand |
On 4/20/20 4:32 AM, Cindy Lu wrote: > Currently we have 2 types of vhost backends in QEMU: vhost kernel and > vhost-user. The above patch provides a generic device for vDPA purpose, > this vDPA device exposes to user space a non-vendor-specific configuration > interface for setting up a vhost HW accelerator, this patch set introduces > a third vhost backend called vhost-vdpa based on the vDPA interface. > > Vhost-vdpa usage: > > qemu-system-x86_64 -cpu host -enable-kvm \ > ...... > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \ > -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \ > > Author: Tiwei Bie Another questionable authorship line; should this be Signed-off-by? (Do we have permission from Tiwei Bie to include this code?) > Signed-off-by: Cindy Lu <lulu@redhat.com> > --- > +++ b/hw/virtio/vhost-vdpa.c > @@ -0,0 +1,379 @@ > +/* > + * vhost-vdpa > + * > + * Copyright(c) 2017-2018 Intel Corporation. All rights reserved. > + * Copyright(c) 2020 Red Hat, Inc. > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. Another questionable "All rights reserved" > --- /dev/null > +++ b/include/hw/virtio/vhost-vdpa.h > @@ -0,0 +1,14 @@ > + > +#ifndef HW_VIRTIO_VHOST_VDPA_H > +#define HW_VIRTIO_VHOST_VDPA_H > + All new files should include a copyright and license, even if they are short.
On 2020/4/20 下午5:32, Cindy Lu wrote: > Currently we have 2 types of vhost backends in QEMU: vhost kernel and > vhost-user. The above patch provides a generic device for vDPA purpose, > this vDPA device exposes to user space a non-vendor-specific configuration > interface for setting up a vhost HW accelerator, this patch set introduces > a third vhost backend called vhost-vdpa based on the vDPA interface. > > Vhost-vdpa usage: > > qemu-system-x86_64 -cpu host -enable-kvm \ > ...... > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \ > -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \ Actually, this part should belongs to patch 2. And we probably need to add a comment that vIOMMU is not supported right now. > > Author: Tiwei Bie > Signed-off-by: Cindy Lu <lulu@redhat.com> > --- > hw/net/vhost_net.c | 43 ++++ > hw/net/virtio-net.c | 9 + > hw/virtio/Makefile.objs | 2 +- > hw/virtio/vhost-backend.c | 3 + > hw/virtio/vhost-vdpa.c | 379 ++++++++++++++++++++++++++++++ > hw/virtio/vhost.c | 5 + > include/hw/virtio/vhost-backend.h | 6 +- > include/hw/virtio/vhost-vdpa.h | 14 ++ > 8 files changed, 459 insertions(+), 2 deletions(-) > create mode 100644 hw/virtio/vhost-vdpa.c > create mode 100644 include/hw/virtio/vhost-vdpa.h > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c > index 4096d64aaf..0d13fda2fc 100644 > --- a/hw/net/vhost_net.c > +++ b/hw/net/vhost_net.c > @@ -17,8 +17,10 @@ > #include "net/net.h" > #include "net/tap.h" > #include "net/vhost-user.h" > +#include "net/vhost-vdpa.h" > > #include "standard-headers/linux/vhost_types.h" > +#include "linux-headers/linux/vhost.h" > #include "hw/virtio/virtio-net.h" > #include "net/vhost_net.h" > #include "qemu/error-report.h" > @@ -85,6 +87,29 @@ static const int user_feature_bits[] = { > VHOST_INVALID_FEATURE_BIT > }; > > +static const int vdpa_feature_bits[] = { > + VIRTIO_F_NOTIFY_ON_EMPTY, > + VIRTIO_RING_F_INDIRECT_DESC, > + VIRTIO_RING_F_EVENT_IDX, > + VIRTIO_F_ANY_LAYOUT, > + VIRTIO_F_VERSION_1, > + VIRTIO_NET_F_CSUM, > + VIRTIO_NET_F_GUEST_CSUM, > + VIRTIO_NET_F_GSO, > + VIRTIO_NET_F_GUEST_TSO4, > + VIRTIO_NET_F_GUEST_TSO6, > + VIRTIO_NET_F_GUEST_ECN, > + VIRTIO_NET_F_GUEST_UFO, > + VIRTIO_NET_F_HOST_TSO4, > + VIRTIO_NET_F_HOST_TSO6, > + VIRTIO_NET_F_HOST_ECN, > + VIRTIO_NET_F_HOST_UFO, > + VIRTIO_NET_F_MRG_RXBUF, > + VIRTIO_NET_F_MTU, > + VIRTIO_F_IOMMU_PLATFORM, > + VIRTIO_NET_F_GUEST_ANNOUNCE, > + VHOST_INVALID_FEATURE_BIT > +}; > static const int *vhost_net_get_feature_bits(struct vhost_net *net) > { > const int *feature_bits = 0; > @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net) > case NET_CLIENT_DRIVER_VHOST_USER: > feature_bits = user_feature_bits; > break; > + case NET_CLIENT_DRIVER_VHOST_VDPA: > + feature_bits = vdpa_feature_bits; > + break; > default: > error_report("Feature bits not defined for this type: %d", > net->nc->info->type); > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc) > assert(vhost_net); > break; > #endif > + case NET_CLIENT_DRIVER_VHOST_VDPA: > + vhost_net = vhost_vdpa_get_vhost_net(nc); > + assert(vhost_net); > + break; > default: > break; > } > @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) > > return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); > } > +int vhost_set_state(NetClientState *nc, int state) > +{ > + struct vhost_net *net = get_vhost_net(nc); > + struct vhost_dev *hdev = &net->dev; > + if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { > + if (hdev->vhost_ops->vhost_set_state) { > + return hdev->vhost_ops->vhost_set_state(hdev, state); > + } > + } > + return 0; > +} > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > index db3d7c38e6..bbecd7ab96 100644 > --- a/hw/net/virtio-net.c > +++ b/hw/net/virtio-net.c > @@ -206,6 +206,9 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) > VirtIODevice *vdev = VIRTIO_DEVICE(n); > NetClientState *nc = qemu_get_queue(n->nic); > int queues = n->multiqueue ? n->max_queues : 1; > + NetClientState *peer = nc->peer; qemu_get_peer()? > + uint8_t status_set = vdev->status ; > + uint8_t vhost_started_pre = n->vhost_started; > > if (!get_vhost_net(nc->peer)) { > return; > @@ -245,6 +248,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) > return; > } > } > + status_set = status_set | VIRTIO_CONFIG_S_DRIVER_OK; > > n->vhost_started = 1; > r = vhost_net_start(vdev, n->nic->ncs, queues); > @@ -252,11 +256,16 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) > error_report("unable to start vhost net: %d: " > "falling back on userspace virtio", -r); > n->vhost_started = 0; > + status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK; > } > } else { > vhost_net_stop(vdev, n->nic->ncs, queues); > + status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK; > n->vhost_started = 0; > } > + if (vhost_started_pre != n->vhost_started) { > + vhost_set_state(peer, status_set); > + } > } I think this deserves an independent patch. > > static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, > diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > index e2f70fbb89..17361d959e 100644 > --- a/hw/virtio/Makefile.objs > +++ b/hw/virtio/Makefile.objs > @@ -2,7 +2,7 @@ ifeq ($(CONFIG_VIRTIO),y) > common-obj-y += virtio-bus.o > obj-y += virtio.o > > -obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o vhost-backend.o > +obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o vhost-backend.o vhost-vdpa.o > common-obj-$(call lnot,$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL))) += vhost-stub.o > obj-$(CONFIG_VHOST_USER) += vhost-user.o > > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c > index 48905383f8..935cd9e561 100644 > --- a/hw/virtio/vhost-backend.c > +++ b/hw/virtio/vhost-backend.c > @@ -286,6 +286,9 @@ int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) > dev->vhost_ops = &user_ops; > break; > #endif > + case VHOST_BACKEND_TYPE_VDPA: > + dev->vhost_ops = &vdpa_ops; > + break; > default: > error_report("Unknown vhost backend type"); > r = -1; > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > new file mode 100644 > index 0000000000..213b327600 > --- /dev/null > +++ b/hw/virtio/vhost-vdpa.c > @@ -0,0 +1,379 @@ > +/* > + * vhost-vdpa > + * > + * Copyright(c) 2017-2018 Intel Corporation. All rights reserved. > + * Copyright(c) 2020 Red Hat, Inc. > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#include "qemu/osdep.h" > +#include <linux/vhost.h> > +#include <linux/vfio.h> > +#include <sys/eventfd.h> > +#include <sys/ioctl.h> > +#include "hw/virtio/vhost.h" > +#include "hw/virtio/vhost-backend.h" > +#include "hw/virtio/virtio-net.h" > +#include "hw/virtio/vhost-vdpa.h" > +#include "qemu/main-loop.h" > +#include <linux/kvm.h> > +#include "sysemu/kvm.h" > + > + > +static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section) > +{ > + return (!memory_region_is_ram(section->mr) && > + !memory_region_is_iommu(section->mr)) || > + /* > + * Sizing an enabled 64-bit BAR can cause spurious mappings to > + * addresses in the upper part of the 64-bit address space. These > + * are never accessed by the CPU and beyond the address width of > + * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width. > + */ > + section->offset_within_address_space & (1ULL << 63); > +} > + > +static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, > + void *vaddr, bool readonly) > +{ > + struct vhost_msg_v2 msg; > + int fd = v->device_fd; > + int ret = 0; > + > + msg.type = VHOST_IOTLB_MSG_V2; Since V2 of the message is used here, I believe we need a kernel patch to allow the querying of backend capability. > + msg.iotlb.iova = iova; > + msg.iotlb.size = size; > + msg.iotlb.uaddr = (uint64_t)vaddr; > + msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; > + msg.iotlb.type = VHOST_IOTLB_UPDATE; > + > + if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { > + error_report("failed to write, fd=%d, errno=%d (%s)", > + fd, errno, strerror(errno)); > + return -EIO ; > + } > + > + return ret; > +} > + > +static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, > + hwaddr size) > +{ > + struct vhost_msg_v2 msg; > + int fd = v->device_fd; > + int ret = 0; > + > + msg.type = VHOST_IOTLB_MSG_V2; > + msg.iotlb.iova = iova; > + msg.iotlb.size = size; > + msg.iotlb.type = VHOST_IOTLB_INVALIDATE; > + > + if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { > + error_report("failed to write, fd=%d, errno=%d (%s)", > + fd, errno, strerror(errno)); > + return -EIO ; > + } > + > + return ret; > +} > + > +static void vhost_vdpa_listener_region_add(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); > + hwaddr iova; > + Int128 llend, llsize; > + void *vaddr; > + int ret; > + > + if (vhost_vdpa_listener_skipped_section(section)) { > + return; > + } > + > + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != > + (section->offset_within_region & ~TARGET_PAGE_MASK))) { > + error_report("%s received unaligned region", __func__); > + return; > + } > + > + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > + llend = int128_make64(section->offset_within_address_space); > + llend = int128_add(llend, section->size); > + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); > + > + if (int128_ge(int128_make64(iova), llend)) { > + return; > + } > + > + memory_region_ref(section->mr); > + > + /* Here we assume that memory_region_is_ram(section->mr)==true */ > + > + vaddr = memory_region_get_ram_ptr(section->mr) + > + section->offset_within_region + > + (iova - section->offset_within_address_space); > + > + llsize = int128_sub(llend, int128_make64(iova)); > + > + ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), > + vaddr, section->readonly); > + if (ret) { > + error_report("vhost vdpa map fail!"); > + if (memory_region_is_ram_device(section->mr)) { > + /* Allow unexpected mappings not to be fatal for RAM devices */ > + error_report("map ram fail!"); > + return ; > + } > + goto fail; > + } > + > + return; > + > +fail: > + if (memory_region_is_ram_device(section->mr)) { > + error_report("failed to vdpa_dma_map. pci p2p may not work"); > + return; > + > + } > + /* > + * On the initfn path, store the first error in the container so we > + * can gracefully fail. Runtime, there's not much we can do other > + * than throw a hardware error. > + */ > + error_report("vhost-vdpa: DMA mapping failed, unable to continue"); > + return; > + > +} > + > +static void vhost_vdpa_listener_region_del(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); > + hwaddr iova; > + Int128 llend, llsize; > + int ret; > + bool try_unmap = true; > + > + if (vhost_vdpa_listener_skipped_section(section)) { > + return; > + } > + > + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != > + (section->offset_within_region & ~TARGET_PAGE_MASK))) { > + error_report("%s received unaligned region", __func__); > + return; > + } > + > + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > + llend = int128_make64(section->offset_within_address_space); > + llend = int128_add(llend, section->size); > + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); > + > + if (int128_ge(int128_make64(iova), llend)) { > + return; > + } > + > + llsize = int128_sub(llend, int128_make64(iova)); > + > + if (try_unmap) { > + ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); > + if (ret) { > + error_report("vhost_vdpa dma unmap error!"); > + } > + } > + > + memory_region_unref(section->mr); > +} > + I think it's better to add comment to explain why vhost-vdpa use a different listener other than the one used by other vhost backends (e.g kernel or user). > +static const MemoryListener vhost_vdpa_memory_listener = { > + .region_add = vhost_vdpa_listener_region_add, > + .region_del = vhost_vdpa_listener_region_del, > +}; > + > + > +static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, > + void *arg) > +{ > + struct vhost_vdpa *v = dev->opaque; > + int fd = v->device_fd; > + > + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); > + > + return ioctl(fd, request, arg); > +} > + > + > + > +static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque) > +{ > + struct vhost_vdpa *v; > + > + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); > + > + v = opaque; > + dev->opaque = opaque ; > + > + v->listener = vhost_vdpa_memory_listener; > + memory_listener_register(&v->listener, &address_space_memory); > + > + return 0; > +} > + > +static int vhost_vdpa_cleanup(struct vhost_dev *dev) > +{ > + struct vhost_vdpa *v; > + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); > + > + v = dev->opaque; > + memory_listener_unregister(&v->listener); > + > + dev->opaque = NULL; > + return 0; > +} > + A comment here is need to explain why INT_MAX is used. > +static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) > +{ > + return INT_MAX; > +} > + > +static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, > + struct vhost_log *log) > +{ > + return 0; > +} I think we should fail this function since we don't support dirty page tracking now. And it's not guarantee to use dirty page bitmap in the future. > + > +static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, > + struct vhost_memory *mem) > +{ > + > + if (mem->padding) { > + return -1; > + } > + > + return 0; A comment is need to explain why mem table is not used. (E.g we used IOTLB API instead). > +} > + > +static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, > + struct vhost_vring_addr *addr) > +{ > + return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); > +} > + > +static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, > + struct vhost_vring_state *ring) > +{ > + return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); > +} > + > +static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, > + struct vhost_vring_state *ring) > +{ > + return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); > +} > + > +static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, > + struct vhost_vring_state *ring) > +{ > + > + return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); > +} > + > +static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, > + struct vhost_vring_file *file) > +{ > + return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); > +} > + > +static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, > + struct vhost_vring_file *file) > +{ > + return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); > +} > + > +static int vhost_vdpa_set_features(struct vhost_dev *dev, > + uint64_t features) > +{ > + > + features |= (1ULL << VIRTIO_F_IOMMU_PLATFORM); This seems tricky, I don't think we need this actually. > + return vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); > + > +} > + > +static int vhost_vdpa_get_features(struct vhost_dev *dev, > + uint64_t *features) > +{ > + return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); > +} > + > +static int vhost_vdpa_set_owner(struct vhost_dev *dev) > +{ > + return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); > +} > + > +static int vhost_vdpa_reset_device(struct vhost_dev *dev) > +{ > + return vhost_vdpa_call(dev, VHOST_RESET_OWNER, NULL); > +} > + > +static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) > +{ > + assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); > + > + return idx - dev->vq_index; > +} > + > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable) > +{ > + int i; > + > + for (i = 0; i < dev->nvqs; ++i) { > + struct vhost_vring_state state = { > + .index = dev->vq_index + i, > + .num = enable, > + }; > + > + state.num = 1; > + > + vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); Please make sure patch 4 comes first then we don't need to fix this in patch 4. > + } > + > + return 0; > +} > + > +static int vhost_vdpa_set_state(struct vhost_dev *dev, int state) > +{ > + return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &state); > +} > + > + > +const VhostOps vdpa_ops = { > + .backend_type = VHOST_BACKEND_TYPE_VDPA, > + .vhost_backend_init = vhost_vdpa_init, > + .vhost_backend_cleanup = vhost_vdpa_cleanup, > + .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, > + .vhost_set_log_base = vhost_vdpa_set_log_base, > + .vhost_set_mem_table = vhost_vdpa_set_mem_table, > + .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, > + .vhost_set_vring_endian = NULL, > + .vhost_set_vring_num = vhost_vdpa_set_vring_num, > + .vhost_set_vring_base = vhost_vdpa_set_vring_base, > + .vhost_get_vring_base = vhost_vdpa_get_vring_base, > + .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, > + .vhost_set_vring_call = vhost_vdpa_set_vring_call, > + .vhost_set_features = vhost_vdpa_set_features, > + .vhost_get_features = vhost_vdpa_get_features, > + .vhost_set_owner = vhost_vdpa_set_owner, > + .vhost_reset_device = vhost_vdpa_reset_device, > + .vhost_get_vq_index = vhost_vdpa_get_vq_index, > + .vhost_set_vring_enable = vhost_vdpa_set_vring_enable, > + .vhost_requires_shm_log = NULL, > + .vhost_migration_done = NULL, > + .vhost_backend_can_merge = NULL, > + .vhost_net_set_mtu = NULL, > + .vhost_set_iotlb_callback = NULL, > + .vhost_send_device_iotlb_msg = NULL, > + .vhost_set_state = vhost_vdpa_set_state, > +}; > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c > index 4da0d5a6c5..d1f2c4add7 100644 > --- a/hw/virtio/vhost.c > +++ b/hw/virtio/vhost.c > @@ -746,6 +746,11 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, > .log_guest_addr = vq->used_phys, > .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0, > }; > + if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA) { > + addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; > + addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; > + addr.used_user_addr = (uint64_t)(unsigned long)vq->used_phys; > + } Comment is needed to explain why vDPA differs from others. Thanks > int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr); > if (r < 0) { > VHOST_OPS_DEBUG("vhost_set_vring_addr failed"); > diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h > index 6f6670783f..d81bd9885f 100644 > --- a/include/hw/virtio/vhost-backend.h > +++ b/include/hw/virtio/vhost-backend.h > @@ -17,7 +17,8 @@ typedef enum VhostBackendType { > VHOST_BACKEND_TYPE_NONE = 0, > VHOST_BACKEND_TYPE_KERNEL = 1, > VHOST_BACKEND_TYPE_USER = 2, > - VHOST_BACKEND_TYPE_MAX = 3, > + VHOST_BACKEND_TYPE_VDPA = 3, > + VHOST_BACKEND_TYPE_MAX = 4, > } VhostBackendType; > > typedef enum VhostSetConfigType { > @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev, > typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev, > struct vhost_inflight *inflight); > > +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state); > typedef struct VhostOps { > VhostBackendType backend_type; > vhost_backend_init vhost_backend_init; > @@ -152,9 +154,11 @@ typedef struct VhostOps { > vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter; > vhost_get_inflight_fd_op vhost_get_inflight_fd; > vhost_set_inflight_fd_op vhost_set_inflight_fd; > + vhost_set_state_op vhost_set_state; > } VhostOps; > > extern const VhostOps user_ops; > +extern const VhostOps vdpa_ops; > > int vhost_set_backend_type(struct vhost_dev *dev, > VhostBackendType backend_type); > diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h > new file mode 100644 > index 0000000000..889c1a4410 > --- /dev/null > +++ b/include/hw/virtio/vhost-vdpa.h > @@ -0,0 +1,14 @@ > + > +#ifndef HW_VIRTIO_VHOST_VDPA_H > +#define HW_VIRTIO_VHOST_VDPA_H > + > +#include "hw/virtio/virtio.h" > + > +typedef struct vhost_vdpa { > + int device_fd; > + MemoryListener listener; > +} VhostVDPA; > + > +extern AddressSpace address_space_memory; > + > +#endif
On Tue, Apr 21, 2020 at 11:57 AM Jason Wang <jasowang@redhat.com> wrote: > > On 2020/4/20 下午5:32, Cindy Lu wrote: > > Currently we have 2 types of vhost backends in QEMU: vhost kernel and > > vhost-user. The above patch provides a generic device for vDPA purpose, > > this vDPA device exposes to user space a non-vendor-specific > configuration > > interface for setting up a vhost HW accelerator, this patch set > introduces > > a third vhost backend called vhost-vdpa based on the vDPA interface. > > > > Vhost-vdpa usage: > > > > qemu-system-x86_64 -cpu host -enable-kvm \ > > ...... > > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \ > > -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \ > > > Actually, this part should belongs to patch 2. > > And we probably need to add a comment that vIOMMU is not supported right > now. > > > Will fix this problem > > > > Author: Tiwei Bie > > Signed-off-by: Cindy Lu <lulu@redhat.com> > > --- > > hw/net/vhost_net.c | 43 ++++ > > hw/net/virtio-net.c | 9 + > > hw/virtio/Makefile.objs | 2 +- > > hw/virtio/vhost-backend.c | 3 + > > hw/virtio/vhost-vdpa.c | 379 ++++++++++++++++++++++++++++++ > > hw/virtio/vhost.c | 5 + > > include/hw/virtio/vhost-backend.h | 6 +- > > include/hw/virtio/vhost-vdpa.h | 14 ++ > > 8 files changed, 459 insertions(+), 2 deletions(-) > > create mode 100644 hw/virtio/vhost-vdpa.c > > create mode 100644 include/hw/virtio/vhost-vdpa.h > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c > > index 4096d64aaf..0d13fda2fc 100644 > > --- a/hw/net/vhost_net.c > > +++ b/hw/net/vhost_net.c > > @@ -17,8 +17,10 @@ > > #include "net/net.h" > > #include "net/tap.h" > > #include "net/vhost-user.h" > > +#include "net/vhost-vdpa.h" > > > > #include "standard-headers/linux/vhost_types.h" > > +#include "linux-headers/linux/vhost.h" > > #include "hw/virtio/virtio-net.h" > > #include "net/vhost_net.h" > > #include "qemu/error-report.h" > > @@ -85,6 +87,29 @@ static const int user_feature_bits[] = { > > VHOST_INVALID_FEATURE_BIT > > }; > > > > +static const int vdpa_feature_bits[] = { > > + VIRTIO_F_NOTIFY_ON_EMPTY, > > + VIRTIO_RING_F_INDIRECT_DESC, > > + VIRTIO_RING_F_EVENT_IDX, > > + VIRTIO_F_ANY_LAYOUT, > > + VIRTIO_F_VERSION_1, > > + VIRTIO_NET_F_CSUM, > > + VIRTIO_NET_F_GUEST_CSUM, > > + VIRTIO_NET_F_GSO, > > + VIRTIO_NET_F_GUEST_TSO4, > > + VIRTIO_NET_F_GUEST_TSO6, > > + VIRTIO_NET_F_GUEST_ECN, > > + VIRTIO_NET_F_GUEST_UFO, > > + VIRTIO_NET_F_HOST_TSO4, > > + VIRTIO_NET_F_HOST_TSO6, > > + VIRTIO_NET_F_HOST_ECN, > > + VIRTIO_NET_F_HOST_UFO, > > + VIRTIO_NET_F_MRG_RXBUF, > > + VIRTIO_NET_F_MTU, > > + VIRTIO_F_IOMMU_PLATFORM, > > + VIRTIO_NET_F_GUEST_ANNOUNCE, > > + VHOST_INVALID_FEATURE_BIT > > +}; > > static const int *vhost_net_get_feature_bits(struct vhost_net *net) > > { > > const int *feature_bits = 0; > > @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct > vhost_net *net) > > case NET_CLIENT_DRIVER_VHOST_USER: > > feature_bits = user_feature_bits; > > break; > > + case NET_CLIENT_DRIVER_VHOST_VDPA: > > + feature_bits = vdpa_feature_bits; > > + break; > > default: > > error_report("Feature bits not defined for this type: %d", > > net->nc->info->type); > > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc) > > assert(vhost_net); > > break; > > #endif > > + case NET_CLIENT_DRIVER_VHOST_VDPA: > > + vhost_net = vhost_vdpa_get_vhost_net(nc); > > + assert(vhost_net); > > + break; > > default: > > break; > > } > > @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, > uint16_t mtu) > > > > return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); > > } > > +int vhost_set_state(NetClientState *nc, int state) > > +{ > > + struct vhost_net *net = get_vhost_net(nc); > > + struct vhost_dev *hdev = &net->dev; > > + if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { > > + if (hdev->vhost_ops->vhost_set_state) { > > + return hdev->vhost_ops->vhost_set_state(hdev, state); > > + } > > + } > > + return 0; > > +} > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > > index db3d7c38e6..bbecd7ab96 100644 > > --- a/hw/net/virtio-net.c > > +++ b/hw/net/virtio-net.c > > @@ -206,6 +206,9 @@ static void virtio_net_vhost_status(VirtIONet *n, > uint8_t status) > > VirtIODevice *vdev = VIRTIO_DEVICE(n); > > NetClientState *nc = qemu_get_queue(n->nic); > > int queues = n->multiqueue ? n->max_queues : 1; > > + NetClientState *peer = nc->peer; > > > qemu_get_peer()? > > will fix it > > > + uint8_t status_set = vdev->status ; > > + uint8_t vhost_started_pre = n->vhost_started; > > > > if (!get_vhost_net(nc->peer)) { > > return; > > @@ -245,6 +248,7 @@ static void virtio_net_vhost_status(VirtIONet *n, > uint8_t status) > > return; > > } > > } > > + status_set = status_set | VIRTIO_CONFIG_S_DRIVER_OK; > > > > n->vhost_started = 1; > > r = vhost_net_start(vdev, n->nic->ncs, queues); > > @@ -252,11 +256,16 @@ static void virtio_net_vhost_status(VirtIONet *n, > uint8_t status) > > error_report("unable to start vhost net: %d: " > > "falling back on userspace virtio", -r); > > n->vhost_started = 0; > > + status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK; > > } > > } else { > > vhost_net_stop(vdev, n->nic->ncs, queues); > > + status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK; > > n->vhost_started = 0; > > } > > + if (vhost_started_pre != n->vhost_started) { > > + vhost_set_state(peer, status_set); > > + } > > } > > > I think this deserves an independent patch. > > > will fix it > > > > > static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, > > diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > > index e2f70fbb89..17361d959e 100644 > > --- a/hw/virtio/Makefile.objs > > +++ b/hw/virtio/Makefile.objs > > @@ -2,7 +2,7 @@ ifeq ($(CONFIG_VIRTIO),y) > > common-obj-y += virtio-bus.o > > obj-y += virtio.o > > > > -obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o > vhost-backend.o > > +obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o > vhost-backend.o vhost-vdpa.o > > common-obj-$(call lnot,$(call > lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL))) += vhost-stub.o > > obj-$(CONFIG_VHOST_USER) += vhost-user.o > > > > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c > > index 48905383f8..935cd9e561 100644 > > --- a/hw/virtio/vhost-backend.c > > +++ b/hw/virtio/vhost-backend.c > > @@ -286,6 +286,9 @@ int vhost_set_backend_type(struct vhost_dev *dev, > VhostBackendType backend_type) > > dev->vhost_ops = &user_ops; > > break; > > #endif > > + case VHOST_BACKEND_TYPE_VDPA: > > + dev->vhost_ops = &vdpa_ops; > > + break; > > default: > > error_report("Unknown vhost backend type"); > > r = -1; > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > > new file mode 100644 > > index 0000000000..213b327600 > > --- /dev/null > > +++ b/hw/virtio/vhost-vdpa.c > > @@ -0,0 +1,379 @@ > > +/* > > + * vhost-vdpa > > + * > > + * Copyright(c) 2017-2018 Intel Corporation. All rights reserved. > > + * Copyright(c) 2020 Red Hat, Inc. > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or > later. > > + * See the COPYING file in the top-level directory. > > + * > > + */ > > + > > +#include "qemu/osdep.h" > > +#include <linux/vhost.h> > > +#include <linux/vfio.h> > > +#include <sys/eventfd.h> > > +#include <sys/ioctl.h> > > +#include "hw/virtio/vhost.h" > > +#include "hw/virtio/vhost-backend.h" > > +#include "hw/virtio/virtio-net.h" > > +#include "hw/virtio/vhost-vdpa.h" > > +#include "qemu/main-loop.h" > > +#include <linux/kvm.h> > > +#include "sysemu/kvm.h" > > + > > + > > +static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection > *section) > > +{ > > + return (!memory_region_is_ram(section->mr) && > > + !memory_region_is_iommu(section->mr)) || > > + /* > > + * Sizing an enabled 64-bit BAR can cause spurious mappings > to > > + * addresses in the upper part of the 64-bit address space. > These > > + * are never accessed by the CPU and beyond the address > width of > > + * some IOMMU hardware. TODO: VDPA should tell us the IOMMU > width. > > + */ > > + section->offset_within_address_space & (1ULL << 63); > > +} > > + > > +static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr > size, > > + void *vaddr, bool readonly) > > +{ > > + struct vhost_msg_v2 msg; > > + int fd = v->device_fd; > > + int ret = 0; > > + > > + msg.type = VHOST_IOTLB_MSG_V2; > > > Since V2 of the message is used here, I believe we need a kernel patch > to allow the querying of backend capability. > > Sure, I will provide another patch for kernel > > > + msg.iotlb.iova = iova; > > + msg.iotlb.size = size; > > + msg.iotlb.uaddr = (uint64_t)vaddr; > > + msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; > > + msg.iotlb.type = VHOST_IOTLB_UPDATE; > > + > > + if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { > > + error_report("failed to write, fd=%d, errno=%d (%s)", > > + fd, errno, strerror(errno)); > > + return -EIO ; > > + } > > + > > + return ret; > > +} > > + > > +static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, > > + hwaddr size) > > +{ > > + struct vhost_msg_v2 msg; > > + int fd = v->device_fd; > > + int ret = 0; > > + > > + msg.type = VHOST_IOTLB_MSG_V2; > > + msg.iotlb.iova = iova; > > + msg.iotlb.size = size; > > + msg.iotlb.type = VHOST_IOTLB_INVALIDATE; > > + > > + if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { > > + error_report("failed to write, fd=%d, errno=%d (%s)", > > + fd, errno, strerror(errno)); > > + return -EIO ; > > + } > > + > > + return ret; > > +} > > + > > +static void vhost_vdpa_listener_region_add(MemoryListener *listener, > > + MemoryRegionSection *section) > > +{ > > + struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, > listener); > > + hwaddr iova; > > + Int128 llend, llsize; > > + void *vaddr; > > + int ret; > > + > > + if (vhost_vdpa_listener_skipped_section(section)) { > > + return; > > + } > > + > > + if (unlikely((section->offset_within_address_space & > ~TARGET_PAGE_MASK) != > > + (section->offset_within_region & ~TARGET_PAGE_MASK))) { > > + error_report("%s received unaligned region", __func__); > > + return; > > + } > > + > > + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > > + llend = int128_make64(section->offset_within_address_space); > > + llend = int128_add(llend, section->size); > > + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); > > + > > + if (int128_ge(int128_make64(iova), llend)) { > > + return; > > + } > > + > > + memory_region_ref(section->mr); > > + > > + /* Here we assume that memory_region_is_ram(section->mr)==true */ > > + > > + vaddr = memory_region_get_ram_ptr(section->mr) + > > + section->offset_within_region + > > + (iova - section->offset_within_address_space); > > + > > + llsize = int128_sub(llend, int128_make64(iova)); > > + > > + ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), > > + vaddr, section->readonly); > > + if (ret) { > > + error_report("vhost vdpa map fail!"); > > + if (memory_region_is_ram_device(section->mr)) { > > + /* Allow unexpected mappings not to be fatal for RAM > devices */ > > + error_report("map ram fail!"); > > + return ; > > + } > > + goto fail; > > + } > > + > > + return; > > + > > +fail: > > + if (memory_region_is_ram_device(section->mr)) { > > + error_report("failed to vdpa_dma_map. pci p2p may not work"); > > + return; > > + > > + } > > + /* > > + * On the initfn path, store the first error in the container so we > > + * can gracefully fail. Runtime, there's not much we can do other > > + * than throw a hardware error. > > + */ > > + error_report("vhost-vdpa: DMA mapping failed, unable to continue"); > > + return; > > + > > +} > > + > > +static void vhost_vdpa_listener_region_del(MemoryListener *listener, > > + MemoryRegionSection *section) > > +{ > > + struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, > listener); > > + hwaddr iova; > > + Int128 llend, llsize; > > + int ret; > > + bool try_unmap = true; > > + > > + if (vhost_vdpa_listener_skipped_section(section)) { > > + return; > > + } > > + > > + if (unlikely((section->offset_within_address_space & > ~TARGET_PAGE_MASK) != > > + (section->offset_within_region & ~TARGET_PAGE_MASK))) { > > + error_report("%s received unaligned region", __func__); > > + return; > > + } > > + > > + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > > + llend = int128_make64(section->offset_within_address_space); > > + llend = int128_add(llend, section->size); > > + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); > > + > > + if (int128_ge(int128_make64(iova), llend)) { > > + return; > > + } > > + > > + llsize = int128_sub(llend, int128_make64(iova)); > > + > > + if (try_unmap) { > > + ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); > > + if (ret) { > > + error_report("vhost_vdpa dma unmap error!"); > > + } > > + } > > + > > + memory_region_unref(section->mr); > > +} > > + > > > I think it's better to add comment to explain why vhost-vdpa use a > different listener other than the one used by other vhost backends (e.g > kernel or user). > > will fix it > > > +static const MemoryListener vhost_vdpa_memory_listener = { > > + .region_add = vhost_vdpa_listener_region_add, > > + .region_del = vhost_vdpa_listener_region_del, > > +}; > > + > > + > > +static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int > request, > > + void *arg) > > +{ > > + struct vhost_vdpa *v = dev->opaque; > > + int fd = v->device_fd; > > + > > + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); > > + > > + return ioctl(fd, request, arg); > > +} > > + > > + > > + > > +static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque) > > +{ > > + struct vhost_vdpa *v; > > + > > + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); > > + > > + v = opaque; > > + dev->opaque = opaque ; > > + > > + v->listener = vhost_vdpa_memory_listener; > > + memory_listener_register(&v->listener, &address_space_memory); > > + > > + return 0; > > +} > > + > > +static int vhost_vdpa_cleanup(struct vhost_dev *dev) > > +{ > > + struct vhost_vdpa *v; > > + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); > > + > > + v = dev->opaque; > > + memory_listener_unregister(&v->listener); > > + > > + dev->opaque = NULL; > > + return 0; > > +} > > + > > > A comment here is need to explain why INT_MAX is used. > > > will do > > +static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) > > +{ > > + return INT_MAX; > > +} > > + > > +static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, > > + struct vhost_log *log) > > +{ > > + return 0; > > +} > > > I think we should fail this function since we don't support dirty page > tracking now. > > And it's not guarantee to use dirty page bitmap in the future. > > > > + > > +static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, > > + struct vhost_memory *mem) > > +{ > > + > > + if (mem->padding) { > > + return -1; > > + } > > + > > + return 0; > > > A comment is need to explain why mem table is not used. (E.g we used > IOTLB API instead). > > will do > > > +} > > + > > +static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, > > + struct vhost_vring_addr *addr) > > +{ > > + return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); > > +} > > + > > +static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, > > + struct vhost_vring_state *ring) > > +{ > > + return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); > > +} > > + > > +static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, > > + struct vhost_vring_state *ring) > > +{ > > + return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); > > +} > > + > > +static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, > > + struct vhost_vring_state *ring) > > +{ > > + > > + return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); > > +} > > + > > +static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, > > + struct vhost_vring_file *file) > > +{ > > + return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); > > +} > > + > > +static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, > > + struct vhost_vring_file *file) > > +{ > > + return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); > > +} > > + > > +static int vhost_vdpa_set_features(struct vhost_dev *dev, > > + uint64_t features) > > +{ > > + > > + features |= (1ULL << VIRTIO_F_IOMMU_PLATFORM); > > > This seems tricky, I don't think we need this actually. > > > I will double check for this problem > > + return vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); > > + > > +} > > + > > +static int vhost_vdpa_get_features(struct vhost_dev *dev, > > + uint64_t *features) > > +{ > > + return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); > > +} > > + > > +static int vhost_vdpa_set_owner(struct vhost_dev *dev) > > +{ > > + return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); > > +} > > + > > +static int vhost_vdpa_reset_device(struct vhost_dev *dev) > > +{ > > + return vhost_vdpa_call(dev, VHOST_RESET_OWNER, NULL); > > +} > > + > > +static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) > > +{ > > + assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); > > + > > + return idx - dev->vq_index; > > +} > > + > > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int > enable) > > +{ > > + int i; > > + > > + for (i = 0; i < dev->nvqs; ++i) { > > + struct vhost_vring_state state = { > > + .index = dev->vq_index + i, > > + .num = enable, > > + }; > > + > > + state.num = 1; > > + > > + vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); > > > Please make sure patch 4 comes first then we don't need to fix this in > patch 4. > > will do > > > + } > > + > > + return 0; > > +} > > + > > +static int vhost_vdpa_set_state(struct vhost_dev *dev, int state) > > +{ > > + return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &state); > > +} > > + > > + > > +const VhostOps vdpa_ops = { > > + .backend_type = VHOST_BACKEND_TYPE_VDPA, > > + .vhost_backend_init = vhost_vdpa_init, > > + .vhost_backend_cleanup = vhost_vdpa_cleanup, > > + .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, > > + .vhost_set_log_base = vhost_vdpa_set_log_base, > > + .vhost_set_mem_table = vhost_vdpa_set_mem_table, > > + .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, > > + .vhost_set_vring_endian = NULL, > > + .vhost_set_vring_num = vhost_vdpa_set_vring_num, > > + .vhost_set_vring_base = vhost_vdpa_set_vring_base, > > + .vhost_get_vring_base = vhost_vdpa_get_vring_base, > > + .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, > > + .vhost_set_vring_call = vhost_vdpa_set_vring_call, > > + .vhost_set_features = vhost_vdpa_set_features, > > + .vhost_get_features = vhost_vdpa_get_features, > > + .vhost_set_owner = vhost_vdpa_set_owner, > > + .vhost_reset_device = vhost_vdpa_reset_device, > > + .vhost_get_vq_index = vhost_vdpa_get_vq_index, > > + .vhost_set_vring_enable = vhost_vdpa_set_vring_enable, > > + .vhost_requires_shm_log = NULL, > > + .vhost_migration_done = NULL, > > + .vhost_backend_can_merge = NULL, > > + .vhost_net_set_mtu = NULL, > > + .vhost_set_iotlb_callback = NULL, > > + .vhost_send_device_iotlb_msg = NULL, > > + .vhost_set_state = vhost_vdpa_set_state, > > +}; > > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c > > index 4da0d5a6c5..d1f2c4add7 100644 > > --- a/hw/virtio/vhost.c > > +++ b/hw/virtio/vhost.c > > @@ -746,6 +746,11 @@ static int vhost_virtqueue_set_addr(struct > vhost_dev *dev, > > .log_guest_addr = vq->used_phys, > > .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0, > > }; > > + if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA) { > > + addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; > > + addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; > > + addr.used_user_addr = (uint64_t)(unsigned long)vq->used_phys; > > + } > > > Comment is needed to explain why vDPA differs from others. > > Thanks > > will do > > > int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr); > > if (r < 0) { > > VHOST_OPS_DEBUG("vhost_set_vring_addr failed"); > > diff --git a/include/hw/virtio/vhost-backend.h > b/include/hw/virtio/vhost-backend.h > > index 6f6670783f..d81bd9885f 100644 > > --- a/include/hw/virtio/vhost-backend.h > > +++ b/include/hw/virtio/vhost-backend.h > > @@ -17,7 +17,8 @@ typedef enum VhostBackendType { > > VHOST_BACKEND_TYPE_NONE = 0, > > VHOST_BACKEND_TYPE_KERNEL = 1, > > VHOST_BACKEND_TYPE_USER = 2, > > - VHOST_BACKEND_TYPE_MAX = 3, > > + VHOST_BACKEND_TYPE_VDPA = 3, > > + VHOST_BACKEND_TYPE_MAX = 4, > > } VhostBackendType; > > > > typedef enum VhostSetConfigType { > > @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct > vhost_dev *dev, > > typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev, > > struct vhost_inflight > *inflight); > > > > +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state); > > typedef struct VhostOps { > > VhostBackendType backend_type; > > vhost_backend_init vhost_backend_init; > > @@ -152,9 +154,11 @@ typedef struct VhostOps { > > vhost_backend_mem_section_filter_op > vhost_backend_mem_section_filter; > > vhost_get_inflight_fd_op vhost_get_inflight_fd; > > vhost_set_inflight_fd_op vhost_set_inflight_fd; > > + vhost_set_state_op vhost_set_state; > > } VhostOps; > > > > extern const VhostOps user_ops; > > +extern const VhostOps vdpa_ops; > > > > int vhost_set_backend_type(struct vhost_dev *dev, > > VhostBackendType backend_type); > > diff --git a/include/hw/virtio/vhost-vdpa.h > b/include/hw/virtio/vhost-vdpa.h > > new file mode 100644 > > index 0000000000..889c1a4410 > > --- /dev/null > > +++ b/include/hw/virtio/vhost-vdpa.h > > @@ -0,0 +1,14 @@ > > + > > +#ifndef HW_VIRTIO_VHOST_VDPA_H > > +#define HW_VIRTIO_VHOST_VDPA_H > > + > > +#include "hw/virtio/virtio.h" > > + > > +typedef struct vhost_vdpa { > > + int device_fd; > > + MemoryListener listener; > > +} VhostVDPA; > > + > > +extern AddressSpace address_space_memory; > > + > > +#endif > >
On 20/04/2020 11:32, Cindy Lu wrote: > Currently we have 2 types of vhost backends in QEMU: vhost kernel and > vhost-user. The above patch provides a generic device for vDPA purpose, > this vDPA device exposes to user space a non-vendor-specific configuration > interface for setting up a vhost HW accelerator, this patch set introduces > a third vhost backend called vhost-vdpa based on the vDPA interface. > > Vhost-vdpa usage: > > qemu-system-x86_64 -cpu host -enable-kvm \ > ...... > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \ > -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \ > > Author: Tiwei Bie Use "git commit --author" to set that. > Signed-off-by: Cindy Lu <lulu@redhat.com> > --- > hw/net/vhost_net.c | 43 ++++ > hw/net/virtio-net.c | 9 + > hw/virtio/Makefile.objs | 2 +- > hw/virtio/vhost-backend.c | 3 + > hw/virtio/vhost-vdpa.c | 379 ++++++++++++++++++++++++++++++ > hw/virtio/vhost.c | 5 + > include/hw/virtio/vhost-backend.h | 6 +- > include/hw/virtio/vhost-vdpa.h | 14 ++ > 8 files changed, 459 insertions(+), 2 deletions(-) > create mode 100644 hw/virtio/vhost-vdpa.c > create mode 100644 include/hw/virtio/vhost-vdpa.h > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c > index 4096d64aaf..0d13fda2fc 100644 > --- a/hw/net/vhost_net.c > +++ b/hw/net/vhost_net.c ... > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc) > assert(vhost_net); > break; > #endif > + case NET_CLIENT_DRIVER_VHOST_VDPA: > + vhost_net = vhost_vdpa_get_vhost_net(nc); > + assert(vhost_net); > + break; This should be inside a "#ifdef". Thanks, Laurent
On Tue, Apr 21, 2020 at 11:54 PM Laurent Vivier <lvivier@redhat.com> wrote: > > On 20/04/2020 11:32, Cindy Lu wrote: > > Currently we have 2 types of vhost backends in QEMU: vhost kernel and > > vhost-user. The above patch provides a generic device for vDPA purpose, > > this vDPA device exposes to user space a non-vendor-specific configuration > > interface for setting up a vhost HW accelerator, this patch set introduces > > a third vhost backend called vhost-vdpa based on the vDPA interface. > > > > Vhost-vdpa usage: > > > > qemu-system-x86_64 -cpu host -enable-kvm \ > > ...... > > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \ > > -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \ > > > > Author: Tiwei Bie > > Use "git commit --author" to set that. Thanks, I will fix this > > Signed-off-by: Cindy Lu <lulu@redhat.com> > > --- > > hw/net/vhost_net.c | 43 ++++ > > hw/net/virtio-net.c | 9 + > > hw/virtio/Makefile.objs | 2 +- > > hw/virtio/vhost-backend.c | 3 + > > hw/virtio/vhost-vdpa.c | 379 ++++++++++++++++++++++++++++++ > > hw/virtio/vhost.c | 5 + > > include/hw/virtio/vhost-backend.h | 6 +- > > include/hw/virtio/vhost-vdpa.h | 14 ++ > > 8 files changed, 459 insertions(+), 2 deletions(-) > > create mode 100644 hw/virtio/vhost-vdpa.c > > create mode 100644 include/hw/virtio/vhost-vdpa.h > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c > > index 4096d64aaf..0d13fda2fc 100644 > > --- a/hw/net/vhost_net.c > > +++ b/hw/net/vhost_net.c > ... > > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc) > > assert(vhost_net); > > break; > > #endif > > + case NET_CLIENT_DRIVER_VHOST_VDPA: > > + vhost_net = vhost_vdpa_get_vhost_net(nc); > > + assert(vhost_net); > > + break; > > This should be inside a "#ifdef". > Thanks Laurent, I will add a new macro for vDPA > Thanks, > Laurent >
On 4/20/20 11:32 AM, Cindy Lu wrote: > Currently we have 2 types of vhost backends in QEMU: vhost kernel and > vhost-user. The above patch provides a generic device for vDPA purpose, > this vDPA device exposes to user space a non-vendor-specific configuration > interface for setting up a vhost HW accelerator, this patch set introduces > a third vhost backend called vhost-vdpa based on the vDPA interface. > > Vhost-vdpa usage: > > qemu-system-x86_64 -cpu host -enable-kvm \ > ...... > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \ > -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \ > > Author: Tiwei Bie > Signed-off-by: Cindy Lu <lulu@redhat.com> > --- > hw/net/vhost_net.c | 43 ++++ > hw/net/virtio-net.c | 9 + > hw/virtio/Makefile.objs | 2 +- > hw/virtio/vhost-backend.c | 3 + > hw/virtio/vhost-vdpa.c | 379 ++++++++++++++++++++++++++++++ > hw/virtio/vhost.c | 5 + > include/hw/virtio/vhost-backend.h | 6 +- > include/hw/virtio/vhost-vdpa.h | 14 ++ > 8 files changed, 459 insertions(+), 2 deletions(-) > create mode 100644 hw/virtio/vhost-vdpa.c > create mode 100644 include/hw/virtio/vhost-vdpa.h > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c > index 4096d64aaf..0d13fda2fc 100644 > --- a/hw/net/vhost_net.c > +++ b/hw/net/vhost_net.c > @@ -17,8 +17,10 @@ > #include "net/net.h" > #include "net/tap.h" > #include "net/vhost-user.h" > +#include "net/vhost-vdpa.h" > > #include "standard-headers/linux/vhost_types.h" > +#include "linux-headers/linux/vhost.h" > #include "hw/virtio/virtio-net.h" > #include "net/vhost_net.h" > #include "qemu/error-report.h" > @@ -85,6 +87,29 @@ static const int user_feature_bits[] = { > VHOST_INVALID_FEATURE_BIT > }; > > +static const int vdpa_feature_bits[] = { > + VIRTIO_F_NOTIFY_ON_EMPTY, > + VIRTIO_RING_F_INDIRECT_DESC, > + VIRTIO_RING_F_EVENT_IDX, > + VIRTIO_F_ANY_LAYOUT, > + VIRTIO_F_VERSION_1, > + VIRTIO_NET_F_CSUM, > + VIRTIO_NET_F_GUEST_CSUM, > + VIRTIO_NET_F_GSO, > + VIRTIO_NET_F_GUEST_TSO4, > + VIRTIO_NET_F_GUEST_TSO6, > + VIRTIO_NET_F_GUEST_ECN, > + VIRTIO_NET_F_GUEST_UFO, > + VIRTIO_NET_F_HOST_TSO4, > + VIRTIO_NET_F_HOST_TSO6, > + VIRTIO_NET_F_HOST_ECN, > + VIRTIO_NET_F_HOST_UFO, > + VIRTIO_NET_F_MRG_RXBUF, > + VIRTIO_NET_F_MTU, > + VIRTIO_F_IOMMU_PLATFORM, > + VIRTIO_NET_F_GUEST_ANNOUNCE, > + VHOST_INVALID_FEATURE_BIT > +}; > static const int *vhost_net_get_feature_bits(struct vhost_net *net) > { > const int *feature_bits = 0; > @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net) > case NET_CLIENT_DRIVER_VHOST_USER: > feature_bits = user_feature_bits; > break; > + case NET_CLIENT_DRIVER_VHOST_VDPA: > + feature_bits = vdpa_feature_bits; > + break; > default: > error_report("Feature bits not defined for this type: %d", > net->nc->info->type); > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc) > assert(vhost_net); > break; > #endif > + case NET_CLIENT_DRIVER_VHOST_VDPA: > + vhost_net = vhost_vdpa_get_vhost_net(nc); > + assert(vhost_net); > + break; > default: > break; > } > @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) > > return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); > } > +int vhost_set_state(NetClientState *nc, int state) > +{ > + struct vhost_net *net = get_vhost_net(nc); > + struct vhost_dev *hdev = &net->dev; > + if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { Maybe checking the vhost_set_state callback is implemented is enough, and it is not need to restrict that to Vhost-vDPA? > + if (hdev->vhost_ops->vhost_set_state) { > + return hdev->vhost_ops->vhost_set_state(hdev, state); > + } > + } > + return 0; > +}
On 4/20/20 11:32 AM, Cindy Lu wrote: > diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h > index 6f6670783f..d81bd9885f 100644 > --- a/include/hw/virtio/vhost-backend.h > +++ b/include/hw/virtio/vhost-backend.h > @@ -17,7 +17,8 @@ typedef enum VhostBackendType { > VHOST_BACKEND_TYPE_NONE = 0, > VHOST_BACKEND_TYPE_KERNEL = 1, > VHOST_BACKEND_TYPE_USER = 2, > - VHOST_BACKEND_TYPE_MAX = 3, > + VHOST_BACKEND_TYPE_VDPA = 3, > + VHOST_BACKEND_TYPE_MAX = 4, > } VhostBackendType; > > typedef enum VhostSetConfigType { > @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev, > typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev, > struct vhost_inflight *inflight); > > +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state); I think state should be of type uint8_t.
On Thu, May 7, 2020 at 11:13 PM Maxime Coquelin <maxime.coquelin@redhat.com> wrote: > > > > On 4/20/20 11:32 AM, Cindy Lu wrote: > > Currently we have 2 types of vhost backends in QEMU: vhost kernel and > > vhost-user. The above patch provides a generic device for vDPA purpose, > > this vDPA device exposes to user space a non-vendor-specific configuration > > interface for setting up a vhost HW accelerator, this patch set introduces > > a third vhost backend called vhost-vdpa based on the vDPA interface. > > > > Vhost-vdpa usage: > > > > qemu-system-x86_64 -cpu host -enable-kvm \ > > ...... > > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \ > > -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \ > > > > Author: Tiwei Bie > > Signed-off-by: Cindy Lu <lulu@redhat.com> > > --- > > hw/net/vhost_net.c | 43 ++++ > > hw/net/virtio-net.c | 9 + > > hw/virtio/Makefile.objs | 2 +- > > hw/virtio/vhost-backend.c | 3 + > > hw/virtio/vhost-vdpa.c | 379 ++++++++++++++++++++++++++++++ > > hw/virtio/vhost.c | 5 + > > include/hw/virtio/vhost-backend.h | 6 +- > > include/hw/virtio/vhost-vdpa.h | 14 ++ > > 8 files changed, 459 insertions(+), 2 deletions(-) > > create mode 100644 hw/virtio/vhost-vdpa.c > > create mode 100644 include/hw/virtio/vhost-vdpa.h > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c > > index 4096d64aaf..0d13fda2fc 100644 > > --- a/hw/net/vhost_net.c > > +++ b/hw/net/vhost_net.c > > @@ -17,8 +17,10 @@ > > #include "net/net.h" > > #include "net/tap.h" > > #include "net/vhost-user.h" > > +#include "net/vhost-vdpa.h" > > > > #include "standard-headers/linux/vhost_types.h" > > +#include "linux-headers/linux/vhost.h" > > #include "hw/virtio/virtio-net.h" > > #include "net/vhost_net.h" > > #include "qemu/error-report.h" > > @@ -85,6 +87,29 @@ static const int user_feature_bits[] = { > > VHOST_INVALID_FEATURE_BIT > > }; > > > > +static const int vdpa_feature_bits[] = { > > + VIRTIO_F_NOTIFY_ON_EMPTY, > > + VIRTIO_RING_F_INDIRECT_DESC, > > + VIRTIO_RING_F_EVENT_IDX, > > + VIRTIO_F_ANY_LAYOUT, > > + VIRTIO_F_VERSION_1, > > + VIRTIO_NET_F_CSUM, > > + VIRTIO_NET_F_GUEST_CSUM, > > + VIRTIO_NET_F_GSO, > > + VIRTIO_NET_F_GUEST_TSO4, > > + VIRTIO_NET_F_GUEST_TSO6, > > + VIRTIO_NET_F_GUEST_ECN, > > + VIRTIO_NET_F_GUEST_UFO, > > + VIRTIO_NET_F_HOST_TSO4, > > + VIRTIO_NET_F_HOST_TSO6, > > + VIRTIO_NET_F_HOST_ECN, > > + VIRTIO_NET_F_HOST_UFO, > > + VIRTIO_NET_F_MRG_RXBUF, > > + VIRTIO_NET_F_MTU, > > + VIRTIO_F_IOMMU_PLATFORM, > > + VIRTIO_NET_F_GUEST_ANNOUNCE, > > + VHOST_INVALID_FEATURE_BIT > > +}; > > static const int *vhost_net_get_feature_bits(struct vhost_net *net) > > { > > const int *feature_bits = 0; > > @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net) > > case NET_CLIENT_DRIVER_VHOST_USER: > > feature_bits = user_feature_bits; > > break; > > + case NET_CLIENT_DRIVER_VHOST_VDPA: > > + feature_bits = vdpa_feature_bits; > > + break; > > default: > > error_report("Feature bits not defined for this type: %d", > > net->nc->info->type); > > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc) > > assert(vhost_net); > > break; > > #endif > > + case NET_CLIENT_DRIVER_VHOST_VDPA: > > + vhost_net = vhost_vdpa_get_vhost_net(nc); > > + assert(vhost_net); > > + break; > > default: > > break; > > } > > @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) > > > > return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); > > } > > +int vhost_set_state(NetClientState *nc, int state) > > +{ > > + struct vhost_net *net = get_vhost_net(nc); > > + struct vhost_dev *hdev = &net->dev; > > + if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { > > Maybe checking the vhost_set_state callback is implemented is enough, > and it is not need to restrict that to Vhost-vDPA? Sure, Will remove this > > + if (hdev->vhost_ops->vhost_set_state) { > > + return hdev->vhost_ops->vhost_set_state(hdev, state); > > + } > > + } > > + return 0; > > +} >
On Thu, May 7, 2020 at 11:30 PM Maxime Coquelin <maxime.coquelin@redhat.com> wrote: > > > > On 4/20/20 11:32 AM, Cindy Lu wrote: > > diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h > > index 6f6670783f..d81bd9885f 100644 > > --- a/include/hw/virtio/vhost-backend.h > > +++ b/include/hw/virtio/vhost-backend.h > > @@ -17,7 +17,8 @@ typedef enum VhostBackendType { > > VHOST_BACKEND_TYPE_NONE = 0, > > VHOST_BACKEND_TYPE_KERNEL = 1, > > VHOST_BACKEND_TYPE_USER = 2, > > - VHOST_BACKEND_TYPE_MAX = 3, > > + VHOST_BACKEND_TYPE_VDPA = 3, > > + VHOST_BACKEND_TYPE_MAX = 4, > > } VhostBackendType; > > > > typedef enum VhostSetConfigType { > > @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev, > > typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev, > > struct vhost_inflight *inflight); > > > > +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state); > > I think state should be of type uint8_t. > ok, I will change this to uint8_t
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 4096d64aaf..0d13fda2fc 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -17,8 +17,10 @@ #include "net/net.h" #include "net/tap.h" #include "net/vhost-user.h" +#include "net/vhost-vdpa.h" #include "standard-headers/linux/vhost_types.h" +#include "linux-headers/linux/vhost.h" #include "hw/virtio/virtio-net.h" #include "net/vhost_net.h" #include "qemu/error-report.h" @@ -85,6 +87,29 @@ static const int user_feature_bits[] = { VHOST_INVALID_FEATURE_BIT }; +static const int vdpa_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_F_ANY_LAYOUT, + VIRTIO_F_VERSION_1, + VIRTIO_NET_F_CSUM, + VIRTIO_NET_F_GUEST_CSUM, + VIRTIO_NET_F_GSO, + VIRTIO_NET_F_GUEST_TSO4, + VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, + VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_HOST_TSO4, + VIRTIO_NET_F_HOST_TSO6, + VIRTIO_NET_F_HOST_ECN, + VIRTIO_NET_F_HOST_UFO, + VIRTIO_NET_F_MRG_RXBUF, + VIRTIO_NET_F_MTU, + VIRTIO_F_IOMMU_PLATFORM, + VIRTIO_NET_F_GUEST_ANNOUNCE, + VHOST_INVALID_FEATURE_BIT +}; static const int *vhost_net_get_feature_bits(struct vhost_net *net) { const int *feature_bits = 0; @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net) case NET_CLIENT_DRIVER_VHOST_USER: feature_bits = user_feature_bits; break; + case NET_CLIENT_DRIVER_VHOST_VDPA: + feature_bits = vdpa_feature_bits; + break; default: error_report("Feature bits not defined for this type: %d", net->nc->info->type); @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc) assert(vhost_net); break; #endif + case NET_CLIENT_DRIVER_VHOST_VDPA: + vhost_net = vhost_vdpa_get_vhost_net(nc); + assert(vhost_net); + break; default: break; } @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); } +int vhost_set_state(NetClientState *nc, int state) +{ + struct vhost_net *net = get_vhost_net(nc); + struct vhost_dev *hdev = &net->dev; + if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { + if (hdev->vhost_ops->vhost_set_state) { + return hdev->vhost_ops->vhost_set_state(hdev, state); + } + } + return 0; +} diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index db3d7c38e6..bbecd7ab96 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -206,6 +206,9 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) VirtIODevice *vdev = VIRTIO_DEVICE(n); NetClientState *nc = qemu_get_queue(n->nic); int queues = n->multiqueue ? n->max_queues : 1; + NetClientState *peer = nc->peer; + uint8_t status_set = vdev->status ; + uint8_t vhost_started_pre = n->vhost_started; if (!get_vhost_net(nc->peer)) { return; @@ -245,6 +248,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) return; } } + status_set = status_set | VIRTIO_CONFIG_S_DRIVER_OK; n->vhost_started = 1; r = vhost_net_start(vdev, n->nic->ncs, queues); @@ -252,11 +256,16 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) error_report("unable to start vhost net: %d: " "falling back on userspace virtio", -r); n->vhost_started = 0; + status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK; } } else { vhost_net_stop(vdev, n->nic->ncs, queues); + status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK; n->vhost_started = 0; } + if (vhost_started_pre != n->vhost_started) { + vhost_set_state(peer, status_set); + } } static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index e2f70fbb89..17361d959e 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -2,7 +2,7 @@ ifeq ($(CONFIG_VIRTIO),y) common-obj-y += virtio-bus.o obj-y += virtio.o -obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o vhost-backend.o +obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o vhost-backend.o vhost-vdpa.o common-obj-$(call lnot,$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL))) += vhost-stub.o obj-$(CONFIG_VHOST_USER) += vhost-user.o diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 48905383f8..935cd9e561 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -286,6 +286,9 @@ int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) dev->vhost_ops = &user_ops; break; #endif + case VHOST_BACKEND_TYPE_VDPA: + dev->vhost_ops = &vdpa_ops; + break; default: error_report("Unknown vhost backend type"); r = -1; diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c new file mode 100644 index 0000000000..213b327600 --- /dev/null +++ b/hw/virtio/vhost-vdpa.c @@ -0,0 +1,379 @@ +/* + * vhost-vdpa + * + * Copyright(c) 2017-2018 Intel Corporation. All rights reserved. + * Copyright(c) 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include <linux/vhost.h> +#include <linux/vfio.h> +#include <sys/eventfd.h> +#include <sys/ioctl.h> +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-backend.h" +#include "hw/virtio/virtio-net.h" +#include "hw/virtio/vhost-vdpa.h" +#include "qemu/main-loop.h" +#include <linux/kvm.h> +#include "sysemu/kvm.h" + + +static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section) +{ + return (!memory_region_is_ram(section->mr) && + !memory_region_is_iommu(section->mr)) || + /* + * Sizing an enabled 64-bit BAR can cause spurious mappings to + * addresses in the upper part of the 64-bit address space. These + * are never accessed by the CPU and beyond the address width of + * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width. + */ + section->offset_within_address_space & (1ULL << 63); +} + +static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, + void *vaddr, bool readonly) +{ + struct vhost_msg_v2 msg; + int fd = v->device_fd; + int ret = 0; + + msg.type = VHOST_IOTLB_MSG_V2; + msg.iotlb.iova = iova; + msg.iotlb.size = size; + msg.iotlb.uaddr = (uint64_t)vaddr; + msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; + msg.iotlb.type = VHOST_IOTLB_UPDATE; + + if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { + error_report("failed to write, fd=%d, errno=%d (%s)", + fd, errno, strerror(errno)); + return -EIO ; + } + + return ret; +} + +static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, + hwaddr size) +{ + struct vhost_msg_v2 msg; + int fd = v->device_fd; + int ret = 0; + + msg.type = VHOST_IOTLB_MSG_V2; + msg.iotlb.iova = iova; + msg.iotlb.size = size; + msg.iotlb.type = VHOST_IOTLB_INVALIDATE; + + if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { + error_report("failed to write, fd=%d, errno=%d (%s)", + fd, errno, strerror(errno)); + return -EIO ; + } + + return ret; +} + +static void vhost_vdpa_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + hwaddr iova; + Int128 llend, llsize; + void *vaddr; + int ret; + + if (vhost_vdpa_listener_skipped_section(section)) { + return; + } + + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != + (section->offset_within_region & ~TARGET_PAGE_MASK))) { + error_report("%s received unaligned region", __func__); + return; + } + + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + + if (int128_ge(int128_make64(iova), llend)) { + return; + } + + memory_region_ref(section->mr); + + /* Here we assume that memory_region_is_ram(section->mr)==true */ + + vaddr = memory_region_get_ram_ptr(section->mr) + + section->offset_within_region + + (iova - section->offset_within_address_space); + + llsize = int128_sub(llend, int128_make64(iova)); + + ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), + vaddr, section->readonly); + if (ret) { + error_report("vhost vdpa map fail!"); + if (memory_region_is_ram_device(section->mr)) { + /* Allow unexpected mappings not to be fatal for RAM devices */ + error_report("map ram fail!"); + return ; + } + goto fail; + } + + return; + +fail: + if (memory_region_is_ram_device(section->mr)) { + error_report("failed to vdpa_dma_map. pci p2p may not work"); + return; + + } + /* + * On the initfn path, store the first error in the container so we + * can gracefully fail. Runtime, there's not much we can do other + * than throw a hardware error. + */ + error_report("vhost-vdpa: DMA mapping failed, unable to continue"); + return; + +} + +static void vhost_vdpa_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + hwaddr iova; + Int128 llend, llsize; + int ret; + bool try_unmap = true; + + if (vhost_vdpa_listener_skipped_section(section)) { + return; + } + + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != + (section->offset_within_region & ~TARGET_PAGE_MASK))) { + error_report("%s received unaligned region", __func__); + return; + } + + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + + if (int128_ge(int128_make64(iova), llend)) { + return; + } + + llsize = int128_sub(llend, int128_make64(iova)); + + if (try_unmap) { + ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); + if (ret) { + error_report("vhost_vdpa dma unmap error!"); + } + } + + memory_region_unref(section->mr); +} + +static const MemoryListener vhost_vdpa_memory_listener = { + .region_add = vhost_vdpa_listener_region_add, + .region_del = vhost_vdpa_listener_region_del, +}; + + +static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, + void *arg) +{ + struct vhost_vdpa *v = dev->opaque; + int fd = v->device_fd; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); + + return ioctl(fd, request, arg); +} + + + +static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque) +{ + struct vhost_vdpa *v; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); + + v = opaque; + dev->opaque = opaque ; + + v->listener = vhost_vdpa_memory_listener; + memory_listener_register(&v->listener, &address_space_memory); + + return 0; +} + +static int vhost_vdpa_cleanup(struct vhost_dev *dev) +{ + struct vhost_vdpa *v; + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); + + v = dev->opaque; + memory_listener_unregister(&v->listener); + + dev->opaque = NULL; + return 0; +} + +static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) +{ + return INT_MAX; +} + +static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) +{ + return 0; +} + +static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, + struct vhost_memory *mem) +{ + + if (mem->padding) { + return -1; + } + + return 0; +} + +static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) +{ + return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); +} + +static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); +} + +static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); +} + +static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + + return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); +} + +static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); +} + +static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); +} + +static int vhost_vdpa_set_features(struct vhost_dev *dev, + uint64_t features) +{ + + features |= (1ULL << VIRTIO_F_IOMMU_PLATFORM); + return vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); + +} + +static int vhost_vdpa_get_features(struct vhost_dev *dev, + uint64_t *features) +{ + return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); +} + +static int vhost_vdpa_set_owner(struct vhost_dev *dev) +{ + return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); +} + +static int vhost_vdpa_reset_device(struct vhost_dev *dev) +{ + return vhost_vdpa_call(dev, VHOST_RESET_OWNER, NULL); +} + +static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) +{ + assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); + + return idx - dev->vq_index; +} + +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable) +{ + int i; + + for (i = 0; i < dev->nvqs; ++i) { + struct vhost_vring_state state = { + .index = dev->vq_index + i, + .num = enable, + }; + + state.num = 1; + + vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); + } + + return 0; +} + +static int vhost_vdpa_set_state(struct vhost_dev *dev, int state) +{ + return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &state); +} + + +const VhostOps vdpa_ops = { + .backend_type = VHOST_BACKEND_TYPE_VDPA, + .vhost_backend_init = vhost_vdpa_init, + .vhost_backend_cleanup = vhost_vdpa_cleanup, + .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, + .vhost_set_log_base = vhost_vdpa_set_log_base, + .vhost_set_mem_table = vhost_vdpa_set_mem_table, + .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, + .vhost_set_vring_endian = NULL, + .vhost_set_vring_num = vhost_vdpa_set_vring_num, + .vhost_set_vring_base = vhost_vdpa_set_vring_base, + .vhost_get_vring_base = vhost_vdpa_get_vring_base, + .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, + .vhost_set_vring_call = vhost_vdpa_set_vring_call, + .vhost_set_features = vhost_vdpa_set_features, + .vhost_get_features = vhost_vdpa_get_features, + .vhost_set_owner = vhost_vdpa_set_owner, + .vhost_reset_device = vhost_vdpa_reset_device, + .vhost_get_vq_index = vhost_vdpa_get_vq_index, + .vhost_set_vring_enable = vhost_vdpa_set_vring_enable, + .vhost_requires_shm_log = NULL, + .vhost_migration_done = NULL, + .vhost_backend_can_merge = NULL, + .vhost_net_set_mtu = NULL, + .vhost_set_iotlb_callback = NULL, + .vhost_send_device_iotlb_msg = NULL, + .vhost_set_state = vhost_vdpa_set_state, +}; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 4da0d5a6c5..d1f2c4add7 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -746,6 +746,11 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, .log_guest_addr = vq->used_phys, .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0, }; + if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA) { + addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; + addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; + addr.used_user_addr = (uint64_t)(unsigned long)vq->used_phys; + } int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr); if (r < 0) { VHOST_OPS_DEBUG("vhost_set_vring_addr failed"); diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 6f6670783f..d81bd9885f 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -17,7 +17,8 @@ typedef enum VhostBackendType { VHOST_BACKEND_TYPE_NONE = 0, VHOST_BACKEND_TYPE_KERNEL = 1, VHOST_BACKEND_TYPE_USER = 2, - VHOST_BACKEND_TYPE_MAX = 3, + VHOST_BACKEND_TYPE_VDPA = 3, + VHOST_BACKEND_TYPE_MAX = 4, } VhostBackendType; typedef enum VhostSetConfigType { @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev, typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev, struct vhost_inflight *inflight); +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state); typedef struct VhostOps { VhostBackendType backend_type; vhost_backend_init vhost_backend_init; @@ -152,9 +154,11 @@ typedef struct VhostOps { vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter; vhost_get_inflight_fd_op vhost_get_inflight_fd; vhost_set_inflight_fd_op vhost_set_inflight_fd; + vhost_set_state_op vhost_set_state; } VhostOps; extern const VhostOps user_ops; +extern const VhostOps vdpa_ops; int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type); diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h new file mode 100644 index 0000000000..889c1a4410 --- /dev/null +++ b/include/hw/virtio/vhost-vdpa.h @@ -0,0 +1,14 @@ + +#ifndef HW_VIRTIO_VHOST_VDPA_H +#define HW_VIRTIO_VHOST_VDPA_H + +#include "hw/virtio/virtio.h" + +typedef struct vhost_vdpa { + int device_fd; + MemoryListener listener; +} VhostVDPA; + +extern AddressSpace address_space_memory; + +#endif
Currently we have 2 types of vhost backends in QEMU: vhost kernel and vhost-user. The above patch provides a generic device for vDPA purpose, this vDPA device exposes to user space a non-vendor-specific configuration interface for setting up a vhost HW accelerator, this patch set introduces a third vhost backend called vhost-vdpa based on the vDPA interface. Vhost-vdpa usage: qemu-system-x86_64 -cpu host -enable-kvm \ ...... -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \ -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \ Author: Tiwei Bie Signed-off-by: Cindy Lu <lulu@redhat.com> --- hw/net/vhost_net.c | 43 ++++ hw/net/virtio-net.c | 9 + hw/virtio/Makefile.objs | 2 +- hw/virtio/vhost-backend.c | 3 + hw/virtio/vhost-vdpa.c | 379 ++++++++++++++++++++++++++++++ hw/virtio/vhost.c | 5 + include/hw/virtio/vhost-backend.h | 6 +- include/hw/virtio/vhost-vdpa.h | 14 ++ 8 files changed, 459 insertions(+), 2 deletions(-) create mode 100644 hw/virtio/vhost-vdpa.c create mode 100644 include/hw/virtio/vhost-vdpa.h