diff mbox series

[3/3] vhost-vdpa-blk: Introduce vhost-vdpa-blk host device

Message ID 20210408101252.552-4-xieyongji@bytedance.com (mailing list archive)
State New, archived
Headers show
Series Introduce vhost-vdpa block device | expand

Commit Message

Yongji Xie April 8, 2021, 10:12 a.m. UTC
This commit introduces a new vhost-vdpa block device, which
will set up a vDPA device specified by a "vdpa-dev" parameter,
something like:

qemu-system-x86_64 \
    -device vhost-vdpa-blk-pci,vdpa-dev=/dev/vhost-vdpa-0

Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
---
 hw/block/Kconfig                   |   5 +
 hw/block/meson.build               |   1 +
 hw/block/vhost-vdpa-blk.c          | 227 +++++++++++++++++++++++++++++
 hw/virtio/meson.build              |   1 +
 hw/virtio/vhost-vdpa-blk-pci.c     | 101 +++++++++++++
 include/hw/virtio/vhost-vdpa-blk.h |  30 ++++
 6 files changed, 365 insertions(+)
 create mode 100644 hw/block/vhost-vdpa-blk.c
 create mode 100644 hw/virtio/vhost-vdpa-blk-pci.c
 create mode 100644 include/hw/virtio/vhost-vdpa-blk.h

Comments

Jason Wang April 9, 2021, 6:02 a.m. UTC | #1
在 2021/4/8 下午6:12, Xie Yongji 写道:
> This commit introduces a new vhost-vdpa block device, which
> will set up a vDPA device specified by a "vdpa-dev" parameter,
> something like:
>
> qemu-system-x86_64 \
>      -device vhost-vdpa-blk-pci,vdpa-dev=/dev/vhost-vdpa-0
>
> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> ---
>   hw/block/Kconfig                   |   5 +
>   hw/block/meson.build               |   1 +
>   hw/block/vhost-vdpa-blk.c          | 227 +++++++++++++++++++++++++++++
>   hw/virtio/meson.build              |   1 +
>   hw/virtio/vhost-vdpa-blk-pci.c     | 101 +++++++++++++
>   include/hw/virtio/vhost-vdpa-blk.h |  30 ++++
>   6 files changed, 365 insertions(+)
>   create mode 100644 hw/block/vhost-vdpa-blk.c
>   create mode 100644 hw/virtio/vhost-vdpa-blk-pci.c
>   create mode 100644 include/hw/virtio/vhost-vdpa-blk.h
>
> diff --git a/hw/block/Kconfig b/hw/block/Kconfig
> index 4fcd152166..4615a2c116 100644
> --- a/hw/block/Kconfig
> +++ b/hw/block/Kconfig
> @@ -41,5 +41,10 @@ config VHOST_USER_BLK
>       default y if VIRTIO_PCI
>       depends on VIRTIO && VHOST_USER && LINUX
>   
> +config VHOST_VDPA_BLK
> +    bool
> +    default y if VIRTIO_PCI
> +    depends on VIRTIO && VHOST_VDPA && LINUX
> +
>   config SWIM
>       bool
> diff --git a/hw/block/meson.build b/hw/block/meson.build
> index 5862bda4cb..98f1fc330a 100644
> --- a/hw/block/meson.build
> +++ b/hw/block/meson.build
> @@ -17,5 +17,6 @@ softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'n
>   
>   specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
>   specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-blk-common.c', 'vhost-user-blk.c'))
> +specific_ss.add(when: 'CONFIG_VHOST_VDPA_BLK', if_true: files('vhost-blk-common.c', 'vhost-vdpa-blk.c'))
>   
>   subdir('dataplane')
> diff --git a/hw/block/vhost-vdpa-blk.c b/hw/block/vhost-vdpa-blk.c
> new file mode 100644
> index 0000000000..d5cbbbba10
> --- /dev/null
> +++ b/hw/block/vhost-vdpa-blk.c
> @@ -0,0 +1,227 @@
> +/*
> + * vhost-vdpa-blk host device
> + *
> + * Copyright (C) 2021 Bytedance Inc. and/or its affiliates. All rights reserved.
> + *
> + * Author:
> + *   Xie Yongji <xieyongji@bytedance.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "qemu/error-report.h"
> +#include "qemu/cutils.h"
> +#include "hw/qdev-core.h"
> +#include "hw/qdev-properties.h"
> +#include "hw/qdev-properties-system.h"
> +#include "hw/virtio/vhost.h"
> +#include "hw/virtio/vhost-vdpa-blk.h"
> +#include "hw/virtio/virtio.h"
> +#include "hw/virtio/virtio-bus.h"
> +#include "hw/virtio/virtio-access.h"
> +#include "sysemu/sysemu.h"
> +#include "sysemu/runstate.h"
> +
> +static const int vdpa_feature_bits[] = {
> +    VIRTIO_BLK_F_SIZE_MAX,
> +    VIRTIO_BLK_F_SEG_MAX,
> +    VIRTIO_BLK_F_GEOMETRY,
> +    VIRTIO_BLK_F_BLK_SIZE,
> +    VIRTIO_BLK_F_TOPOLOGY,
> +    VIRTIO_BLK_F_MQ,
> +    VIRTIO_BLK_F_RO,
> +    VIRTIO_BLK_F_FLUSH,
> +    VIRTIO_BLK_F_CONFIG_WCE,
> +    VIRTIO_BLK_F_DISCARD,
> +    VIRTIO_BLK_F_WRITE_ZEROES,
> +    VIRTIO_F_VERSION_1,
> +    VIRTIO_RING_F_INDIRECT_DESC,
> +    VIRTIO_RING_F_EVENT_IDX,
> +    VIRTIO_F_NOTIFY_ON_EMPTY,
> +    VHOST_INVALID_FEATURE_BIT
> +};
> +
> +static void vhost_vdpa_blk_set_status(VirtIODevice *vdev, uint8_t status)
> +{
> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> +    bool should_start = virtio_device_started(vdev, status);
> +    int ret;
> +
> +    if (!vdev->vm_running) {
> +        should_start = false;
> +    }
> +
> +    if (vbc->dev.started == should_start) {
> +        return;
> +    }
> +
> +    if (should_start) {
> +        ret = vhost_blk_common_start(vbc);
> +        if (ret < 0) {
> +            error_report("vhost-vdpa-blk: vhost start failed: %s",
> +                         strerror(-ret));
> +        }
> +    } else {
> +        vhost_blk_common_stop(vbc);
> +    }
> +
> +}
> +
> +static void vhost_vdpa_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> +{
> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> +    int i, ret;


I believe we should never reach here, the backend should poll the 
notifier and trigger vq handler there after DRIVER_OK?


> +
> +    if (!vdev->start_on_kick) {
> +        return;
> +    }
> +
> +    if (vbc->dev.started) {
> +        return;
> +    }
> +
> +    ret = vhost_blk_common_start(vbc);
> +    if (ret < 0) {
> +        error_report("vhost-vdpa-blk: vhost start failed: %s",
> +                     strerror(-ret));
> +        return;
> +    }
> +
> +    /* Kick right away to begin processing requests already in vring */
> +    for (i = 0; i < vbc->dev.nvqs; i++) {
> +        VirtQueue *kick_vq = virtio_get_queue(vdev, i);
> +
> +        if (!virtio_queue_get_desc_addr(vdev, i)) {
> +            continue;
> +        }
> +        event_notifier_set(virtio_queue_get_host_notifier(kick_vq));
> +    }
> +}
> +
> +static void vhost_vdpa_blk_device_realize(DeviceState *dev, Error **errp)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> +    Error *err = NULL;
> +    int ret;
> +
> +    s->vdpa.device_fd = qemu_open_old(s->vdpa_dev, O_RDWR);
> +    if (s->vdpa.device_fd == -1) {
> +        error_setg(errp, "vhost-vdpa-blk: open %s failed: %s",
> +                   s->vdpa_dev, strerror(errno));
> +        return;
> +    }
> +
> +    vhost_blk_common_realize(vbc, vhost_vdpa_blk_handle_output, &err);
> +    if (err != NULL) {
> +        error_propagate(errp, err);
> +        goto blk_err;
> +    }
> +
> +    vbc->vhost_vqs = g_new0(struct vhost_virtqueue, vbc->num_queues);
> +    vbc->dev.nvqs = vbc->num_queues;
> +    vbc->dev.vqs = vbc->vhost_vqs;
> +    vbc->dev.vq_index = 0;
> +    vbc->dev.backend_features = 0;
> +    vbc->started = false;
> +
> +    vhost_dev_set_config_notifier(&vbc->dev, &blk_ops);
> +
> +    ret = vhost_dev_init(&vbc->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0);
> +    if (ret < 0) {
> +        error_setg(errp, "vhost-vdpa-blk: vhost initialization failed: %s",
> +                   strerror(-ret));
> +        goto init_err;
> +    }
> +
> +    ret = vhost_dev_get_config(&vbc->dev, (uint8_t *)&vbc->blkcfg,
> +                               sizeof(struct virtio_blk_config));
> +    if (ret < 0) {
> +        error_setg(errp, "vhost-vdpa-blk: get block config failed");
> +        goto config_err;
> +    }
> +
> +    return;
> +config_err:
> +    vhost_dev_cleanup(&vbc->dev);
> +init_err:
> +    vhost_blk_common_unrealize(vbc);
> +blk_err:
> +    close(s->vdpa.device_fd);
> +}
> +
> +static void vhost_vdpa_blk_device_unrealize(DeviceState *dev)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(dev);
> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> +
> +    virtio_set_status(vdev, 0);
> +    vhost_dev_cleanup(&vbc->dev);
> +    vhost_blk_common_unrealize(vbc);
> +    close(s->vdpa.device_fd);
> +}
> +
> +static void vhost_vdpa_blk_instance_init(Object *obj)
> +{
> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(obj);
> +
> +    vbc->feature_bits = vdpa_feature_bits;
> +
> +    device_add_bootindex_property(obj, &vbc->bootindex, "bootindex",
> +                                  "/disk@0,0", DEVICE(obj));
> +}
> +
> +static const VMStateDescription vmstate_vhost_vdpa_blk = {
> +    .name = "vhost-vdpa-blk",
> +    .minimum_version_id = 1,
> +    .version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_VIRTIO_DEVICE,
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
> +static Property vhost_vdpa_blk_properties[] = {
> +    DEFINE_PROP_STRING("vdpa-dev", VHostVdpaBlk, vdpa_dev),
> +    DEFINE_PROP_UINT16("num-queues", VHostBlkCommon, num_queues,
> +                       VHOST_BLK_AUTO_NUM_QUEUES),
> +    DEFINE_PROP_UINT32("queue-size", VHostBlkCommon, queue_size, 256),
> +    DEFINE_PROP_BIT("config-wce", VHostBlkCommon, config_wce, 0, true),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void vhost_vdpa_blk_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
> +
> +    device_class_set_props(dc, vhost_vdpa_blk_properties);
> +    dc->vmsd = &vmstate_vhost_vdpa_blk;
> +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
> +    vdc->realize = vhost_vdpa_blk_device_realize;
> +    vdc->unrealize = vhost_vdpa_blk_device_unrealize;
> +    vdc->set_status = vhost_vdpa_blk_set_status;
> +}
> +
> +static const TypeInfo vhost_vdpa_blk_info = {
> +    .name = TYPE_VHOST_VDPA_BLK,
> +    .parent = TYPE_VHOST_BLK_COMMON,
> +    .instance_size = sizeof(VHostVdpaBlk),
> +    .instance_init = vhost_vdpa_blk_instance_init,
> +    .class_init = vhost_vdpa_blk_class_init,
> +};
> +
> +static void virtio_register_types(void)
> +{
> +    type_register_static(&vhost_vdpa_blk_info);
> +}
> +
> +type_init(virtio_register_types)
> diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
> index fbff9bc9d4..f02bea65a2 100644
> --- a/hw/virtio/meson.build
> +++ b/hw/virtio/meson.build
> @@ -30,6 +30,7 @@ virtio_pci_ss = ss.source_set()
>   virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c'))
>   virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock-pci.c'))
>   virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk-pci.c'))
> +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_BLK', if_true: files('vhost-vdpa-blk-pci.c'))
>   virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input-pci.c'))
>   virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c'))
>   virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c'))
> diff --git a/hw/virtio/vhost-vdpa-blk-pci.c b/hw/virtio/vhost-vdpa-blk-pci.c
> new file mode 100644
> index 0000000000..976c47fb4f
> --- /dev/null
> +++ b/hw/virtio/vhost-vdpa-blk-pci.c
> @@ -0,0 +1,101 @@
> +/*
> + * vhost-vdpa-blk PCI Bindings
> + *
> + * Copyright (C) 2021 Bytedance Inc. and/or its affiliates. All rights reserved.
> + *
> + * Author:
> + *   Xie Yongji <xieyongji@bytedance.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "standard-headers/linux/virtio_pci.h"
> +#include "hw/virtio/virtio.h"
> +#include "hw/virtio/vhost-vdpa-blk.h"
> +#include "hw/pci/pci.h"
> +#include "hw/qdev-properties.h"
> +#include "qapi/error.h"
> +#include "qemu/error-report.h"
> +#include "qemu/module.h"
> +#include "virtio-pci.h"
> +#include "qom/object.h"
> +
> +typedef struct VHostVdpaBlkPCI VHostVdpaBlkPCI;
> +
> +#define TYPE_VHOST_VDPA_BLK_PCI "vhost-vdpa-blk-pci-base"
> +DECLARE_INSTANCE_CHECKER(VHostVdpaBlkPCI, VHOST_VDPA_BLK_PCI,
> +                         TYPE_VHOST_VDPA_BLK_PCI)
> +
> +struct VHostVdpaBlkPCI {
> +    VirtIOPCIProxy parent_obj;
> +    VHostVdpaBlk vdev;
> +};
> +
> +static Property vhost_vdpa_blk_pci_properties[] = {
> +    DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
> +    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
> +                       DEV_NVECTORS_UNSPECIFIED),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void vhost_vdpa_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
> +{
> +    VHostVdpaBlkPCI *dev = VHOST_VDPA_BLK_PCI(vpci_dev);
> +    DeviceState *vdev = DEVICE(&dev->vdev);
> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(&dev->vdev);
> +
> +    if (vbc->num_queues == VHOST_BLK_AUTO_NUM_QUEUES) {
> +        vbc->num_queues = virtio_pci_optimal_num_queues(0);
> +    }
> +
> +    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
> +        vpci_dev->nvectors = vbc->num_queues + 1;
> +    }
> +
> +    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
> +}
> +
> +static void vhost_vdpa_blk_pci_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
> +    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
> +
> +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
> +    device_class_set_props(dc, vhost_vdpa_blk_pci_properties);
> +    k->realize = vhost_vdpa_blk_pci_realize;
> +    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
> +    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK;
> +    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
> +    pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
> +}
> +
> +static void vhost_vdpa_blk_pci_instance_init(Object *obj)
> +{
> +    VHostVdpaBlkPCI *dev = VHOST_VDPA_BLK_PCI(obj);
> +
> +    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
> +                                TYPE_VHOST_VDPA_BLK);
> +    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
> +                              "bootindex");
> +}
> +
> +static const VirtioPCIDeviceTypeInfo vhost_vdpa_blk_pci_info = {
> +    .base_name               = TYPE_VHOST_VDPA_BLK_PCI,
> +    .generic_name            = "vhost-vdpa-blk-pci",
> +    .transitional_name       = "vhost-vdpa-blk-pci-transitional",
> +    .non_transitional_name   = "vhost-vdpa-blk-pci-non-transitional",
> +    .instance_size  = sizeof(VHostVdpaBlkPCI),
> +    .instance_init  = vhost_vdpa_blk_pci_instance_init,
> +    .class_init     = vhost_vdpa_blk_pci_class_init,
> +};
> +
> +static void vhost_vdpa_blk_pci_register(void)
> +{
> +    virtio_pci_types_register(&vhost_vdpa_blk_pci_info);
> +}


I wonder how could we use virtio-mmio for vDPA block here.

Thanks


> +
> +type_init(vhost_vdpa_blk_pci_register)
> diff --git a/include/hw/virtio/vhost-vdpa-blk.h b/include/hw/virtio/vhost-vdpa-blk.h
> new file mode 100644
> index 0000000000..80712f6dae
> --- /dev/null
> +++ b/include/hw/virtio/vhost-vdpa-blk.h
> @@ -0,0 +1,30 @@
> +/*
> + * vhost-vdpa-blk host device
> + *
> + * Copyright (C) 2021 Bytedance Inc. and/or its affiliates. All rights reserved.
> + *
> + * Author:
> + *   Xie Yongji <xieyongji@bytedance.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef VHOST_VDPA_BLK_H
> +#define VHOST_VDPA_BLK_H
> +
> +#include "hw/virtio/vhost-vdpa.h"
> +#include "hw/virtio/vhost-blk-common.h"
> +#include "qom/object.h"
> +
> +#define TYPE_VHOST_VDPA_BLK "vhost-vdpa-blk"
> +OBJECT_DECLARE_SIMPLE_TYPE(VHostVdpaBlk, VHOST_VDPA_BLK)
> +
> +struct VHostVdpaBlk {
> +    VHostBlkCommon parent_obj;
> +    char *vdpa_dev;
> +    struct vhost_vdpa vdpa;
> +};
> +
> +#endif
Yongji Xie April 9, 2021, 8:17 a.m. UTC | #2
On Fri, Apr 9, 2021 at 2:02 PM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2021/4/8 下午6:12, Xie Yongji 写道:
> > This commit introduces a new vhost-vdpa block device, which
> > will set up a vDPA device specified by a "vdpa-dev" parameter,
> > something like:
> >
> > qemu-system-x86_64 \
> >      -device vhost-vdpa-blk-pci,vdpa-dev=/dev/vhost-vdpa-0
> >
> > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > ---
> >   hw/block/Kconfig                   |   5 +
> >   hw/block/meson.build               |   1 +
> >   hw/block/vhost-vdpa-blk.c          | 227 +++++++++++++++++++++++++++++
> >   hw/virtio/meson.build              |   1 +
> >   hw/virtio/vhost-vdpa-blk-pci.c     | 101 +++++++++++++
> >   include/hw/virtio/vhost-vdpa-blk.h |  30 ++++
> >   6 files changed, 365 insertions(+)
> >   create mode 100644 hw/block/vhost-vdpa-blk.c
> >   create mode 100644 hw/virtio/vhost-vdpa-blk-pci.c
> >   create mode 100644 include/hw/virtio/vhost-vdpa-blk.h
> >
> > diff --git a/hw/block/Kconfig b/hw/block/Kconfig
> > index 4fcd152166..4615a2c116 100644
> > --- a/hw/block/Kconfig
> > +++ b/hw/block/Kconfig
> > @@ -41,5 +41,10 @@ config VHOST_USER_BLK
> >       default y if VIRTIO_PCI
> >       depends on VIRTIO && VHOST_USER && LINUX
> >
> > +config VHOST_VDPA_BLK
> > +    bool
> > +    default y if VIRTIO_PCI
> > +    depends on VIRTIO && VHOST_VDPA && LINUX
> > +
> >   config SWIM
> >       bool
> > diff --git a/hw/block/meson.build b/hw/block/meson.build
> > index 5862bda4cb..98f1fc330a 100644
> > --- a/hw/block/meson.build
> > +++ b/hw/block/meson.build
> > @@ -17,5 +17,6 @@ softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'n
> >
> >   specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
> >   specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-blk-common.c', 'vhost-user-blk.c'))
> > +specific_ss.add(when: 'CONFIG_VHOST_VDPA_BLK', if_true: files('vhost-blk-common.c', 'vhost-vdpa-blk.c'))
> >
> >   subdir('dataplane')
> > diff --git a/hw/block/vhost-vdpa-blk.c b/hw/block/vhost-vdpa-blk.c
> > new file mode 100644
> > index 0000000000..d5cbbbba10
> > --- /dev/null
> > +++ b/hw/block/vhost-vdpa-blk.c
> > @@ -0,0 +1,227 @@
> > +/*
> > + * vhost-vdpa-blk host device
> > + *
> > + * Copyright (C) 2021 Bytedance Inc. and/or its affiliates. All rights reserved.
> > + *
> > + * Author:
> > + *   Xie Yongji <xieyongji@bytedance.com>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qapi/error.h"
> > +#include "qemu/error-report.h"
> > +#include "qemu/cutils.h"
> > +#include "hw/qdev-core.h"
> > +#include "hw/qdev-properties.h"
> > +#include "hw/qdev-properties-system.h"
> > +#include "hw/virtio/vhost.h"
> > +#include "hw/virtio/vhost-vdpa-blk.h"
> > +#include "hw/virtio/virtio.h"
> > +#include "hw/virtio/virtio-bus.h"
> > +#include "hw/virtio/virtio-access.h"
> > +#include "sysemu/sysemu.h"
> > +#include "sysemu/runstate.h"
> > +
> > +static const int vdpa_feature_bits[] = {
> > +    VIRTIO_BLK_F_SIZE_MAX,
> > +    VIRTIO_BLK_F_SEG_MAX,
> > +    VIRTIO_BLK_F_GEOMETRY,
> > +    VIRTIO_BLK_F_BLK_SIZE,
> > +    VIRTIO_BLK_F_TOPOLOGY,
> > +    VIRTIO_BLK_F_MQ,
> > +    VIRTIO_BLK_F_RO,
> > +    VIRTIO_BLK_F_FLUSH,
> > +    VIRTIO_BLK_F_CONFIG_WCE,
> > +    VIRTIO_BLK_F_DISCARD,
> > +    VIRTIO_BLK_F_WRITE_ZEROES,
> > +    VIRTIO_F_VERSION_1,
> > +    VIRTIO_RING_F_INDIRECT_DESC,
> > +    VIRTIO_RING_F_EVENT_IDX,
> > +    VIRTIO_F_NOTIFY_ON_EMPTY,
> > +    VHOST_INVALID_FEATURE_BIT
> > +};
> > +
> > +static void vhost_vdpa_blk_set_status(VirtIODevice *vdev, uint8_t status)
> > +{
> > +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
> > +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> > +    bool should_start = virtio_device_started(vdev, status);
> > +    int ret;
> > +
> > +    if (!vdev->vm_running) {
> > +        should_start = false;
> > +    }
> > +
> > +    if (vbc->dev.started == should_start) {
> > +        return;
> > +    }
> > +
> > +    if (should_start) {
> > +        ret = vhost_blk_common_start(vbc);
> > +        if (ret < 0) {
> > +            error_report("vhost-vdpa-blk: vhost start failed: %s",
> > +                         strerror(-ret));
> > +        }
> > +    } else {
> > +        vhost_blk_common_stop(vbc);
> > +    }
> > +
> > +}
> > +
> > +static void vhost_vdpa_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > +{
> > +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
> > +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> > +    int i, ret;
>
>
> I believe we should never reach here, the backend should poll the
> notifier and trigger vq handler there after DRIVER_OK?
>

Some legacy virtio-blk driver (virtio 0.9) will do that. Kick before
set DRIVER_OK.

>
> > +
> > +    if (!vdev->start_on_kick) {
> > +        return;
> > +    }
> > +
> > +    if (vbc->dev.started) {
> > +        return;
> > +    }
> > +
> > +    ret = vhost_blk_common_start(vbc);
> > +    if (ret < 0) {
> > +        error_report("vhost-vdpa-blk: vhost start failed: %s",
> > +                     strerror(-ret));
> > +        return;
> > +    }
> > +
> > +    /* Kick right away to begin processing requests already in vring */
> > +    for (i = 0; i < vbc->dev.nvqs; i++) {
> > +        VirtQueue *kick_vq = virtio_get_queue(vdev, i);
> > +
> > +        if (!virtio_queue_get_desc_addr(vdev, i)) {
> > +            continue;
> > +        }
> > +        event_notifier_set(virtio_queue_get_host_notifier(kick_vq));
> > +    }
> > +}
> > +
> > +static void vhost_vdpa_blk_device_realize(DeviceState *dev, Error **errp)
> > +{
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
> > +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> > +    Error *err = NULL;
> > +    int ret;
> > +
> > +    s->vdpa.device_fd = qemu_open_old(s->vdpa_dev, O_RDWR);
> > +    if (s->vdpa.device_fd == -1) {
> > +        error_setg(errp, "vhost-vdpa-blk: open %s failed: %s",
> > +                   s->vdpa_dev, strerror(errno));
> > +        return;
> > +    }
> > +
> > +    vhost_blk_common_realize(vbc, vhost_vdpa_blk_handle_output, &err);
> > +    if (err != NULL) {
> > +        error_propagate(errp, err);
> > +        goto blk_err;
> > +    }
> > +
> > +    vbc->vhost_vqs = g_new0(struct vhost_virtqueue, vbc->num_queues);
> > +    vbc->dev.nvqs = vbc->num_queues;
> > +    vbc->dev.vqs = vbc->vhost_vqs;
> > +    vbc->dev.vq_index = 0;
> > +    vbc->dev.backend_features = 0;
> > +    vbc->started = false;
> > +
> > +    vhost_dev_set_config_notifier(&vbc->dev, &blk_ops);
> > +
> > +    ret = vhost_dev_init(&vbc->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0);
> > +    if (ret < 0) {
> > +        error_setg(errp, "vhost-vdpa-blk: vhost initialization failed: %s",
> > +                   strerror(-ret));
> > +        goto init_err;
> > +    }
> > +
> > +    ret = vhost_dev_get_config(&vbc->dev, (uint8_t *)&vbc->blkcfg,
> > +                               sizeof(struct virtio_blk_config));
> > +    if (ret < 0) {
> > +        error_setg(errp, "vhost-vdpa-blk: get block config failed");
> > +        goto config_err;
> > +    }
> > +
> > +    return;
> > +config_err:
> > +    vhost_dev_cleanup(&vbc->dev);
> > +init_err:
> > +    vhost_blk_common_unrealize(vbc);
> > +blk_err:
> > +    close(s->vdpa.device_fd);
> > +}
> > +
> > +static void vhost_vdpa_blk_device_unrealize(DeviceState *dev)
> > +{
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VHostVdpaBlk *s = VHOST_VDPA_BLK(dev);
> > +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> > +
> > +    virtio_set_status(vdev, 0);
> > +    vhost_dev_cleanup(&vbc->dev);
> > +    vhost_blk_common_unrealize(vbc);
> > +    close(s->vdpa.device_fd);
> > +}
> > +
> > +static void vhost_vdpa_blk_instance_init(Object *obj)
> > +{
> > +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(obj);
> > +
> > +    vbc->feature_bits = vdpa_feature_bits;
> > +
> > +    device_add_bootindex_property(obj, &vbc->bootindex, "bootindex",
> > +                                  "/disk@0,0", DEVICE(obj));
> > +}
> > +
> > +static const VMStateDescription vmstate_vhost_vdpa_blk = {
> > +    .name = "vhost-vdpa-blk",
> > +    .minimum_version_id = 1,
> > +    .version_id = 1,
> > +    .fields = (VMStateField[]) {
> > +        VMSTATE_VIRTIO_DEVICE,
> > +        VMSTATE_END_OF_LIST()
> > +    },
> > +};
> > +
> > +static Property vhost_vdpa_blk_properties[] = {
> > +    DEFINE_PROP_STRING("vdpa-dev", VHostVdpaBlk, vdpa_dev),
> > +    DEFINE_PROP_UINT16("num-queues", VHostBlkCommon, num_queues,
> > +                       VHOST_BLK_AUTO_NUM_QUEUES),
> > +    DEFINE_PROP_UINT32("queue-size", VHostBlkCommon, queue_size, 256),
> > +    DEFINE_PROP_BIT("config-wce", VHostBlkCommon, config_wce, 0, true),
> > +    DEFINE_PROP_END_OF_LIST(),
> > +};
> > +
> > +static void vhost_vdpa_blk_class_init(ObjectClass *klass, void *data)
> > +{
> > +    DeviceClass *dc = DEVICE_CLASS(klass);
> > +    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
> > +
> > +    device_class_set_props(dc, vhost_vdpa_blk_properties);
> > +    dc->vmsd = &vmstate_vhost_vdpa_blk;
> > +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
> > +    vdc->realize = vhost_vdpa_blk_device_realize;
> > +    vdc->unrealize = vhost_vdpa_blk_device_unrealize;
> > +    vdc->set_status = vhost_vdpa_blk_set_status;
> > +}
> > +
> > +static const TypeInfo vhost_vdpa_blk_info = {
> > +    .name = TYPE_VHOST_VDPA_BLK,
> > +    .parent = TYPE_VHOST_BLK_COMMON,
> > +    .instance_size = sizeof(VHostVdpaBlk),
> > +    .instance_init = vhost_vdpa_blk_instance_init,
> > +    .class_init = vhost_vdpa_blk_class_init,
> > +};
> > +
> > +static void virtio_register_types(void)
> > +{
> > +    type_register_static(&vhost_vdpa_blk_info);
> > +}
> > +
> > +type_init(virtio_register_types)
> > diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
> > index fbff9bc9d4..f02bea65a2 100644
> > --- a/hw/virtio/meson.build
> > +++ b/hw/virtio/meson.build
> > @@ -30,6 +30,7 @@ virtio_pci_ss = ss.source_set()
> >   virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c'))
> >   virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock-pci.c'))
> >   virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk-pci.c'))
> > +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_BLK', if_true: files('vhost-vdpa-blk-pci.c'))
> >   virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input-pci.c'))
> >   virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c'))
> >   virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c'))
> > diff --git a/hw/virtio/vhost-vdpa-blk-pci.c b/hw/virtio/vhost-vdpa-blk-pci.c
> > new file mode 100644
> > index 0000000000..976c47fb4f
> > --- /dev/null
> > +++ b/hw/virtio/vhost-vdpa-blk-pci.c
> > @@ -0,0 +1,101 @@
> > +/*
> > + * vhost-vdpa-blk PCI Bindings
> > + *
> > + * Copyright (C) 2021 Bytedance Inc. and/or its affiliates. All rights reserved.
> > + *
> > + * Author:
> > + *   Xie Yongji <xieyongji@bytedance.com>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "standard-headers/linux/virtio_pci.h"
> > +#include "hw/virtio/virtio.h"
> > +#include "hw/virtio/vhost-vdpa-blk.h"
> > +#include "hw/pci/pci.h"
> > +#include "hw/qdev-properties.h"
> > +#include "qapi/error.h"
> > +#include "qemu/error-report.h"
> > +#include "qemu/module.h"
> > +#include "virtio-pci.h"
> > +#include "qom/object.h"
> > +
> > +typedef struct VHostVdpaBlkPCI VHostVdpaBlkPCI;
> > +
> > +#define TYPE_VHOST_VDPA_BLK_PCI "vhost-vdpa-blk-pci-base"
> > +DECLARE_INSTANCE_CHECKER(VHostVdpaBlkPCI, VHOST_VDPA_BLK_PCI,
> > +                         TYPE_VHOST_VDPA_BLK_PCI)
> > +
> > +struct VHostVdpaBlkPCI {
> > +    VirtIOPCIProxy parent_obj;
> > +    VHostVdpaBlk vdev;
> > +};
> > +
> > +static Property vhost_vdpa_blk_pci_properties[] = {
> > +    DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
> > +    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
> > +                       DEV_NVECTORS_UNSPECIFIED),
> > +    DEFINE_PROP_END_OF_LIST(),
> > +};
> > +
> > +static void vhost_vdpa_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
> > +{
> > +    VHostVdpaBlkPCI *dev = VHOST_VDPA_BLK_PCI(vpci_dev);
> > +    DeviceState *vdev = DEVICE(&dev->vdev);
> > +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(&dev->vdev);
> > +
> > +    if (vbc->num_queues == VHOST_BLK_AUTO_NUM_QUEUES) {
> > +        vbc->num_queues = virtio_pci_optimal_num_queues(0);
> > +    }
> > +
> > +    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
> > +        vpci_dev->nvectors = vbc->num_queues + 1;
> > +    }
> > +
> > +    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
> > +}
> > +
> > +static void vhost_vdpa_blk_pci_class_init(ObjectClass *klass, void *data)
> > +{
> > +    DeviceClass *dc = DEVICE_CLASS(klass);
> > +    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
> > +    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
> > +
> > +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
> > +    device_class_set_props(dc, vhost_vdpa_blk_pci_properties);
> > +    k->realize = vhost_vdpa_blk_pci_realize;
> > +    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
> > +    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK;
> > +    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
> > +    pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
> > +}
> > +
> > +static void vhost_vdpa_blk_pci_instance_init(Object *obj)
> > +{
> > +    VHostVdpaBlkPCI *dev = VHOST_VDPA_BLK_PCI(obj);
> > +
> > +    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
> > +                                TYPE_VHOST_VDPA_BLK);
> > +    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
> > +                              "bootindex");
> > +}
> > +
> > +static const VirtioPCIDeviceTypeInfo vhost_vdpa_blk_pci_info = {
> > +    .base_name               = TYPE_VHOST_VDPA_BLK_PCI,
> > +    .generic_name            = "vhost-vdpa-blk-pci",
> > +    .transitional_name       = "vhost-vdpa-blk-pci-transitional",
> > +    .non_transitional_name   = "vhost-vdpa-blk-pci-non-transitional",
> > +    .instance_size  = sizeof(VHostVdpaBlkPCI),
> > +    .instance_init  = vhost_vdpa_blk_pci_instance_init,
> > +    .class_init     = vhost_vdpa_blk_pci_class_init,
> > +};
> > +
> > +static void vhost_vdpa_blk_pci_register(void)
> > +{
> > +    virtio_pci_types_register(&vhost_vdpa_blk_pci_info);
> > +}
>
>
> I wonder how could we use virtio-mmio for vDPA block here.
>

Use something like:

-device vhost-vdpa-blk,vdpa-dev=/dev/vhost-vdpa-0 ?

Thanks,
Yongji
Jason Wang April 12, 2021, 7:14 a.m. UTC | #3
在 2021/4/9 下午4:17, Yongji Xie 写道:
> On Fri, Apr 9, 2021 at 2:02 PM Jason Wang <jasowang@redhat.com> wrote:
>>
>> 在 2021/4/8 下午6:12, Xie Yongji 写道:
>>> This commit introduces a new vhost-vdpa block device, which
>>> will set up a vDPA device specified by a "vdpa-dev" parameter,
>>> something like:
>>>
>>> qemu-system-x86_64 \
>>>       -device vhost-vdpa-blk-pci,vdpa-dev=/dev/vhost-vdpa-0
>>>
>>> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
>>> ---
>>>    hw/block/Kconfig                   |   5 +
>>>    hw/block/meson.build               |   1 +
>>>    hw/block/vhost-vdpa-blk.c          | 227 +++++++++++++++++++++++++++++
>>>    hw/virtio/meson.build              |   1 +
>>>    hw/virtio/vhost-vdpa-blk-pci.c     | 101 +++++++++++++
>>>    include/hw/virtio/vhost-vdpa-blk.h |  30 ++++
>>>    6 files changed, 365 insertions(+)
>>>    create mode 100644 hw/block/vhost-vdpa-blk.c
>>>    create mode 100644 hw/virtio/vhost-vdpa-blk-pci.c
>>>    create mode 100644 include/hw/virtio/vhost-vdpa-blk.h
>>>
>>> diff --git a/hw/block/Kconfig b/hw/block/Kconfig
>>> index 4fcd152166..4615a2c116 100644
>>> --- a/hw/block/Kconfig
>>> +++ b/hw/block/Kconfig
>>> @@ -41,5 +41,10 @@ config VHOST_USER_BLK
>>>        default y if VIRTIO_PCI
>>>        depends on VIRTIO && VHOST_USER && LINUX
>>>
>>> +config VHOST_VDPA_BLK
>>> +    bool
>>> +    default y if VIRTIO_PCI
>>> +    depends on VIRTIO && VHOST_VDPA && LINUX
>>> +
>>>    config SWIM
>>>        bool
>>> diff --git a/hw/block/meson.build b/hw/block/meson.build
>>> index 5862bda4cb..98f1fc330a 100644
>>> --- a/hw/block/meson.build
>>> +++ b/hw/block/meson.build
>>> @@ -17,5 +17,6 @@ softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'n
>>>
>>>    specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
>>>    specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-blk-common.c', 'vhost-user-blk.c'))
>>> +specific_ss.add(when: 'CONFIG_VHOST_VDPA_BLK', if_true: files('vhost-blk-common.c', 'vhost-vdpa-blk.c'))
>>>
>>>    subdir('dataplane')
>>> diff --git a/hw/block/vhost-vdpa-blk.c b/hw/block/vhost-vdpa-blk.c
>>> new file mode 100644
>>> index 0000000000..d5cbbbba10
>>> --- /dev/null
>>> +++ b/hw/block/vhost-vdpa-blk.c
>>> @@ -0,0 +1,227 @@
>>> +/*
>>> + * vhost-vdpa-blk host device
>>> + *
>>> + * Copyright (C) 2021 Bytedance Inc. and/or its affiliates. All rights reserved.
>>> + *
>>> + * Author:
>>> + *   Xie Yongji <xieyongji@bytedance.com>
>>> + *
>>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>>> + * the COPYING file in the top-level directory.
>>> + *
>>> + */
>>> +
>>> +#include "qemu/osdep.h"
>>> +#include "qapi/error.h"
>>> +#include "qemu/error-report.h"
>>> +#include "qemu/cutils.h"
>>> +#include "hw/qdev-core.h"
>>> +#include "hw/qdev-properties.h"
>>> +#include "hw/qdev-properties-system.h"
>>> +#include "hw/virtio/vhost.h"
>>> +#include "hw/virtio/vhost-vdpa-blk.h"
>>> +#include "hw/virtio/virtio.h"
>>> +#include "hw/virtio/virtio-bus.h"
>>> +#include "hw/virtio/virtio-access.h"
>>> +#include "sysemu/sysemu.h"
>>> +#include "sysemu/runstate.h"
>>> +
>>> +static const int vdpa_feature_bits[] = {
>>> +    VIRTIO_BLK_F_SIZE_MAX,
>>> +    VIRTIO_BLK_F_SEG_MAX,
>>> +    VIRTIO_BLK_F_GEOMETRY,
>>> +    VIRTIO_BLK_F_BLK_SIZE,
>>> +    VIRTIO_BLK_F_TOPOLOGY,
>>> +    VIRTIO_BLK_F_MQ,
>>> +    VIRTIO_BLK_F_RO,
>>> +    VIRTIO_BLK_F_FLUSH,
>>> +    VIRTIO_BLK_F_CONFIG_WCE,
>>> +    VIRTIO_BLK_F_DISCARD,
>>> +    VIRTIO_BLK_F_WRITE_ZEROES,
>>> +    VIRTIO_F_VERSION_1,
>>> +    VIRTIO_RING_F_INDIRECT_DESC,
>>> +    VIRTIO_RING_F_EVENT_IDX,
>>> +    VIRTIO_F_NOTIFY_ON_EMPTY,
>>> +    VHOST_INVALID_FEATURE_BIT
>>> +};
>>> +
>>> +static void vhost_vdpa_blk_set_status(VirtIODevice *vdev, uint8_t status)
>>> +{
>>> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
>>> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
>>> +    bool should_start = virtio_device_started(vdev, status);
>>> +    int ret;
>>> +
>>> +    if (!vdev->vm_running) {
>>> +        should_start = false;
>>> +    }
>>> +
>>> +    if (vbc->dev.started == should_start) {
>>> +        return;
>>> +    }
>>> +
>>> +    if (should_start) {
>>> +        ret = vhost_blk_common_start(vbc);
>>> +        if (ret < 0) {
>>> +            error_report("vhost-vdpa-blk: vhost start failed: %s",
>>> +                         strerror(-ret));
>>> +        }
>>> +    } else {
>>> +        vhost_blk_common_stop(vbc);
>>> +    }
>>> +
>>> +}
>>> +
>>> +static void vhost_vdpa_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>>> +{
>>> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
>>> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
>>> +    int i, ret;
>>
>> I believe we should never reach here, the backend should poll the
>> notifier and trigger vq handler there after DRIVER_OK?
>>
> Some legacy virtio-blk driver (virtio 0.9) will do that. Kick before
> set DRIVER_OK.


Ok, I see, but any reason:

1) we need start vhost-blk
2) the relay is not done per vq but per device?


>
>>> +
>>> +    if (!vdev->start_on_kick) {
>>> +        return;
>>> +    }
>>> +
>>> +    if (vbc->dev.started) {
>>> +        return;
>>> +    }
>>> +
>>> +    ret = vhost_blk_common_start(vbc);
>>> +    if (ret < 0) {
>>> +        error_report("vhost-vdpa-blk: vhost start failed: %s",
>>> +                     strerror(-ret));
>>> +        return;
>>> +    }
>>> +
>>> +    /* Kick right away to begin processing requests already in vring */
>>> +    for (i = 0; i < vbc->dev.nvqs; i++) {
>>> +        VirtQueue *kick_vq = virtio_get_queue(vdev, i);
>>> +
>>> +        if (!virtio_queue_get_desc_addr(vdev, i)) {
>>> +            continue;
>>> +        }
>>> +        event_notifier_set(virtio_queue_get_host_notifier(kick_vq));
>>> +    }
>>> +}
>>> +
>>> +static void vhost_vdpa_blk_device_realize(DeviceState *dev, Error **errp)
>>> +{
>>> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>>> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
>>> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
>>> +    Error *err = NULL;
>>> +    int ret;
>>> +
>>> +    s->vdpa.device_fd = qemu_open_old(s->vdpa_dev, O_RDWR);
>>> +    if (s->vdpa.device_fd == -1) {
>>> +        error_setg(errp, "vhost-vdpa-blk: open %s failed: %s",
>>> +                   s->vdpa_dev, strerror(errno));
>>> +        return;
>>> +    }
>>> +
>>> +    vhost_blk_common_realize(vbc, vhost_vdpa_blk_handle_output, &err);
>>> +    if (err != NULL) {
>>> +        error_propagate(errp, err);
>>> +        goto blk_err;
>>> +    }
>>> +
>>> +    vbc->vhost_vqs = g_new0(struct vhost_virtqueue, vbc->num_queues);
>>> +    vbc->dev.nvqs = vbc->num_queues;
>>> +    vbc->dev.vqs = vbc->vhost_vqs;
>>> +    vbc->dev.vq_index = 0;
>>> +    vbc->dev.backend_features = 0;
>>> +    vbc->started = false;
>>> +
>>> +    vhost_dev_set_config_notifier(&vbc->dev, &blk_ops);
>>> +
>>> +    ret = vhost_dev_init(&vbc->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0);
>>> +    if (ret < 0) {
>>> +        error_setg(errp, "vhost-vdpa-blk: vhost initialization failed: %s",
>>> +                   strerror(-ret));
>>> +        goto init_err;
>>> +    }
>>> +
>>> +    ret = vhost_dev_get_config(&vbc->dev, (uint8_t *)&vbc->blkcfg,
>>> +                               sizeof(struct virtio_blk_config));
>>> +    if (ret < 0) {
>>> +        error_setg(errp, "vhost-vdpa-blk: get block config failed");
>>> +        goto config_err;
>>> +    }
>>> +
>>> +    return;
>>> +config_err:
>>> +    vhost_dev_cleanup(&vbc->dev);
>>> +init_err:
>>> +    vhost_blk_common_unrealize(vbc);
>>> +blk_err:
>>> +    close(s->vdpa.device_fd);
>>> +}
>>> +
>>> +static void vhost_vdpa_blk_device_unrealize(DeviceState *dev)
>>> +{
>>> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>>> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(dev);
>>> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
>>> +
>>> +    virtio_set_status(vdev, 0);
>>> +    vhost_dev_cleanup(&vbc->dev);
>>> +    vhost_blk_common_unrealize(vbc);
>>> +    close(s->vdpa.device_fd);
>>> +}
>>> +
>>> +static void vhost_vdpa_blk_instance_init(Object *obj)
>>> +{
>>> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(obj);
>>> +
>>> +    vbc->feature_bits = vdpa_feature_bits;
>>> +
>>> +    device_add_bootindex_property(obj, &vbc->bootindex, "bootindex",
>>> +                                  "/disk@0,0", DEVICE(obj));
>>> +}
>>> +
>>> +static const VMStateDescription vmstate_vhost_vdpa_blk = {
>>> +    .name = "vhost-vdpa-blk",
>>> +    .minimum_version_id = 1,
>>> +    .version_id = 1,
>>> +    .fields = (VMStateField[]) {
>>> +        VMSTATE_VIRTIO_DEVICE,
>>> +        VMSTATE_END_OF_LIST()
>>> +    },
>>> +};
>>> +
>>> +static Property vhost_vdpa_blk_properties[] = {
>>> +    DEFINE_PROP_STRING("vdpa-dev", VHostVdpaBlk, vdpa_dev),
>>> +    DEFINE_PROP_UINT16("num-queues", VHostBlkCommon, num_queues,
>>> +                       VHOST_BLK_AUTO_NUM_QUEUES),
>>> +    DEFINE_PROP_UINT32("queue-size", VHostBlkCommon, queue_size, 256),
>>> +    DEFINE_PROP_BIT("config-wce", VHostBlkCommon, config_wce, 0, true),
>>> +    DEFINE_PROP_END_OF_LIST(),
>>> +};
>>> +
>>> +static void vhost_vdpa_blk_class_init(ObjectClass *klass, void *data)
>>> +{
>>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>>> +    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
>>> +
>>> +    device_class_set_props(dc, vhost_vdpa_blk_properties);
>>> +    dc->vmsd = &vmstate_vhost_vdpa_blk;
>>> +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
>>> +    vdc->realize = vhost_vdpa_blk_device_realize;
>>> +    vdc->unrealize = vhost_vdpa_blk_device_unrealize;
>>> +    vdc->set_status = vhost_vdpa_blk_set_status;
>>> +}
>>> +
>>> +static const TypeInfo vhost_vdpa_blk_info = {
>>> +    .name = TYPE_VHOST_VDPA_BLK,
>>> +    .parent = TYPE_VHOST_BLK_COMMON,
>>> +    .instance_size = sizeof(VHostVdpaBlk),
>>> +    .instance_init = vhost_vdpa_blk_instance_init,
>>> +    .class_init = vhost_vdpa_blk_class_init,
>>> +};
>>> +
>>> +static void virtio_register_types(void)
>>> +{
>>> +    type_register_static(&vhost_vdpa_blk_info);
>>> +}
>>> +
>>> +type_init(virtio_register_types)
>>> diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
>>> index fbff9bc9d4..f02bea65a2 100644
>>> --- a/hw/virtio/meson.build
>>> +++ b/hw/virtio/meson.build
>>> @@ -30,6 +30,7 @@ virtio_pci_ss = ss.source_set()
>>>    virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c'))
>>>    virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock-pci.c'))
>>>    virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk-pci.c'))
>>> +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_BLK', if_true: files('vhost-vdpa-blk-pci.c'))
>>>    virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input-pci.c'))
>>>    virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c'))
>>>    virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c'))
>>> diff --git a/hw/virtio/vhost-vdpa-blk-pci.c b/hw/virtio/vhost-vdpa-blk-pci.c
>>> new file mode 100644
>>> index 0000000000..976c47fb4f
>>> --- /dev/null
>>> +++ b/hw/virtio/vhost-vdpa-blk-pci.c
>>> @@ -0,0 +1,101 @@
>>> +/*
>>> + * vhost-vdpa-blk PCI Bindings
>>> + *
>>> + * Copyright (C) 2021 Bytedance Inc. and/or its affiliates. All rights reserved.
>>> + *
>>> + * Author:
>>> + *   Xie Yongji <xieyongji@bytedance.com>
>>> + *
>>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>>> + * the COPYING file in the top-level directory.
>>> + *
>>> + */
>>> +
>>> +#include "qemu/osdep.h"
>>> +#include "standard-headers/linux/virtio_pci.h"
>>> +#include "hw/virtio/virtio.h"
>>> +#include "hw/virtio/vhost-vdpa-blk.h"
>>> +#include "hw/pci/pci.h"
>>> +#include "hw/qdev-properties.h"
>>> +#include "qapi/error.h"
>>> +#include "qemu/error-report.h"
>>> +#include "qemu/module.h"
>>> +#include "virtio-pci.h"
>>> +#include "qom/object.h"
>>> +
>>> +typedef struct VHostVdpaBlkPCI VHostVdpaBlkPCI;
>>> +
>>> +#define TYPE_VHOST_VDPA_BLK_PCI "vhost-vdpa-blk-pci-base"
>>> +DECLARE_INSTANCE_CHECKER(VHostVdpaBlkPCI, VHOST_VDPA_BLK_PCI,
>>> +                         TYPE_VHOST_VDPA_BLK_PCI)
>>> +
>>> +struct VHostVdpaBlkPCI {
>>> +    VirtIOPCIProxy parent_obj;
>>> +    VHostVdpaBlk vdev;
>>> +};
>>> +
>>> +static Property vhost_vdpa_blk_pci_properties[] = {
>>> +    DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
>>> +    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
>>> +                       DEV_NVECTORS_UNSPECIFIED),
>>> +    DEFINE_PROP_END_OF_LIST(),
>>> +};
>>> +
>>> +static void vhost_vdpa_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
>>> +{
>>> +    VHostVdpaBlkPCI *dev = VHOST_VDPA_BLK_PCI(vpci_dev);
>>> +    DeviceState *vdev = DEVICE(&dev->vdev);
>>> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(&dev->vdev);
>>> +
>>> +    if (vbc->num_queues == VHOST_BLK_AUTO_NUM_QUEUES) {
>>> +        vbc->num_queues = virtio_pci_optimal_num_queues(0);
>>> +    }
>>> +
>>> +    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
>>> +        vpci_dev->nvectors = vbc->num_queues + 1;
>>> +    }
>>> +
>>> +    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
>>> +}
>>> +
>>> +static void vhost_vdpa_blk_pci_class_init(ObjectClass *klass, void *data)
>>> +{
>>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>>> +    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
>>> +    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
>>> +
>>> +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
>>> +    device_class_set_props(dc, vhost_vdpa_blk_pci_properties);
>>> +    k->realize = vhost_vdpa_blk_pci_realize;
>>> +    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
>>> +    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK;
>>> +    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
>>> +    pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
>>> +}
>>> +
>>> +static void vhost_vdpa_blk_pci_instance_init(Object *obj)
>>> +{
>>> +    VHostVdpaBlkPCI *dev = VHOST_VDPA_BLK_PCI(obj);
>>> +
>>> +    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
>>> +                                TYPE_VHOST_VDPA_BLK);
>>> +    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
>>> +                              "bootindex");
>>> +}
>>> +
>>> +static const VirtioPCIDeviceTypeInfo vhost_vdpa_blk_pci_info = {
>>> +    .base_name               = TYPE_VHOST_VDPA_BLK_PCI,
>>> +    .generic_name            = "vhost-vdpa-blk-pci",
>>> +    .transitional_name       = "vhost-vdpa-blk-pci-transitional",
>>> +    .non_transitional_name   = "vhost-vdpa-blk-pci-non-transitional",
>>> +    .instance_size  = sizeof(VHostVdpaBlkPCI),
>>> +    .instance_init  = vhost_vdpa_blk_pci_instance_init,
>>> +    .class_init     = vhost_vdpa_blk_pci_class_init,
>>> +};
>>> +
>>> +static void vhost_vdpa_blk_pci_register(void)
>>> +{
>>> +    virtio_pci_types_register(&vhost_vdpa_blk_pci_info);
>>> +}
>>
>> I wonder how could we use virtio-mmio for vDPA block here.
>>
> Use something like:
>
> -device vhost-vdpa-blk,vdpa-dev=/dev/vhost-vdpa-0 ?


Something like this, making vDPA indepedent for a specific bus is a 
great advantage.

Thanks


>
> Thanks,
> Yongji
>
Yongji Xie April 12, 2021, 7:51 a.m. UTC | #4
On Mon, Apr 12, 2021 at 3:14 PM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2021/4/9 下午4:17, Yongji Xie 写道:
> > On Fri, Apr 9, 2021 at 2:02 PM Jason Wang <jasowang@redhat.com> wrote:
> >>
> >> 在 2021/4/8 下午6:12, Xie Yongji 写道:
> >>> This commit introduces a new vhost-vdpa block device, which
> >>> will set up a vDPA device specified by a "vdpa-dev" parameter,
> >>> something like:
> >>>
> >>> qemu-system-x86_64 \
> >>>       -device vhost-vdpa-blk-pci,vdpa-dev=/dev/vhost-vdpa-0
> >>>
> >>> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> >>> ---
> >>>    hw/block/Kconfig                   |   5 +
> >>>    hw/block/meson.build               |   1 +
> >>>    hw/block/vhost-vdpa-blk.c          | 227 +++++++++++++++++++++++++++++
> >>>    hw/virtio/meson.build              |   1 +
> >>>    hw/virtio/vhost-vdpa-blk-pci.c     | 101 +++++++++++++
> >>>    include/hw/virtio/vhost-vdpa-blk.h |  30 ++++
> >>>    6 files changed, 365 insertions(+)
> >>>    create mode 100644 hw/block/vhost-vdpa-blk.c
> >>>    create mode 100644 hw/virtio/vhost-vdpa-blk-pci.c
> >>>    create mode 100644 include/hw/virtio/vhost-vdpa-blk.h
> >>>
> >>> diff --git a/hw/block/Kconfig b/hw/block/Kconfig
> >>> index 4fcd152166..4615a2c116 100644
> >>> --- a/hw/block/Kconfig
> >>> +++ b/hw/block/Kconfig
> >>> @@ -41,5 +41,10 @@ config VHOST_USER_BLK
> >>>        default y if VIRTIO_PCI
> >>>        depends on VIRTIO && VHOST_USER && LINUX
> >>>
> >>> +config VHOST_VDPA_BLK
> >>> +    bool
> >>> +    default y if VIRTIO_PCI
> >>> +    depends on VIRTIO && VHOST_VDPA && LINUX
> >>> +
> >>>    config SWIM
> >>>        bool
> >>> diff --git a/hw/block/meson.build b/hw/block/meson.build
> >>> index 5862bda4cb..98f1fc330a 100644
> >>> --- a/hw/block/meson.build
> >>> +++ b/hw/block/meson.build
> >>> @@ -17,5 +17,6 @@ softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'n
> >>>
> >>>    specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
> >>>    specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-blk-common.c', 'vhost-user-blk.c'))
> >>> +specific_ss.add(when: 'CONFIG_VHOST_VDPA_BLK', if_true: files('vhost-blk-common.c', 'vhost-vdpa-blk.c'))
> >>>
> >>>    subdir('dataplane')
> >>> diff --git a/hw/block/vhost-vdpa-blk.c b/hw/block/vhost-vdpa-blk.c
> >>> new file mode 100644
> >>> index 0000000000..d5cbbbba10
> >>> --- /dev/null
> >>> +++ b/hw/block/vhost-vdpa-blk.c
> >>> @@ -0,0 +1,227 @@
> >>> +/*
> >>> + * vhost-vdpa-blk host device
> >>> + *
> >>> + * Copyright (C) 2021 Bytedance Inc. and/or its affiliates. All rights reserved.
> >>> + *
> >>> + * Author:
> >>> + *   Xie Yongji <xieyongji@bytedance.com>
> >>> + *
> >>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> >>> + * the COPYING file in the top-level directory.
> >>> + *
> >>> + */
> >>> +
> >>> +#include "qemu/osdep.h"
> >>> +#include "qapi/error.h"
> >>> +#include "qemu/error-report.h"
> >>> +#include "qemu/cutils.h"
> >>> +#include "hw/qdev-core.h"
> >>> +#include "hw/qdev-properties.h"
> >>> +#include "hw/qdev-properties-system.h"
> >>> +#include "hw/virtio/vhost.h"
> >>> +#include "hw/virtio/vhost-vdpa-blk.h"
> >>> +#include "hw/virtio/virtio.h"
> >>> +#include "hw/virtio/virtio-bus.h"
> >>> +#include "hw/virtio/virtio-access.h"
> >>> +#include "sysemu/sysemu.h"
> >>> +#include "sysemu/runstate.h"
> >>> +
> >>> +static const int vdpa_feature_bits[] = {
> >>> +    VIRTIO_BLK_F_SIZE_MAX,
> >>> +    VIRTIO_BLK_F_SEG_MAX,
> >>> +    VIRTIO_BLK_F_GEOMETRY,
> >>> +    VIRTIO_BLK_F_BLK_SIZE,
> >>> +    VIRTIO_BLK_F_TOPOLOGY,
> >>> +    VIRTIO_BLK_F_MQ,
> >>> +    VIRTIO_BLK_F_RO,
> >>> +    VIRTIO_BLK_F_FLUSH,
> >>> +    VIRTIO_BLK_F_CONFIG_WCE,
> >>> +    VIRTIO_BLK_F_DISCARD,
> >>> +    VIRTIO_BLK_F_WRITE_ZEROES,
> >>> +    VIRTIO_F_VERSION_1,
> >>> +    VIRTIO_RING_F_INDIRECT_DESC,
> >>> +    VIRTIO_RING_F_EVENT_IDX,
> >>> +    VIRTIO_F_NOTIFY_ON_EMPTY,
> >>> +    VHOST_INVALID_FEATURE_BIT
> >>> +};
> >>> +
> >>> +static void vhost_vdpa_blk_set_status(VirtIODevice *vdev, uint8_t status)
> >>> +{
> >>> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
> >>> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> >>> +    bool should_start = virtio_device_started(vdev, status);
> >>> +    int ret;
> >>> +
> >>> +    if (!vdev->vm_running) {
> >>> +        should_start = false;
> >>> +    }
> >>> +
> >>> +    if (vbc->dev.started == should_start) {
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    if (should_start) {
> >>> +        ret = vhost_blk_common_start(vbc);
> >>> +        if (ret < 0) {
> >>> +            error_report("vhost-vdpa-blk: vhost start failed: %s",
> >>> +                         strerror(-ret));
> >>> +        }
> >>> +    } else {
> >>> +        vhost_blk_common_stop(vbc);
> >>> +    }
> >>> +
> >>> +}
> >>> +
> >>> +static void vhost_vdpa_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> >>> +{
> >>> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
> >>> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> >>> +    int i, ret;
> >>
> >> I believe we should never reach here, the backend should poll the
> >> notifier and trigger vq handler there after DRIVER_OK?
> >>
> > Some legacy virtio-blk driver (virtio 0.9) will do that. Kick before
> > set DRIVER_OK.
>
>
> Ok, I see, but any reason:
>
> 1) we need start vhost-blk
> 2) the relay is not done per vq but per device?
>

We need to make vhost backend process I/Os. Otherwise, guest will hang
if we don't start vhost-blk here.

>
> >
> >>> +
> >>> +    if (!vdev->start_on_kick) {
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    if (vbc->dev.started) {
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    ret = vhost_blk_common_start(vbc);
> >>> +    if (ret < 0) {
> >>> +        error_report("vhost-vdpa-blk: vhost start failed: %s",
> >>> +                     strerror(-ret));
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    /* Kick right away to begin processing requests already in vring */
> >>> +    for (i = 0; i < vbc->dev.nvqs; i++) {
> >>> +        VirtQueue *kick_vq = virtio_get_queue(vdev, i);
> >>> +
> >>> +        if (!virtio_queue_get_desc_addr(vdev, i)) {
> >>> +            continue;
> >>> +        }
> >>> +        event_notifier_set(virtio_queue_get_host_notifier(kick_vq));
> >>> +    }
> >>> +}
> >>> +
> >>> +static void vhost_vdpa_blk_device_realize(DeviceState *dev, Error **errp)
> >>> +{
> >>> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >>> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
> >>> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> >>> +    Error *err = NULL;
> >>> +    int ret;
> >>> +
> >>> +    s->vdpa.device_fd = qemu_open_old(s->vdpa_dev, O_RDWR);
> >>> +    if (s->vdpa.device_fd == -1) {
> >>> +        error_setg(errp, "vhost-vdpa-blk: open %s failed: %s",
> >>> +                   s->vdpa_dev, strerror(errno));
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    vhost_blk_common_realize(vbc, vhost_vdpa_blk_handle_output, &err);
> >>> +    if (err != NULL) {
> >>> +        error_propagate(errp, err);
> >>> +        goto blk_err;
> >>> +    }
> >>> +
> >>> +    vbc->vhost_vqs = g_new0(struct vhost_virtqueue, vbc->num_queues);
> >>> +    vbc->dev.nvqs = vbc->num_queues;
> >>> +    vbc->dev.vqs = vbc->vhost_vqs;
> >>> +    vbc->dev.vq_index = 0;
> >>> +    vbc->dev.backend_features = 0;
> >>> +    vbc->started = false;
> >>> +
> >>> +    vhost_dev_set_config_notifier(&vbc->dev, &blk_ops);
> >>> +
> >>> +    ret = vhost_dev_init(&vbc->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0);
> >>> +    if (ret < 0) {
> >>> +        error_setg(errp, "vhost-vdpa-blk: vhost initialization failed: %s",
> >>> +                   strerror(-ret));
> >>> +        goto init_err;
> >>> +    }
> >>> +
> >>> +    ret = vhost_dev_get_config(&vbc->dev, (uint8_t *)&vbc->blkcfg,
> >>> +                               sizeof(struct virtio_blk_config));
> >>> +    if (ret < 0) {
> >>> +        error_setg(errp, "vhost-vdpa-blk: get block config failed");
> >>> +        goto config_err;
> >>> +    }
> >>> +
> >>> +    return;
> >>> +config_err:
> >>> +    vhost_dev_cleanup(&vbc->dev);
> >>> +init_err:
> >>> +    vhost_blk_common_unrealize(vbc);
> >>> +blk_err:
> >>> +    close(s->vdpa.device_fd);
> >>> +}
> >>> +
> >>> +static void vhost_vdpa_blk_device_unrealize(DeviceState *dev)
> >>> +{
> >>> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >>> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(dev);
> >>> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> >>> +
> >>> +    virtio_set_status(vdev, 0);
> >>> +    vhost_dev_cleanup(&vbc->dev);
> >>> +    vhost_blk_common_unrealize(vbc);
> >>> +    close(s->vdpa.device_fd);
> >>> +}
> >>> +
> >>> +static void vhost_vdpa_blk_instance_init(Object *obj)
> >>> +{
> >>> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(obj);
> >>> +
> >>> +    vbc->feature_bits = vdpa_feature_bits;
> >>> +
> >>> +    device_add_bootindex_property(obj, &vbc->bootindex, "bootindex",
> >>> +                                  "/disk@0,0", DEVICE(obj));
> >>> +}
> >>> +
> >>> +static const VMStateDescription vmstate_vhost_vdpa_blk = {
> >>> +    .name = "vhost-vdpa-blk",
> >>> +    .minimum_version_id = 1,
> >>> +    .version_id = 1,
> >>> +    .fields = (VMStateField[]) {
> >>> +        VMSTATE_VIRTIO_DEVICE,
> >>> +        VMSTATE_END_OF_LIST()
> >>> +    },
> >>> +};
> >>> +
> >>> +static Property vhost_vdpa_blk_properties[] = {
> >>> +    DEFINE_PROP_STRING("vdpa-dev", VHostVdpaBlk, vdpa_dev),
> >>> +    DEFINE_PROP_UINT16("num-queues", VHostBlkCommon, num_queues,
> >>> +                       VHOST_BLK_AUTO_NUM_QUEUES),
> >>> +    DEFINE_PROP_UINT32("queue-size", VHostBlkCommon, queue_size, 256),
> >>> +    DEFINE_PROP_BIT("config-wce", VHostBlkCommon, config_wce, 0, true),
> >>> +    DEFINE_PROP_END_OF_LIST(),
> >>> +};
> >>> +
> >>> +static void vhost_vdpa_blk_class_init(ObjectClass *klass, void *data)
> >>> +{
> >>> +    DeviceClass *dc = DEVICE_CLASS(klass);
> >>> +    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
> >>> +
> >>> +    device_class_set_props(dc, vhost_vdpa_blk_properties);
> >>> +    dc->vmsd = &vmstate_vhost_vdpa_blk;
> >>> +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
> >>> +    vdc->realize = vhost_vdpa_blk_device_realize;
> >>> +    vdc->unrealize = vhost_vdpa_blk_device_unrealize;
> >>> +    vdc->set_status = vhost_vdpa_blk_set_status;
> >>> +}
> >>> +
> >>> +static const TypeInfo vhost_vdpa_blk_info = {
> >>> +    .name = TYPE_VHOST_VDPA_BLK,
> >>> +    .parent = TYPE_VHOST_BLK_COMMON,
> >>> +    .instance_size = sizeof(VHostVdpaBlk),
> >>> +    .instance_init = vhost_vdpa_blk_instance_init,
> >>> +    .class_init = vhost_vdpa_blk_class_init,
> >>> +};
> >>> +
> >>> +static void virtio_register_types(void)
> >>> +{
> >>> +    type_register_static(&vhost_vdpa_blk_info);
> >>> +}
> >>> +
> >>> +type_init(virtio_register_types)
> >>> diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
> >>> index fbff9bc9d4..f02bea65a2 100644
> >>> --- a/hw/virtio/meson.build
> >>> +++ b/hw/virtio/meson.build
> >>> @@ -30,6 +30,7 @@ virtio_pci_ss = ss.source_set()
> >>>    virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c'))
> >>>    virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock-pci.c'))
> >>>    virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk-pci.c'))
> >>> +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_BLK', if_true: files('vhost-vdpa-blk-pci.c'))
> >>>    virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input-pci.c'))
> >>>    virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c'))
> >>>    virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c'))
> >>> diff --git a/hw/virtio/vhost-vdpa-blk-pci.c b/hw/virtio/vhost-vdpa-blk-pci.c
> >>> new file mode 100644
> >>> index 0000000000..976c47fb4f
> >>> --- /dev/null
> >>> +++ b/hw/virtio/vhost-vdpa-blk-pci.c
> >>> @@ -0,0 +1,101 @@
> >>> +/*
> >>> + * vhost-vdpa-blk PCI Bindings
> >>> + *
> >>> + * Copyright (C) 2021 Bytedance Inc. and/or its affiliates. All rights reserved.
> >>> + *
> >>> + * Author:
> >>> + *   Xie Yongji <xieyongji@bytedance.com>
> >>> + *
> >>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> >>> + * the COPYING file in the top-level directory.
> >>> + *
> >>> + */
> >>> +
> >>> +#include "qemu/osdep.h"
> >>> +#include "standard-headers/linux/virtio_pci.h"
> >>> +#include "hw/virtio/virtio.h"
> >>> +#include "hw/virtio/vhost-vdpa-blk.h"
> >>> +#include "hw/pci/pci.h"
> >>> +#include "hw/qdev-properties.h"
> >>> +#include "qapi/error.h"
> >>> +#include "qemu/error-report.h"
> >>> +#include "qemu/module.h"
> >>> +#include "virtio-pci.h"
> >>> +#include "qom/object.h"
> >>> +
> >>> +typedef struct VHostVdpaBlkPCI VHostVdpaBlkPCI;
> >>> +
> >>> +#define TYPE_VHOST_VDPA_BLK_PCI "vhost-vdpa-blk-pci-base"
> >>> +DECLARE_INSTANCE_CHECKER(VHostVdpaBlkPCI, VHOST_VDPA_BLK_PCI,
> >>> +                         TYPE_VHOST_VDPA_BLK_PCI)
> >>> +
> >>> +struct VHostVdpaBlkPCI {
> >>> +    VirtIOPCIProxy parent_obj;
> >>> +    VHostVdpaBlk vdev;
> >>> +};
> >>> +
> >>> +static Property vhost_vdpa_blk_pci_properties[] = {
> >>> +    DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
> >>> +    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
> >>> +                       DEV_NVECTORS_UNSPECIFIED),
> >>> +    DEFINE_PROP_END_OF_LIST(),
> >>> +};
> >>> +
> >>> +static void vhost_vdpa_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
> >>> +{
> >>> +    VHostVdpaBlkPCI *dev = VHOST_VDPA_BLK_PCI(vpci_dev);
> >>> +    DeviceState *vdev = DEVICE(&dev->vdev);
> >>> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(&dev->vdev);
> >>> +
> >>> +    if (vbc->num_queues == VHOST_BLK_AUTO_NUM_QUEUES) {
> >>> +        vbc->num_queues = virtio_pci_optimal_num_queues(0);
> >>> +    }
> >>> +
> >>> +    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
> >>> +        vpci_dev->nvectors = vbc->num_queues + 1;
> >>> +    }
> >>> +
> >>> +    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
> >>> +}
> >>> +
> >>> +static void vhost_vdpa_blk_pci_class_init(ObjectClass *klass, void *data)
> >>> +{
> >>> +    DeviceClass *dc = DEVICE_CLASS(klass);
> >>> +    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
> >>> +    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
> >>> +
> >>> +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
> >>> +    device_class_set_props(dc, vhost_vdpa_blk_pci_properties);
> >>> +    k->realize = vhost_vdpa_blk_pci_realize;
> >>> +    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
> >>> +    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK;
> >>> +    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
> >>> +    pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
> >>> +}
> >>> +
> >>> +static void vhost_vdpa_blk_pci_instance_init(Object *obj)
> >>> +{
> >>> +    VHostVdpaBlkPCI *dev = VHOST_VDPA_BLK_PCI(obj);
> >>> +
> >>> +    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
> >>> +                                TYPE_VHOST_VDPA_BLK);
> >>> +    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
> >>> +                              "bootindex");
> >>> +}
> >>> +
> >>> +static const VirtioPCIDeviceTypeInfo vhost_vdpa_blk_pci_info = {
> >>> +    .base_name               = TYPE_VHOST_VDPA_BLK_PCI,
> >>> +    .generic_name            = "vhost-vdpa-blk-pci",
> >>> +    .transitional_name       = "vhost-vdpa-blk-pci-transitional",
> >>> +    .non_transitional_name   = "vhost-vdpa-blk-pci-non-transitional",
> >>> +    .instance_size  = sizeof(VHostVdpaBlkPCI),
> >>> +    .instance_init  = vhost_vdpa_blk_pci_instance_init,
> >>> +    .class_init     = vhost_vdpa_blk_pci_class_init,
> >>> +};
> >>> +
> >>> +static void vhost_vdpa_blk_pci_register(void)
> >>> +{
> >>> +    virtio_pci_types_register(&vhost_vdpa_blk_pci_info);
> >>> +}
> >>
> >> I wonder how could we use virtio-mmio for vDPA block here.
> >>
> > Use something like:
> >
> > -device vhost-vdpa-blk,vdpa-dev=/dev/vhost-vdpa-0 ?
>
>
> Something like this, making vDPA indepedent for a specific bus is a
> great advantage.
>

Yes, and I think we already support that.

Thanks,
Yongji
Stefan Hajnoczi April 26, 2021, 3:05 p.m. UTC | #5
On Thu, Apr 08, 2021 at 06:12:52PM +0800, Xie Yongji wrote:
> +static const int vdpa_feature_bits[] = {
> +    VIRTIO_BLK_F_SIZE_MAX,
> +    VIRTIO_BLK_F_SEG_MAX,
> +    VIRTIO_BLK_F_GEOMETRY,
> +    VIRTIO_BLK_F_BLK_SIZE,
> +    VIRTIO_BLK_F_TOPOLOGY,
> +    VIRTIO_BLK_F_MQ,
> +    VIRTIO_BLK_F_RO,
> +    VIRTIO_BLK_F_FLUSH,
> +    VIRTIO_BLK_F_CONFIG_WCE,
> +    VIRTIO_BLK_F_DISCARD,
> +    VIRTIO_BLK_F_WRITE_ZEROES,
> +    VIRTIO_F_VERSION_1,
> +    VIRTIO_RING_F_INDIRECT_DESC,
> +    VIRTIO_RING_F_EVENT_IDX,
> +    VIRTIO_F_NOTIFY_ON_EMPTY,
> +    VHOST_INVALID_FEATURE_BIT
> +};

Please add VIRTIO_F_RING_PACKED so it can be implemented in vDPA in the
future without changes to the QEMU vhost-vdpa-blk.c code.

> +static void vhost_vdpa_blk_device_realize(DeviceState *dev, Error **errp)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
> +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> +    Error *err = NULL;
> +    int ret;
> +
> +    s->vdpa.device_fd = qemu_open_old(s->vdpa_dev, O_RDWR);
> +    if (s->vdpa.device_fd == -1) {
> +        error_setg(errp, "vhost-vdpa-blk: open %s failed: %s",
> +                   s->vdpa_dev, strerror(errno));
> +        return;
> +    }
> +
> +    vhost_blk_common_realize(vbc, vhost_vdpa_blk_handle_output, &err);
> +    if (err != NULL) {
> +        error_propagate(errp, err);
> +        goto blk_err;
> +    }
> +
> +    vbc->vhost_vqs = g_new0(struct vhost_virtqueue, vbc->num_queues);

This is already done by vhost_blk_common_realize(). The old pointer is
overwritten and leaked here.

> +static const VMStateDescription vmstate_vhost_vdpa_blk = {
> +    .name = "vhost-vdpa-blk",
> +    .minimum_version_id = 1,
> +    .version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_VIRTIO_DEVICE,
> +        VMSTATE_END_OF_LIST()
> +    },
> +};

vdpa-blk does not support live migration yet. Please remove this.

Does hw/virtio/vhost.c should automatically register a migration
blocker? If not, please register one.

> +#define TYPE_VHOST_VDPA_BLK "vhost-vdpa-blk"

At this stage vdpa-blk is still very new and in development. I suggest
naming it x-vhost-vdpa-blk so that incompatible changes can still be
made to the command-line/APIs. "x-" can be removed later when the
feature has matured.
Yongji Xie April 27, 2021, 10:33 a.m. UTC | #6
On Mon, Apr 26, 2021 at 11:05 PM Stefan Hajnoczi <stefanha@redhat.com> wrote:
>
> On Thu, Apr 08, 2021 at 06:12:52PM +0800, Xie Yongji wrote:
> > +static const int vdpa_feature_bits[] = {
> > +    VIRTIO_BLK_F_SIZE_MAX,
> > +    VIRTIO_BLK_F_SEG_MAX,
> > +    VIRTIO_BLK_F_GEOMETRY,
> > +    VIRTIO_BLK_F_BLK_SIZE,
> > +    VIRTIO_BLK_F_TOPOLOGY,
> > +    VIRTIO_BLK_F_MQ,
> > +    VIRTIO_BLK_F_RO,
> > +    VIRTIO_BLK_F_FLUSH,
> > +    VIRTIO_BLK_F_CONFIG_WCE,
> > +    VIRTIO_BLK_F_DISCARD,
> > +    VIRTIO_BLK_F_WRITE_ZEROES,
> > +    VIRTIO_F_VERSION_1,
> > +    VIRTIO_RING_F_INDIRECT_DESC,
> > +    VIRTIO_RING_F_EVENT_IDX,
> > +    VIRTIO_F_NOTIFY_ON_EMPTY,
> > +    VHOST_INVALID_FEATURE_BIT
> > +};
>
> Please add VIRTIO_F_RING_PACKED so it can be implemented in vDPA in the
> future without changes to the QEMU vhost-vdpa-blk.c code.
>

Sure.

> > +static void vhost_vdpa_blk_device_realize(DeviceState *dev, Error **errp)
> > +{
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
> > +    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
> > +    Error *err = NULL;
> > +    int ret;
> > +
> > +    s->vdpa.device_fd = qemu_open_old(s->vdpa_dev, O_RDWR);
> > +    if (s->vdpa.device_fd == -1) {
> > +        error_setg(errp, "vhost-vdpa-blk: open %s failed: %s",
> > +                   s->vdpa_dev, strerror(errno));
> > +        return;
> > +    }
> > +
> > +    vhost_blk_common_realize(vbc, vhost_vdpa_blk_handle_output, &err);
> > +    if (err != NULL) {
> > +        error_propagate(errp, err);
> > +        goto blk_err;
> > +    }
> > +
> > +    vbc->vhost_vqs = g_new0(struct vhost_virtqueue, vbc->num_queues);
>
> This is already done by vhost_blk_common_realize(). The old pointer is
> overwritten and leaked here.
>

Will fix it.

> > +static const VMStateDescription vmstate_vhost_vdpa_blk = {
> > +    .name = "vhost-vdpa-blk",
> > +    .minimum_version_id = 1,
> > +    .version_id = 1,
> > +    .fields = (VMStateField[]) {
> > +        VMSTATE_VIRTIO_DEVICE,
> > +        VMSTATE_END_OF_LIST()
> > +    },
> > +};
>
> vdpa-blk does not support live migration yet. Please remove this.
>
> Does hw/virtio/vhost.c should automatically register a migration
> blocker? If not, please register one.
>

Will do it.

> > +#define TYPE_VHOST_VDPA_BLK "vhost-vdpa-blk"
>
> At this stage vdpa-blk is still very new and in development. I suggest
> naming it x-vhost-vdpa-blk so that incompatible changes can still be
> made to the command-line/APIs. "x-" can be removed later when the
> feature has matured.

OK.

Thanks,
Yongji
diff mbox series

Patch

diff --git a/hw/block/Kconfig b/hw/block/Kconfig
index 4fcd152166..4615a2c116 100644
--- a/hw/block/Kconfig
+++ b/hw/block/Kconfig
@@ -41,5 +41,10 @@  config VHOST_USER_BLK
     default y if VIRTIO_PCI
     depends on VIRTIO && VHOST_USER && LINUX
 
+config VHOST_VDPA_BLK
+    bool
+    default y if VIRTIO_PCI
+    depends on VIRTIO && VHOST_VDPA && LINUX
+
 config SWIM
     bool
diff --git a/hw/block/meson.build b/hw/block/meson.build
index 5862bda4cb..98f1fc330a 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -17,5 +17,6 @@  softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'n
 
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
 specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-blk-common.c', 'vhost-user-blk.c'))
+specific_ss.add(when: 'CONFIG_VHOST_VDPA_BLK', if_true: files('vhost-blk-common.c', 'vhost-vdpa-blk.c'))
 
 subdir('dataplane')
diff --git a/hw/block/vhost-vdpa-blk.c b/hw/block/vhost-vdpa-blk.c
new file mode 100644
index 0000000000..d5cbbbba10
--- /dev/null
+++ b/hw/block/vhost-vdpa-blk.c
@@ -0,0 +1,227 @@ 
+/*
+ * vhost-vdpa-blk host device
+ *
+ * Copyright (C) 2021 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author:
+ *   Xie Yongji <xieyongji@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/cutils.h"
+#include "hw/qdev-core.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-vdpa-blk.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/runstate.h"
+
+static const int vdpa_feature_bits[] = {
+    VIRTIO_BLK_F_SIZE_MAX,
+    VIRTIO_BLK_F_SEG_MAX,
+    VIRTIO_BLK_F_GEOMETRY,
+    VIRTIO_BLK_F_BLK_SIZE,
+    VIRTIO_BLK_F_TOPOLOGY,
+    VIRTIO_BLK_F_MQ,
+    VIRTIO_BLK_F_RO,
+    VIRTIO_BLK_F_FLUSH,
+    VIRTIO_BLK_F_CONFIG_WCE,
+    VIRTIO_BLK_F_DISCARD,
+    VIRTIO_BLK_F_WRITE_ZEROES,
+    VIRTIO_F_VERSION_1,
+    VIRTIO_RING_F_INDIRECT_DESC,
+    VIRTIO_RING_F_EVENT_IDX,
+    VIRTIO_F_NOTIFY_ON_EMPTY,
+    VHOST_INVALID_FEATURE_BIT
+};
+
+static void vhost_vdpa_blk_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
+    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
+    bool should_start = virtio_device_started(vdev, status);
+    int ret;
+
+    if (!vdev->vm_running) {
+        should_start = false;
+    }
+
+    if (vbc->dev.started == should_start) {
+        return;
+    }
+
+    if (should_start) {
+        ret = vhost_blk_common_start(vbc);
+        if (ret < 0) {
+            error_report("vhost-vdpa-blk: vhost start failed: %s",
+                         strerror(-ret));
+        }
+    } else {
+        vhost_blk_common_stop(vbc);
+    }
+
+}
+
+static void vhost_vdpa_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
+    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
+    int i, ret;
+
+    if (!vdev->start_on_kick) {
+        return;
+    }
+
+    if (vbc->dev.started) {
+        return;
+    }
+
+    ret = vhost_blk_common_start(vbc);
+    if (ret < 0) {
+        error_report("vhost-vdpa-blk: vhost start failed: %s",
+                     strerror(-ret));
+        return;
+    }
+
+    /* Kick right away to begin processing requests already in vring */
+    for (i = 0; i < vbc->dev.nvqs; i++) {
+        VirtQueue *kick_vq = virtio_get_queue(vdev, i);
+
+        if (!virtio_queue_get_desc_addr(vdev, i)) {
+            continue;
+        }
+        event_notifier_set(virtio_queue_get_host_notifier(kick_vq));
+    }
+}
+
+static void vhost_vdpa_blk_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostVdpaBlk *s = VHOST_VDPA_BLK(vdev);
+    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
+    Error *err = NULL;
+    int ret;
+
+    s->vdpa.device_fd = qemu_open_old(s->vdpa_dev, O_RDWR);
+    if (s->vdpa.device_fd == -1) {
+        error_setg(errp, "vhost-vdpa-blk: open %s failed: %s",
+                   s->vdpa_dev, strerror(errno));
+        return;
+    }
+
+    vhost_blk_common_realize(vbc, vhost_vdpa_blk_handle_output, &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        goto blk_err;
+    }
+
+    vbc->vhost_vqs = g_new0(struct vhost_virtqueue, vbc->num_queues);
+    vbc->dev.nvqs = vbc->num_queues;
+    vbc->dev.vqs = vbc->vhost_vqs;
+    vbc->dev.vq_index = 0;
+    vbc->dev.backend_features = 0;
+    vbc->started = false;
+
+    vhost_dev_set_config_notifier(&vbc->dev, &blk_ops);
+
+    ret = vhost_dev_init(&vbc->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0);
+    if (ret < 0) {
+        error_setg(errp, "vhost-vdpa-blk: vhost initialization failed: %s",
+                   strerror(-ret));
+        goto init_err;
+    }
+
+    ret = vhost_dev_get_config(&vbc->dev, (uint8_t *)&vbc->blkcfg,
+                               sizeof(struct virtio_blk_config));
+    if (ret < 0) {
+        error_setg(errp, "vhost-vdpa-blk: get block config failed");
+        goto config_err;
+    }
+
+    return;
+config_err:
+    vhost_dev_cleanup(&vbc->dev);
+init_err:
+    vhost_blk_common_unrealize(vbc);
+blk_err:
+    close(s->vdpa.device_fd);
+}
+
+static void vhost_vdpa_blk_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostVdpaBlk *s = VHOST_VDPA_BLK(dev);
+    VHostBlkCommon *vbc = VHOST_BLK_COMMON(s);
+
+    virtio_set_status(vdev, 0);
+    vhost_dev_cleanup(&vbc->dev);
+    vhost_blk_common_unrealize(vbc);
+    close(s->vdpa.device_fd);
+}
+
+static void vhost_vdpa_blk_instance_init(Object *obj)
+{
+    VHostBlkCommon *vbc = VHOST_BLK_COMMON(obj);
+
+    vbc->feature_bits = vdpa_feature_bits;
+
+    device_add_bootindex_property(obj, &vbc->bootindex, "bootindex",
+                                  "/disk@0,0", DEVICE(obj));
+}
+
+static const VMStateDescription vmstate_vhost_vdpa_blk = {
+    .name = "vhost-vdpa-blk",
+    .minimum_version_id = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static Property vhost_vdpa_blk_properties[] = {
+    DEFINE_PROP_STRING("vdpa-dev", VHostVdpaBlk, vdpa_dev),
+    DEFINE_PROP_UINT16("num-queues", VHostBlkCommon, num_queues,
+                       VHOST_BLK_AUTO_NUM_QUEUES),
+    DEFINE_PROP_UINT32("queue-size", VHostBlkCommon, queue_size, 256),
+    DEFINE_PROP_BIT("config-wce", VHostBlkCommon, config_wce, 0, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_vdpa_blk_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, vhost_vdpa_blk_properties);
+    dc->vmsd = &vmstate_vhost_vdpa_blk;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    vdc->realize = vhost_vdpa_blk_device_realize;
+    vdc->unrealize = vhost_vdpa_blk_device_unrealize;
+    vdc->set_status = vhost_vdpa_blk_set_status;
+}
+
+static const TypeInfo vhost_vdpa_blk_info = {
+    .name = TYPE_VHOST_VDPA_BLK,
+    .parent = TYPE_VHOST_BLK_COMMON,
+    .instance_size = sizeof(VHostVdpaBlk),
+    .instance_init = vhost_vdpa_blk_instance_init,
+    .class_init = vhost_vdpa_blk_class_init,
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&vhost_vdpa_blk_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
index fbff9bc9d4..f02bea65a2 100644
--- a/hw/virtio/meson.build
+++ b/hw/virtio/meson.build
@@ -30,6 +30,7 @@  virtio_pci_ss = ss.source_set()
 virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c'))
 virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock-pci.c'))
 virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_BLK', if_true: files('vhost-vdpa-blk-pci.c'))
 virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input-pci.c'))
 virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c'))
 virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c'))
diff --git a/hw/virtio/vhost-vdpa-blk-pci.c b/hw/virtio/vhost-vdpa-blk-pci.c
new file mode 100644
index 0000000000..976c47fb4f
--- /dev/null
+++ b/hw/virtio/vhost-vdpa-blk-pci.c
@@ -0,0 +1,101 @@ 
+/*
+ * vhost-vdpa-blk PCI Bindings
+ *
+ * Copyright (C) 2021 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author:
+ *   Xie Yongji <xieyongji@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "standard-headers/linux/virtio_pci.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/vhost-vdpa-blk.h"
+#include "hw/pci/pci.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "virtio-pci.h"
+#include "qom/object.h"
+
+typedef struct VHostVdpaBlkPCI VHostVdpaBlkPCI;
+
+#define TYPE_VHOST_VDPA_BLK_PCI "vhost-vdpa-blk-pci-base"
+DECLARE_INSTANCE_CHECKER(VHostVdpaBlkPCI, VHOST_VDPA_BLK_PCI,
+                         TYPE_VHOST_VDPA_BLK_PCI)
+
+struct VHostVdpaBlkPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostVdpaBlk vdev;
+};
+
+static Property vhost_vdpa_blk_pci_properties[] = {
+    DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_vdpa_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostVdpaBlkPCI *dev = VHOST_VDPA_BLK_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    VHostBlkCommon *vbc = VHOST_BLK_COMMON(&dev->vdev);
+
+    if (vbc->num_queues == VHOST_BLK_AUTO_NUM_QUEUES) {
+        vbc->num_queues = virtio_pci_optimal_num_queues(0);
+    }
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = vbc->num_queues + 1;
+    }
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void vhost_vdpa_blk_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    device_class_set_props(dc, vhost_vdpa_blk_pci_properties);
+    k->realize = vhost_vdpa_blk_pci_realize;
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK;
+    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+    pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
+}
+
+static void vhost_vdpa_blk_pci_instance_init(Object *obj)
+{
+    VHostVdpaBlkPCI *dev = VHOST_VDPA_BLK_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_VDPA_BLK);
+    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
+                              "bootindex");
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_vdpa_blk_pci_info = {
+    .base_name               = TYPE_VHOST_VDPA_BLK_PCI,
+    .generic_name            = "vhost-vdpa-blk-pci",
+    .transitional_name       = "vhost-vdpa-blk-pci-transitional",
+    .non_transitional_name   = "vhost-vdpa-blk-pci-non-transitional",
+    .instance_size  = sizeof(VHostVdpaBlkPCI),
+    .instance_init  = vhost_vdpa_blk_pci_instance_init,
+    .class_init     = vhost_vdpa_blk_pci_class_init,
+};
+
+static void vhost_vdpa_blk_pci_register(void)
+{
+    virtio_pci_types_register(&vhost_vdpa_blk_pci_info);
+}
+
+type_init(vhost_vdpa_blk_pci_register)
diff --git a/include/hw/virtio/vhost-vdpa-blk.h b/include/hw/virtio/vhost-vdpa-blk.h
new file mode 100644
index 0000000000..80712f6dae
--- /dev/null
+++ b/include/hw/virtio/vhost-vdpa-blk.h
@@ -0,0 +1,30 @@ 
+/*
+ * vhost-vdpa-blk host device
+ *
+ * Copyright (C) 2021 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author:
+ *   Xie Yongji <xieyongji@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_VDPA_BLK_H
+#define VHOST_VDPA_BLK_H
+
+#include "hw/virtio/vhost-vdpa.h"
+#include "hw/virtio/vhost-blk-common.h"
+#include "qom/object.h"
+
+#define TYPE_VHOST_VDPA_BLK "vhost-vdpa-blk"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostVdpaBlk, VHOST_VDPA_BLK)
+
+struct VHostVdpaBlk {
+    VHostBlkCommon parent_obj;
+    char *vdpa_dev;
+    struct vhost_vdpa vdpa;
+};
+
+#endif