[RFC,2/2] vhost: IFC VF vdpa layer
diff mbox series

Message ID 20191016013050.3918-3-lingshan.zhu@intel.com
State New
Headers show
Series
  • Intel IFC VF driver for vdpa
Related show

Commit Message

Zhu Lingshan Oct. 16, 2019, 1:30 a.m. UTC
This commit introduced IFC VF operations for vdpa, which complys to
vhost_mdev interfaces, handles IFC VF initialization,
configuration and removal.

Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
---
 drivers/vhost/ifcvf/ifcvf_main.c | 541 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 541 insertions(+)
 create mode 100644 drivers/vhost/ifcvf/ifcvf_main.c

Comments

Jason Wang Oct. 16, 2019, 10:19 a.m. UTC | #1
On 2019/10/16 上午9:30, Zhu Lingshan wrote:
> This commit introduced IFC VF operations for vdpa, which complys to
> vhost_mdev interfaces, handles IFC VF initialization,
> configuration and removal.
>
> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
> ---
>   drivers/vhost/ifcvf/ifcvf_main.c | 541 +++++++++++++++++++++++++++++++++++++++
>   1 file changed, 541 insertions(+)
>   create mode 100644 drivers/vhost/ifcvf/ifcvf_main.c
>
> diff --git a/drivers/vhost/ifcvf/ifcvf_main.c b/drivers/vhost/ifcvf/ifcvf_main.c
> new file mode 100644
> index 000000000000..c48a29969a85
> --- /dev/null
> +++ b/drivers/vhost/ifcvf/ifcvf_main.c
> @@ -0,0 +1,541 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2019 Intel Corporation.
> + */
> +
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <linux/mdev.h>
> +#include <linux/pci.h>
> +#include <linux/sysfs.h>
> +
> +#include "ifcvf_base.h"
> +
> +#define VERSION_STRING	"0.1"
> +#define DRIVER_AUTHOR	"Intel Corporation"
> +#define IFCVF_DRIVER_NAME	"ifcvf"
> +
> +static irqreturn_t ifcvf_intr_handler(int irq, void *arg)
> +{
> +	struct vring_info *vring = arg;
> +
> +	if (vring->cb.callback)
> +		return vring->cb.callback(vring->cb.private);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static u64 ifcvf_mdev_get_features(struct mdev_device *mdev)
> +{
> +	return IFC_SUPPORTED_FEATURES;


I would expect this should be done by querying the hw. Or IFC VF can't 
get any update through its firmware?


> +}
> +
> +static int ifcvf_mdev_set_features(struct mdev_device *mdev, u64 features)
> +{
> +	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	vf->req_features = features;
> +
> +	return 0;
> +}
> +
> +static u64 ifcvf_mdev_get_vq_state(struct mdev_device *mdev, u16 qid)
> +{
> +	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	return vf->vring[qid].last_avail_idx;


Does this really work? I'd expect it should be fetched from hw since 
it's an internal state.


> +}
> +
> +static int ifcvf_mdev_set_vq_state(struct mdev_device *mdev, u16 qid, u64 num)
> +{
> +	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	vf->vring[qid].last_used_idx = num;


I fail to understand why last_used_idx is needed. It looks to me the 
used idx in the used ring is sufficient.


> +	vf->vring[qid].last_avail_idx = num;


Do we need a synchronization with hw immediately here?


> +
> +	return 0;
> +}
> +
> +static int ifcvf_mdev_set_vq_address(struct mdev_device *mdev, u16 idx,
> +				     u64 desc_area, u64 driver_area,
> +				     u64 device_area)
> +{
> +	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	vf->vring[idx].desc = desc_area;
> +	vf->vring[idx].avail = driver_area;
> +	vf->vring[idx].used = device_area;
> +
> +	return 0;
> +}
> +
> +static void ifcvf_mdev_set_vq_num(struct mdev_device *mdev, u16 qid, u32 num)
> +{
> +	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	vf->vring[qid].size = num;
> +}
> +
> +static void ifcvf_mdev_set_vq_ready(struct mdev_device *mdev,
> +				u16 qid, bool ready)
> +{
> +
> +	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	vf->vring[qid].ready = ready;
> +}
> +
> +static bool ifcvf_mdev_get_vq_ready(struct mdev_device *mdev, u16 qid)
> +{
> +
> +	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	return vf->vring[qid].ready;
> +}
> +
> +static void ifcvf_mdev_set_vq_cb(struct mdev_device *mdev, u16 idx,
> +				 struct virtio_mdev_callback *cb)
> +{
> +	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	vf->vring[idx].cb = *cb;
> +}
> +
> +static void ifcvf_mdev_kick_vq(struct mdev_device *mdev, u16 idx)
> +{
> +	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	ifcvf_notify_queue(vf, idx);
> +}
> +
> +static u8 ifcvf_mdev_get_status(struct mdev_device *mdev)
> +{
> +	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	return vf->status;
> +}
> +
> +static u32 ifcvf_mdev_get_generation(struct mdev_device *mdev)
> +{
> +	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	return vf->generation;
> +}
> +
> +static int ifcvf_mdev_get_version(struct mdev_device *mdev)
> +{
> +	return VIRTIO_MDEV_VERSION;
> +}
> +
> +static u32 ifcvf_mdev_get_device_id(struct mdev_device *mdev)
> +{
> +	return IFCVF_DEVICE_ID;
> +}
> +
> +static u32 ifcvf_mdev_get_vendor_id(struct mdev_device *mdev)
> +{
> +	return IFCVF_VENDOR_ID;
> +}
> +
> +static u16 ifcvf_mdev_get_vq_align(struct mdev_device *mdev)
> +{
> +	return IFCVF_QUEUE_ALIGNMENT;
> +}
> +
> +static int ifcvf_start_datapath(void *private)
> +{
> +	int i, ret;
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
> +
> +	for (i = 0; i < (IFCVF_MAX_QUEUE_PAIRS * 2); i++) {
> +		if (!vf->vring[i].ready)
> +			break;


Looks like error should be returned here?


> +
> +		if (!vf->vring[i].size)
> +			break;
> +
> +		if (!vf->vring[i].desc || !vf->vring[i].avail ||
> +			!vf->vring[i].used)
> +			break;
> +	}
> +	vf->nr_vring = i;
> +
> +	ret = ifcvf_start_hw(vf);
> +	return ret;
> +}
> +
> +static int ifcvf_stop_datapath(void *private)
> +{
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
> +	int i;
> +
> +	for (i = 0; i < IFCVF_MAX_QUEUES; i++)
> +		vf->vring[i].cb.callback = NULL;


Any synchronization is needed for the vq irq handler?


> +
> +	ifcvf_stop_hw(vf);
> +
> +	return 0;
> +}
> +
> +static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
> +{
> +	int i;
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
> +		vf->vring[i].last_used_idx = 0;
> +		vf->vring[i].last_avail_idx = 0;
> +		vf->vring[i].desc = 0;
> +		vf->vring[i].avail = 0;
> +		vf->vring[i].used = 0;
> +		vf->vring[i].ready = 0;
> +		vf->vring->cb.callback = NULL;
> +		vf->vring->cb.private = NULL;
> +	}
> +}
> +
> +static void ifcvf_mdev_set_status(struct mdev_device *mdev, u8 status)
> +{
> +	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +
> +	vf->status = status;
> +
> +	if (status == 0) {
> +		ifcvf_stop_datapath(adapter);
> +		ifcvf_reset_vring(adapter);
> +		return;
> +	}
> +
> +	if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
> +		ifcvf_start_datapath(adapter);
> +		return;
> +	}
> +}
> +
> +static u16 ifcvf_mdev_get_queue_max(struct mdev_device *mdev)
> +{
> +	return IFCVF_MAX_QUEUES;


The name is confusing, it was used to return the maximum queue size. In 
new version of virtio-mdev, the callback was renamed as get_vq_num_max().


> +}
> +
> +static struct virtio_mdev_device_ops ifc_mdev_ops = {
> +	.get_features  = ifcvf_mdev_get_features,
> +	.set_features  = ifcvf_mdev_set_features,
> +	.get_status    = ifcvf_mdev_get_status,
> +	.set_status    = ifcvf_mdev_set_status,
> +	.get_queue_max = ifcvf_mdev_get_queue_max,
> +	.get_vq_state   = ifcvf_mdev_get_vq_state,
> +	.set_vq_state   = ifcvf_mdev_set_vq_state,
> +	.set_vq_cb      = ifcvf_mdev_set_vq_cb,
> +	.set_vq_ready   = ifcvf_mdev_set_vq_ready,
> +	.get_vq_ready	= ifcvf_mdev_get_vq_ready,
> +	.set_vq_num     = ifcvf_mdev_set_vq_num,
> +	.set_vq_address = ifcvf_mdev_set_vq_address,
> +	.kick_vq        = ifcvf_mdev_kick_vq,
> +	.get_generation	= ifcvf_mdev_get_generation,
> +	.get_version	= ifcvf_mdev_get_version,
> +	.get_device_id	= ifcvf_mdev_get_device_id,
> +	.get_vendor_id	= ifcvf_mdev_get_vendor_id,
> +	.get_vq_align	= ifcvf_mdev_get_vq_align,
> +};


set_config/get_config is missing. It looks to me they are not hard, just 
implementing the access to dev_cfg. It's key to make kernel virtio 
driver to work.

And in the new version of virito-mdev, features like _F_LOG_ALL should 
be advertised through get_mdev_features.


> +
> +static int ifcvf_init_msix(struct ifcvf_adapter *adapter)
> +{
> +	int vector, i, ret, irq;
> +	struct pci_dev *pdev = to_pci_dev(adapter->dev);
> +	struct ifcvf_hw *vf = &adapter->vf;
> +
> +	ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
> +			IFCVF_MAX_INTR, PCI_IRQ_MSIX);
> +	if (ret < 0) {
> +		IFC_ERR(adapter->dev, "Failed to alloc irq vectors.\n");
> +		return ret;
> +	}
> +
> +	for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
> +		vector = i + IFCVF_MSI_QUEUE_OFF;
> +		irq = pci_irq_vector(pdev, vector);
> +		ret = request_irq(irq, ifcvf_intr_handler, 0,
> +				pci_name(pdev), &vf->vring[i]);
> +		if (ret) {
> +			IFC_ERR(adapter->dev,
> +				"Failed to request irq for vq %d.\n", i);
> +			return ret;
> +		}
> +	}


Do we need to provide fallback when we can't do per vq MSIX?


> +
> +	return 0;
> +}
> +
> +static void ifcvf_destroy_adapter(struct ifcvf_adapter *adapter)
> +{
> +	int i, vector, irq;
> +	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
> +	struct pci_dev *pdev = to_pci_dev(adapter->dev);
> +
> +	for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
> +		vector = i + IFCVF_MSI_QUEUE_OFF;
> +		irq = pci_irq_vector(pdev, vector);
> +		free_irq(irq, &vf->vring[i]);
> +	}
> +}
> +
> +static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
> +{
> +	const char *name = "vhost accelerator (virtio ring compatible)";
> +
> +	return sprintf(buf, "%s\n", name);
> +}
> +MDEV_TYPE_ATTR_RO(name);
> +
> +static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
> +			       char *buf)
> +{
> +	return sprintf(buf, "%s\n", VIRTIO_MDEV_DEVICE_API_STRING);
> +}
> +MDEV_TYPE_ATTR_RO(device_api);
> +
> +static ssize_t available_instances_show(struct kobject *kobj,
> +					struct device *dev, char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
> +
> +	return sprintf(buf, "%d\n", adapter->mdev_count);
> +}
> +
> +MDEV_TYPE_ATTR_RO(available_instances);
> +
> +static ssize_t type_show(struct kobject *kobj,
> +			struct device *dev, char *buf)
> +{
> +	return sprintf(buf, "%s\n", "net");
> +}
> +
> +MDEV_TYPE_ATTR_RO(type);
> +
> +
> +static struct attribute *mdev_types_attrs[] = {
> +	&mdev_type_attr_name.attr,
> +	&mdev_type_attr_device_api.attr,
> +	&mdev_type_attr_available_instances.attr,
> +	&mdev_type_attr_type.attr,
> +	NULL,
> +};
> +
> +static struct attribute_group mdev_type_group = {
> +	.name  = "vdpa_virtio",


To be consistent, it should be "vhost" or "virtio".


> +	.attrs = mdev_types_attrs,
> +};
> +
> +static struct attribute_group *mdev_type_groups[] = {
> +	&mdev_type_group,
> +	NULL,
> +};
> +
> +const struct attribute_group *mdev_dev_groups[] = {
> +	NULL,
> +};
> +
> +static int ifcvf_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
> +{
> +	struct device *dev = mdev_parent_dev(mdev);
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
> +	int ret = 0;
> +
> +	mutex_lock(&adapter->mdev_lock);
> +
> +	if (adapter->mdev_count < 1) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	mdev_set_class_id(mdev, MDEV_ID_VHOST);
> +	mdev_set_dev_ops(mdev, &ifc_mdev_ops);
> +
> +	mdev_set_drvdata(mdev, adapter);
> +	mdev_set_iommu_device(mdev_dev(mdev), dev);
> +
> +	INIT_LIST_HEAD(&adapter->dma_maps);
> +	adapter->mdev_count--;
> +
> +out:
> +	mutex_unlock(&adapter->mdev_lock);
> +	return ret;
> +}
> +
> +static int ifcvf_mdev_remove(struct mdev_device *mdev)
> +{
> +	struct device *dev = mdev_parent_dev(mdev);
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
> +
> +	mutex_lock(&adapter->mdev_lock);
> +	adapter->mdev_count++;
> +	mutex_unlock(&adapter->mdev_lock);
> +
> +	return 0;
> +}
> +
> +static struct mdev_parent_ops ifcvf_mdev_fops = {
> +	.owner			= THIS_MODULE,
> +	.supported_type_groups	= mdev_type_groups,
> +	.mdev_attr_groups	= mdev_dev_groups,
> +	.create			= ifcvf_mdev_create,
> +	.remove			= ifcvf_mdev_remove,
> +};
> +
> +static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct ifcvf_adapter *adapter;
> +	struct ifcvf_hw *vf;
> +	int ret, i;
> +
> +	adapter = kzalloc(sizeof(struct ifcvf_adapter), GFP_KERNEL);
> +	if (adapter == NULL) {
> +		ret = -ENOMEM;
> +		goto fail;
> +	}
> +
> +	mutex_init(&adapter->mdev_lock);
> +	adapter->mdev_count = 1;


So this is per VF based vDPA implementation, which seems not convenient 
for management.  Anyhow we can control the creation in PF?

Thanks


> +	adapter->dev = dev;
> +
> +	pci_set_drvdata(pdev, adapter);
> +
> +	ret = pci_enable_device(pdev);
> +	if (ret) {
> +		IFC_ERR(adapter->dev, "Failed to enable device.\n");
> +		goto free_adapter;
> +	}
> +
> +	ret = pci_request_regions(pdev, IFCVF_DRIVER_NAME);
> +	if (ret) {
> +		IFC_ERR(adapter->dev, "Failed to request MMIO region.\n");
> +		goto disable_device;
> +	}
> +
> +	pci_set_master(pdev);
> +
> +	ret = ifcvf_init_msix(adapter);
> +	if (ret) {
> +		IFC_ERR(adapter->dev, "Failed to initialize MSIX.\n");
> +		goto free_msix;
> +	}
> +
> +	vf = &adapter->vf;
> +	for (i = 0; i < IFCVF_PCI_MAX_RESOURCE; i++) {
> +		vf->mem_resource[i].phys_addr = pci_resource_start(pdev, i);
> +		vf->mem_resource[i].len = pci_resource_len(pdev, i);
> +		if (!vf->mem_resource[i].len) {
> +			vf->mem_resource[i].addr = NULL;
> +			continue;
> +		}
> +
> +		vf->mem_resource[i].addr = pci_iomap_range(pdev, i, 0,
> +				vf->mem_resource[i].len);
> +		if (!vf->mem_resource[i].addr) {
> +			IFC_ERR(adapter->dev, "Failed to map IO resource %d\n",
> +				i);
> +			return -1;
> +		}
> +	}
> +
> +	if (ifcvf_init_hw(vf, pdev) < 0)
> +		return -1;
> +
> +	ret = mdev_register_device(dev, &ifcvf_mdev_fops);
> +	if (ret) {
> +		IFC_ERR(adapter->dev,  "Failed to register mdev device\n");
> +		goto destroy_adapter;
> +	}
> +
> +	return 0;
> +
> +destroy_adapter:
> +	ifcvf_destroy_adapter(adapter);
> +free_msix:
> +	pci_free_irq_vectors(pdev);
> +	pci_release_regions(pdev);
> +disable_device:
> +	pci_disable_device(pdev);
> +free_adapter:
> +	kfree(adapter);
> +fail:
> +	return ret;
> +}
> +
> +static void ifcvf_remove(struct pci_dev *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
> +	struct ifcvf_hw *vf;
> +	int i;
> +
> +	mdev_unregister_device(dev);
> +
> +	vf = &adapter->vf;
> +	for (i = 0; i < IFCVF_PCI_MAX_RESOURCE; i++) {
> +		if (vf->mem_resource[i].addr) {
> +			pci_iounmap(pdev, vf->mem_resource[i].addr);
> +			vf->mem_resource[i].addr = NULL;
> +		}
> +	}
> +
> +	ifcvf_destroy_adapter(adapter);
> +	pci_free_irq_vectors(pdev);
> +
> +	pci_release_regions(pdev);
> +	pci_disable_device(pdev);
> +
> +	kfree(adapter);
> +}
> +
> +static struct pci_device_id ifcvf_pci_ids[] = {
> +	{ PCI_DEVICE_SUB(IFCVF_VENDOR_ID,
> +			IFCVF_DEVICE_ID,
> +			IFCVF_SUBSYS_VENDOR_ID,
> +			IFCVF_SUBSYS_DEVICE_ID) },
> +	{ 0 },
> +};
> +MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids);
> +
> +static struct pci_driver ifcvf_driver = {
> +	.name     = IFCVF_DRIVER_NAME,
> +	.id_table = ifcvf_pci_ids,
> +	.probe    = ifcvf_probe,
> +	.remove   = ifcvf_remove,
> +};
> +
> +static int __init ifcvf_init_module(void)
> +{
> +	int ret;
> +
> +	ret = pci_register_driver(&ifcvf_driver);
> +	return ret;
> +}
> +
> +static void __exit ifcvf_exit_module(void)
> +{
> +	pci_unregister_driver(&ifcvf_driver);
> +}
> +
> +module_init(ifcvf_init_module);
> +module_exit(ifcvf_exit_module);
> +
> +MODULE_LICENSE("GPL v2");
> +MODULE_VERSION(VERSION_STRING);
> +MODULE_AUTHOR(DRIVER_AUTHOR);
Zhu Lingshan Oct. 18, 2019, 6:36 a.m. UTC | #2
Hello Jason,

Thanks for your comments, I am on a conference travel, I will reply next 
Monday.

Thanks,
BR
Zhu Lingshan
On 10/16/2019 6:19 PM, Jason Wang wrote:
>
> On 2019/10/16 上午9:30, Zhu Lingshan wrote:
>> This commit introduced IFC VF operations for vdpa, which complys to
>> vhost_mdev interfaces, handles IFC VF initialization,
>> configuration and removal.
>>
>> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
>> ---
>>   drivers/vhost/ifcvf/ifcvf_main.c | 541 
>> +++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 541 insertions(+)
>>   create mode 100644 drivers/vhost/ifcvf/ifcvf_main.c
>>
>> diff --git a/drivers/vhost/ifcvf/ifcvf_main.c 
>> b/drivers/vhost/ifcvf/ifcvf_main.c
>> new file mode 100644
>> index 000000000000..c48a29969a85
>> --- /dev/null
>> +++ b/drivers/vhost/ifcvf/ifcvf_main.c
>> @@ -0,0 +1,541 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Copyright (C) 2019 Intel Corporation.
>> + */
>> +
>> +#include <linux/interrupt.h>
>> +#include <linux/module.h>
>> +#include <linux/mdev.h>
>> +#include <linux/pci.h>
>> +#include <linux/sysfs.h>
>> +
>> +#include "ifcvf_base.h"
>> +
>> +#define VERSION_STRING    "0.1"
>> +#define DRIVER_AUTHOR    "Intel Corporation"
>> +#define IFCVF_DRIVER_NAME    "ifcvf"
>> +
>> +static irqreturn_t ifcvf_intr_handler(int irq, void *arg)
>> +{
>> +    struct vring_info *vring = arg;
>> +
>> +    if (vring->cb.callback)
>> +        return vring->cb.callback(vring->cb.private);
>> +
>> +    return IRQ_HANDLED;
>> +}
>> +
>> +static u64 ifcvf_mdev_get_features(struct mdev_device *mdev)
>> +{
>> +    return IFC_SUPPORTED_FEATURES;
>
>
> I would expect this should be done by querying the hw. Or IFC VF can't 
> get any update through its firmware?
>
>
>> +}
>> +
>> +static int ifcvf_mdev_set_features(struct mdev_device *mdev, u64 
>> features)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->req_features = features;
>> +
>> +    return 0;
>> +}
>> +
>> +static u64 ifcvf_mdev_get_vq_state(struct mdev_device *mdev, u16 qid)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    return vf->vring[qid].last_avail_idx;
>
>
> Does this really work? I'd expect it should be fetched from hw since 
> it's an internal state.
>
>
>> +}
>> +
>> +static int ifcvf_mdev_set_vq_state(struct mdev_device *mdev, u16 
>> qid, u64 num)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->vring[qid].last_used_idx = num;
>
>
> I fail to understand why last_used_idx is needed. It looks to me the 
> used idx in the used ring is sufficient.
>
>
>> +    vf->vring[qid].last_avail_idx = num;
>
>
> Do we need a synchronization with hw immediately here?
>
>
>> +
>> +    return 0;
>> +}
>> +
>> +static int ifcvf_mdev_set_vq_address(struct mdev_device *mdev, u16 idx,
>> +                     u64 desc_area, u64 driver_area,
>> +                     u64 device_area)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->vring[idx].desc = desc_area;
>> +    vf->vring[idx].avail = driver_area;
>> +    vf->vring[idx].used = device_area;
>> +
>> +    return 0;
>> +}
>> +
>> +static void ifcvf_mdev_set_vq_num(struct mdev_device *mdev, u16 qid, 
>> u32 num)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->vring[qid].size = num;
>> +}
>> +
>> +static void ifcvf_mdev_set_vq_ready(struct mdev_device *mdev,
>> +                u16 qid, bool ready)
>> +{
>> +
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->vring[qid].ready = ready;
>> +}
>> +
>> +static bool ifcvf_mdev_get_vq_ready(struct mdev_device *mdev, u16 qid)
>> +{
>> +
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    return vf->vring[qid].ready;
>> +}
>> +
>> +static void ifcvf_mdev_set_vq_cb(struct mdev_device *mdev, u16 idx,
>> +                 struct virtio_mdev_callback *cb)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->vring[idx].cb = *cb;
>> +}
>> +
>> +static void ifcvf_mdev_kick_vq(struct mdev_device *mdev, u16 idx)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    ifcvf_notify_queue(vf, idx);
>> +}
>> +
>> +static u8 ifcvf_mdev_get_status(struct mdev_device *mdev)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    return vf->status;
>> +}
>> +
>> +static u32 ifcvf_mdev_get_generation(struct mdev_device *mdev)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    return vf->generation;
>> +}
>> +
>> +static int ifcvf_mdev_get_version(struct mdev_device *mdev)
>> +{
>> +    return VIRTIO_MDEV_VERSION;
>> +}
>> +
>> +static u32 ifcvf_mdev_get_device_id(struct mdev_device *mdev)
>> +{
>> +    return IFCVF_DEVICE_ID;
>> +}
>> +
>> +static u32 ifcvf_mdev_get_vendor_id(struct mdev_device *mdev)
>> +{
>> +    return IFCVF_VENDOR_ID;
>> +}
>> +
>> +static u16 ifcvf_mdev_get_vq_align(struct mdev_device *mdev)
>> +{
>> +    return IFCVF_QUEUE_ALIGNMENT;
>> +}
>> +
>> +static int ifcvf_start_datapath(void *private)
>> +{
>> +    int i, ret;
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
>> +
>> +    for (i = 0; i < (IFCVF_MAX_QUEUE_PAIRS * 2); i++) {
>> +        if (!vf->vring[i].ready)
>> +            break;
>
>
> Looks like error should be returned here?
>
>
>> +
>> +        if (!vf->vring[i].size)
>> +            break;
>> +
>> +        if (!vf->vring[i].desc || !vf->vring[i].avail ||
>> +            !vf->vring[i].used)
>> +            break;
>> +    }
>> +    vf->nr_vring = i;
>> +
>> +    ret = ifcvf_start_hw(vf);
>> +    return ret;
>> +}
>> +
>> +static int ifcvf_stop_datapath(void *private)
>> +{
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
>> +    int i;
>> +
>> +    for (i = 0; i < IFCVF_MAX_QUEUES; i++)
>> +        vf->vring[i].cb.callback = NULL;
>
>
> Any synchronization is needed for the vq irq handler?
>
>
>> +
>> +    ifcvf_stop_hw(vf);
>> +
>> +    return 0;
>> +}
>> +
>> +static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
>> +{
>> +    int i;
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>> +        vf->vring[i].last_used_idx = 0;
>> +        vf->vring[i].last_avail_idx = 0;
>> +        vf->vring[i].desc = 0;
>> +        vf->vring[i].avail = 0;
>> +        vf->vring[i].used = 0;
>> +        vf->vring[i].ready = 0;
>> +        vf->vring->cb.callback = NULL;
>> +        vf->vring->cb.private = NULL;
>> +    }
>> +}
>> +
>> +static void ifcvf_mdev_set_status(struct mdev_device *mdev, u8 status)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->status = status;
>> +
>> +    if (status == 0) {
>> +        ifcvf_stop_datapath(adapter);
>> +        ifcvf_reset_vring(adapter);
>> +        return;
>> +    }
>> +
>> +    if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
>> +        ifcvf_start_datapath(adapter);
>> +        return;
>> +    }
>> +}
>> +
>> +static u16 ifcvf_mdev_get_queue_max(struct mdev_device *mdev)
>> +{
>> +    return IFCVF_MAX_QUEUES;
>
>
> The name is confusing, it was used to return the maximum queue size. 
> In new version of virtio-mdev, the callback was renamed as 
> get_vq_num_max().
>
>
>> +}
>> +
>> +static struct virtio_mdev_device_ops ifc_mdev_ops = {
>> +    .get_features  = ifcvf_mdev_get_features,
>> +    .set_features  = ifcvf_mdev_set_features,
>> +    .get_status    = ifcvf_mdev_get_status,
>> +    .set_status    = ifcvf_mdev_set_status,
>> +    .get_queue_max = ifcvf_mdev_get_queue_max,
>> +    .get_vq_state   = ifcvf_mdev_get_vq_state,
>> +    .set_vq_state   = ifcvf_mdev_set_vq_state,
>> +    .set_vq_cb      = ifcvf_mdev_set_vq_cb,
>> +    .set_vq_ready   = ifcvf_mdev_set_vq_ready,
>> +    .get_vq_ready    = ifcvf_mdev_get_vq_ready,
>> +    .set_vq_num     = ifcvf_mdev_set_vq_num,
>> +    .set_vq_address = ifcvf_mdev_set_vq_address,
>> +    .kick_vq        = ifcvf_mdev_kick_vq,
>> +    .get_generation    = ifcvf_mdev_get_generation,
>> +    .get_version    = ifcvf_mdev_get_version,
>> +    .get_device_id    = ifcvf_mdev_get_device_id,
>> +    .get_vendor_id    = ifcvf_mdev_get_vendor_id,
>> +    .get_vq_align    = ifcvf_mdev_get_vq_align,
>> +};
>
>
> set_config/get_config is missing. It looks to me they are not hard, 
> just implementing the access to dev_cfg. It's key to make kernel 
> virtio driver to work.
>
> And in the new version of virito-mdev, features like _F_LOG_ALL should 
> be advertised through get_mdev_features.
>
>
>> +
>> +static int ifcvf_init_msix(struct ifcvf_adapter *adapter)
>> +{
>> +    int vector, i, ret, irq;
>> +    struct pci_dev *pdev = to_pci_dev(adapter->dev);
>> +    struct ifcvf_hw *vf = &adapter->vf;
>> +
>> +    ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
>> +            IFCVF_MAX_INTR, PCI_IRQ_MSIX);
>> +    if (ret < 0) {
>> +        IFC_ERR(adapter->dev, "Failed to alloc irq vectors.\n");
>> +        return ret;
>> +    }
>> +
>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>> +        vector = i + IFCVF_MSI_QUEUE_OFF;
>> +        irq = pci_irq_vector(pdev, vector);
>> +        ret = request_irq(irq, ifcvf_intr_handler, 0,
>> +                pci_name(pdev), &vf->vring[i]);
>> +        if (ret) {
>> +            IFC_ERR(adapter->dev,
>> +                "Failed to request irq for vq %d.\n", i);
>> +            return ret;
>> +        }
>> +    }
>
>
> Do we need to provide fallback when we can't do per vq MSIX?
>
>
>> +
>> +    return 0;
>> +}
>> +
>> +static void ifcvf_destroy_adapter(struct ifcvf_adapter *adapter)
>> +{
>> +    int i, vector, irq;
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +    struct pci_dev *pdev = to_pci_dev(adapter->dev);
>> +
>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>> +        vector = i + IFCVF_MSI_QUEUE_OFF;
>> +        irq = pci_irq_vector(pdev, vector);
>> +        free_irq(irq, &vf->vring[i]);
>> +    }
>> +}
>> +
>> +static ssize_t name_show(struct kobject *kobj, struct device *dev, 
>> char *buf)
>> +{
>> +    const char *name = "vhost accelerator (virtio ring compatible)";
>> +
>> +    return sprintf(buf, "%s\n", name);
>> +}
>> +MDEV_TYPE_ATTR_RO(name);
>> +
>> +static ssize_t device_api_show(struct kobject *kobj, struct device 
>> *dev,
>> +                   char *buf)
>> +{
>> +    return sprintf(buf, "%s\n", VIRTIO_MDEV_DEVICE_API_STRING);
>> +}
>> +MDEV_TYPE_ATTR_RO(device_api);
>> +
>> +static ssize_t available_instances_show(struct kobject *kobj,
>> +                    struct device *dev, char *buf)
>> +{
>> +    struct pci_dev *pdev = to_pci_dev(dev);
>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>> +
>> +    return sprintf(buf, "%d\n", adapter->mdev_count);
>> +}
>> +
>> +MDEV_TYPE_ATTR_RO(available_instances);
>> +
>> +static ssize_t type_show(struct kobject *kobj,
>> +            struct device *dev, char *buf)
>> +{
>> +    return sprintf(buf, "%s\n", "net");
>> +}
>> +
>> +MDEV_TYPE_ATTR_RO(type);
>> +
>> +
>> +static struct attribute *mdev_types_attrs[] = {
>> +    &mdev_type_attr_name.attr,
>> +    &mdev_type_attr_device_api.attr,
>> +    &mdev_type_attr_available_instances.attr,
>> +    &mdev_type_attr_type.attr,
>> +    NULL,
>> +};
>> +
>> +static struct attribute_group mdev_type_group = {
>> +    .name  = "vdpa_virtio",
>
>
> To be consistent, it should be "vhost" or "virtio".
>
>
>> +    .attrs = mdev_types_attrs,
>> +};
>> +
>> +static struct attribute_group *mdev_type_groups[] = {
>> +    &mdev_type_group,
>> +    NULL,
>> +};
>> +
>> +const struct attribute_group *mdev_dev_groups[] = {
>> +    NULL,
>> +};
>> +
>> +static int ifcvf_mdev_create(struct kobject *kobj, struct 
>> mdev_device *mdev)
>> +{
>> +    struct device *dev = mdev_parent_dev(mdev);
>> +    struct pci_dev *pdev = to_pci_dev(dev);
>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>> +    int ret = 0;
>> +
>> +    mutex_lock(&adapter->mdev_lock);
>> +
>> +    if (adapter->mdev_count < 1) {
>> +        ret = -EINVAL;
>> +        goto out;
>> +    }
>> +
>> +    mdev_set_class_id(mdev, MDEV_ID_VHOST);
>> +    mdev_set_dev_ops(mdev, &ifc_mdev_ops);
>> +
>> +    mdev_set_drvdata(mdev, adapter);
>> +    mdev_set_iommu_device(mdev_dev(mdev), dev);
>> +
>> +    INIT_LIST_HEAD(&adapter->dma_maps);
>> +    adapter->mdev_count--;
>> +
>> +out:
>> +    mutex_unlock(&adapter->mdev_lock);
>> +    return ret;
>> +}
>> +
>> +static int ifcvf_mdev_remove(struct mdev_device *mdev)
>> +{
>> +    struct device *dev = mdev_parent_dev(mdev);
>> +    struct pci_dev *pdev = to_pci_dev(dev);
>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>> +
>> +    mutex_lock(&adapter->mdev_lock);
>> +    adapter->mdev_count++;
>> +    mutex_unlock(&adapter->mdev_lock);
>> +
>> +    return 0;
>> +}
>> +
>> +static struct mdev_parent_ops ifcvf_mdev_fops = {
>> +    .owner            = THIS_MODULE,
>> +    .supported_type_groups    = mdev_type_groups,
>> +    .mdev_attr_groups    = mdev_dev_groups,
>> +    .create            = ifcvf_mdev_create,
>> +    .remove            = ifcvf_mdev_remove,
>> +};
>> +
>> +static int ifcvf_probe(struct pci_dev *pdev, const struct 
>> pci_device_id *id)
>> +{
>> +    struct device *dev = &pdev->dev;
>> +    struct ifcvf_adapter *adapter;
>> +    struct ifcvf_hw *vf;
>> +    int ret, i;
>> +
>> +    adapter = kzalloc(sizeof(struct ifcvf_adapter), GFP_KERNEL);
>> +    if (adapter == NULL) {
>> +        ret = -ENOMEM;
>> +        goto fail;
>> +    }
>> +
>> +    mutex_init(&adapter->mdev_lock);
>> +    adapter->mdev_count = 1;
>
>
> So this is per VF based vDPA implementation, which seems not 
> convenient for management.  Anyhow we can control the creation in PF?
>
> Thanks
>
>
>> +    adapter->dev = dev;
>> +
>> +    pci_set_drvdata(pdev, adapter);
>> +
>> +    ret = pci_enable_device(pdev);
>> +    if (ret) {
>> +        IFC_ERR(adapter->dev, "Failed to enable device.\n");
>> +        goto free_adapter;
>> +    }
>> +
>> +    ret = pci_request_regions(pdev, IFCVF_DRIVER_NAME);
>> +    if (ret) {
>> +        IFC_ERR(adapter->dev, "Failed to request MMIO region.\n");
>> +        goto disable_device;
>> +    }
>> +
>> +    pci_set_master(pdev);
>> +
>> +    ret = ifcvf_init_msix(adapter);
>> +    if (ret) {
>> +        IFC_ERR(adapter->dev, "Failed to initialize MSIX.\n");
>> +        goto free_msix;
>> +    }
>> +
>> +    vf = &adapter->vf;
>> +    for (i = 0; i < IFCVF_PCI_MAX_RESOURCE; i++) {
>> +        vf->mem_resource[i].phys_addr = pci_resource_start(pdev, i);
>> +        vf->mem_resource[i].len = pci_resource_len(pdev, i);
>> +        if (!vf->mem_resource[i].len) {
>> +            vf->mem_resource[i].addr = NULL;
>> +            continue;
>> +        }
>> +
>> +        vf->mem_resource[i].addr = pci_iomap_range(pdev, i, 0,
>> +                vf->mem_resource[i].len);
>> +        if (!vf->mem_resource[i].addr) {
>> +            IFC_ERR(adapter->dev, "Failed to map IO resource %d\n",
>> +                i);
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    if (ifcvf_init_hw(vf, pdev) < 0)
>> +        return -1;
>> +
>> +    ret = mdev_register_device(dev, &ifcvf_mdev_fops);
>> +    if (ret) {
>> +        IFC_ERR(adapter->dev,  "Failed to register mdev device\n");
>> +        goto destroy_adapter;
>> +    }
>> +
>> +    return 0;
>> +
>> +destroy_adapter:
>> +    ifcvf_destroy_adapter(adapter);
>> +free_msix:
>> +    pci_free_irq_vectors(pdev);
>> +    pci_release_regions(pdev);
>> +disable_device:
>> +    pci_disable_device(pdev);
>> +free_adapter:
>> +    kfree(adapter);
>> +fail:
>> +    return ret;
>> +}
>> +
>> +static void ifcvf_remove(struct pci_dev *pdev)
>> +{
>> +    struct device *dev = &pdev->dev;
>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>> +    struct ifcvf_hw *vf;
>> +    int i;
>> +
>> +    mdev_unregister_device(dev);
>> +
>> +    vf = &adapter->vf;
>> +    for (i = 0; i < IFCVF_PCI_MAX_RESOURCE; i++) {
>> +        if (vf->mem_resource[i].addr) {
>> +            pci_iounmap(pdev, vf->mem_resource[i].addr);
>> +            vf->mem_resource[i].addr = NULL;
>> +        }
>> +    }
>> +
>> +    ifcvf_destroy_adapter(adapter);
>> +    pci_free_irq_vectors(pdev);
>> +
>> +    pci_release_regions(pdev);
>> +    pci_disable_device(pdev);
>> +
>> +    kfree(adapter);
>> +}
>> +
>> +static struct pci_device_id ifcvf_pci_ids[] = {
>> +    { PCI_DEVICE_SUB(IFCVF_VENDOR_ID,
>> +            IFCVF_DEVICE_ID,
>> +            IFCVF_SUBSYS_VENDOR_ID,
>> +            IFCVF_SUBSYS_DEVICE_ID) },
>> +    { 0 },
>> +};
>> +MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids);
>> +
>> +static struct pci_driver ifcvf_driver = {
>> +    .name     = IFCVF_DRIVER_NAME,
>> +    .id_table = ifcvf_pci_ids,
>> +    .probe    = ifcvf_probe,
>> +    .remove   = ifcvf_remove,
>> +};
>> +
>> +static int __init ifcvf_init_module(void)
>> +{
>> +    int ret;
>> +
>> +    ret = pci_register_driver(&ifcvf_driver);
>> +    return ret;
>> +}
>> +
>> +static void __exit ifcvf_exit_module(void)
>> +{
>> +    pci_unregister_driver(&ifcvf_driver);
>> +}
>> +
>> +module_init(ifcvf_init_module);
>> +module_exit(ifcvf_exit_module);
>> +
>> +MODULE_LICENSE("GPL v2");
>> +MODULE_VERSION(VERSION_STRING);
>> +MODULE_AUTHOR(DRIVER_AUTHOR);
Zhu Lingshan Oct. 21, 2019, 9:53 a.m. UTC | #3
On 10/16/2019 6:19 PM, Jason Wang wrote:
>
> On 2019/10/16 上午9:30, Zhu Lingshan wrote:
>> This commit introduced IFC VF operations for vdpa, which complys to
>> vhost_mdev interfaces, handles IFC VF initialization,
>> configuration and removal.
>>
>> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
>> ---
>>   drivers/vhost/ifcvf/ifcvf_main.c | 541 
>> +++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 541 insertions(+)
>>   create mode 100644 drivers/vhost/ifcvf/ifcvf_main.c
>>
>> diff --git a/drivers/vhost/ifcvf/ifcvf_main.c 
>> b/drivers/vhost/ifcvf/ifcvf_main.c
>> new file mode 100644
>> index 000000000000..c48a29969a85
>> --- /dev/null
>> +++ b/drivers/vhost/ifcvf/ifcvf_main.c
>> @@ -0,0 +1,541 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Copyright (C) 2019 Intel Corporation.
>> + */
>> +
>> +#include <linux/interrupt.h>
>> +#include <linux/module.h>
>> +#include <linux/mdev.h>
>> +#include <linux/pci.h>
>> +#include <linux/sysfs.h>
>> +
>> +#include "ifcvf_base.h"
>> +
>> +#define VERSION_STRING    "0.1"
>> +#define DRIVER_AUTHOR    "Intel Corporation"
>> +#define IFCVF_DRIVER_NAME    "ifcvf"
>> +
>> +static irqreturn_t ifcvf_intr_handler(int irq, void *arg)
>> +{
>> +    struct vring_info *vring = arg;
>> +
>> +    if (vring->cb.callback)
>> +        return vring->cb.callback(vring->cb.private);
>> +
>> +    return IRQ_HANDLED;
>> +}
>> +
>> +static u64 ifcvf_mdev_get_features(struct mdev_device *mdev)
>> +{
>> +    return IFC_SUPPORTED_FEATURES;
>
>
> I would expect this should be done by querying the hw. Or IFC VF can't 
> get any update through its firmware?

Hi Jason,

Thanks for your comments, for now driver just support these features.

>
>
>> +}
>> +
>> +static int ifcvf_mdev_set_features(struct mdev_device *mdev, u64 
>> features)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->req_features = features;
>> +
>> +    return 0;
>> +}
>> +
>> +static u64 ifcvf_mdev_get_vq_state(struct mdev_device *mdev, u16 qid)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    return vf->vring[qid].last_avail_idx;
>
>
> Does this really work? I'd expect it should be fetched from hw since 
> it's an internal state.
for now, it's working, we intend to support LM in next version drivers.
>
>
>> +}
>> +
>> +static int ifcvf_mdev_set_vq_state(struct mdev_device *mdev, u16 
>> qid, u64 num)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->vring[qid].last_used_idx = num;
>
>
> I fail to understand why last_used_idx is needed. It looks to me the 
> used idx in the used ring is sufficient.
I will remove it.
>
>
>> +    vf->vring[qid].last_avail_idx = num;
>
>
> Do we need a synchronization with hw immediately here?
>
>
>> +
>> +    return 0;
>> +}
>> +
>> +static int ifcvf_mdev_set_vq_address(struct mdev_device *mdev, u16 idx,
>> +                     u64 desc_area, u64 driver_area,
>> +                     u64 device_area)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->vring[idx].desc = desc_area;
>> +    vf->vring[idx].avail = driver_area;
>> +    vf->vring[idx].used = device_area;
>> +
>> +    return 0;
>> +}
>> +
>> +static void ifcvf_mdev_set_vq_num(struct mdev_device *mdev, u16 qid, 
>> u32 num)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->vring[qid].size = num;
>> +}
>> +
>> +static void ifcvf_mdev_set_vq_ready(struct mdev_device *mdev,
>> +                u16 qid, bool ready)
>> +{
>> +
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->vring[qid].ready = ready;
>> +}
>> +
>> +static bool ifcvf_mdev_get_vq_ready(struct mdev_device *mdev, u16 qid)
>> +{
>> +
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    return vf->vring[qid].ready;
>> +}
>> +
>> +static void ifcvf_mdev_set_vq_cb(struct mdev_device *mdev, u16 idx,
>> +                 struct virtio_mdev_callback *cb)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->vring[idx].cb = *cb;
>> +}
>> +
>> +static void ifcvf_mdev_kick_vq(struct mdev_device *mdev, u16 idx)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    ifcvf_notify_queue(vf, idx);
>> +}
>> +
>> +static u8 ifcvf_mdev_get_status(struct mdev_device *mdev)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    return vf->status;
>> +}
>> +
>> +static u32 ifcvf_mdev_get_generation(struct mdev_device *mdev)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    return vf->generation;
>> +}
>> +
>> +static int ifcvf_mdev_get_version(struct mdev_device *mdev)
>> +{
>> +    return VIRTIO_MDEV_VERSION;
>> +}
>> +
>> +static u32 ifcvf_mdev_get_device_id(struct mdev_device *mdev)
>> +{
>> +    return IFCVF_DEVICE_ID;
>> +}
>> +
>> +static u32 ifcvf_mdev_get_vendor_id(struct mdev_device *mdev)
>> +{
>> +    return IFCVF_VENDOR_ID;
>> +}
>> +
>> +static u16 ifcvf_mdev_get_vq_align(struct mdev_device *mdev)
>> +{
>> +    return IFCVF_QUEUE_ALIGNMENT;
>> +}
>> +
>> +static int ifcvf_start_datapath(void *private)
>> +{
>> +    int i, ret;
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
>> +
>> +    for (i = 0; i < (IFCVF_MAX_QUEUE_PAIRS * 2); i++) {
>> +        if (!vf->vring[i].ready)
>> +            break;
>
>
> Looks like error should be returned here?
agreed!
>
>
>> +
>> +        if (!vf->vring[i].size)
>> +            break;
>> +
>> +        if (!vf->vring[i].desc || !vf->vring[i].avail ||
>> +            !vf->vring[i].used)
>> +            break;
>> +    }
>> +    vf->nr_vring = i;
>> +
>> +    ret = ifcvf_start_hw(vf);
>> +    return ret;
>> +}
>> +
>> +static int ifcvf_stop_datapath(void *private)
>> +{
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
>> +    int i;
>> +
>> +    for (i = 0; i < IFCVF_MAX_QUEUES; i++)
>> +        vf->vring[i].cb.callback = NULL;
>
>
> Any synchronization is needed for the vq irq handler?
I think even we set callback = NULL, the code is still there, on-going 
routines would not be effected.
>
>
>> +
>> +    ifcvf_stop_hw(vf);
>> +
>> +    return 0;
>> +}
>> +
>> +static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
>> +{
>> +    int i;
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>> +        vf->vring[i].last_used_idx = 0;
>> +        vf->vring[i].last_avail_idx = 0;
>> +        vf->vring[i].desc = 0;
>> +        vf->vring[i].avail = 0;
>> +        vf->vring[i].used = 0;
>> +        vf->vring[i].ready = 0;
>> +        vf->vring->cb.callback = NULL;
>> +        vf->vring->cb.private = NULL;
>> +    }
>> +}
>> +
>> +static void ifcvf_mdev_set_status(struct mdev_device *mdev, u8 status)
>> +{
>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +
>> +    vf->status = status;
>> +
>> +    if (status == 0) {
>> +        ifcvf_stop_datapath(adapter);
>> +        ifcvf_reset_vring(adapter);
>> +        return;
>> +    }
>> +
>> +    if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
>> +        ifcvf_start_datapath(adapter);
>> +        return;
>> +    }
>> +}
>> +
>> +static u16 ifcvf_mdev_get_queue_max(struct mdev_device *mdev)
>> +{
>> +    return IFCVF_MAX_QUEUES;
>
>
> The name is confusing, it was used to return the maximum queue size. 
> In new version of virtio-mdev, the callback was renamed as 
> get_vq_num_max().
will change that.
>
>
>> +}
>> +
>> +static struct virtio_mdev_device_ops ifc_mdev_ops = {
>> +    .get_features  = ifcvf_mdev_get_features,
>> +    .set_features  = ifcvf_mdev_set_features,
>> +    .get_status    = ifcvf_mdev_get_status,
>> +    .set_status    = ifcvf_mdev_set_status,
>> +    .get_queue_max = ifcvf_mdev_get_queue_max,
>> +    .get_vq_state   = ifcvf_mdev_get_vq_state,
>> +    .set_vq_state   = ifcvf_mdev_set_vq_state,
>> +    .set_vq_cb      = ifcvf_mdev_set_vq_cb,
>> +    .set_vq_ready   = ifcvf_mdev_set_vq_ready,
>> +    .get_vq_ready    = ifcvf_mdev_get_vq_ready,
>> +    .set_vq_num     = ifcvf_mdev_set_vq_num,
>> +    .set_vq_address = ifcvf_mdev_set_vq_address,
>> +    .kick_vq        = ifcvf_mdev_kick_vq,
>> +    .get_generation    = ifcvf_mdev_get_generation,
>> +    .get_version    = ifcvf_mdev_get_version,
>> +    .get_device_id    = ifcvf_mdev_get_device_id,
>> +    .get_vendor_id    = ifcvf_mdev_get_vendor_id,
>> +    .get_vq_align    = ifcvf_mdev_get_vq_align,
>> +};
>
>
> set_config/get_config is missing. It looks to me they are not hard, 
> just implementing the access to dev_cfg. It's key to make kernel 
> virtio driver to work.
>
> And in the new version of virito-mdev, features like _F_LOG_ALL should 
> be advertised through get_mdev_features.
IMHO, currently the driver can work without set/get_config, vhost_mdev 
doesn't call them for now.
>
>
>> +
>> +static int ifcvf_init_msix(struct ifcvf_adapter *adapter)
>> +{
>> +    int vector, i, ret, irq;
>> +    struct pci_dev *pdev = to_pci_dev(adapter->dev);
>> +    struct ifcvf_hw *vf = &adapter->vf;
>> +
>> +    ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
>> +            IFCVF_MAX_INTR, PCI_IRQ_MSIX);
>> +    if (ret < 0) {
>> +        IFC_ERR(adapter->dev, "Failed to alloc irq vectors.\n");
>> +        return ret;
>> +    }
>> +
>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>> +        vector = i + IFCVF_MSI_QUEUE_OFF;
>> +        irq = pci_irq_vector(pdev, vector);
>> +        ret = request_irq(irq, ifcvf_intr_handler, 0,
>> +                pci_name(pdev), &vf->vring[i]);
>> +        if (ret) {
>> +            IFC_ERR(adapter->dev,
>> +                "Failed to request irq for vq %d.\n", i);
>> +            return ret;
>> +        }
>> +    }
>
>
> Do we need to provide fallback when we can't do per vq MSIX?
I think it would be very rarely that can not get enough vectors.
>
>
>> +
>> +    return 0;
>> +}
>> +
>> +static void ifcvf_destroy_adapter(struct ifcvf_adapter *adapter)
>> +{
>> +    int i, vector, irq;
>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>> +    struct pci_dev *pdev = to_pci_dev(adapter->dev);
>> +
>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>> +        vector = i + IFCVF_MSI_QUEUE_OFF;
>> +        irq = pci_irq_vector(pdev, vector);
>> +        free_irq(irq, &vf->vring[i]);
>> +    }
>> +}
>> +
>> +static ssize_t name_show(struct kobject *kobj, struct device *dev, 
>> char *buf)
>> +{
>> +    const char *name = "vhost accelerator (virtio ring compatible)";
>> +
>> +    return sprintf(buf, "%s\n", name);
>> +}
>> +MDEV_TYPE_ATTR_RO(name);
>> +
>> +static ssize_t device_api_show(struct kobject *kobj, struct device 
>> *dev,
>> +                   char *buf)
>> +{
>> +    return sprintf(buf, "%s\n", VIRTIO_MDEV_DEVICE_API_STRING);
>> +}
>> +MDEV_TYPE_ATTR_RO(device_api);
>> +
>> +static ssize_t available_instances_show(struct kobject *kobj,
>> +                    struct device *dev, char *buf)
>> +{
>> +    struct pci_dev *pdev = to_pci_dev(dev);
>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>> +
>> +    return sprintf(buf, "%d\n", adapter->mdev_count);
>> +}
>> +
>> +MDEV_TYPE_ATTR_RO(available_instances);
>> +
>> +static ssize_t type_show(struct kobject *kobj,
>> +            struct device *dev, char *buf)
>> +{
>> +    return sprintf(buf, "%s\n", "net");
>> +}
>> +
>> +MDEV_TYPE_ATTR_RO(type);
>> +
>> +
>> +static struct attribute *mdev_types_attrs[] = {
>> +    &mdev_type_attr_name.attr,
>> +    &mdev_type_attr_device_api.attr,
>> +    &mdev_type_attr_available_instances.attr,
>> +    &mdev_type_attr_type.attr,
>> +    NULL,
>> +};
>> +
>> +static struct attribute_group mdev_type_group = {
>> +    .name  = "vdpa_virtio",
>
>
> To be consistent, it should be "vhost" or "virtio".
agreed!
>
>
>> +    .attrs = mdev_types_attrs,
>> +};
>> +
>> +static struct attribute_group *mdev_type_groups[] = {
>> +    &mdev_type_group,
>> +    NULL,
>> +};
>> +
>> +const struct attribute_group *mdev_dev_groups[] = {
>> +    NULL,
>> +};
>> +
>> +static int ifcvf_mdev_create(struct kobject *kobj, struct 
>> mdev_device *mdev)
>> +{
>> +    struct device *dev = mdev_parent_dev(mdev);
>> +    struct pci_dev *pdev = to_pci_dev(dev);
>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>> +    int ret = 0;
>> +
>> +    mutex_lock(&adapter->mdev_lock);
>> +
>> +    if (adapter->mdev_count < 1) {
>> +        ret = -EINVAL;
>> +        goto out;
>> +    }
>> +
>> +    mdev_set_class_id(mdev, MDEV_ID_VHOST);
>> +    mdev_set_dev_ops(mdev, &ifc_mdev_ops);
>> +
>> +    mdev_set_drvdata(mdev, adapter);
>> +    mdev_set_iommu_device(mdev_dev(mdev), dev);
>> +
>> +    INIT_LIST_HEAD(&adapter->dma_maps);
>> +    adapter->mdev_count--;
>> +
>> +out:
>> +    mutex_unlock(&adapter->mdev_lock);
>> +    return ret;
>> +}
>> +
>> +static int ifcvf_mdev_remove(struct mdev_device *mdev)
>> +{
>> +    struct device *dev = mdev_parent_dev(mdev);
>> +    struct pci_dev *pdev = to_pci_dev(dev);
>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>> +
>> +    mutex_lock(&adapter->mdev_lock);
>> +    adapter->mdev_count++;
>> +    mutex_unlock(&adapter->mdev_lock);
>> +
>> +    return 0;
>> +}
>> +
>> +static struct mdev_parent_ops ifcvf_mdev_fops = {
>> +    .owner            = THIS_MODULE,
>> +    .supported_type_groups    = mdev_type_groups,
>> +    .mdev_attr_groups    = mdev_dev_groups,
>> +    .create            = ifcvf_mdev_create,
>> +    .remove            = ifcvf_mdev_remove,
>> +};
>> +
>> +static int ifcvf_probe(struct pci_dev *pdev, const struct 
>> pci_device_id *id)
>> +{
>> +    struct device *dev = &pdev->dev;
>> +    struct ifcvf_adapter *adapter;
>> +    struct ifcvf_hw *vf;
>> +    int ret, i;
>> +
>> +    adapter = kzalloc(sizeof(struct ifcvf_adapter), GFP_KERNEL);
>> +    if (adapter == NULL) {
>> +        ret = -ENOMEM;
>> +        goto fail;
>> +    }
>> +
>> +    mutex_init(&adapter->mdev_lock);
>> +    adapter->mdev_count = 1;
>
>
> So this is per VF based vDPA implementation, which seems not 
> convenient for management.  Anyhow we can control the creation in PF?
>
> Thanks
the driver scope for now doesn't support that, we can add these feature 
in next releases.
>
>
>> +    adapter->dev = dev;
>> +
>> +    pci_set_drvdata(pdev, adapter);
>> +
>> +    ret = pci_enable_device(pdev);
>> +    if (ret) {
>> +        IFC_ERR(adapter->dev, "Failed to enable device.\n");
>> +        goto free_adapter;
>> +    }
>> +
>> +    ret = pci_request_regions(pdev, IFCVF_DRIVER_NAME);
>> +    if (ret) {
>> +        IFC_ERR(adapter->dev, "Failed to request MMIO region.\n");
>> +        goto disable_device;
>> +    }
>> +
>> +    pci_set_master(pdev);
>> +
>> +    ret = ifcvf_init_msix(adapter);
>> +    if (ret) {
>> +        IFC_ERR(adapter->dev, "Failed to initialize MSIX.\n");
>> +        goto free_msix;
>> +    }
>> +
>> +    vf = &adapter->vf;
>> +    for (i = 0; i < IFCVF_PCI_MAX_RESOURCE; i++) {
>> +        vf->mem_resource[i].phys_addr = pci_resource_start(pdev, i);
>> +        vf->mem_resource[i].len = pci_resource_len(pdev, i);
>> +        if (!vf->mem_resource[i].len) {
>> +            vf->mem_resource[i].addr = NULL;
>> +            continue;
>> +        }
>> +
>> +        vf->mem_resource[i].addr = pci_iomap_range(pdev, i, 0,
>> +                vf->mem_resource[i].len);
>> +        if (!vf->mem_resource[i].addr) {
>> +            IFC_ERR(adapter->dev, "Failed to map IO resource %d\n",
>> +                i);
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    if (ifcvf_init_hw(vf, pdev) < 0)
>> +        return -1;
>> +
>> +    ret = mdev_register_device(dev, &ifcvf_mdev_fops);
>> +    if (ret) {
>> +        IFC_ERR(adapter->dev,  "Failed to register mdev device\n");
>> +        goto destroy_adapter;
>> +    }
>> +
>> +    return 0;
>> +
>> +destroy_adapter:
>> +    ifcvf_destroy_adapter(adapter);
>> +free_msix:
>> +    pci_free_irq_vectors(pdev);
>> +    pci_release_regions(pdev);
>> +disable_device:
>> +    pci_disable_device(pdev);
>> +free_adapter:
>> +    kfree(adapter);
>> +fail:
>> +    return ret;
>> +}
>> +
>> +static void ifcvf_remove(struct pci_dev *pdev)
>> +{
>> +    struct device *dev = &pdev->dev;
>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>> +    struct ifcvf_hw *vf;
>> +    int i;
>> +
>> +    mdev_unregister_device(dev);
>> +
>> +    vf = &adapter->vf;
>> +    for (i = 0; i < IFCVF_PCI_MAX_RESOURCE; i++) {
>> +        if (vf->mem_resource[i].addr) {
>> +            pci_iounmap(pdev, vf->mem_resource[i].addr);
>> +            vf->mem_resource[i].addr = NULL;
>> +        }
>> +    }
>> +
>> +    ifcvf_destroy_adapter(adapter);
>> +    pci_free_irq_vectors(pdev);
>> +
>> +    pci_release_regions(pdev);
>> +    pci_disable_device(pdev);
>> +
>> +    kfree(adapter);
>> +}
>> +
>> +static struct pci_device_id ifcvf_pci_ids[] = {
>> +    { PCI_DEVICE_SUB(IFCVF_VENDOR_ID,
>> +            IFCVF_DEVICE_ID,
>> +            IFCVF_SUBSYS_VENDOR_ID,
>> +            IFCVF_SUBSYS_DEVICE_ID) },
>> +    { 0 },
>> +};
>> +MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids);
>> +
>> +static struct pci_driver ifcvf_driver = {
>> +    .name     = IFCVF_DRIVER_NAME,
>> +    .id_table = ifcvf_pci_ids,
>> +    .probe    = ifcvf_probe,
>> +    .remove   = ifcvf_remove,
>> +};
>> +
>> +static int __init ifcvf_init_module(void)
>> +{
>> +    int ret;
>> +
>> +    ret = pci_register_driver(&ifcvf_driver);
>> +    return ret;
>> +}
>> +
>> +static void __exit ifcvf_exit_module(void)
>> +{
>> +    pci_unregister_driver(&ifcvf_driver);
>> +}
>> +
>> +module_init(ifcvf_init_module);
>> +module_exit(ifcvf_exit_module);
>> +
>> +MODULE_LICENSE("GPL v2");
>> +MODULE_VERSION(VERSION_STRING);
>> +MODULE_AUTHOR(DRIVER_AUTHOR);
Jason Wang Oct. 21, 2019, 10:19 a.m. UTC | #4
On 2019/10/21 下午5:53, Zhu, Lingshan wrote:
>
> On 10/16/2019 6:19 PM, Jason Wang wrote:
>>
>> On 2019/10/16 上午9:30, Zhu Lingshan wrote:
>>> This commit introduced IFC VF operations for vdpa, which complys to
>>> vhost_mdev interfaces, handles IFC VF initialization,
>>> configuration and removal.
>>>
>>> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
>>> ---
>>>   drivers/vhost/ifcvf/ifcvf_main.c | 541 
>>> +++++++++++++++++++++++++++++++++++++++
>>>   1 file changed, 541 insertions(+)
>>>   create mode 100644 drivers/vhost/ifcvf/ifcvf_main.c
>>>
>>> diff --git a/drivers/vhost/ifcvf/ifcvf_main.c 
>>> b/drivers/vhost/ifcvf/ifcvf_main.c
>>> new file mode 100644
>>> index 000000000000..c48a29969a85
>>> --- /dev/null
>>> +++ b/drivers/vhost/ifcvf/ifcvf_main.c
>>> @@ -0,0 +1,541 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +/*
>>> + * Copyright (C) 2019 Intel Corporation.
>>> + */
>>> +
>>> +#include <linux/interrupt.h>
>>> +#include <linux/module.h>
>>> +#include <linux/mdev.h>
>>> +#include <linux/pci.h>
>>> +#include <linux/sysfs.h>
>>> +
>>> +#include "ifcvf_base.h"
>>> +
>>> +#define VERSION_STRING    "0.1"
>>> +#define DRIVER_AUTHOR    "Intel Corporation"
>>> +#define IFCVF_DRIVER_NAME    "ifcvf"
>>> +
>>> +static irqreturn_t ifcvf_intr_handler(int irq, void *arg)
>>> +{
>>> +    struct vring_info *vring = arg;
>>> +
>>> +    if (vring->cb.callback)
>>> +        return vring->cb.callback(vring->cb.private);
>>> +
>>> +    return IRQ_HANDLED;
>>> +}
>>> +
>>> +static u64 ifcvf_mdev_get_features(struct mdev_device *mdev)
>>> +{
>>> +    return IFC_SUPPORTED_FEATURES;
>>
>>
>> I would expect this should be done by querying the hw. Or IFC VF 
>> can't get any update through its firmware?
>
> Hi Jason,
>
> Thanks for your comments, for now driver just support these features.


Ok, it should work but less flexible, we can change it in the future.


>
>>
>>
>>> +}
>>> +
>>> +static int ifcvf_mdev_set_features(struct mdev_device *mdev, u64 
>>> features)
>>> +{
>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    vf->req_features = features;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static u64 ifcvf_mdev_get_vq_state(struct mdev_device *mdev, u16 qid)
>>> +{
>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    return vf->vring[qid].last_avail_idx;
>>
>>
>> Does this really work? I'd expect it should be fetched from hw since 
>> it's an internal state.
> for now, it's working, we intend to support LM in next version drivers.


I'm not sure I understand here, I don't see any synchronization between 
the hardware and last_avail_idx, so last_avail_idx should not change.

Btw, what did "LM" mean :) ?


>>
>>
>>> +}
>>> +
>>> +static int ifcvf_mdev_set_vq_state(struct mdev_device *mdev, u16 
>>> qid, u64 num)
>>> +{
>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    vf->vring[qid].last_used_idx = num;
>>
>>
>> I fail to understand why last_used_idx is needed. It looks to me the 
>> used idx in the used ring is sufficient.
> I will remove it.
>>
>>
>>> +    vf->vring[qid].last_avail_idx = num;
>>
>>
>> Do we need a synchronization with hw immediately here?
>>
>>
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static int ifcvf_mdev_set_vq_address(struct mdev_device *mdev, u16 
>>> idx,
>>> +                     u64 desc_area, u64 driver_area,
>>> +                     u64 device_area)
>>> +{
>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    vf->vring[idx].desc = desc_area;
>>> +    vf->vring[idx].avail = driver_area;
>>> +    vf->vring[idx].used = device_area;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static void ifcvf_mdev_set_vq_num(struct mdev_device *mdev, u16 
>>> qid, u32 num)
>>> +{
>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    vf->vring[qid].size = num;
>>> +}
>>> +
>>> +static void ifcvf_mdev_set_vq_ready(struct mdev_device *mdev,
>>> +                u16 qid, bool ready)
>>> +{
>>> +
>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    vf->vring[qid].ready = ready;
>>> +}
>>> +
>>> +static bool ifcvf_mdev_get_vq_ready(struct mdev_device *mdev, u16 qid)
>>> +{
>>> +
>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    return vf->vring[qid].ready;
>>> +}
>>> +
>>> +static void ifcvf_mdev_set_vq_cb(struct mdev_device *mdev, u16 idx,
>>> +                 struct virtio_mdev_callback *cb)
>>> +{
>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    vf->vring[idx].cb = *cb;
>>> +}
>>> +
>>> +static void ifcvf_mdev_kick_vq(struct mdev_device *mdev, u16 idx)
>>> +{
>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    ifcvf_notify_queue(vf, idx);
>>> +}
>>> +
>>> +static u8 ifcvf_mdev_get_status(struct mdev_device *mdev)
>>> +{
>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    return vf->status;
>>> +}
>>> +
>>> +static u32 ifcvf_mdev_get_generation(struct mdev_device *mdev)
>>> +{
>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    return vf->generation;
>>> +}
>>> +
>>> +static int ifcvf_mdev_get_version(struct mdev_device *mdev)
>>> +{
>>> +    return VIRTIO_MDEV_VERSION;
>>> +}
>>> +
>>> +static u32 ifcvf_mdev_get_device_id(struct mdev_device *mdev)
>>> +{
>>> +    return IFCVF_DEVICE_ID;
>>> +}
>>> +
>>> +static u32 ifcvf_mdev_get_vendor_id(struct mdev_device *mdev)
>>> +{
>>> +    return IFCVF_VENDOR_ID;
>>> +}
>>> +
>>> +static u16 ifcvf_mdev_get_vq_align(struct mdev_device *mdev)
>>> +{
>>> +    return IFCVF_QUEUE_ALIGNMENT;
>>> +}
>>> +
>>> +static int ifcvf_start_datapath(void *private)
>>> +{
>>> +    int i, ret;
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
>>> +
>>> +    for (i = 0; i < (IFCVF_MAX_QUEUE_PAIRS * 2); i++) {
>>> +        if (!vf->vring[i].ready)
>>> +            break;
>>
>>
>> Looks like error should be returned here?
> agreed!
>>
>>
>>> +
>>> +        if (!vf->vring[i].size)
>>> +            break;
>>> +
>>> +        if (!vf->vring[i].desc || !vf->vring[i].avail ||
>>> +            !vf->vring[i].used)
>>> +            break;
>>> +    }
>>> +    vf->nr_vring = i;
>>> +
>>> +    ret = ifcvf_start_hw(vf);
>>> +    return ret;
>>> +}
>>> +
>>> +static int ifcvf_stop_datapath(void *private)
>>> +{
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
>>> +    int i;
>>> +
>>> +    for (i = 0; i < IFCVF_MAX_QUEUES; i++)
>>> +        vf->vring[i].cb.callback = NULL;
>>
>>
>> Any synchronization is needed for the vq irq handler?
> I think even we set callback = NULL, the code is still there, on-going 
> routines would not be effected.


Ok I think you mean when ifcvf_stop_hw() return, hardware will not 
respond to e.g kick and other events etc.


>>
>>
>>> +
>>> +    ifcvf_stop_hw(vf);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
>>> +{
>>> +    int i;
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>>> +        vf->vring[i].last_used_idx = 0;
>>> +        vf->vring[i].last_avail_idx = 0;
>>> +        vf->vring[i].desc = 0;
>>> +        vf->vring[i].avail = 0;
>>> +        vf->vring[i].used = 0;
>>> +        vf->vring[i].ready = 0;
>>> +        vf->vring->cb.callback = NULL;
>>> +        vf->vring->cb.private = NULL;
>>> +    }
>>> +}
>>> +
>>> +static void ifcvf_mdev_set_status(struct mdev_device *mdev, u8 status)
>>> +{
>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +
>>> +    vf->status = status;
>>> +
>>> +    if (status == 0) {
>>> +        ifcvf_stop_datapath(adapter);
>>> +        ifcvf_reset_vring(adapter);
>>> +        return;
>>> +    }
>>> +
>>> +    if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
>>> +        ifcvf_start_datapath(adapter);
>>> +        return;
>>> +    }
>>> +}
>>> +
>>> +static u16 ifcvf_mdev_get_queue_max(struct mdev_device *mdev)
>>> +{
>>> +    return IFCVF_MAX_QUEUES;
>>
>>
>> The name is confusing, it was used to return the maximum queue size. 
>> In new version of virtio-mdev, the callback was renamed as 
>> get_vq_num_max().
> will change that.
>>
>>
>>> +}
>>> +
>>> +static struct virtio_mdev_device_ops ifc_mdev_ops = {
>>> +    .get_features  = ifcvf_mdev_get_features,
>>> +    .set_features  = ifcvf_mdev_set_features,
>>> +    .get_status    = ifcvf_mdev_get_status,
>>> +    .set_status    = ifcvf_mdev_set_status,
>>> +    .get_queue_max = ifcvf_mdev_get_queue_max,
>>> +    .get_vq_state   = ifcvf_mdev_get_vq_state,
>>> +    .set_vq_state   = ifcvf_mdev_set_vq_state,
>>> +    .set_vq_cb      = ifcvf_mdev_set_vq_cb,
>>> +    .set_vq_ready   = ifcvf_mdev_set_vq_ready,
>>> +    .get_vq_ready    = ifcvf_mdev_get_vq_ready,
>>> +    .set_vq_num     = ifcvf_mdev_set_vq_num,
>>> +    .set_vq_address = ifcvf_mdev_set_vq_address,
>>> +    .kick_vq        = ifcvf_mdev_kick_vq,
>>> +    .get_generation    = ifcvf_mdev_get_generation,
>>> +    .get_version    = ifcvf_mdev_get_version,
>>> +    .get_device_id    = ifcvf_mdev_get_device_id,
>>> +    .get_vendor_id    = ifcvf_mdev_get_vendor_id,
>>> +    .get_vq_align    = ifcvf_mdev_get_vq_align,
>>> +};
>>
>>
>> set_config/get_config is missing. It looks to me they are not hard, 
>> just implementing the access to dev_cfg. It's key to make kernel 
>> virtio driver to work.
>>
>> And in the new version of virito-mdev, features like _F_LOG_ALL 
>> should be advertised through get_mdev_features.
> IMHO, currently the driver can work without set/get_config, vhost_mdev 
> doesn't call them for now.


Yes, but it was required by virtio_mdev for host driver to work, and it 
looks to me it's not hard to add them. If possible please add them and 
"virtio" type then we can use the ops for both the case of VM and 
containers.


>>
>>
>>> +
>>> +static int ifcvf_init_msix(struct ifcvf_adapter *adapter)
>>> +{
>>> +    int vector, i, ret, irq;
>>> +    struct pci_dev *pdev = to_pci_dev(adapter->dev);
>>> +    struct ifcvf_hw *vf = &adapter->vf;
>>> +
>>> +    ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
>>> +            IFCVF_MAX_INTR, PCI_IRQ_MSIX);
>>> +    if (ret < 0) {
>>> +        IFC_ERR(adapter->dev, "Failed to alloc irq vectors.\n");
>>> +        return ret;
>>> +    }
>>> +
>>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>>> +        vector = i + IFCVF_MSI_QUEUE_OFF;
>>> +        irq = pci_irq_vector(pdev, vector);
>>> +        ret = request_irq(irq, ifcvf_intr_handler, 0,
>>> +                pci_name(pdev), &vf->vring[i]);
>>> +        if (ret) {
>>> +            IFC_ERR(adapter->dev,
>>> +                "Failed to request irq for vq %d.\n", i);
>>> +            return ret;
>>> +        }
>>> +    }
>>
>>
>> Do we need to provide fallback when we can't do per vq MSIX?
> I think it would be very rarely that can not get enough vectors.


Right.


>>
>>
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static void ifcvf_destroy_adapter(struct ifcvf_adapter *adapter)
>>> +{
>>> +    int i, vector, irq;
>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>> +    struct pci_dev *pdev = to_pci_dev(adapter->dev);
>>> +
>>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>>> +        vector = i + IFCVF_MSI_QUEUE_OFF;
>>> +        irq = pci_irq_vector(pdev, vector);
>>> +        free_irq(irq, &vf->vring[i]);
>>> +    }
>>> +}
>>> +
>>> +static ssize_t name_show(struct kobject *kobj, struct device *dev, 
>>> char *buf)
>>> +{
>>> +    const char *name = "vhost accelerator (virtio ring compatible)";
>>> +
>>> +    return sprintf(buf, "%s\n", name);
>>> +}
>>> +MDEV_TYPE_ATTR_RO(name);
>>> +
>>> +static ssize_t device_api_show(struct kobject *kobj, struct device 
>>> *dev,
>>> +                   char *buf)
>>> +{
>>> +    return sprintf(buf, "%s\n", VIRTIO_MDEV_DEVICE_API_STRING);
>>> +}
>>> +MDEV_TYPE_ATTR_RO(device_api);
>>> +
>>> +static ssize_t available_instances_show(struct kobject *kobj,
>>> +                    struct device *dev, char *buf)
>>> +{
>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>>> +
>>> +    return sprintf(buf, "%d\n", adapter->mdev_count);
>>> +}
>>> +
>>> +MDEV_TYPE_ATTR_RO(available_instances);
>>> +
>>> +static ssize_t type_show(struct kobject *kobj,
>>> +            struct device *dev, char *buf)
>>> +{
>>> +    return sprintf(buf, "%s\n", "net");
>>> +}
>>> +
>>> +MDEV_TYPE_ATTR_RO(type);
>>> +
>>> +
>>> +static struct attribute *mdev_types_attrs[] = {
>>> +    &mdev_type_attr_name.attr,
>>> +    &mdev_type_attr_device_api.attr,
>>> +    &mdev_type_attr_available_instances.attr,
>>> +    &mdev_type_attr_type.attr,
>>> +    NULL,
>>> +};
>>> +
>>> +static struct attribute_group mdev_type_group = {
>>> +    .name  = "vdpa_virtio",
>>
>>
>> To be consistent, it should be "vhost" or "virtio".
> agreed!
>>
>>
>>> +    .attrs = mdev_types_attrs,
>>> +};
>>> +
>>> +static struct attribute_group *mdev_type_groups[] = {
>>> +    &mdev_type_group,
>>> +    NULL,
>>> +};
>>> +
>>> +const struct attribute_group *mdev_dev_groups[] = {
>>> +    NULL,
>>> +};
>>> +
>>> +static int ifcvf_mdev_create(struct kobject *kobj, struct 
>>> mdev_device *mdev)
>>> +{
>>> +    struct device *dev = mdev_parent_dev(mdev);
>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>>> +    int ret = 0;
>>> +
>>> +    mutex_lock(&adapter->mdev_lock);
>>> +
>>> +    if (adapter->mdev_count < 1) {
>>> +        ret = -EINVAL;
>>> +        goto out;
>>> +    }
>>> +
>>> +    mdev_set_class_id(mdev, MDEV_ID_VHOST);
>>> +    mdev_set_dev_ops(mdev, &ifc_mdev_ops);
>>> +
>>> +    mdev_set_drvdata(mdev, adapter);
>>> +    mdev_set_iommu_device(mdev_dev(mdev), dev);
>>> +
>>> +    INIT_LIST_HEAD(&adapter->dma_maps);
>>> +    adapter->mdev_count--;
>>> +
>>> +out:
>>> +    mutex_unlock(&adapter->mdev_lock);
>>> +    return ret;
>>> +}
>>> +
>>> +static int ifcvf_mdev_remove(struct mdev_device *mdev)
>>> +{
>>> +    struct device *dev = mdev_parent_dev(mdev);
>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>>> +
>>> +    mutex_lock(&adapter->mdev_lock);
>>> +    adapter->mdev_count++;
>>> +    mutex_unlock(&adapter->mdev_lock);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static struct mdev_parent_ops ifcvf_mdev_fops = {
>>> +    .owner            = THIS_MODULE,
>>> +    .supported_type_groups    = mdev_type_groups,
>>> +    .mdev_attr_groups    = mdev_dev_groups,
>>> +    .create            = ifcvf_mdev_create,
>>> +    .remove            = ifcvf_mdev_remove,
>>> +};
>>> +
>>> +static int ifcvf_probe(struct pci_dev *pdev, const struct 
>>> pci_device_id *id)
>>> +{
>>> +    struct device *dev = &pdev->dev;
>>> +    struct ifcvf_adapter *adapter;
>>> +    struct ifcvf_hw *vf;
>>> +    int ret, i;
>>> +
>>> +    adapter = kzalloc(sizeof(struct ifcvf_adapter), GFP_KERNEL);
>>> +    if (adapter == NULL) {
>>> +        ret = -ENOMEM;
>>> +        goto fail;
>>> +    }
>>> +
>>> +    mutex_init(&adapter->mdev_lock);
>>> +    adapter->mdev_count = 1;
>>
>>
>> So this is per VF based vDPA implementation, which seems not 
>> convenient for management.  Anyhow we can control the creation in PF?
>>
>> Thanks
> the driver scope for now doesn't support that, we can add these 
> feature in next releases.


Not a must for this series, but to have a better interaction with 
management like libvirt, it's better.

Btw, do you have the plan to post PF drivers?

Thanks
Zhu Lingshan Oct. 22, 2019, 6:53 a.m. UTC | #5
On 10/21/2019 6:19 PM, Jason Wang wrote:
>
> On 2019/10/21 下午5:53, Zhu, Lingshan wrote:
>>
>> On 10/16/2019 6:19 PM, Jason Wang wrote:
>>>
>>> On 2019/10/16 上午9:30, Zhu Lingshan wrote:
>>>> This commit introduced IFC VF operations for vdpa, which complys to
>>>> vhost_mdev interfaces, handles IFC VF initialization,
>>>> configuration and removal.
>>>>
>>>> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
>>>> ---
>>>>   drivers/vhost/ifcvf/ifcvf_main.c | 541 
>>>> +++++++++++++++++++++++++++++++++++++++
>>>>   1 file changed, 541 insertions(+)
>>>>   create mode 100644 drivers/vhost/ifcvf/ifcvf_main.c
>>>>
>>>> diff --git a/drivers/vhost/ifcvf/ifcvf_main.c 
>>>> b/drivers/vhost/ifcvf/ifcvf_main.c
>>>> new file mode 100644
>>>> index 000000000000..c48a29969a85
>>>> --- /dev/null
>>>> +++ b/drivers/vhost/ifcvf/ifcvf_main.c
>>>> @@ -0,0 +1,541 @@
>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>> +/*
>>>> + * Copyright (C) 2019 Intel Corporation.
>>>> + */
>>>> +
>>>> +#include <linux/interrupt.h>
>>>> +#include <linux/module.h>
>>>> +#include <linux/mdev.h>
>>>> +#include <linux/pci.h>
>>>> +#include <linux/sysfs.h>
>>>> +
>>>> +#include "ifcvf_base.h"
>>>> +
>>>> +#define VERSION_STRING    "0.1"
>>>> +#define DRIVER_AUTHOR    "Intel Corporation"
>>>> +#define IFCVF_DRIVER_NAME    "ifcvf"
>>>> +
>>>> +static irqreturn_t ifcvf_intr_handler(int irq, void *arg)
>>>> +{
>>>> +    struct vring_info *vring = arg;
>>>> +
>>>> +    if (vring->cb.callback)
>>>> +        return vring->cb.callback(vring->cb.private);
>>>> +
>>>> +    return IRQ_HANDLED;
>>>> +}
>>>> +
>>>> +static u64 ifcvf_mdev_get_features(struct mdev_device *mdev)
>>>> +{
>>>> +    return IFC_SUPPORTED_FEATURES;
>>>
>>>
>>> I would expect this should be done by querying the hw. Or IFC VF 
>>> can't get any update through its firmware?
>>
>> Hi Jason,
>>
>> Thanks for your comments, for now driver just support these features.
>
>
> Ok, it should work but less flexible, we can change it in the future.
sure!
>
>
>>
>>>
>>>
>>>> +}
>>>> +
>>>> +static int ifcvf_mdev_set_features(struct mdev_device *mdev, u64 
>>>> features)
>>>> +{
>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    vf->req_features = features;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static u64 ifcvf_mdev_get_vq_state(struct mdev_device *mdev, u16 qid)
>>>> +{
>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    return vf->vring[qid].last_avail_idx;
>>>
>>>
>>> Does this really work? I'd expect it should be fetched from hw since 
>>> it's an internal state.
>> for now, it's working, we intend to support LM in next version drivers.
>
>
> I'm not sure I understand here, I don't see any synchronization 
> between the hardware and last_avail_idx, so last_avail_idx should not 
> change.
>
> Btw, what did "LM" mean :) ?

I can add bar IO operations here, LM = live migration, sorry for the 
abbreviation.

>
>
>>>
>>>
>>>> +}
>>>> +
>>>> +static int ifcvf_mdev_set_vq_state(struct mdev_device *mdev, u16 
>>>> qid, u64 num)
>>>> +{
>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    vf->vring[qid].last_used_idx = num;
>>>
>>>
>>> I fail to understand why last_used_idx is needed. It looks to me the 
>>> used idx in the used ring is sufficient.
>> I will remove it.
>>>
>>>
>>>> + vf->vring[qid].last_avail_idx = num;
>>>
>>>
>>> Do we need a synchronization with hw immediately here?
>>>
>>>
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int ifcvf_mdev_set_vq_address(struct mdev_device *mdev, u16 
>>>> idx,
>>>> +                     u64 desc_area, u64 driver_area,
>>>> +                     u64 device_area)
>>>> +{
>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    vf->vring[idx].desc = desc_area;
>>>> +    vf->vring[idx].avail = driver_area;
>>>> +    vf->vring[idx].used = device_area;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static void ifcvf_mdev_set_vq_num(struct mdev_device *mdev, u16 
>>>> qid, u32 num)
>>>> +{
>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    vf->vring[qid].size = num;
>>>> +}
>>>> +
>>>> +static void ifcvf_mdev_set_vq_ready(struct mdev_device *mdev,
>>>> +                u16 qid, bool ready)
>>>> +{
>>>> +
>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    vf->vring[qid].ready = ready;
>>>> +}
>>>> +
>>>> +static bool ifcvf_mdev_get_vq_ready(struct mdev_device *mdev, u16 
>>>> qid)
>>>> +{
>>>> +
>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    return vf->vring[qid].ready;
>>>> +}
>>>> +
>>>> +static void ifcvf_mdev_set_vq_cb(struct mdev_device *mdev, u16 idx,
>>>> +                 struct virtio_mdev_callback *cb)
>>>> +{
>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    vf->vring[idx].cb = *cb;
>>>> +}
>>>> +
>>>> +static void ifcvf_mdev_kick_vq(struct mdev_device *mdev, u16 idx)
>>>> +{
>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    ifcvf_notify_queue(vf, idx);
>>>> +}
>>>> +
>>>> +static u8 ifcvf_mdev_get_status(struct mdev_device *mdev)
>>>> +{
>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    return vf->status;
>>>> +}
>>>> +
>>>> +static u32 ifcvf_mdev_get_generation(struct mdev_device *mdev)
>>>> +{
>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    return vf->generation;
>>>> +}
>>>> +
>>>> +static int ifcvf_mdev_get_version(struct mdev_device *mdev)
>>>> +{
>>>> +    return VIRTIO_MDEV_VERSION;
>>>> +}
>>>> +
>>>> +static u32 ifcvf_mdev_get_device_id(struct mdev_device *mdev)
>>>> +{
>>>> +    return IFCVF_DEVICE_ID;
>>>> +}
>>>> +
>>>> +static u32 ifcvf_mdev_get_vendor_id(struct mdev_device *mdev)
>>>> +{
>>>> +    return IFCVF_VENDOR_ID;
>>>> +}
>>>> +
>>>> +static u16 ifcvf_mdev_get_vq_align(struct mdev_device *mdev)
>>>> +{
>>>> +    return IFCVF_QUEUE_ALIGNMENT;
>>>> +}
>>>> +
>>>> +static int ifcvf_start_datapath(void *private)
>>>> +{
>>>> +    int i, ret;
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
>>>> +
>>>> +    for (i = 0; i < (IFCVF_MAX_QUEUE_PAIRS * 2); i++) {
>>>> +        if (!vf->vring[i].ready)
>>>> +            break;
>>>
>>>
>>> Looks like error should be returned here?
>> agreed!
>>>
>>>
>>>> +
>>>> +        if (!vf->vring[i].size)
>>>> +            break;
>>>> +
>>>> +        if (!vf->vring[i].desc || !vf->vring[i].avail ||
>>>> +            !vf->vring[i].used)
>>>> +            break;
>>>> +    }
>>>> +    vf->nr_vring = i;
>>>> +
>>>> +    ret = ifcvf_start_hw(vf);
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static int ifcvf_stop_datapath(void *private)
>>>> +{
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
>>>> +    int i;
>>>> +
>>>> +    for (i = 0; i < IFCVF_MAX_QUEUES; i++)
>>>> +        vf->vring[i].cb.callback = NULL;
>>>
>>>
>>> Any synchronization is needed for the vq irq handler?
>> I think even we set callback = NULL, the code is still there, 
>> on-going routines would not be effected.
>
>
> Ok I think you mean when ifcvf_stop_hw() return, hardware will not 
> respond to e.g kick and other events etc.
Yes, ifcvf_stop_hw() would disable the hw.
>
>
>>>
>>>
>>>> +
>>>> +    ifcvf_stop_hw(vf);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
>>>> +{
>>>> +    int i;
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>>>> +        vf->vring[i].last_used_idx = 0;
>>>> +        vf->vring[i].last_avail_idx = 0;
>>>> +        vf->vring[i].desc = 0;
>>>> +        vf->vring[i].avail = 0;
>>>> +        vf->vring[i].used = 0;
>>>> +        vf->vring[i].ready = 0;
>>>> +        vf->vring->cb.callback = NULL;
>>>> +        vf->vring->cb.private = NULL;
>>>> +    }
>>>> +}
>>>> +
>>>> +static void ifcvf_mdev_set_status(struct mdev_device *mdev, u8 
>>>> status)
>>>> +{
>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +
>>>> +    vf->status = status;
>>>> +
>>>> +    if (status == 0) {
>>>> +        ifcvf_stop_datapath(adapter);
>>>> +        ifcvf_reset_vring(adapter);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
>>>> +        ifcvf_start_datapath(adapter);
>>>> +        return;
>>>> +    }
>>>> +}
>>>> +
>>>> +static u16 ifcvf_mdev_get_queue_max(struct mdev_device *mdev)
>>>> +{
>>>> +    return IFCVF_MAX_QUEUES;
>>>
>>>
>>> The name is confusing, it was used to return the maximum queue size. 
>>> In new version of virtio-mdev, the callback was renamed as 
>>> get_vq_num_max().
>> will change that.
>>>
>>>
>>>> +}
>>>> +
>>>> +static struct virtio_mdev_device_ops ifc_mdev_ops = {
>>>> +    .get_features  = ifcvf_mdev_get_features,
>>>> +    .set_features  = ifcvf_mdev_set_features,
>>>> +    .get_status    = ifcvf_mdev_get_status,
>>>> +    .set_status    = ifcvf_mdev_set_status,
>>>> +    .get_queue_max = ifcvf_mdev_get_queue_max,
>>>> +    .get_vq_state   = ifcvf_mdev_get_vq_state,
>>>> +    .set_vq_state   = ifcvf_mdev_set_vq_state,
>>>> +    .set_vq_cb      = ifcvf_mdev_set_vq_cb,
>>>> +    .set_vq_ready   = ifcvf_mdev_set_vq_ready,
>>>> +    .get_vq_ready    = ifcvf_mdev_get_vq_ready,
>>>> +    .set_vq_num     = ifcvf_mdev_set_vq_num,
>>>> +    .set_vq_address = ifcvf_mdev_set_vq_address,
>>>> +    .kick_vq        = ifcvf_mdev_kick_vq,
>>>> +    .get_generation    = ifcvf_mdev_get_generation,
>>>> +    .get_version    = ifcvf_mdev_get_version,
>>>> +    .get_device_id    = ifcvf_mdev_get_device_id,
>>>> +    .get_vendor_id    = ifcvf_mdev_get_vendor_id,
>>>> +    .get_vq_align    = ifcvf_mdev_get_vq_align,
>>>> +};
>>>
>>>
>>> set_config/get_config is missing. It looks to me they are not hard, 
>>> just implementing the access to dev_cfg. It's key to make kernel 
>>> virtio driver to work.
>>>
>>> And in the new version of virito-mdev, features like _F_LOG_ALL 
>>> should be advertised through get_mdev_features.
>> IMHO, currently the driver can work without set/get_config, 
>> vhost_mdev doesn't call them for now.
>
>
> Yes, but it was required by virtio_mdev for host driver to work, and 
> it looks to me it's not hard to add them. If possible please add them 
> and "virtio" type then we can use the ops for both the case of VM and 
> containers.
sure
>
>
>>>
>>>
>>>> +
>>>> +static int ifcvf_init_msix(struct ifcvf_adapter *adapter)
>>>> +{
>>>> +    int vector, i, ret, irq;
>>>> +    struct pci_dev *pdev = to_pci_dev(adapter->dev);
>>>> +    struct ifcvf_hw *vf = &adapter->vf;
>>>> +
>>>> +    ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
>>>> +            IFCVF_MAX_INTR, PCI_IRQ_MSIX);
>>>> +    if (ret < 0) {
>>>> +        IFC_ERR(adapter->dev, "Failed to alloc irq vectors.\n");
>>>> +        return ret;
>>>> +    }
>>>> +
>>>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>>>> +        vector = i + IFCVF_MSI_QUEUE_OFF;
>>>> +        irq = pci_irq_vector(pdev, vector);
>>>> +        ret = request_irq(irq, ifcvf_intr_handler, 0,
>>>> +                pci_name(pdev), &vf->vring[i]);
>>>> +        if (ret) {
>>>> +            IFC_ERR(adapter->dev,
>>>> +                "Failed to request irq for vq %d.\n", i);
>>>> +            return ret;
>>>> +        }
>>>> +    }
>>>
>>>
>>> Do we need to provide fallback when we can't do per vq MSIX?
>> I think it would be very rarely that can not get enough vectors.
>
>
> Right.
>
>
>>>
>>>
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static void ifcvf_destroy_adapter(struct ifcvf_adapter *adapter)
>>>> +{
>>>> +    int i, vector, irq;
>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>> +    struct pci_dev *pdev = to_pci_dev(adapter->dev);
>>>> +
>>>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>>>> +        vector = i + IFCVF_MSI_QUEUE_OFF;
>>>> +        irq = pci_irq_vector(pdev, vector);
>>>> +        free_irq(irq, &vf->vring[i]);
>>>> +    }
>>>> +}
>>>> +
>>>> +static ssize_t name_show(struct kobject *kobj, struct device *dev, 
>>>> char *buf)
>>>> +{
>>>> +    const char *name = "vhost accelerator (virtio ring compatible)";
>>>> +
>>>> +    return sprintf(buf, "%s\n", name);
>>>> +}
>>>> +MDEV_TYPE_ATTR_RO(name);
>>>> +
>>>> +static ssize_t device_api_show(struct kobject *kobj, struct device 
>>>> *dev,
>>>> +                   char *buf)
>>>> +{
>>>> +    return sprintf(buf, "%s\n", VIRTIO_MDEV_DEVICE_API_STRING);
>>>> +}
>>>> +MDEV_TYPE_ATTR_RO(device_api);
>>>> +
>>>> +static ssize_t available_instances_show(struct kobject *kobj,
>>>> +                    struct device *dev, char *buf)
>>>> +{
>>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>>>> +
>>>> +    return sprintf(buf, "%d\n", adapter->mdev_count);
>>>> +}
>>>> +
>>>> +MDEV_TYPE_ATTR_RO(available_instances);
>>>> +
>>>> +static ssize_t type_show(struct kobject *kobj,
>>>> +            struct device *dev, char *buf)
>>>> +{
>>>> +    return sprintf(buf, "%s\n", "net");
>>>> +}
>>>> +
>>>> +MDEV_TYPE_ATTR_RO(type);
>>>> +
>>>> +
>>>> +static struct attribute *mdev_types_attrs[] = {
>>>> +    &mdev_type_attr_name.attr,
>>>> +    &mdev_type_attr_device_api.attr,
>>>> +    &mdev_type_attr_available_instances.attr,
>>>> +    &mdev_type_attr_type.attr,
>>>> +    NULL,
>>>> +};
>>>> +
>>>> +static struct attribute_group mdev_type_group = {
>>>> +    .name  = "vdpa_virtio",
>>>
>>>
>>> To be consistent, it should be "vhost" or "virtio".
>> agreed!
>>>
>>>
>>>> +    .attrs = mdev_types_attrs,
>>>> +};
>>>> +
>>>> +static struct attribute_group *mdev_type_groups[] = {
>>>> +    &mdev_type_group,
>>>> +    NULL,
>>>> +};
>>>> +
>>>> +const struct attribute_group *mdev_dev_groups[] = {
>>>> +    NULL,
>>>> +};
>>>> +
>>>> +static int ifcvf_mdev_create(struct kobject *kobj, struct 
>>>> mdev_device *mdev)
>>>> +{
>>>> +    struct device *dev = mdev_parent_dev(mdev);
>>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>>>> +    int ret = 0;
>>>> +
>>>> +    mutex_lock(&adapter->mdev_lock);
>>>> +
>>>> +    if (adapter->mdev_count < 1) {
>>>> +        ret = -EINVAL;
>>>> +        goto out;
>>>> +    }
>>>> +
>>>> +    mdev_set_class_id(mdev, MDEV_ID_VHOST);
>>>> +    mdev_set_dev_ops(mdev, &ifc_mdev_ops);
>>>> +
>>>> +    mdev_set_drvdata(mdev, adapter);
>>>> +    mdev_set_iommu_device(mdev_dev(mdev), dev);
>>>> +
>>>> +    INIT_LIST_HEAD(&adapter->dma_maps);
>>>> +    adapter->mdev_count--;
>>>> +
>>>> +out:
>>>> +    mutex_unlock(&adapter->mdev_lock);
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static int ifcvf_mdev_remove(struct mdev_device *mdev)
>>>> +{
>>>> +    struct device *dev = mdev_parent_dev(mdev);
>>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>>>> +
>>>> +    mutex_lock(&adapter->mdev_lock);
>>>> +    adapter->mdev_count++;
>>>> +    mutex_unlock(&adapter->mdev_lock);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static struct mdev_parent_ops ifcvf_mdev_fops = {
>>>> +    .owner            = THIS_MODULE,
>>>> +    .supported_type_groups    = mdev_type_groups,
>>>> +    .mdev_attr_groups    = mdev_dev_groups,
>>>> +    .create            = ifcvf_mdev_create,
>>>> +    .remove            = ifcvf_mdev_remove,
>>>> +};
>>>> +
>>>> +static int ifcvf_probe(struct pci_dev *pdev, const struct 
>>>> pci_device_id *id)
>>>> +{
>>>> +    struct device *dev = &pdev->dev;
>>>> +    struct ifcvf_adapter *adapter;
>>>> +    struct ifcvf_hw *vf;
>>>> +    int ret, i;
>>>> +
>>>> +    adapter = kzalloc(sizeof(struct ifcvf_adapter), GFP_KERNEL);
>>>> +    if (adapter == NULL) {
>>>> +        ret = -ENOMEM;
>>>> +        goto fail;
>>>> +    }
>>>> +
>>>> +    mutex_init(&adapter->mdev_lock);
>>>> +    adapter->mdev_count = 1;
>>>
>>>
>>> So this is per VF based vDPA implementation, which seems not 
>>> convenient for management.  Anyhow we can control the creation in PF?
>>>
>>> Thanks
>> the driver scope for now doesn't support that, we can add these 
>> feature in next releases.
>
>
> Not a must for this series, but to have a better interaction with 
> management like libvirt, it's better.
>
> Btw, do you have the plan to post PF drivers?
>
> Thanks
Maybe we can workout PF driver in next release :)
THanks,
BR
Zhu Lingshan
>
>
Jason Wang Oct. 22, 2019, 1:05 p.m. UTC | #6
On 2019/10/22 下午2:53, Zhu Lingshan wrote:
>
> On 10/21/2019 6:19 PM, Jason Wang wrote:
>>
>> On 2019/10/21 下午5:53, Zhu, Lingshan wrote:
>>>
>>> On 10/16/2019 6:19 PM, Jason Wang wrote:
>>>>
>>>> On 2019/10/16 上午9:30, Zhu Lingshan wrote:
>>>>> This commit introduced IFC VF operations for vdpa, which complys to
>>>>> vhost_mdev interfaces, handles IFC VF initialization,
>>>>> configuration and removal.
>>>>>
>>>>> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
>>>>> ---


[...]


>>
>>
>>>
>>>>
>>>>
>>>>> +}
>>>>> +
>>>>> +static int ifcvf_mdev_set_features(struct mdev_device *mdev, u64 
>>>>> features)
>>>>> +{
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    vf->req_features = features;
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static u64 ifcvf_mdev_get_vq_state(struct mdev_device *mdev, u16 
>>>>> qid)
>>>>> +{
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    return vf->vring[qid].last_avail_idx;
>>>>
>>>>
>>>> Does this really work? I'd expect it should be fetched from hw 
>>>> since it's an internal state.
>>> for now, it's working, we intend to support LM in next version drivers.
>>
>>
>> I'm not sure I understand here, I don't see any synchronization 
>> between the hardware and last_avail_idx, so last_avail_idx should not 
>> change.
>>
>> Btw, what did "LM" mean :) ?
>
> I can add bar IO operations here, LM = live migration, sorry for the 
> abbreviation.


Just make sure I understand here, I believe you mean reading 
last_avail_idx through IO bar here?

Thanks
Zhu Lingshan Oct. 23, 2019, 6:19 a.m. UTC | #7
On 10/22/2019 9:05 PM, Jason Wang wrote:
>
> On 2019/10/22 下午2:53, Zhu Lingshan wrote:
>>
>> On 10/21/2019 6:19 PM, Jason Wang wrote:
>>>
>>> On 2019/10/21 下午5:53, Zhu, Lingshan wrote:
>>>>
>>>> On 10/16/2019 6:19 PM, Jason Wang wrote:
>>>>>
>>>>> On 2019/10/16 上午9:30, Zhu Lingshan wrote:
>>>>>> This commit introduced IFC VF operations for vdpa, which complys to
>>>>>> vhost_mdev interfaces, handles IFC VF initialization,
>>>>>> configuration and removal.
>>>>>>
>>>>>> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
>>>>>> ---
>
>
> [...]
>
>
>>>
>>>
>>>>
>>>>>
>>>>>
>>>>>> +}
>>>>>> +
>>>>>> +static int ifcvf_mdev_set_features(struct mdev_device *mdev, u64 
>>>>>> features)
>>>>>> +{
>>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>>> +
>>>>>> +    vf->req_features = features;
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static u64 ifcvf_mdev_get_vq_state(struct mdev_device *mdev, u16 
>>>>>> qid)
>>>>>> +{
>>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>>> +
>>>>>> +    return vf->vring[qid].last_avail_idx;
>>>>>
>>>>>
>>>>> Does this really work? I'd expect it should be fetched from hw 
>>>>> since it's an internal state.
>>>> for now, it's working, we intend to support LM in next version 
>>>> drivers.
>>>
>>>
>>> I'm not sure I understand here, I don't see any synchronization 
>>> between the hardware and last_avail_idx, so last_avail_idx should 
>>> not change.
>>>
>>> Btw, what did "LM" mean :) ?
>>
>> I can add bar IO operations here, LM = live migration, sorry for the 
>> abbreviation.
>
>
> Just make sure I understand here, I believe you mean reading 
> last_avail_idx through IO bar here?
>
> Thanks

Hi Jason,

Yes, I mean last_avail_idx. is that correct?

THanks

>
>
Jason Wang Oct. 23, 2019, 6:39 a.m. UTC | #8
On 2019/10/23 下午2:19, Zhu, Lingshan wrote:
>
> On 10/22/2019 9:05 PM, Jason Wang wrote:
>>
>> On 2019/10/22 下午2:53, Zhu Lingshan wrote:
>>>
>>> On 10/21/2019 6:19 PM, Jason Wang wrote:
>>>>
>>>> On 2019/10/21 下午5:53, Zhu, Lingshan wrote:
>>>>>
>>>>> On 10/16/2019 6:19 PM, Jason Wang wrote:
>>>>>>
>>>>>> On 2019/10/16 上午9:30, Zhu Lingshan wrote:
>>>>>>> This commit introduced IFC VF operations for vdpa, which complys to
>>>>>>> vhost_mdev interfaces, handles IFC VF initialization,
>>>>>>> configuration and removal.
>>>>>>>
>>>>>>> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
>>>>>>> ---
>>
>>
>> [...]
>>
>>
>>>>
>>>>
>>>>>
>>>>>>
>>>>>>
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int ifcvf_mdev_set_features(struct mdev_device *mdev, 
>>>>>>> u64 features)
>>>>>>> +{
>>>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>>>> +
>>>>>>> +    vf->req_features = features;
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static u64 ifcvf_mdev_get_vq_state(struct mdev_device *mdev, 
>>>>>>> u16 qid)
>>>>>>> +{
>>>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>>>> +
>>>>>>> +    return vf->vring[qid].last_avail_idx;
>>>>>>
>>>>>>
>>>>>> Does this really work? I'd expect it should be fetched from hw 
>>>>>> since it's an internal state.
>>>>> for now, it's working, we intend to support LM in next version 
>>>>> drivers.
>>>>
>>>>
>>>> I'm not sure I understand here, I don't see any synchronization 
>>>> between the hardware and last_avail_idx, so last_avail_idx should 
>>>> not change.
>>>>
>>>> Btw, what did "LM" mean :) ?
>>>
>>> I can add bar IO operations here, LM = live migration, sorry for the 
>>> abbreviation.
>>
>>
>> Just make sure I understand here, I believe you mean reading 
>> last_avail_idx through IO bar here?
>>
>> Thanks
>
> Hi Jason,
>
> Yes, I mean last_avail_idx. is that correct?
>
> THanks


Yes.

Thanks


>
>>
>>
Zhu Lingshan Oct. 23, 2019, 9:24 a.m. UTC | #9
On 10/22/2019 2:53 PM, Zhu Lingshan wrote:
>
> On 10/21/2019 6:19 PM, Jason Wang wrote:
>>
>> On 2019/10/21 下午5:53, Zhu, Lingshan wrote:
>>>
>>> On 10/16/2019 6:19 PM, Jason Wang wrote:
>>>>
>>>> On 2019/10/16 上午9:30, Zhu Lingshan wrote:
>>>>> This commit introduced IFC VF operations for vdpa, which complys to
>>>>> vhost_mdev interfaces, handles IFC VF initialization,
>>>>> configuration and removal.
>>>>>
>>>>> Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
>>>>> ---
>>>>>   drivers/vhost/ifcvf/ifcvf_main.c | 541 
>>>>> +++++++++++++++++++++++++++++++++++++++
>>>>>   1 file changed, 541 insertions(+)
>>>>>   create mode 100644 drivers/vhost/ifcvf/ifcvf_main.c
>>>>>
>>>>> diff --git a/drivers/vhost/ifcvf/ifcvf_main.c 
>>>>> b/drivers/vhost/ifcvf/ifcvf_main.c
>>>>> new file mode 100644
>>>>> index 000000000000..c48a29969a85
>>>>> --- /dev/null
>>>>> +++ b/drivers/vhost/ifcvf/ifcvf_main.c
>>>>> @@ -0,0 +1,541 @@
>>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>>> +/*
>>>>> + * Copyright (C) 2019 Intel Corporation.
>>>>> + */
>>>>> +
>>>>> +#include <linux/interrupt.h>
>>>>> +#include <linux/module.h>
>>>>> +#include <linux/mdev.h>
>>>>> +#include <linux/pci.h>
>>>>> +#include <linux/sysfs.h>
>>>>> +
>>>>> +#include "ifcvf_base.h"
>>>>> +
>>>>> +#define VERSION_STRING    "0.1"
>>>>> +#define DRIVER_AUTHOR    "Intel Corporation"
>>>>> +#define IFCVF_DRIVER_NAME    "ifcvf"
>>>>> +
>>>>> +static irqreturn_t ifcvf_intr_handler(int irq, void *arg)
>>>>> +{
>>>>> +    struct vring_info *vring = arg;
>>>>> +
>>>>> +    if (vring->cb.callback)
>>>>> +        return vring->cb.callback(vring->cb.private);
>>>>> +
>>>>> +    return IRQ_HANDLED;
>>>>> +}
>>>>> +
>>>>> +static u64 ifcvf_mdev_get_features(struct mdev_device *mdev)
>>>>> +{
>>>>> +    return IFC_SUPPORTED_FEATURES;
>>>>
>>>>
>>>> I would expect this should be done by querying the hw. Or IFC VF 
>>>> can't get any update through its firmware?
>>>
>>> Hi Jason,
>>>
>>> Thanks for your comments, for now driver just support these features.
>>
>>
>> Ok, it should work but less flexible, we can change it in the future.
> sure!
>>
>>
>>>
>>>>
>>>>
>>>>> +}
>>>>> +
>>>>> +static int ifcvf_mdev_set_features(struct mdev_device *mdev, u64 
>>>>> features)
>>>>> +{
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    vf->req_features = features;
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static u64 ifcvf_mdev_get_vq_state(struct mdev_device *mdev, u16 
>>>>> qid)
>>>>> +{
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    return vf->vring[qid].last_avail_idx;
>>>>
>>>>
>>>> Does this really work? I'd expect it should be fetched from hw 
>>>> since it's an internal state.
>>> for now, it's working, we intend to support LM in next version drivers.
>>
>>
>> I'm not sure I understand here, I don't see any synchronization 
>> between the hardware and last_avail_idx, so last_avail_idx should not 
>> change.
>>
>> Btw, what did "LM" mean :) ?
>
> I can add bar IO operations here, LM = live migration, sorry for the 
> abbreviation.
>
>>
>>
>>>>
>>>>
>>>>> +}
>>>>> +
>>>>> +static int ifcvf_mdev_set_vq_state(struct mdev_device *mdev, u16 
>>>>> qid, u64 num)
>>>>> +{
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    vf->vring[qid].last_used_idx = num;
>>>>
>>>>
>>>> I fail to understand why last_used_idx is needed. It looks to me 
>>>> the used idx in the used ring is sufficient.
>>> I will remove it.
>>>>
>>>>
>>>>> + vf->vring[qid].last_avail_idx = num;
>>>>
>>>>
>>>> Do we need a synchronization with hw immediately here?
>>>>
>>>>
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static int ifcvf_mdev_set_vq_address(struct mdev_device *mdev, 
>>>>> u16 idx,
>>>>> +                     u64 desc_area, u64 driver_area,
>>>>> +                     u64 device_area)
>>>>> +{
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    vf->vring[idx].desc = desc_area;
>>>>> +    vf->vring[idx].avail = driver_area;
>>>>> +    vf->vring[idx].used = device_area;
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static void ifcvf_mdev_set_vq_num(struct mdev_device *mdev, u16 
>>>>> qid, u32 num)
>>>>> +{
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    vf->vring[qid].size = num;
>>>>> +}
>>>>> +
>>>>> +static void ifcvf_mdev_set_vq_ready(struct mdev_device *mdev,
>>>>> +                u16 qid, bool ready)
>>>>> +{
>>>>> +
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    vf->vring[qid].ready = ready;
>>>>> +}
>>>>> +
>>>>> +static bool ifcvf_mdev_get_vq_ready(struct mdev_device *mdev, u16 
>>>>> qid)
>>>>> +{
>>>>> +
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    return vf->vring[qid].ready;
>>>>> +}
>>>>> +
>>>>> +static void ifcvf_mdev_set_vq_cb(struct mdev_device *mdev, u16 idx,
>>>>> +                 struct virtio_mdev_callback *cb)
>>>>> +{
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    vf->vring[idx].cb = *cb;
>>>>> +}
>>>>> +
>>>>> +static void ifcvf_mdev_kick_vq(struct mdev_device *mdev, u16 idx)
>>>>> +{
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    ifcvf_notify_queue(vf, idx);
>>>>> +}
>>>>> +
>>>>> +static u8 ifcvf_mdev_get_status(struct mdev_device *mdev)
>>>>> +{
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    return vf->status;
>>>>> +}
>>>>> +
>>>>> +static u32 ifcvf_mdev_get_generation(struct mdev_device *mdev)
>>>>> +{
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    return vf->generation;
>>>>> +}
>>>>> +
>>>>> +static int ifcvf_mdev_get_version(struct mdev_device *mdev)
>>>>> +{
>>>>> +    return VIRTIO_MDEV_VERSION;
>>>>> +}
>>>>> +
>>>>> +static u32 ifcvf_mdev_get_device_id(struct mdev_device *mdev)
>>>>> +{
>>>>> +    return IFCVF_DEVICE_ID;
>>>>> +}
>>>>> +
>>>>> +static u32 ifcvf_mdev_get_vendor_id(struct mdev_device *mdev)
>>>>> +{
>>>>> +    return IFCVF_VENDOR_ID;
>>>>> +}
>>>>> +
>>>>> +static u16 ifcvf_mdev_get_vq_align(struct mdev_device *mdev)
>>>>> +{
>>>>> +    return IFCVF_QUEUE_ALIGNMENT;
>>>>> +}
>>>>> +
>>>>> +static int ifcvf_start_datapath(void *private)
>>>>> +{
>>>>> +    int i, ret;
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
>>>>> +
>>>>> +    for (i = 0; i < (IFCVF_MAX_QUEUE_PAIRS * 2); i++) {
>>>>> +        if (!vf->vring[i].ready)
>>>>> +            break;
>>>>
>>>>
>>>> Looks like error should be returned here?
>>> agreed!
>>>>
>>>>
>>>>> +
>>>>> +        if (!vf->vring[i].size)
>>>>> +            break;
>>>>> +
>>>>> +        if (!vf->vring[i].desc || !vf->vring[i].avail ||
>>>>> +            !vf->vring[i].used)
>>>>> +            break;
>>>>> +    }
>>>>> +    vf->nr_vring = i;
>>>>> +
>>>>> +    ret = ifcvf_start_hw(vf);
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static int ifcvf_stop_datapath(void *private)
>>>>> +{
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
>>>>> +    int i;
>>>>> +
>>>>> +    for (i = 0; i < IFCVF_MAX_QUEUES; i++)
>>>>> +        vf->vring[i].cb.callback = NULL;
>>>>
>>>>
>>>> Any synchronization is needed for the vq irq handler?
>>> I think even we set callback = NULL, the code is still there, 
>>> on-going routines would not be effected.
>>
>>
>> Ok I think you mean when ifcvf_stop_hw() return, hardware will not 
>> respond to e.g kick and other events etc.
> Yes, ifcvf_stop_hw() would disable the hw.
>>
>>
>>>>
>>>>
>>>>> +
>>>>> +    ifcvf_stop_hw(vf);
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
>>>>> +{
>>>>> +    int i;
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>>>>> +        vf->vring[i].last_used_idx = 0;
>>>>> +        vf->vring[i].last_avail_idx = 0;
>>>>> +        vf->vring[i].desc = 0;
>>>>> +        vf->vring[i].avail = 0;
>>>>> +        vf->vring[i].used = 0;
>>>>> +        vf->vring[i].ready = 0;
>>>>> +        vf->vring->cb.callback = NULL;
>>>>> +        vf->vring->cb.private = NULL;
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +static void ifcvf_mdev_set_status(struct mdev_device *mdev, u8 
>>>>> status)
>>>>> +{
>>>>> +    struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +
>>>>> +    vf->status = status;
>>>>> +
>>>>> +    if (status == 0) {
>>>>> +        ifcvf_stop_datapath(adapter);
>>>>> +        ifcvf_reset_vring(adapter);
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
>>>>> +        ifcvf_start_datapath(adapter);
>>>>> +        return;
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +static u16 ifcvf_mdev_get_queue_max(struct mdev_device *mdev)
>>>>> +{
>>>>> +    return IFCVF_MAX_QUEUES;
>>>>
>>>>
>>>> The name is confusing, it was used to return the maximum queue 
>>>> size. In new version of virtio-mdev, the callback was renamed as 
>>>> get_vq_num_max().
>>> will change that.
>>>>
>>>>
>>>>> +}
>>>>> +
>>>>> +static struct virtio_mdev_device_ops ifc_mdev_ops = {
>>>>> +    .get_features  = ifcvf_mdev_get_features,
>>>>> +    .set_features  = ifcvf_mdev_set_features,
>>>>> +    .get_status    = ifcvf_mdev_get_status,
>>>>> +    .set_status    = ifcvf_mdev_set_status,
>>>>> +    .get_queue_max = ifcvf_mdev_get_queue_max,
>>>>> +    .get_vq_state   = ifcvf_mdev_get_vq_state,
>>>>> +    .set_vq_state   = ifcvf_mdev_set_vq_state,
>>>>> +    .set_vq_cb      = ifcvf_mdev_set_vq_cb,
>>>>> +    .set_vq_ready   = ifcvf_mdev_set_vq_ready,
>>>>> +    .get_vq_ready    = ifcvf_mdev_get_vq_ready,
>>>>> +    .set_vq_num     = ifcvf_mdev_set_vq_num,
>>>>> +    .set_vq_address = ifcvf_mdev_set_vq_address,
>>>>> +    .kick_vq        = ifcvf_mdev_kick_vq,
>>>>> +    .get_generation    = ifcvf_mdev_get_generation,
>>>>> +    .get_version    = ifcvf_mdev_get_version,
>>>>> +    .get_device_id    = ifcvf_mdev_get_device_id,
>>>>> +    .get_vendor_id    = ifcvf_mdev_get_vendor_id,
>>>>> +    .get_vq_align    = ifcvf_mdev_get_vq_align,
>>>>> +};
>>>>
>>>>
>>>> set_config/get_config is missing. It looks to me they are not hard, 
>>>> just implementing the access to dev_cfg. It's key to make kernel 
>>>> virtio driver to work.
>>>>
>>>> And in the new version of virito-mdev, features like _F_LOG_ALL 
>>>> should be advertised through get_mdev_features.
>>> IMHO, currently the driver can work without set/get_config, 
>>> vhost_mdev doesn't call them for now.
>>
>>
>> Yes, but it was required by virtio_mdev for host driver to work, and 
>> it looks to me it's not hard to add them. If possible please add them 
>> and "virtio" type then we can use the ops for both the case of VM and 
>> containers.
> sure

Hello Jason,

Just want to double confirm the implementation of set/get_config, for 
now, dev_cfg only contains mac[6], status and max_virtqueue_pairs, is 
that enough to support virtio_mdev?

THanks!

>>
>>
>>>>
>>>>
>>>>> +
>>>>> +static int ifcvf_init_msix(struct ifcvf_adapter *adapter)
>>>>> +{
>>>>> +    int vector, i, ret, irq;
>>>>> +    struct pci_dev *pdev = to_pci_dev(adapter->dev);
>>>>> +    struct ifcvf_hw *vf = &adapter->vf;
>>>>> +
>>>>> +    ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
>>>>> +            IFCVF_MAX_INTR, PCI_IRQ_MSIX);
>>>>> +    if (ret < 0) {
>>>>> +        IFC_ERR(adapter->dev, "Failed to alloc irq vectors.\n");
>>>>> +        return ret;
>>>>> +    }
>>>>> +
>>>>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>>>>> +        vector = i + IFCVF_MSI_QUEUE_OFF;
>>>>> +        irq = pci_irq_vector(pdev, vector);
>>>>> +        ret = request_irq(irq, ifcvf_intr_handler, 0,
>>>>> +                pci_name(pdev), &vf->vring[i]);
>>>>> +        if (ret) {
>>>>> +            IFC_ERR(adapter->dev,
>>>>> +                "Failed to request irq for vq %d.\n", i);
>>>>> +            return ret;
>>>>> +        }
>>>>> +    }
>>>>
>>>>
>>>> Do we need to provide fallback when we can't do per vq MSIX?
>>> I think it would be very rarely that can not get enough vectors.
>>
>>
>> Right.
>>
>>
>>>>
>>>>
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static void ifcvf_destroy_adapter(struct ifcvf_adapter *adapter)
>>>>> +{
>>>>> +    int i, vector, irq;
>>>>> +    struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
>>>>> +    struct pci_dev *pdev = to_pci_dev(adapter->dev);
>>>>> +
>>>>> +    for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
>>>>> +        vector = i + IFCVF_MSI_QUEUE_OFF;
>>>>> +        irq = pci_irq_vector(pdev, vector);
>>>>> +        free_irq(irq, &vf->vring[i]);
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +static ssize_t name_show(struct kobject *kobj, struct device 
>>>>> *dev, char *buf)
>>>>> +{
>>>>> +    const char *name = "vhost accelerator (virtio ring compatible)";
>>>>> +
>>>>> +    return sprintf(buf, "%s\n", name);
>>>>> +}
>>>>> +MDEV_TYPE_ATTR_RO(name);
>>>>> +
>>>>> +static ssize_t device_api_show(struct kobject *kobj, struct 
>>>>> device *dev,
>>>>> +                   char *buf)
>>>>> +{
>>>>> +    return sprintf(buf, "%s\n", VIRTIO_MDEV_DEVICE_API_STRING);
>>>>> +}
>>>>> +MDEV_TYPE_ATTR_RO(device_api);
>>>>> +
>>>>> +static ssize_t available_instances_show(struct kobject *kobj,
>>>>> +                    struct device *dev, char *buf)
>>>>> +{
>>>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>>>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>>>>> +
>>>>> +    return sprintf(buf, "%d\n", adapter->mdev_count);
>>>>> +}
>>>>> +
>>>>> +MDEV_TYPE_ATTR_RO(available_instances);
>>>>> +
>>>>> +static ssize_t type_show(struct kobject *kobj,
>>>>> +            struct device *dev, char *buf)
>>>>> +{
>>>>> +    return sprintf(buf, "%s\n", "net");
>>>>> +}
>>>>> +
>>>>> +MDEV_TYPE_ATTR_RO(type);
>>>>> +
>>>>> +
>>>>> +static struct attribute *mdev_types_attrs[] = {
>>>>> +    &mdev_type_attr_name.attr,
>>>>> +    &mdev_type_attr_device_api.attr,
>>>>> + &mdev_type_attr_available_instances.attr,
>>>>> +    &mdev_type_attr_type.attr,
>>>>> +    NULL,
>>>>> +};
>>>>> +
>>>>> +static struct attribute_group mdev_type_group = {
>>>>> +    .name  = "vdpa_virtio",
>>>>
>>>>
>>>> To be consistent, it should be "vhost" or "virtio".
>>> agreed!
>>>>
>>>>
>>>>> +    .attrs = mdev_types_attrs,
>>>>> +};
>>>>> +
>>>>> +static struct attribute_group *mdev_type_groups[] = {
>>>>> +    &mdev_type_group,
>>>>> +    NULL,
>>>>> +};
>>>>> +
>>>>> +const struct attribute_group *mdev_dev_groups[] = {
>>>>> +    NULL,
>>>>> +};
>>>>> +
>>>>> +static int ifcvf_mdev_create(struct kobject *kobj, struct 
>>>>> mdev_device *mdev)
>>>>> +{
>>>>> +    struct device *dev = mdev_parent_dev(mdev);
>>>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>>>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>>>>> +    int ret = 0;
>>>>> +
>>>>> +    mutex_lock(&adapter->mdev_lock);
>>>>> +
>>>>> +    if (adapter->mdev_count < 1) {
>>>>> +        ret = -EINVAL;
>>>>> +        goto out;
>>>>> +    }
>>>>> +
>>>>> +    mdev_set_class_id(mdev, MDEV_ID_VHOST);
>>>>> +    mdev_set_dev_ops(mdev, &ifc_mdev_ops);
>>>>> +
>>>>> +    mdev_set_drvdata(mdev, adapter);
>>>>> +    mdev_set_iommu_device(mdev_dev(mdev), dev);
>>>>> +
>>>>> +    INIT_LIST_HEAD(&adapter->dma_maps);
>>>>> +    adapter->mdev_count--;
>>>>> +
>>>>> +out:
>>>>> +    mutex_unlock(&adapter->mdev_lock);
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static int ifcvf_mdev_remove(struct mdev_device *mdev)
>>>>> +{
>>>>> +    struct device *dev = mdev_parent_dev(mdev);
>>>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>>>> +    struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
>>>>> +
>>>>> +    mutex_lock(&adapter->mdev_lock);
>>>>> +    adapter->mdev_count++;
>>>>> +    mutex_unlock(&adapter->mdev_lock);
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static struct mdev_parent_ops ifcvf_mdev_fops = {
>>>>> +    .owner            = THIS_MODULE,
>>>>> +    .supported_type_groups    = mdev_type_groups,
>>>>> +    .mdev_attr_groups    = mdev_dev_groups,
>>>>> +    .create            = ifcvf_mdev_create,
>>>>> +    .remove            = ifcvf_mdev_remove,
>>>>> +};
>>>>> +
>>>>> +static int ifcvf_probe(struct pci_dev *pdev, const struct 
>>>>> pci_device_id *id)
>>>>> +{
>>>>> +    struct device *dev = &pdev->dev;
>>>>> +    struct ifcvf_adapter *adapter;
>>>>> +    struct ifcvf_hw *vf;
>>>>> +    int ret, i;
>>>>> +
>>>>> +    adapter = kzalloc(sizeof(struct ifcvf_adapter), GFP_KERNEL);
>>>>> +    if (adapter == NULL) {
>>>>> +        ret = -ENOMEM;
>>>>> +        goto fail;
>>>>> +    }
>>>>> +
>>>>> +    mutex_init(&adapter->mdev_lock);
>>>>> +    adapter->mdev_count = 1;
>>>>
>>>>
>>>> So this is per VF based vDPA implementation, which seems not 
>>>> convenient for management.  Anyhow we can control the creation in PF?
>>>>
>>>> Thanks
>>> the driver scope for now doesn't support that, we can add these 
>>> feature in next releases.
>>
>>
>> Not a must for this series, but to have a better interaction with 
>> management like libvirt, it's better.
>>
>> Btw, do you have the plan to post PF drivers?
>>
>> Thanks
> Maybe we can workout PF driver in next release :)
> THanks,
> BR
> Zhu Lingshan
>>
>>
Jason Wang Oct. 23, 2019, 9:58 a.m. UTC | #10
On 2019/10/23 下午5:24, Zhu, Lingshan wrote:
>>>>>
>>>>>
>>>>> set_config/get_config is missing. It looks to me they are not 
>>>>> hard, just implementing the access to dev_cfg. It's key to make 
>>>>> kernel virtio driver to work.
>>>>>
>>>>> And in the new version of virito-mdev, features like _F_LOG_ALL 
>>>>> should be advertised through get_mdev_features.
>>>> IMHO, currently the driver can work without set/get_config, 
>>>> vhost_mdev doesn't call them for now.
>>>
>>>
>>> Yes, but it was required by virtio_mdev for host driver to work, and 
>>> it looks to me it's not hard to add them. If possible please add 
>>> them and "virtio" type then we can use the ops for both the case of 
>>> VM and containers.
>> sure
>
> Hello Jason,
>
> Just want to double confirm the implementation of set/get_config, for 
> now, dev_cfg only contains mac[6], status and max_virtqueue_pairs, is 
> that enough to support virtio_mdev?
>
> THanks!


Yes, and it depends on the features that you want to advertise. If you 
don't want to advertise MQ, there's no need to expose max_virtqueue_pairs.

Thanks

Patch
diff mbox series

diff --git a/drivers/vhost/ifcvf/ifcvf_main.c b/drivers/vhost/ifcvf/ifcvf_main.c
new file mode 100644
index 000000000000..c48a29969a85
--- /dev/null
+++ b/drivers/vhost/ifcvf/ifcvf_main.c
@@ -0,0 +1,541 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019 Intel Corporation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mdev.h>
+#include <linux/pci.h>
+#include <linux/sysfs.h>
+
+#include "ifcvf_base.h"
+
+#define VERSION_STRING	"0.1"
+#define DRIVER_AUTHOR	"Intel Corporation"
+#define IFCVF_DRIVER_NAME	"ifcvf"
+
+static irqreturn_t ifcvf_intr_handler(int irq, void *arg)
+{
+	struct vring_info *vring = arg;
+
+	if (vring->cb.callback)
+		return vring->cb.callback(vring->cb.private);
+
+	return IRQ_HANDLED;
+}
+
+static u64 ifcvf_mdev_get_features(struct mdev_device *mdev)
+{
+	return IFC_SUPPORTED_FEATURES;
+}
+
+static int ifcvf_mdev_set_features(struct mdev_device *mdev, u64 features)
+{
+	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	vf->req_features = features;
+
+	return 0;
+}
+
+static u64 ifcvf_mdev_get_vq_state(struct mdev_device *mdev, u16 qid)
+{
+	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	return vf->vring[qid].last_avail_idx;
+}
+
+static int ifcvf_mdev_set_vq_state(struct mdev_device *mdev, u16 qid, u64 num)
+{
+	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	vf->vring[qid].last_used_idx = num;
+	vf->vring[qid].last_avail_idx = num;
+
+	return 0;
+}
+
+static int ifcvf_mdev_set_vq_address(struct mdev_device *mdev, u16 idx,
+				     u64 desc_area, u64 driver_area,
+				     u64 device_area)
+{
+	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	vf->vring[idx].desc = desc_area;
+	vf->vring[idx].avail = driver_area;
+	vf->vring[idx].used = device_area;
+
+	return 0;
+}
+
+static void ifcvf_mdev_set_vq_num(struct mdev_device *mdev, u16 qid, u32 num)
+{
+	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	vf->vring[qid].size = num;
+}
+
+static void ifcvf_mdev_set_vq_ready(struct mdev_device *mdev,
+				u16 qid, bool ready)
+{
+
+	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	vf->vring[qid].ready = ready;
+}
+
+static bool ifcvf_mdev_get_vq_ready(struct mdev_device *mdev, u16 qid)
+{
+
+	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	return vf->vring[qid].ready;
+}
+
+static void ifcvf_mdev_set_vq_cb(struct mdev_device *mdev, u16 idx,
+				 struct virtio_mdev_callback *cb)
+{
+	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	vf->vring[idx].cb = *cb;
+}
+
+static void ifcvf_mdev_kick_vq(struct mdev_device *mdev, u16 idx)
+{
+	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	ifcvf_notify_queue(vf, idx);
+}
+
+static u8 ifcvf_mdev_get_status(struct mdev_device *mdev)
+{
+	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	return vf->status;
+}
+
+static u32 ifcvf_mdev_get_generation(struct mdev_device *mdev)
+{
+	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	return vf->generation;
+}
+
+static int ifcvf_mdev_get_version(struct mdev_device *mdev)
+{
+	return VIRTIO_MDEV_VERSION;
+}
+
+static u32 ifcvf_mdev_get_device_id(struct mdev_device *mdev)
+{
+	return IFCVF_DEVICE_ID;
+}
+
+static u32 ifcvf_mdev_get_vendor_id(struct mdev_device *mdev)
+{
+	return IFCVF_VENDOR_ID;
+}
+
+static u16 ifcvf_mdev_get_vq_align(struct mdev_device *mdev)
+{
+	return IFCVF_QUEUE_ALIGNMENT;
+}
+
+static int ifcvf_start_datapath(void *private)
+{
+	int i, ret;
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
+
+	for (i = 0; i < (IFCVF_MAX_QUEUE_PAIRS * 2); i++) {
+		if (!vf->vring[i].ready)
+			break;
+
+		if (!vf->vring[i].size)
+			break;
+
+		if (!vf->vring[i].desc || !vf->vring[i].avail ||
+			!vf->vring[i].used)
+			break;
+	}
+	vf->nr_vring = i;
+
+	ret = ifcvf_start_hw(vf);
+	return ret;
+}
+
+static int ifcvf_stop_datapath(void *private)
+{
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(private);
+	int i;
+
+	for (i = 0; i < IFCVF_MAX_QUEUES; i++)
+		vf->vring[i].cb.callback = NULL;
+
+	ifcvf_stop_hw(vf);
+
+	return 0;
+}
+
+static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
+{
+	int i;
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+		vf->vring[i].last_used_idx = 0;
+		vf->vring[i].last_avail_idx = 0;
+		vf->vring[i].desc = 0;
+		vf->vring[i].avail = 0;
+		vf->vring[i].used = 0;
+		vf->vring[i].ready = 0;
+		vf->vring->cb.callback = NULL;
+		vf->vring->cb.private = NULL;
+	}
+}
+
+static void ifcvf_mdev_set_status(struct mdev_device *mdev, u8 status)
+{
+	struct ifcvf_adapter *adapter = mdev_get_drvdata(mdev);
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+
+	vf->status = status;
+
+	if (status == 0) {
+		ifcvf_stop_datapath(adapter);
+		ifcvf_reset_vring(adapter);
+		return;
+	}
+
+	if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
+		ifcvf_start_datapath(adapter);
+		return;
+	}
+}
+
+static u16 ifcvf_mdev_get_queue_max(struct mdev_device *mdev)
+{
+	return IFCVF_MAX_QUEUES;
+}
+
+static struct virtio_mdev_device_ops ifc_mdev_ops = {
+	.get_features  = ifcvf_mdev_get_features,
+	.set_features  = ifcvf_mdev_set_features,
+	.get_status    = ifcvf_mdev_get_status,
+	.set_status    = ifcvf_mdev_set_status,
+	.get_queue_max = ifcvf_mdev_get_queue_max,
+	.get_vq_state   = ifcvf_mdev_get_vq_state,
+	.set_vq_state   = ifcvf_mdev_set_vq_state,
+	.set_vq_cb      = ifcvf_mdev_set_vq_cb,
+	.set_vq_ready   = ifcvf_mdev_set_vq_ready,
+	.get_vq_ready	= ifcvf_mdev_get_vq_ready,
+	.set_vq_num     = ifcvf_mdev_set_vq_num,
+	.set_vq_address = ifcvf_mdev_set_vq_address,
+	.kick_vq        = ifcvf_mdev_kick_vq,
+	.get_generation	= ifcvf_mdev_get_generation,
+	.get_version	= ifcvf_mdev_get_version,
+	.get_device_id	= ifcvf_mdev_get_device_id,
+	.get_vendor_id	= ifcvf_mdev_get_vendor_id,
+	.get_vq_align	= ifcvf_mdev_get_vq_align,
+};
+
+static int ifcvf_init_msix(struct ifcvf_adapter *adapter)
+{
+	int vector, i, ret, irq;
+	struct pci_dev *pdev = to_pci_dev(adapter->dev);
+	struct ifcvf_hw *vf = &adapter->vf;
+
+	ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
+			IFCVF_MAX_INTR, PCI_IRQ_MSIX);
+	if (ret < 0) {
+		IFC_ERR(adapter->dev, "Failed to alloc irq vectors.\n");
+		return ret;
+	}
+
+	for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+		vector = i + IFCVF_MSI_QUEUE_OFF;
+		irq = pci_irq_vector(pdev, vector);
+		ret = request_irq(irq, ifcvf_intr_handler, 0,
+				pci_name(pdev), &vf->vring[i]);
+		if (ret) {
+			IFC_ERR(adapter->dev,
+				"Failed to request irq for vq %d.\n", i);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void ifcvf_destroy_adapter(struct ifcvf_adapter *adapter)
+{
+	int i, vector, irq;
+	struct ifcvf_hw *vf = IFC_PRIVATE_TO_VF(adapter);
+	struct pci_dev *pdev = to_pci_dev(adapter->dev);
+
+	for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+		vector = i + IFCVF_MSI_QUEUE_OFF;
+		irq = pci_irq_vector(pdev, vector);
+		free_irq(irq, &vf->vring[i]);
+	}
+}
+
+static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+	const char *name = "vhost accelerator (virtio ring compatible)";
+
+	return sprintf(buf, "%s\n", name);
+}
+MDEV_TYPE_ATTR_RO(name);
+
+static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
+			       char *buf)
+{
+	return sprintf(buf, "%s\n", VIRTIO_MDEV_DEVICE_API_STRING);
+}
+MDEV_TYPE_ATTR_RO(device_api);
+
+static ssize_t available_instances_show(struct kobject *kobj,
+					struct device *dev, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
+
+	return sprintf(buf, "%d\n", adapter->mdev_count);
+}
+
+MDEV_TYPE_ATTR_RO(available_instances);
+
+static ssize_t type_show(struct kobject *kobj,
+			struct device *dev, char *buf)
+{
+	return sprintf(buf, "%s\n", "net");
+}
+
+MDEV_TYPE_ATTR_RO(type);
+
+
+static struct attribute *mdev_types_attrs[] = {
+	&mdev_type_attr_name.attr,
+	&mdev_type_attr_device_api.attr,
+	&mdev_type_attr_available_instances.attr,
+	&mdev_type_attr_type.attr,
+	NULL,
+};
+
+static struct attribute_group mdev_type_group = {
+	.name  = "vdpa_virtio",
+	.attrs = mdev_types_attrs,
+};
+
+static struct attribute_group *mdev_type_groups[] = {
+	&mdev_type_group,
+	NULL,
+};
+
+const struct attribute_group *mdev_dev_groups[] = {
+	NULL,
+};
+
+static int ifcvf_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
+{
+	struct device *dev = mdev_parent_dev(mdev);
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
+	int ret = 0;
+
+	mutex_lock(&adapter->mdev_lock);
+
+	if (adapter->mdev_count < 1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	mdev_set_class_id(mdev, MDEV_ID_VHOST);
+	mdev_set_dev_ops(mdev, &ifc_mdev_ops);
+
+	mdev_set_drvdata(mdev, adapter);
+	mdev_set_iommu_device(mdev_dev(mdev), dev);
+
+	INIT_LIST_HEAD(&adapter->dma_maps);
+	adapter->mdev_count--;
+
+out:
+	mutex_unlock(&adapter->mdev_lock);
+	return ret;
+}
+
+static int ifcvf_mdev_remove(struct mdev_device *mdev)
+{
+	struct device *dev = mdev_parent_dev(mdev);
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
+
+	mutex_lock(&adapter->mdev_lock);
+	adapter->mdev_count++;
+	mutex_unlock(&adapter->mdev_lock);
+
+	return 0;
+}
+
+static struct mdev_parent_ops ifcvf_mdev_fops = {
+	.owner			= THIS_MODULE,
+	.supported_type_groups	= mdev_type_groups,
+	.mdev_attr_groups	= mdev_dev_groups,
+	.create			= ifcvf_mdev_create,
+	.remove			= ifcvf_mdev_remove,
+};
+
+static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct device *dev = &pdev->dev;
+	struct ifcvf_adapter *adapter;
+	struct ifcvf_hw *vf;
+	int ret, i;
+
+	adapter = kzalloc(sizeof(struct ifcvf_adapter), GFP_KERNEL);
+	if (adapter == NULL) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	mutex_init(&adapter->mdev_lock);
+	adapter->mdev_count = 1;
+	adapter->dev = dev;
+
+	pci_set_drvdata(pdev, adapter);
+
+	ret = pci_enable_device(pdev);
+	if (ret) {
+		IFC_ERR(adapter->dev, "Failed to enable device.\n");
+		goto free_adapter;
+	}
+
+	ret = pci_request_regions(pdev, IFCVF_DRIVER_NAME);
+	if (ret) {
+		IFC_ERR(adapter->dev, "Failed to request MMIO region.\n");
+		goto disable_device;
+	}
+
+	pci_set_master(pdev);
+
+	ret = ifcvf_init_msix(adapter);
+	if (ret) {
+		IFC_ERR(adapter->dev, "Failed to initialize MSIX.\n");
+		goto free_msix;
+	}
+
+	vf = &adapter->vf;
+	for (i = 0; i < IFCVF_PCI_MAX_RESOURCE; i++) {
+		vf->mem_resource[i].phys_addr = pci_resource_start(pdev, i);
+		vf->mem_resource[i].len = pci_resource_len(pdev, i);
+		if (!vf->mem_resource[i].len) {
+			vf->mem_resource[i].addr = NULL;
+			continue;
+		}
+
+		vf->mem_resource[i].addr = pci_iomap_range(pdev, i, 0,
+				vf->mem_resource[i].len);
+		if (!vf->mem_resource[i].addr) {
+			IFC_ERR(adapter->dev, "Failed to map IO resource %d\n",
+				i);
+			return -1;
+		}
+	}
+
+	if (ifcvf_init_hw(vf, pdev) < 0)
+		return -1;
+
+	ret = mdev_register_device(dev, &ifcvf_mdev_fops);
+	if (ret) {
+		IFC_ERR(adapter->dev,  "Failed to register mdev device\n");
+		goto destroy_adapter;
+	}
+
+	return 0;
+
+destroy_adapter:
+	ifcvf_destroy_adapter(adapter);
+free_msix:
+	pci_free_irq_vectors(pdev);
+	pci_release_regions(pdev);
+disable_device:
+	pci_disable_device(pdev);
+free_adapter:
+	kfree(adapter);
+fail:
+	return ret;
+}
+
+static void ifcvf_remove(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
+	struct ifcvf_hw *vf;
+	int i;
+
+	mdev_unregister_device(dev);
+
+	vf = &adapter->vf;
+	for (i = 0; i < IFCVF_PCI_MAX_RESOURCE; i++) {
+		if (vf->mem_resource[i].addr) {
+			pci_iounmap(pdev, vf->mem_resource[i].addr);
+			vf->mem_resource[i].addr = NULL;
+		}
+	}
+
+	ifcvf_destroy_adapter(adapter);
+	pci_free_irq_vectors(pdev);
+
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+
+	kfree(adapter);
+}
+
+static struct pci_device_id ifcvf_pci_ids[] = {
+	{ PCI_DEVICE_SUB(IFCVF_VENDOR_ID,
+			IFCVF_DEVICE_ID,
+			IFCVF_SUBSYS_VENDOR_ID,
+			IFCVF_SUBSYS_DEVICE_ID) },
+	{ 0 },
+};
+MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids);
+
+static struct pci_driver ifcvf_driver = {
+	.name     = IFCVF_DRIVER_NAME,
+	.id_table = ifcvf_pci_ids,
+	.probe    = ifcvf_probe,
+	.remove   = ifcvf_remove,
+};
+
+static int __init ifcvf_init_module(void)
+{
+	int ret;
+
+	ret = pci_register_driver(&ifcvf_driver);
+	return ret;
+}
+
+static void __exit ifcvf_exit_module(void)
+{
+	pci_unregister_driver(&ifcvf_driver);
+}
+
+module_init(ifcvf_init_module);
+module_exit(ifcvf_exit_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(VERSION_STRING);
+MODULE_AUTHOR(DRIVER_AUTHOR);