diff mbox series

[V2,vfio,2/9] virtio-pci: Introduce admin virtqueue

Message ID 20231029155952.67686-3-yishaih@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Introduce a vfio driver over virtio devices | expand

Commit Message

Yishai Hadas Oct. 29, 2023, 3:59 p.m. UTC
From: Feng Liu <feliu@nvidia.com>

Introduce support for the admin virtqueue. By negotiating
VIRTIO_F_ADMIN_VQ feature, driver detects capability and creates one
administration virtqueue. Administration virtqueue implementation in
virtio pci generic layer, enables multiple types of upper layer
drivers such as vfio, net, blk to utilize it.

Signed-off-by: Feng Liu <feliu@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
 drivers/virtio/virtio.c                | 37 ++++++++++++++--
 drivers/virtio/virtio_pci_common.c     |  3 ++
 drivers/virtio/virtio_pci_common.h     | 15 ++++++-
 drivers/virtio/virtio_pci_modern.c     | 61 +++++++++++++++++++++++++-
 drivers/virtio/virtio_pci_modern_dev.c | 18 ++++++++
 include/linux/virtio_config.h          |  4 ++
 include/linux/virtio_pci_modern.h      |  5 +++
 7 files changed, 137 insertions(+), 6 deletions(-)

Comments

Michael S. Tsirkin Oct. 29, 2023, 8:22 p.m. UTC | #1
On Sun, Oct 29, 2023 at 05:59:45PM +0200, Yishai Hadas wrote:
> From: Feng Liu <feliu@nvidia.com>
> 
> Introduce support for the admin virtqueue. By negotiating
> VIRTIO_F_ADMIN_VQ feature, driver detects capability and creates one
> administration virtqueue. Administration virtqueue implementation in
> virtio pci generic layer, enables multiple types of upper layer
> drivers such as vfio, net, blk to utilize it.
> 
> Signed-off-by: Feng Liu <feliu@nvidia.com>
> Reviewed-by: Parav Pandit <parav@nvidia.com>
> Reviewed-by: Jiri Pirko <jiri@nvidia.com>
> Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
> ---
>  drivers/virtio/virtio.c                | 37 ++++++++++++++--
>  drivers/virtio/virtio_pci_common.c     |  3 ++
>  drivers/virtio/virtio_pci_common.h     | 15 ++++++-
>  drivers/virtio/virtio_pci_modern.c     | 61 +++++++++++++++++++++++++-
>  drivers/virtio/virtio_pci_modern_dev.c | 18 ++++++++
>  include/linux/virtio_config.h          |  4 ++
>  include/linux/virtio_pci_modern.h      |  5 +++
>  7 files changed, 137 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> index 3893dc29eb26..f4080692b351 100644
> --- a/drivers/virtio/virtio.c
> +++ b/drivers/virtio/virtio.c
> @@ -302,9 +302,15 @@ static int virtio_dev_probe(struct device *_d)
>  	if (err)
>  		goto err;
>  
> +	if (dev->config->create_avq) {
> +		err = dev->config->create_avq(dev);
> +		if (err)
> +			goto err;
> +	}
> +
>  	err = drv->probe(dev);
>  	if (err)
> -		goto err;
> +		goto err_probe;
>  
>  	/* If probe didn't do it, mark device DRIVER_OK ourselves. */
>  	if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK))

Hmm I am not all that happy that we are just creating avq
unconditionally.
Can't we do it on demand to avoid wasting resources if
no one uses it?





> @@ -316,6 +322,10 @@ static int virtio_dev_probe(struct device *_d)
>  	virtio_config_enable(dev);
>  
>  	return 0;
> +
> +err_probe:
> +	if (dev->config->destroy_avq)
> +		dev->config->destroy_avq(dev);
>  err:
>  	virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
>  	return err;
> @@ -331,6 +341,9 @@ static void virtio_dev_remove(struct device *_d)
>  
>  	drv->remove(dev);
>  
> +	if (dev->config->destroy_avq)
> +		dev->config->destroy_avq(dev);
> +
>  	/* Driver should have reset device. */
>  	WARN_ON_ONCE(dev->config->get_status(dev));
>  
> @@ -489,13 +502,20 @@ EXPORT_SYMBOL_GPL(unregister_virtio_device);
>  int virtio_device_freeze(struct virtio_device *dev)
>  {
>  	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
> +	int ret;
>  
>  	virtio_config_disable(dev);
>  
>  	dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
>  
> -	if (drv && drv->freeze)
> -		return drv->freeze(dev);
> +	if (drv && drv->freeze) {
> +		ret = drv->freeze(dev);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	if (dev->config->destroy_avq)
> +		dev->config->destroy_avq(dev);
>  
>  	return 0;
>  }
> @@ -532,10 +552,16 @@ int virtio_device_restore(struct virtio_device *dev)
>  	if (ret)
>  		goto err;
>  
> +	if (dev->config->create_avq) {
> +		ret = dev->config->create_avq(dev);
> +		if (ret)
> +			goto err;
> +	}
> +
>  	if (drv->restore) {
>  		ret = drv->restore(dev);
>  		if (ret)
> -			goto err;
> +			goto err_restore;
>  	}
>  
>  	/* If restore didn't do it, mark device DRIVER_OK ourselves. */
> @@ -546,6 +572,9 @@ int virtio_device_restore(struct virtio_device *dev)
>  
>  	return 0;
>  
> +err_restore:
> +	if (dev->config->destroy_avq)
> +		dev->config->destroy_avq(dev);
>  err:
>  	virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
>  	return ret;
> diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
> index c2524a7207cf..6b4766d5abe6 100644
> --- a/drivers/virtio/virtio_pci_common.c
> +++ b/drivers/virtio/virtio_pci_common.c
> @@ -236,6 +236,9 @@ void vp_del_vqs(struct virtio_device *vdev)
>  	int i;
>  
>  	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
> +		if (vp_dev->is_avq(vdev, vq->index))
> +			continue;
> +
>  		if (vp_dev->per_vq_vectors) {
>  			int v = vp_dev->vqs[vq->index]->msix_vector;
>  
> diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
> index 4b773bd7c58c..e03af0966a4b 100644
> --- a/drivers/virtio/virtio_pci_common.h
> +++ b/drivers/virtio/virtio_pci_common.h
> @@ -41,6 +41,14 @@ struct virtio_pci_vq_info {
>  	unsigned int msix_vector;
>  };
>  
> +struct virtio_pci_admin_vq {
> +	/* Virtqueue info associated with this admin queue. */
> +	struct virtio_pci_vq_info info;
> +	/* Name of the admin queue: avq.$index. */
> +	char name[10];
> +	u16 vq_index;
> +};
> +
>  /* Our device structure */
>  struct virtio_pci_device {
>  	struct virtio_device vdev;
> @@ -58,9 +66,13 @@ struct virtio_pci_device {
>  	spinlock_t lock;
>  	struct list_head virtqueues;
>  
> -	/* array of all queues for house-keeping */
> +	/* Array of all virtqueues reported in the
> +	 * PCI common config num_queues field
> +	 */
>  	struct virtio_pci_vq_info **vqs;
>  
> +	struct virtio_pci_admin_vq admin_vq;
> +
>  	/* MSI-X support */
>  	int msix_enabled;
>  	int intx_enabled;
> @@ -86,6 +98,7 @@ struct virtio_pci_device {
>  	void (*del_vq)(struct virtio_pci_vq_info *info);
>  
>  	u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
> +	bool (*is_avq)(struct virtio_device *vdev, unsigned int index);
>  };
>  
>  /* Constants for MSI-X */
> diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
> index d6bb68ba84e5..01c5ba346471 100644
> --- a/drivers/virtio/virtio_pci_modern.c
> +++ b/drivers/virtio/virtio_pci_modern.c
> @@ -26,6 +26,16 @@ static u64 vp_get_features(struct virtio_device *vdev)
>  	return vp_modern_get_features(&vp_dev->mdev);
>  }
>  
> +static bool vp_is_avq(struct virtio_device *vdev, unsigned int index)
> +{
> +	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> +
> +	if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ))
> +		return false;
> +
> +	return index == vp_dev->admin_vq.vq_index;
> +}
> +
>  static void vp_transport_features(struct virtio_device *vdev, u64 features)
>  {
>  	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> @@ -37,6 +47,9 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features)
>  
>  	if (features & BIT_ULL(VIRTIO_F_RING_RESET))
>  		__virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
> +
> +	if (features & BIT_ULL(VIRTIO_F_ADMIN_VQ))
> +		__virtio_set_bit(vdev, VIRTIO_F_ADMIN_VQ);
>  }
>  
>  /* virtio config->finalize_features() implementation */
> @@ -317,7 +330,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
>  	else
>  		notify = vp_notify;
>  
> -	if (index >= vp_modern_get_num_queues(mdev))
> +	if (index >= vp_modern_get_num_queues(mdev) &&
> +	    !vp_is_avq(&vp_dev->vdev, index))
>  		return ERR_PTR(-EINVAL);
>  
>  	/* Check if queue is either not available or already active. */
> @@ -491,6 +505,46 @@ static bool vp_get_shm_region(struct virtio_device *vdev,
>  	return true;
>  }
>  
> +static int vp_modern_create_avq(struct virtio_device *vdev)
> +{
> +	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> +	struct virtio_pci_admin_vq *avq;
> +	struct virtqueue *vq;
> +	u16 admin_q_num;
> +
> +	if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ))
> +		return 0;
> +
> +	admin_q_num = vp_modern_avq_num(&vp_dev->mdev);
> +	if (!admin_q_num)
> +		return -EINVAL;


We really just need 1 entry ATM. Limit to that?

> +
> +	avq = &vp_dev->admin_vq;
> +	avq->vq_index = vp_modern_avq_index(&vp_dev->mdev);
> +	sprintf(avq->name, "avq.%u", avq->vq_index);
> +	vq = vp_dev->setup_vq(vp_dev, &vp_dev->admin_vq.info, avq->vq_index, NULL,
> +			      avq->name, NULL, VIRTIO_MSI_NO_VECTOR);
> +	if (IS_ERR(vq)) {
> +		dev_err(&vdev->dev, "failed to setup admin virtqueue, err=%ld",
> +			PTR_ERR(vq));
> +		return PTR_ERR(vq);
> +	}
> +
> +	vp_dev->admin_vq.info.vq = vq;
> +	vp_modern_set_queue_enable(&vp_dev->mdev, avq->info.vq->index, true);
> +	return 0;
> +}
> +
> +static void vp_modern_destroy_avq(struct virtio_device *vdev)
> +{
> +	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> +
> +	if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ))
> +		return;
> +
> +	vp_dev->del_vq(&vp_dev->admin_vq.info);
> +}
> +
>  static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
>  	.get		= NULL,
>  	.set		= NULL,
> @@ -509,6 +563,8 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
>  	.get_shm_region  = vp_get_shm_region,
>  	.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
>  	.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
> +	.create_avq = vp_modern_create_avq,
> +	.destroy_avq = vp_modern_destroy_avq,
>  };
>  
>  static const struct virtio_config_ops virtio_pci_config_ops = {
> @@ -529,6 +585,8 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
>  	.get_shm_region  = vp_get_shm_region,
>  	.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
>  	.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
> +	.create_avq = vp_modern_create_avq,
> +	.destroy_avq = vp_modern_destroy_avq,
>  };
>  
>  /* the PCI probing function */
> @@ -552,6 +610,7 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
>  	vp_dev->config_vector = vp_config_vector;
>  	vp_dev->setup_vq = setup_vq;
>  	vp_dev->del_vq = del_vq;
> +	vp_dev->is_avq = vp_is_avq;
>  	vp_dev->isr = mdev->isr;
>  	vp_dev->vdev.id = mdev->id;
>  
> diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c
> index 9cb601e16688..4aab1727d121 100644
> --- a/drivers/virtio/virtio_pci_modern_dev.c
> +++ b/drivers/virtio/virtio_pci_modern_dev.c
> @@ -714,6 +714,24 @@ void __iomem *vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev,
>  }
>  EXPORT_SYMBOL_GPL(vp_modern_map_vq_notify);
>  
> +u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev)
> +{
> +	struct virtio_pci_modern_common_cfg __iomem *cfg;
> +
> +	cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common;
> +	return vp_ioread16(&cfg->admin_queue_num);
> +}
> +EXPORT_SYMBOL_GPL(vp_modern_avq_num);
> +
> +u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev)
> +{
> +	struct virtio_pci_modern_common_cfg __iomem *cfg;
> +
> +	cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common;
> +	return vp_ioread16(&cfg->admin_queue_index);
> +}
> +EXPORT_SYMBOL_GPL(vp_modern_avq_index);
> +
>  MODULE_VERSION("0.1");
>  MODULE_DESCRIPTION("Modern Virtio PCI Device");
>  MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>");
> diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> index 2b3438de2c4d..da9b271b54db 100644
> --- a/include/linux/virtio_config.h
> +++ b/include/linux/virtio_config.h
> @@ -93,6 +93,8 @@ typedef void vq_callback_t(struct virtqueue *);
>   *	Returns 0 on success or error status
>   *	If disable_vq_and_reset is set, then enable_vq_after_reset must also be
>   *	set.
> + * @create_avq: create admin virtqueue resource.
> + * @destroy_avq: destroy admin virtqueue resource.
>   */
>  struct virtio_config_ops {
>  	void (*get)(struct virtio_device *vdev, unsigned offset,
> @@ -120,6 +122,8 @@ struct virtio_config_ops {
>  			       struct virtio_shm_region *region, u8 id);
>  	int (*disable_vq_and_reset)(struct virtqueue *vq);
>  	int (*enable_vq_after_reset)(struct virtqueue *vq);
> +	int (*create_avq)(struct virtio_device *vdev);
> +	void (*destroy_avq)(struct virtio_device *vdev);
>  };
>  
>  /* If driver didn't advertise the feature, it will never appear. */
> diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h
> index 067ac1d789bc..0f8737c9ae7d 100644
> --- a/include/linux/virtio_pci_modern.h
> +++ b/include/linux/virtio_pci_modern.h
> @@ -10,6 +10,9 @@ struct virtio_pci_modern_common_cfg {
>  
>  	__le16 queue_notify_data;	/* read-write */
>  	__le16 queue_reset;		/* read-write */
> +
> +	__le16 admin_queue_index;	/* read-only */
> +	__le16 admin_queue_num;		/* read-only */
>  };
>  
>  struct virtio_pci_modern_device {
> @@ -121,4 +124,6 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev);
>  void vp_modern_remove(struct virtio_pci_modern_device *mdev);
>  int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
>  void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
> +u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev);
> +u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev);
>  #endif
> -- 
> 2.27.0
Parav Pandit Oct. 30, 2023, 3:51 p.m. UTC | #2
> From: Michael S. Tsirkin <mst@redhat.com>
> Sent: Monday, October 30, 2023 1:53 AM
> 
> On Sun, Oct 29, 2023 at 05:59:45PM +0200, Yishai Hadas wrote:
> > From: Feng Liu <feliu@nvidia.com>
> >
> > Introduce support for the admin virtqueue. By negotiating
> > VIRTIO_F_ADMIN_VQ feature, driver detects capability and creates one
> > administration virtqueue. Administration virtqueue implementation in
> > virtio pci generic layer, enables multiple types of upper layer
> > drivers such as vfio, net, blk to utilize it.
> >
> > Signed-off-by: Feng Liu <feliu@nvidia.com>
> > Reviewed-by: Parav Pandit <parav@nvidia.com>
> > Reviewed-by: Jiri Pirko <jiri@nvidia.com>
> > Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
> > ---
> >  drivers/virtio/virtio.c                | 37 ++++++++++++++--
> >  drivers/virtio/virtio_pci_common.c     |  3 ++
> >  drivers/virtio/virtio_pci_common.h     | 15 ++++++-
> >  drivers/virtio/virtio_pci_modern.c     | 61 +++++++++++++++++++++++++-
> >  drivers/virtio/virtio_pci_modern_dev.c | 18 ++++++++
> >  include/linux/virtio_config.h          |  4 ++
> >  include/linux/virtio_pci_modern.h      |  5 +++
> >  7 files changed, 137 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index
> > 3893dc29eb26..f4080692b351 100644
> > --- a/drivers/virtio/virtio.c
> > +++ b/drivers/virtio/virtio.c
> > @@ -302,9 +302,15 @@ static int virtio_dev_probe(struct device *_d)
> >  	if (err)
> >  		goto err;
> >
> > +	if (dev->config->create_avq) {
> > +		err = dev->config->create_avq(dev);
> > +		if (err)
> > +			goto err;
> > +	}
> > +
> >  	err = drv->probe(dev);
> >  	if (err)
> > -		goto err;
> > +		goto err_probe;
> >
> >  	/* If probe didn't do it, mark device DRIVER_OK ourselves. */
> >  	if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK))
> 
> Hmm I am not all that happy that we are just creating avq unconditionally.
> Can't we do it on demand to avoid wasting resources if no one uses it?
>
Virtio queues must be enabled before driver_ok as we discussed in F_DYNAMIC bit exercise.
So creating AQ when first legacy command is invoked, would be too late.

> 
> > @@ -316,6 +322,10 @@ static int virtio_dev_probe(struct device *_d)
> >  	virtio_config_enable(dev);
> >
> >  	return 0;
> > +
> > +err_probe:
> > +	if (dev->config->destroy_avq)
> > +		dev->config->destroy_avq(dev);
> >  err:
> >  	virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
> >  	return err;
> > @@ -331,6 +341,9 @@ static void virtio_dev_remove(struct device *_d)
> >
> >  	drv->remove(dev);
> >
> > +	if (dev->config->destroy_avq)
> > +		dev->config->destroy_avq(dev);
> > +
> >  	/* Driver should have reset device. */
> >  	WARN_ON_ONCE(dev->config->get_status(dev));
> >
> > @@ -489,13 +502,20 @@ EXPORT_SYMBOL_GPL(unregister_virtio_device);
> >  int virtio_device_freeze(struct virtio_device *dev)  {
> >  	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
> > +	int ret;
> >
> >  	virtio_config_disable(dev);
> >
> >  	dev->failed = dev->config->get_status(dev) &
> VIRTIO_CONFIG_S_FAILED;
> >
> > -	if (drv && drv->freeze)
> > -		return drv->freeze(dev);
> > +	if (drv && drv->freeze) {
> > +		ret = drv->freeze(dev);
> > +		if (ret)
> > +			return ret;
> > +	}
> > +
> > +	if (dev->config->destroy_avq)
> > +		dev->config->destroy_avq(dev);
> >
> >  	return 0;
> >  }
> > @@ -532,10 +552,16 @@ int virtio_device_restore(struct virtio_device *dev)
> >  	if (ret)
> >  		goto err;
> >
> > +	if (dev->config->create_avq) {
> > +		ret = dev->config->create_avq(dev);
> > +		if (ret)
> > +			goto err;
> > +	}
> > +
> >  	if (drv->restore) {
> >  		ret = drv->restore(dev);
> >  		if (ret)
> > -			goto err;
> > +			goto err_restore;
> >  	}
> >
> >  	/* If restore didn't do it, mark device DRIVER_OK ourselves. */ @@
> > -546,6 +572,9 @@ int virtio_device_restore(struct virtio_device *dev)
> >
> >  	return 0;
> >
> > +err_restore:
> > +	if (dev->config->destroy_avq)
> > +		dev->config->destroy_avq(dev);
> >  err:
> >  	virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
> >  	return ret;
> > diff --git a/drivers/virtio/virtio_pci_common.c
> > b/drivers/virtio/virtio_pci_common.c
> > index c2524a7207cf..6b4766d5abe6 100644
> > --- a/drivers/virtio/virtio_pci_common.c
> > +++ b/drivers/virtio/virtio_pci_common.c
> > @@ -236,6 +236,9 @@ void vp_del_vqs(struct virtio_device *vdev)
> >  	int i;
> >
> >  	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
> > +		if (vp_dev->is_avq(vdev, vq->index))
> > +			continue;
> > +
> >  		if (vp_dev->per_vq_vectors) {
> >  			int v = vp_dev->vqs[vq->index]->msix_vector;
> >
> > diff --git a/drivers/virtio/virtio_pci_common.h
> > b/drivers/virtio/virtio_pci_common.h
> > index 4b773bd7c58c..e03af0966a4b 100644
> > --- a/drivers/virtio/virtio_pci_common.h
> > +++ b/drivers/virtio/virtio_pci_common.h
> > @@ -41,6 +41,14 @@ struct virtio_pci_vq_info {
> >  	unsigned int msix_vector;
> >  };
> >
> > +struct virtio_pci_admin_vq {
> > +	/* Virtqueue info associated with this admin queue. */
> > +	struct virtio_pci_vq_info info;
> > +	/* Name of the admin queue: avq.$index. */
> > +	char name[10];
> > +	u16 vq_index;
> > +};
> > +
> >  /* Our device structure */
> >  struct virtio_pci_device {
> >  	struct virtio_device vdev;
> > @@ -58,9 +66,13 @@ struct virtio_pci_device {
> >  	spinlock_t lock;
> >  	struct list_head virtqueues;
> >
> > -	/* array of all queues for house-keeping */
> > +	/* Array of all virtqueues reported in the
> > +	 * PCI common config num_queues field
> > +	 */
> >  	struct virtio_pci_vq_info **vqs;
> >
> > +	struct virtio_pci_admin_vq admin_vq;
> > +
> >  	/* MSI-X support */
> >  	int msix_enabled;
> >  	int intx_enabled;
> > @@ -86,6 +98,7 @@ struct virtio_pci_device {
> >  	void (*del_vq)(struct virtio_pci_vq_info *info);
> >
> >  	u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
> > +	bool (*is_avq)(struct virtio_device *vdev, unsigned int index);
> >  };
> >
> >  /* Constants for MSI-X */
> > diff --git a/drivers/virtio/virtio_pci_modern.c
> > b/drivers/virtio/virtio_pci_modern.c
> > index d6bb68ba84e5..01c5ba346471 100644
> > --- a/drivers/virtio/virtio_pci_modern.c
> > +++ b/drivers/virtio/virtio_pci_modern.c
> > @@ -26,6 +26,16 @@ static u64 vp_get_features(struct virtio_device *vdev)
> >  	return vp_modern_get_features(&vp_dev->mdev);
> >  }
> >
> > +static bool vp_is_avq(struct virtio_device *vdev, unsigned int index)
> > +{
> > +	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> > +
> > +	if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ))
> > +		return false;
> > +
> > +	return index == vp_dev->admin_vq.vq_index; }
> > +
> >  static void vp_transport_features(struct virtio_device *vdev, u64
> > features)  {
> >  	struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -37,6
> > +47,9 @@ static void vp_transport_features(struct virtio_device *vdev,
> > u64 features)
> >
> >  	if (features & BIT_ULL(VIRTIO_F_RING_RESET))
> >  		__virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
> > +
> > +	if (features & BIT_ULL(VIRTIO_F_ADMIN_VQ))
> > +		__virtio_set_bit(vdev, VIRTIO_F_ADMIN_VQ);
> >  }
> >
> >  /* virtio config->finalize_features() implementation */ @@ -317,7
> > +330,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device
> *vp_dev,
> >  	else
> >  		notify = vp_notify;
> >
> > -	if (index >= vp_modern_get_num_queues(mdev))
> > +	if (index >= vp_modern_get_num_queues(mdev) &&
> > +	    !vp_is_avq(&vp_dev->vdev, index))
> >  		return ERR_PTR(-EINVAL);
> >
> >  	/* Check if queue is either not available or already active. */ @@
> > -491,6 +505,46 @@ static bool vp_get_shm_region(struct virtio_device
> *vdev,
> >  	return true;
> >  }
> >
> > +static int vp_modern_create_avq(struct virtio_device *vdev) {
> > +	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> > +	struct virtio_pci_admin_vq *avq;
> > +	struct virtqueue *vq;
> > +	u16 admin_q_num;
> > +
> > +	if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ))
> > +		return 0;
> > +
> > +	admin_q_num = vp_modern_avq_num(&vp_dev->mdev);
> > +	if (!admin_q_num)
> > +		return -EINVAL;
> 
> 
> We really just need 1 entry ATM. Limit to that?
>
Above check is only reading and validating that number of admin queues being nonzero from the device.
It is ok if its > 1. 
Driver currently using only 1 aq of default depth (entries) reported by the device.

Did I misunderstand your question about 1 entry?
 
> > +
> > +	avq = &vp_dev->admin_vq;
> > +	avq->vq_index = vp_modern_avq_index(&vp_dev->mdev);
> > +	sprintf(avq->name, "avq.%u", avq->vq_index);
> > +	vq = vp_dev->setup_vq(vp_dev, &vp_dev->admin_vq.info, avq-
> >vq_index, NULL,
> > +			      avq->name, NULL, VIRTIO_MSI_NO_VECTOR);
> > +	if (IS_ERR(vq)) {
> > +		dev_err(&vdev->dev, "failed to setup admin virtqueue,
> err=%ld",
> > +			PTR_ERR(vq));
> > +		return PTR_ERR(vq);
> > +	}
> > +
> > +	vp_dev->admin_vq.info.vq = vq;
> > +	vp_modern_set_queue_enable(&vp_dev->mdev, avq->info.vq->index,
> true);
> > +	return 0;
> > +}
> > +
> > +static void vp_modern_destroy_avq(struct virtio_device *vdev) {
> > +	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> > +
> > +	if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ))
> > +		return;
> > +
> > +	vp_dev->del_vq(&vp_dev->admin_vq.info);
> > +}
> > +
> >  static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
> >  	.get		= NULL,
> >  	.set		= NULL,
> > @@ -509,6 +563,8 @@ static const struct virtio_config_ops
> virtio_pci_config_nodev_ops = {
> >  	.get_shm_region  = vp_get_shm_region,
> >  	.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
> >  	.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
> > +	.create_avq = vp_modern_create_avq,
> > +	.destroy_avq = vp_modern_destroy_avq,
> >  };
> >
> >  static const struct virtio_config_ops virtio_pci_config_ops = { @@
> > -529,6 +585,8 @@ static const struct virtio_config_ops virtio_pci_config_ops
> = {
> >  	.get_shm_region  = vp_get_shm_region,
> >  	.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
> >  	.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
> > +	.create_avq = vp_modern_create_avq,
> > +	.destroy_avq = vp_modern_destroy_avq,
> >  };
> >
> >  /* the PCI probing function */
> > @@ -552,6 +610,7 @@ int virtio_pci_modern_probe(struct virtio_pci_device
> *vp_dev)
> >  	vp_dev->config_vector = vp_config_vector;
> >  	vp_dev->setup_vq = setup_vq;
> >  	vp_dev->del_vq = del_vq;
> > +	vp_dev->is_avq = vp_is_avq;
> >  	vp_dev->isr = mdev->isr;
> >  	vp_dev->vdev.id = mdev->id;
> >
> > diff --git a/drivers/virtio/virtio_pci_modern_dev.c
> > b/drivers/virtio/virtio_pci_modern_dev.c
> > index 9cb601e16688..4aab1727d121 100644
> > --- a/drivers/virtio/virtio_pci_modern_dev.c
> > +++ b/drivers/virtio/virtio_pci_modern_dev.c
> > @@ -714,6 +714,24 @@ void __iomem *vp_modern_map_vq_notify(struct
> > virtio_pci_modern_device *mdev,  }
> > EXPORT_SYMBOL_GPL(vp_modern_map_vq_notify);
> >
> > +u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev) {
> > +	struct virtio_pci_modern_common_cfg __iomem *cfg;
> > +
> > +	cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev-
> >common;
> > +	return vp_ioread16(&cfg->admin_queue_num);
> > +}
> > +EXPORT_SYMBOL_GPL(vp_modern_avq_num);
> > +
> > +u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev) {
> > +	struct virtio_pci_modern_common_cfg __iomem *cfg;
> > +
> > +	cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev-
> >common;
> > +	return vp_ioread16(&cfg->admin_queue_index);
> > +}
> > +EXPORT_SYMBOL_GPL(vp_modern_avq_index);
> > +
> >  MODULE_VERSION("0.1");
> >  MODULE_DESCRIPTION("Modern Virtio PCI Device");
> MODULE_AUTHOR("Jason
> > Wang <jasowang@redhat.com>"); diff --git
> > a/include/linux/virtio_config.h b/include/linux/virtio_config.h index
> > 2b3438de2c4d..da9b271b54db 100644
> > --- a/include/linux/virtio_config.h
> > +++ b/include/linux/virtio_config.h
> > @@ -93,6 +93,8 @@ typedef void vq_callback_t(struct virtqueue *);
> >   *	Returns 0 on success or error status
> >   *	If disable_vq_and_reset is set, then enable_vq_after_reset must also be
> >   *	set.
> > + * @create_avq: create admin virtqueue resource.
> > + * @destroy_avq: destroy admin virtqueue resource.
> >   */
> >  struct virtio_config_ops {
> >  	void (*get)(struct virtio_device *vdev, unsigned offset, @@ -120,6
> > +122,8 @@ struct virtio_config_ops {
> >  			       struct virtio_shm_region *region, u8 id);
> >  	int (*disable_vq_and_reset)(struct virtqueue *vq);
> >  	int (*enable_vq_after_reset)(struct virtqueue *vq);
> > +	int (*create_avq)(struct virtio_device *vdev);
> > +	void (*destroy_avq)(struct virtio_device *vdev);
> >  };
> >
> >  /* If driver didn't advertise the feature, it will never appear. */
> > diff --git a/include/linux/virtio_pci_modern.h
> > b/include/linux/virtio_pci_modern.h
> > index 067ac1d789bc..0f8737c9ae7d 100644
> > --- a/include/linux/virtio_pci_modern.h
> > +++ b/include/linux/virtio_pci_modern.h
> > @@ -10,6 +10,9 @@ struct virtio_pci_modern_common_cfg {
> >
> >  	__le16 queue_notify_data;	/* read-write */
> >  	__le16 queue_reset;		/* read-write */
> > +
> > +	__le16 admin_queue_index;	/* read-only */
> > +	__le16 admin_queue_num;		/* read-only */
> >  };
> >
> >  struct virtio_pci_modern_device {
> > @@ -121,4 +124,6 @@ int vp_modern_probe(struct
> > virtio_pci_modern_device *mdev);  void vp_modern_remove(struct
> > virtio_pci_modern_device *mdev);  int vp_modern_get_queue_reset(struct
> > virtio_pci_modern_device *mdev, u16 index);  void
> > vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16
> > index);
> > +u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev);
> > +u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev);
> >  #endif
> > --
> > 2.27.0
Michael S. Tsirkin Oct. 30, 2023, 3:59 p.m. UTC | #3
On Mon, Oct 30, 2023 at 03:51:40PM +0000, Parav Pandit wrote:
> 
> 
> > From: Michael S. Tsirkin <mst@redhat.com>
> > Sent: Monday, October 30, 2023 1:53 AM
> > 
> > On Sun, Oct 29, 2023 at 05:59:45PM +0200, Yishai Hadas wrote:
> > > From: Feng Liu <feliu@nvidia.com>
> > >
> > > Introduce support for the admin virtqueue. By negotiating
> > > VIRTIO_F_ADMIN_VQ feature, driver detects capability and creates one
> > > administration virtqueue. Administration virtqueue implementation in
> > > virtio pci generic layer, enables multiple types of upper layer
> > > drivers such as vfio, net, blk to utilize it.
> > >
> > > Signed-off-by: Feng Liu <feliu@nvidia.com>
> > > Reviewed-by: Parav Pandit <parav@nvidia.com>
> > > Reviewed-by: Jiri Pirko <jiri@nvidia.com>
> > > Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
> > > ---
> > >  drivers/virtio/virtio.c                | 37 ++++++++++++++--
> > >  drivers/virtio/virtio_pci_common.c     |  3 ++
> > >  drivers/virtio/virtio_pci_common.h     | 15 ++++++-
> > >  drivers/virtio/virtio_pci_modern.c     | 61 +++++++++++++++++++++++++-
> > >  drivers/virtio/virtio_pci_modern_dev.c | 18 ++++++++
> > >  include/linux/virtio_config.h          |  4 ++
> > >  include/linux/virtio_pci_modern.h      |  5 +++
> > >  7 files changed, 137 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index
> > > 3893dc29eb26..f4080692b351 100644
> > > --- a/drivers/virtio/virtio.c
> > > +++ b/drivers/virtio/virtio.c
> > > @@ -302,9 +302,15 @@ static int virtio_dev_probe(struct device *_d)
> > >  	if (err)
> > >  		goto err;
> > >
> > > +	if (dev->config->create_avq) {
> > > +		err = dev->config->create_avq(dev);
> > > +		if (err)
> > > +			goto err;
> > > +	}
> > > +
> > >  	err = drv->probe(dev);
> > >  	if (err)
> > > -		goto err;
> > > +		goto err_probe;
> > >
> > >  	/* If probe didn't do it, mark device DRIVER_OK ourselves. */
> > >  	if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK))
> > 
> > Hmm I am not all that happy that we are just creating avq unconditionally.
> > Can't we do it on demand to avoid wasting resources if no one uses it?
> >
> Virtio queues must be enabled before driver_ok as we discussed in F_DYNAMIC bit exercise.
> So creating AQ when first legacy command is invoked, would be too late.

Well we didn't release the spec with AQ so I am pretty sure there are no
devices using the feature. Do we want to already make an
exception for AQ and allow creating AQs after DRIVER_OK
even without F_DYNAMIC?

> > 
> > > @@ -316,6 +322,10 @@ static int virtio_dev_probe(struct device *_d)
> > >  	virtio_config_enable(dev);
> > >
> > >  	return 0;
> > > +
> > > +err_probe:
> > > +	if (dev->config->destroy_avq)
> > > +		dev->config->destroy_avq(dev);
> > >  err:
> > >  	virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
> > >  	return err;
> > > @@ -331,6 +341,9 @@ static void virtio_dev_remove(struct device *_d)
> > >
> > >  	drv->remove(dev);
> > >
> > > +	if (dev->config->destroy_avq)
> > > +		dev->config->destroy_avq(dev);
> > > +
> > >  	/* Driver should have reset device. */
> > >  	WARN_ON_ONCE(dev->config->get_status(dev));
> > >
> > > @@ -489,13 +502,20 @@ EXPORT_SYMBOL_GPL(unregister_virtio_device);
> > >  int virtio_device_freeze(struct virtio_device *dev)  {
> > >  	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
> > > +	int ret;
> > >
> > >  	virtio_config_disable(dev);
> > >
> > >  	dev->failed = dev->config->get_status(dev) &
> > VIRTIO_CONFIG_S_FAILED;
> > >
> > > -	if (drv && drv->freeze)
> > > -		return drv->freeze(dev);
> > > +	if (drv && drv->freeze) {
> > > +		ret = drv->freeze(dev);
> > > +		if (ret)
> > > +			return ret;
> > > +	}
> > > +
> > > +	if (dev->config->destroy_avq)
> > > +		dev->config->destroy_avq(dev);
> > >
> > >  	return 0;
> > >  }
> > > @@ -532,10 +552,16 @@ int virtio_device_restore(struct virtio_device *dev)
> > >  	if (ret)
> > >  		goto err;
> > >
> > > +	if (dev->config->create_avq) {
> > > +		ret = dev->config->create_avq(dev);
> > > +		if (ret)
> > > +			goto err;
> > > +	}
> > > +
> > >  	if (drv->restore) {
> > >  		ret = drv->restore(dev);
> > >  		if (ret)
> > > -			goto err;
> > > +			goto err_restore;
> > >  	}
> > >
> > >  	/* If restore didn't do it, mark device DRIVER_OK ourselves. */ @@
> > > -546,6 +572,9 @@ int virtio_device_restore(struct virtio_device *dev)
> > >
> > >  	return 0;
> > >
> > > +err_restore:
> > > +	if (dev->config->destroy_avq)
> > > +		dev->config->destroy_avq(dev);
> > >  err:
> > >  	virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
> > >  	return ret;
> > > diff --git a/drivers/virtio/virtio_pci_common.c
> > > b/drivers/virtio/virtio_pci_common.c
> > > index c2524a7207cf..6b4766d5abe6 100644
> > > --- a/drivers/virtio/virtio_pci_common.c
> > > +++ b/drivers/virtio/virtio_pci_common.c
> > > @@ -236,6 +236,9 @@ void vp_del_vqs(struct virtio_device *vdev)
> > >  	int i;
> > >
> > >  	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
> > > +		if (vp_dev->is_avq(vdev, vq->index))
> > > +			continue;
> > > +
> > >  		if (vp_dev->per_vq_vectors) {
> > >  			int v = vp_dev->vqs[vq->index]->msix_vector;
> > >
> > > diff --git a/drivers/virtio/virtio_pci_common.h
> > > b/drivers/virtio/virtio_pci_common.h
> > > index 4b773bd7c58c..e03af0966a4b 100644
> > > --- a/drivers/virtio/virtio_pci_common.h
> > > +++ b/drivers/virtio/virtio_pci_common.h
> > > @@ -41,6 +41,14 @@ struct virtio_pci_vq_info {
> > >  	unsigned int msix_vector;
> > >  };
> > >
> > > +struct virtio_pci_admin_vq {
> > > +	/* Virtqueue info associated with this admin queue. */
> > > +	struct virtio_pci_vq_info info;
> > > +	/* Name of the admin queue: avq.$index. */
> > > +	char name[10];
> > > +	u16 vq_index;
> > > +};
> > > +
> > >  /* Our device structure */
> > >  struct virtio_pci_device {
> > >  	struct virtio_device vdev;
> > > @@ -58,9 +66,13 @@ struct virtio_pci_device {
> > >  	spinlock_t lock;
> > >  	struct list_head virtqueues;
> > >
> > > -	/* array of all queues for house-keeping */
> > > +	/* Array of all virtqueues reported in the
> > > +	 * PCI common config num_queues field
> > > +	 */
> > >  	struct virtio_pci_vq_info **vqs;
> > >
> > > +	struct virtio_pci_admin_vq admin_vq;
> > > +
> > >  	/* MSI-X support */
> > >  	int msix_enabled;
> > >  	int intx_enabled;
> > > @@ -86,6 +98,7 @@ struct virtio_pci_device {
> > >  	void (*del_vq)(struct virtio_pci_vq_info *info);
> > >
> > >  	u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
> > > +	bool (*is_avq)(struct virtio_device *vdev, unsigned int index);
> > >  };
> > >
> > >  /* Constants for MSI-X */
> > > diff --git a/drivers/virtio/virtio_pci_modern.c
> > > b/drivers/virtio/virtio_pci_modern.c
> > > index d6bb68ba84e5..01c5ba346471 100644
> > > --- a/drivers/virtio/virtio_pci_modern.c
> > > +++ b/drivers/virtio/virtio_pci_modern.c
> > > @@ -26,6 +26,16 @@ static u64 vp_get_features(struct virtio_device *vdev)
> > >  	return vp_modern_get_features(&vp_dev->mdev);
> > >  }
> > >
> > > +static bool vp_is_avq(struct virtio_device *vdev, unsigned int index)
> > > +{
> > > +	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> > > +
> > > +	if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ))
> > > +		return false;
> > > +
> > > +	return index == vp_dev->admin_vq.vq_index; }
> > > +
> > >  static void vp_transport_features(struct virtio_device *vdev, u64
> > > features)  {
> > >  	struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -37,6
> > > +47,9 @@ static void vp_transport_features(struct virtio_device *vdev,
> > > u64 features)
> > >
> > >  	if (features & BIT_ULL(VIRTIO_F_RING_RESET))
> > >  		__virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
> > > +
> > > +	if (features & BIT_ULL(VIRTIO_F_ADMIN_VQ))
> > > +		__virtio_set_bit(vdev, VIRTIO_F_ADMIN_VQ);
> > >  }
> > >
> > >  /* virtio config->finalize_features() implementation */ @@ -317,7
> > > +330,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device
> > *vp_dev,
> > >  	else
> > >  		notify = vp_notify;
> > >
> > > -	if (index >= vp_modern_get_num_queues(mdev))
> > > +	if (index >= vp_modern_get_num_queues(mdev) &&
> > > +	    !vp_is_avq(&vp_dev->vdev, index))
> > >  		return ERR_PTR(-EINVAL);
> > >
> > >  	/* Check if queue is either not available or already active. */ @@
> > > -491,6 +505,46 @@ static bool vp_get_shm_region(struct virtio_device
> > *vdev,
> > >  	return true;
> > >  }
> > >
> > > +static int vp_modern_create_avq(struct virtio_device *vdev) {
> > > +	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> > > +	struct virtio_pci_admin_vq *avq;
> > > +	struct virtqueue *vq;
> > > +	u16 admin_q_num;
> > > +
> > > +	if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ))
> > > +		return 0;
> > > +
> > > +	admin_q_num = vp_modern_avq_num(&vp_dev->mdev);
> > > +	if (!admin_q_num)
> > > +		return -EINVAL;
> > 
> > 
> > We really just need 1 entry ATM. Limit to that?
> >
> Above check is only reading and validating that number of admin queues being nonzero from the device.
> It is ok if its > 1. 
> Driver currently using only 1 aq of default depth (entries) reported by the device.
> 
> Did I misunderstand your question about 1 entry?
>  
> > > +
> > > +	avq = &vp_dev->admin_vq;
> > > +	avq->vq_index = vp_modern_avq_index(&vp_dev->mdev);
> > > +	sprintf(avq->name, "avq.%u", avq->vq_index);
> > > +	vq = vp_dev->setup_vq(vp_dev, &vp_dev->admin_vq.info, avq-
> > >vq_index, NULL,
> > > +			      avq->name, NULL, VIRTIO_MSI_NO_VECTOR);
> > > +	if (IS_ERR(vq)) {
> > > +		dev_err(&vdev->dev, "failed to setup admin virtqueue,
> > err=%ld",
> > > +			PTR_ERR(vq));
> > > +		return PTR_ERR(vq);
> > > +	}
> > > +
> > > +	vp_dev->admin_vq.info.vq = vq;
> > > +	vp_modern_set_queue_enable(&vp_dev->mdev, avq->info.vq->index,
> > true);
> > > +	return 0;
> > > +}
> > > +
> > > +static void vp_modern_destroy_avq(struct virtio_device *vdev) {
> > > +	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> > > +
> > > +	if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ))
> > > +		return;
> > > +
> > > +	vp_dev->del_vq(&vp_dev->admin_vq.info);
> > > +}
> > > +
> > >  static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
> > >  	.get		= NULL,
> > >  	.set		= NULL,
> > > @@ -509,6 +563,8 @@ static const struct virtio_config_ops
> > virtio_pci_config_nodev_ops = {
> > >  	.get_shm_region  = vp_get_shm_region,
> > >  	.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
> > >  	.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
> > > +	.create_avq = vp_modern_create_avq,
> > > +	.destroy_avq = vp_modern_destroy_avq,
> > >  };
> > >
> > >  static const struct virtio_config_ops virtio_pci_config_ops = { @@
> > > -529,6 +585,8 @@ static const struct virtio_config_ops virtio_pci_config_ops
> > = {
> > >  	.get_shm_region  = vp_get_shm_region,
> > >  	.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
> > >  	.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
> > > +	.create_avq = vp_modern_create_avq,
> > > +	.destroy_avq = vp_modern_destroy_avq,
> > >  };
> > >
> > >  /* the PCI probing function */
> > > @@ -552,6 +610,7 @@ int virtio_pci_modern_probe(struct virtio_pci_device
> > *vp_dev)
> > >  	vp_dev->config_vector = vp_config_vector;
> > >  	vp_dev->setup_vq = setup_vq;
> > >  	vp_dev->del_vq = del_vq;
> > > +	vp_dev->is_avq = vp_is_avq;
> > >  	vp_dev->isr = mdev->isr;
> > >  	vp_dev->vdev.id = mdev->id;
> > >
> > > diff --git a/drivers/virtio/virtio_pci_modern_dev.c
> > > b/drivers/virtio/virtio_pci_modern_dev.c
> > > index 9cb601e16688..4aab1727d121 100644
> > > --- a/drivers/virtio/virtio_pci_modern_dev.c
> > > +++ b/drivers/virtio/virtio_pci_modern_dev.c
> > > @@ -714,6 +714,24 @@ void __iomem *vp_modern_map_vq_notify(struct
> > > virtio_pci_modern_device *mdev,  }
> > > EXPORT_SYMBOL_GPL(vp_modern_map_vq_notify);
> > >
> > > +u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev) {
> > > +	struct virtio_pci_modern_common_cfg __iomem *cfg;
> > > +
> > > +	cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev-
> > >common;
> > > +	return vp_ioread16(&cfg->admin_queue_num);
> > > +}
> > > +EXPORT_SYMBOL_GPL(vp_modern_avq_num);
> > > +
> > > +u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev) {
> > > +	struct virtio_pci_modern_common_cfg __iomem *cfg;
> > > +
> > > +	cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev-
> > >common;
> > > +	return vp_ioread16(&cfg->admin_queue_index);
> > > +}
> > > +EXPORT_SYMBOL_GPL(vp_modern_avq_index);
> > > +
> > >  MODULE_VERSION("0.1");
> > >  MODULE_DESCRIPTION("Modern Virtio PCI Device");
> > MODULE_AUTHOR("Jason
> > > Wang <jasowang@redhat.com>"); diff --git
> > > a/include/linux/virtio_config.h b/include/linux/virtio_config.h index
> > > 2b3438de2c4d..da9b271b54db 100644
> > > --- a/include/linux/virtio_config.h
> > > +++ b/include/linux/virtio_config.h
> > > @@ -93,6 +93,8 @@ typedef void vq_callback_t(struct virtqueue *);
> > >   *	Returns 0 on success or error status
> > >   *	If disable_vq_and_reset is set, then enable_vq_after_reset must also be
> > >   *	set.
> > > + * @create_avq: create admin virtqueue resource.
> > > + * @destroy_avq: destroy admin virtqueue resource.
> > >   */
> > >  struct virtio_config_ops {
> > >  	void (*get)(struct virtio_device *vdev, unsigned offset, @@ -120,6
> > > +122,8 @@ struct virtio_config_ops {
> > >  			       struct virtio_shm_region *region, u8 id);
> > >  	int (*disable_vq_and_reset)(struct virtqueue *vq);
> > >  	int (*enable_vq_after_reset)(struct virtqueue *vq);
> > > +	int (*create_avq)(struct virtio_device *vdev);
> > > +	void (*destroy_avq)(struct virtio_device *vdev);
> > >  };
> > >
> > >  /* If driver didn't advertise the feature, it will never appear. */
> > > diff --git a/include/linux/virtio_pci_modern.h
> > > b/include/linux/virtio_pci_modern.h
> > > index 067ac1d789bc..0f8737c9ae7d 100644
> > > --- a/include/linux/virtio_pci_modern.h
> > > +++ b/include/linux/virtio_pci_modern.h
> > > @@ -10,6 +10,9 @@ struct virtio_pci_modern_common_cfg {
> > >
> > >  	__le16 queue_notify_data;	/* read-write */
> > >  	__le16 queue_reset;		/* read-write */
> > > +
> > > +	__le16 admin_queue_index;	/* read-only */
> > > +	__le16 admin_queue_num;		/* read-only */
> > >  };
> > >
> > >  struct virtio_pci_modern_device {
> > > @@ -121,4 +124,6 @@ int vp_modern_probe(struct
> > > virtio_pci_modern_device *mdev);  void vp_modern_remove(struct
> > > virtio_pci_modern_device *mdev);  int vp_modern_get_queue_reset(struct
> > > virtio_pci_modern_device *mdev, u16 index);  void
> > > vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16
> > > index);
> > > +u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev);
> > > +u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev);
> > >  #endif
> > > --
> > > 2.27.0
Parav Pandit Oct. 30, 2023, 6:10 p.m. UTC | #4
> From: Michael S. Tsirkin <mst@redhat.com>
> Sent: Monday, October 30, 2023 9:29 PM
> On Mon, Oct 30, 2023 at 03:51:40PM +0000, Parav Pandit wrote:
> >
> >
> > > From: Michael S. Tsirkin <mst@redhat.com>
> > > Sent: Monday, October 30, 2023 1:53 AM
> > >
> > > On Sun, Oct 29, 2023 at 05:59:45PM +0200, Yishai Hadas wrote:
> > > > From: Feng Liu <feliu@nvidia.com>
> > > >
> > > > Introduce support for the admin virtqueue. By negotiating
> > > > VIRTIO_F_ADMIN_VQ feature, driver detects capability and creates
> > > > one administration virtqueue. Administration virtqueue
> > > > implementation in virtio pci generic layer, enables multiple types
> > > > of upper layer drivers such as vfio, net, blk to utilize it.
> > > >
> > > > Signed-off-by: Feng Liu <feliu@nvidia.com>
> > > > Reviewed-by: Parav Pandit <parav@nvidia.com>
> > > > Reviewed-by: Jiri Pirko <jiri@nvidia.com>
> > > > Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
> > > > ---
> > > >  drivers/virtio/virtio.c                | 37 ++++++++++++++--
> > > >  drivers/virtio/virtio_pci_common.c     |  3 ++
> > > >  drivers/virtio/virtio_pci_common.h     | 15 ++++++-
> > > >  drivers/virtio/virtio_pci_modern.c     | 61 +++++++++++++++++++++++++-
> > > >  drivers/virtio/virtio_pci_modern_dev.c | 18 ++++++++
> > > >  include/linux/virtio_config.h          |  4 ++
> > > >  include/linux/virtio_pci_modern.h      |  5 +++
> > > >  7 files changed, 137 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > index
> > > > 3893dc29eb26..f4080692b351 100644
> > > > --- a/drivers/virtio/virtio.c
> > > > +++ b/drivers/virtio/virtio.c
> > > > @@ -302,9 +302,15 @@ static int virtio_dev_probe(struct device *_d)
> > > >  	if (err)
> > > >  		goto err;
> > > >
> > > > +	if (dev->config->create_avq) {
> > > > +		err = dev->config->create_avq(dev);
> > > > +		if (err)
> > > > +			goto err;
> > > > +	}
> > > > +
> > > >  	err = drv->probe(dev);
> > > >  	if (err)
> > > > -		goto err;
> > > > +		goto err_probe;
> > > >
> > > >  	/* If probe didn't do it, mark device DRIVER_OK ourselves. */
> > > >  	if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK))
> > >
> > > Hmm I am not all that happy that we are just creating avq unconditionally.
> > > Can't we do it on demand to avoid wasting resources if no one uses it?
> > >
> > Virtio queues must be enabled before driver_ok as we discussed in
> F_DYNAMIC bit exercise.
> > So creating AQ when first legacy command is invoked, would be too late.
> 
> Well we didn't release the spec with AQ so I am pretty sure there are no devices
> using the feature. Do we want to already make an exception for AQ and allow
> creating AQs after DRIVER_OK even without F_DYNAMIC?
> 
No. it would abuse the init time config registers for the dynamic things like this.
For flow filters and others there is need for dynamic q creation with multiple physical address anyway.
So creating virtqueues dynamically using a generic scheme is desired with new feature bit.
Michael S. Tsirkin Oct. 30, 2023, 11:31 p.m. UTC | #5
On Mon, Oct 30, 2023 at 06:10:06PM +0000, Parav Pandit wrote:
> 
> 
> > From: Michael S. Tsirkin <mst@redhat.com>
> > Sent: Monday, October 30, 2023 9:29 PM
> > On Mon, Oct 30, 2023 at 03:51:40PM +0000, Parav Pandit wrote:
> > >
> > >
> > > > From: Michael S. Tsirkin <mst@redhat.com>
> > > > Sent: Monday, October 30, 2023 1:53 AM
> > > >
> > > > On Sun, Oct 29, 2023 at 05:59:45PM +0200, Yishai Hadas wrote:
> > > > > From: Feng Liu <feliu@nvidia.com>
> > > > >
> > > > > Introduce support for the admin virtqueue. By negotiating
> > > > > VIRTIO_F_ADMIN_VQ feature, driver detects capability and creates
> > > > > one administration virtqueue. Administration virtqueue
> > > > > implementation in virtio pci generic layer, enables multiple types
> > > > > of upper layer drivers such as vfio, net, blk to utilize it.
> > > > >
> > > > > Signed-off-by: Feng Liu <feliu@nvidia.com>
> > > > > Reviewed-by: Parav Pandit <parav@nvidia.com>
> > > > > Reviewed-by: Jiri Pirko <jiri@nvidia.com>
> > > > > Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
> > > > > ---
> > > > >  drivers/virtio/virtio.c                | 37 ++++++++++++++--
> > > > >  drivers/virtio/virtio_pci_common.c     |  3 ++
> > > > >  drivers/virtio/virtio_pci_common.h     | 15 ++++++-
> > > > >  drivers/virtio/virtio_pci_modern.c     | 61 +++++++++++++++++++++++++-
> > > > >  drivers/virtio/virtio_pci_modern_dev.c | 18 ++++++++
> > > > >  include/linux/virtio_config.h          |  4 ++
> > > > >  include/linux/virtio_pci_modern.h      |  5 +++
> > > > >  7 files changed, 137 insertions(+), 6 deletions(-)
> > > > >
> > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > index
> > > > > 3893dc29eb26..f4080692b351 100644
> > > > > --- a/drivers/virtio/virtio.c
> > > > > +++ b/drivers/virtio/virtio.c
> > > > > @@ -302,9 +302,15 @@ static int virtio_dev_probe(struct device *_d)
> > > > >  	if (err)
> > > > >  		goto err;
> > > > >
> > > > > +	if (dev->config->create_avq) {
> > > > > +		err = dev->config->create_avq(dev);
> > > > > +		if (err)
> > > > > +			goto err;
> > > > > +	}
> > > > > +
> > > > >  	err = drv->probe(dev);
> > > > >  	if (err)
> > > > > -		goto err;
> > > > > +		goto err_probe;
> > > > >
> > > > >  	/* If probe didn't do it, mark device DRIVER_OK ourselves. */
> > > > >  	if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK))
> > > >
> > > > Hmm I am not all that happy that we are just creating avq unconditionally.
> > > > Can't we do it on demand to avoid wasting resources if no one uses it?
> > > >
> > > Virtio queues must be enabled before driver_ok as we discussed in
> > F_DYNAMIC bit exercise.
> > > So creating AQ when first legacy command is invoked, would be too late.
> > 
> > Well we didn't release the spec with AQ so I am pretty sure there are no devices
> > using the feature. Do we want to already make an exception for AQ and allow
> > creating AQs after DRIVER_OK even without F_DYNAMIC?
> > 
> No. it would abuse the init time config registers for the dynamic things like this.
> For flow filters and others there is need for dynamic q creation with multiple physical address anyway.

That seems like a completely unrelated issue.

> So creating virtqueues dynamically using a generic scheme is desired with new feature bit.
Parav Pandit Oct. 31, 2023, 3:11 a.m. UTC | #6
> From: Michael S. Tsirkin <mst@redhat.com>
> Sent: Tuesday, October 31, 2023 5:02 AM
> 
> On Mon, Oct 30, 2023 at 06:10:06PM +0000, Parav Pandit wrote:
> >
> >
> > > From: Michael S. Tsirkin <mst@redhat.com>
> > > Sent: Monday, October 30, 2023 9:29 PM On Mon, Oct 30, 2023 at
> > > 03:51:40PM +0000, Parav Pandit wrote:
> > > >
> > > >
> > > > > From: Michael S. Tsirkin <mst@redhat.com>
> > > > > Sent: Monday, October 30, 2023 1:53 AM
> > > > >
> > > > > On Sun, Oct 29, 2023 at 05:59:45PM +0200, Yishai Hadas wrote:
> > > > > > From: Feng Liu <feliu@nvidia.com>
> > > > > >
> > > > > > Introduce support for the admin virtqueue. By negotiating
> > > > > > VIRTIO_F_ADMIN_VQ feature, driver detects capability and
> > > > > > creates one administration virtqueue. Administration virtqueue
> > > > > > implementation in virtio pci generic layer, enables multiple
> > > > > > types of upper layer drivers such as vfio, net, blk to utilize it.
> > > > > >
> > > > > > Signed-off-by: Feng Liu <feliu@nvidia.com>
> > > > > > Reviewed-by: Parav Pandit <parav@nvidia.com>
> > > > > > Reviewed-by: Jiri Pirko <jiri@nvidia.com>
> > > > > > Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
> > > > > > ---
> > > > > >  drivers/virtio/virtio.c                | 37 ++++++++++++++--
> > > > > >  drivers/virtio/virtio_pci_common.c     |  3 ++
> > > > > >  drivers/virtio/virtio_pci_common.h     | 15 ++++++-
> > > > > >  drivers/virtio/virtio_pci_modern.c     | 61
> +++++++++++++++++++++++++-
> > > > > >  drivers/virtio/virtio_pci_modern_dev.c | 18 ++++++++
> > > > > >  include/linux/virtio_config.h          |  4 ++
> > > > > >  include/linux/virtio_pci_modern.h      |  5 +++
> > > > > >  7 files changed, 137 insertions(+), 6 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > index
> > > > > > 3893dc29eb26..f4080692b351 100644
> > > > > > --- a/drivers/virtio/virtio.c
> > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > @@ -302,9 +302,15 @@ static int virtio_dev_probe(struct device *_d)
> > > > > >  	if (err)
> > > > > >  		goto err;
> > > > > >
> > > > > > +	if (dev->config->create_avq) {
> > > > > > +		err = dev->config->create_avq(dev);
> > > > > > +		if (err)
> > > > > > +			goto err;
> > > > > > +	}
> > > > > > +
> > > > > >  	err = drv->probe(dev);
> > > > > >  	if (err)
> > > > > > -		goto err;
> > > > > > +		goto err_probe;
> > > > > >
> > > > > >  	/* If probe didn't do it, mark device DRIVER_OK ourselves. */
> > > > > >  	if (!(dev->config->get_status(dev) &
> > > > > > VIRTIO_CONFIG_S_DRIVER_OK))
> > > > >
> > > > > Hmm I am not all that happy that we are just creating avq
> unconditionally.
> > > > > Can't we do it on demand to avoid wasting resources if no one uses it?
> > > > >
> > > > Virtio queues must be enabled before driver_ok as we discussed in
> > > F_DYNAMIC bit exercise.
> > > > So creating AQ when first legacy command is invoked, would be too late.
> > >
> > > Well we didn't release the spec with AQ so I am pretty sure there
> > > are no devices using the feature. Do we want to already make an
> > > exception for AQ and allow creating AQs after DRIVER_OK even without
> F_DYNAMIC?
> > >
> > No. it would abuse the init time config registers for the dynamic things like
> this.
> > For flow filters and others there is need for dynamic q creation with multiple
> physical address anyway.
> 
> That seems like a completely unrelated issue.
> 
It isn't.
Driver requirements are:
1. Driver needs to dynamically create vqs
2. Sometimes this VQ needs to have multiple physical addresses
3. Driver needs to create them after driver is fully running, past the bootstrap stage using tiny config registers

Device requirements are:
1. Not to keep growing 64K VQs *(8+8+8) bytes of address registers + enable bit
2. Ability to return appropriate error code when fail to create queue
3. Above #2

Users of this new infrastructure are eth tx,rx queues, flow filter queues, aq, blk rq per cpu.
AQs are just one of those.
When a generic infrastructure for this will be built in the spec as we started that, all above use cases will be handled.

> > So creating virtqueues dynamically using a generic scheme is desired with
> new feature bit.
Michael S. Tsirkin Oct. 31, 2023, 7:59 a.m. UTC | #7
On Tue, Oct 31, 2023 at 03:11:57AM +0000, Parav Pandit wrote:
> 
> 
> > From: Michael S. Tsirkin <mst@redhat.com>
> > Sent: Tuesday, October 31, 2023 5:02 AM
> > 
> > On Mon, Oct 30, 2023 at 06:10:06PM +0000, Parav Pandit wrote:
> > >
> > >
> > > > From: Michael S. Tsirkin <mst@redhat.com>
> > > > Sent: Monday, October 30, 2023 9:29 PM On Mon, Oct 30, 2023 at
> > > > 03:51:40PM +0000, Parav Pandit wrote:
> > > > >
> > > > >
> > > > > > From: Michael S. Tsirkin <mst@redhat.com>
> > > > > > Sent: Monday, October 30, 2023 1:53 AM
> > > > > >
> > > > > > On Sun, Oct 29, 2023 at 05:59:45PM +0200, Yishai Hadas wrote:
> > > > > > > From: Feng Liu <feliu@nvidia.com>
> > > > > > >
> > > > > > > Introduce support for the admin virtqueue. By negotiating
> > > > > > > VIRTIO_F_ADMIN_VQ feature, driver detects capability and
> > > > > > > creates one administration virtqueue. Administration virtqueue
> > > > > > > implementation in virtio pci generic layer, enables multiple
> > > > > > > types of upper layer drivers such as vfio, net, blk to utilize it.
> > > > > > >
> > > > > > > Signed-off-by: Feng Liu <feliu@nvidia.com>
> > > > > > > Reviewed-by: Parav Pandit <parav@nvidia.com>
> > > > > > > Reviewed-by: Jiri Pirko <jiri@nvidia.com>
> > > > > > > Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
> > > > > > > ---
> > > > > > >  drivers/virtio/virtio.c                | 37 ++++++++++++++--
> > > > > > >  drivers/virtio/virtio_pci_common.c     |  3 ++
> > > > > > >  drivers/virtio/virtio_pci_common.h     | 15 ++++++-
> > > > > > >  drivers/virtio/virtio_pci_modern.c     | 61
> > +++++++++++++++++++++++++-
> > > > > > >  drivers/virtio/virtio_pci_modern_dev.c | 18 ++++++++
> > > > > > >  include/linux/virtio_config.h          |  4 ++
> > > > > > >  include/linux/virtio_pci_modern.h      |  5 +++
> > > > > > >  7 files changed, 137 insertions(+), 6 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > > index
> > > > > > > 3893dc29eb26..f4080692b351 100644
> > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > @@ -302,9 +302,15 @@ static int virtio_dev_probe(struct device *_d)
> > > > > > >  	if (err)
> > > > > > >  		goto err;
> > > > > > >
> > > > > > > +	if (dev->config->create_avq) {
> > > > > > > +		err = dev->config->create_avq(dev);
> > > > > > > +		if (err)
> > > > > > > +			goto err;
> > > > > > > +	}
> > > > > > > +
> > > > > > >  	err = drv->probe(dev);
> > > > > > >  	if (err)
> > > > > > > -		goto err;
> > > > > > > +		goto err_probe;
> > > > > > >
> > > > > > >  	/* If probe didn't do it, mark device DRIVER_OK ourselves. */
> > > > > > >  	if (!(dev->config->get_status(dev) &
> > > > > > > VIRTIO_CONFIG_S_DRIVER_OK))
> > > > > >
> > > > > > Hmm I am not all that happy that we are just creating avq
> > unconditionally.
> > > > > > Can't we do it on demand to avoid wasting resources if no one uses it?
> > > > > >
> > > > > Virtio queues must be enabled before driver_ok as we discussed in
> > > > F_DYNAMIC bit exercise.
> > > > > So creating AQ when first legacy command is invoked, would be too late.
> > > >
> > > > Well we didn't release the spec with AQ so I am pretty sure there
> > > > are no devices using the feature. Do we want to already make an
> > > > exception for AQ and allow creating AQs after DRIVER_OK even without
> > F_DYNAMIC?
> > > >
> > > No. it would abuse the init time config registers for the dynamic things like
> > this.
> > > For flow filters and others there is need for dynamic q creation with multiple
> > physical address anyway.
> > 
> > That seems like a completely unrelated issue.
> > 
> It isn't.
> Driver requirements are:
> 1. Driver needs to dynamically create vqs
> 2. Sometimes this VQ needs to have multiple physical addresses
> 3. Driver needs to create them after driver is fully running, past the bootstrap stage using tiny config registers
> 
> Device requirements are:
> 1. Not to keep growing 64K VQs *(8+8+8) bytes of address registers + enable bit
> 2. Ability to return appropriate error code when fail to create queue
> 3. Above #2
> 
> Users of this new infrastructure are eth tx,rx queues, flow filter queues, aq, blk rq per cpu.
> AQs are just one of those.
> When a generic infrastructure for this will be built in the spec as we started that, all above use cases will be handled.
> 
> > > So creating virtqueues dynamically using a generic scheme is desired with
> > new feature bit.

Reducing config registers and returning errors should be handled by
a new transport.
VQ with multiple addresses - I can see how you would maybe only
support that with that new transport?


I think I can guess why you are tying multiple addresses to dynamic VQs -
you suspect that allocating huge half-megabyte VQs dynamically will fail if
triggered on a busy system. Is that the point?


In that case I feel it's a good argument to special-case
admin VQs because there's no real need to make them
huge at the moment - for example this driver just adds one at a time.
No?
Parav Pandit Oct. 31, 2023, 12:11 p.m. UTC | #8
> From: Michael S. Tsirkin <mst@redhat.com>
> Sent: Tuesday, October 31, 2023 1:29 PM
> 
> On Tue, Oct 31, 2023 at 03:11:57AM +0000, Parav Pandit wrote:
> >
> >
> > > From: Michael S. Tsirkin <mst@redhat.com>
> > > Sent: Tuesday, October 31, 2023 5:02 AM
> > >
> > > On Mon, Oct 30, 2023 at 06:10:06PM +0000, Parav Pandit wrote:
> > > >
> > > >
> > > > > From: Michael S. Tsirkin <mst@redhat.com>
> > > > > Sent: Monday, October 30, 2023 9:29 PM On Mon, Oct 30, 2023 at
> > > > > 03:51:40PM +0000, Parav Pandit wrote:
> > > > > >
> > > > > >
> > > > > > > From: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > Sent: Monday, October 30, 2023 1:53 AM
> > > > > > >
> > > > > > > On Sun, Oct 29, 2023 at 05:59:45PM +0200, Yishai Hadas wrote:
> > > > > > > > From: Feng Liu <feliu@nvidia.com>
> > > > > > > >
> > > > > > > > Introduce support for the admin virtqueue. By negotiating
> > > > > > > > VIRTIO_F_ADMIN_VQ feature, driver detects capability and
> > > > > > > > creates one administration virtqueue. Administration
> > > > > > > > virtqueue implementation in virtio pci generic layer,
> > > > > > > > enables multiple types of upper layer drivers such as vfio, net, blk
> to utilize it.
> > > > > > > >
> > > > > > > > Signed-off-by: Feng Liu <feliu@nvidia.com>
> > > > > > > > Reviewed-by: Parav Pandit <parav@nvidia.com>
> > > > > > > > Reviewed-by: Jiri Pirko <jiri@nvidia.com>
> > > > > > > > Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
> > > > > > > > ---
> > > > > > > >  drivers/virtio/virtio.c                | 37 ++++++++++++++--
> > > > > > > >  drivers/virtio/virtio_pci_common.c     |  3 ++
> > > > > > > >  drivers/virtio/virtio_pci_common.h     | 15 ++++++-
> > > > > > > >  drivers/virtio/virtio_pci_modern.c     | 61
> > > +++++++++++++++++++++++++-
> > > > > > > >  drivers/virtio/virtio_pci_modern_dev.c | 18 ++++++++
> > > > > > > >  include/linux/virtio_config.h          |  4 ++
> > > > > > > >  include/linux/virtio_pci_modern.h      |  5 +++
> > > > > > > >  7 files changed, 137 insertions(+), 6 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/virtio/virtio.c
> > > > > > > > b/drivers/virtio/virtio.c index
> > > > > > > > 3893dc29eb26..f4080692b351 100644
> > > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > > @@ -302,9 +302,15 @@ static int virtio_dev_probe(struct device
> *_d)
> > > > > > > >  	if (err)
> > > > > > > >  		goto err;
> > > > > > > >
> > > > > > > > +	if (dev->config->create_avq) {
> > > > > > > > +		err = dev->config->create_avq(dev);
> > > > > > > > +		if (err)
> > > > > > > > +			goto err;
> > > > > > > > +	}
> > > > > > > > +
> > > > > > > >  	err = drv->probe(dev);
> > > > > > > >  	if (err)
> > > > > > > > -		goto err;
> > > > > > > > +		goto err_probe;
> > > > > > > >
> > > > > > > >  	/* If probe didn't do it, mark device DRIVER_OK ourselves. */
> > > > > > > >  	if (!(dev->config->get_status(dev) &
> > > > > > > > VIRTIO_CONFIG_S_DRIVER_OK))
> > > > > > >
> > > > > > > Hmm I am not all that happy that we are just creating avq
> > > unconditionally.
> > > > > > > Can't we do it on demand to avoid wasting resources if no one uses
> it?
> > > > > > >
> > > > > > Virtio queues must be enabled before driver_ok as we discussed
> > > > > > in
> > > > > F_DYNAMIC bit exercise.
> > > > > > So creating AQ when first legacy command is invoked, would be too
> late.
> > > > >
> > > > > Well we didn't release the spec with AQ so I am pretty sure
> > > > > there are no devices using the feature. Do we want to already
> > > > > make an exception for AQ and allow creating AQs after DRIVER_OK
> > > > > even without
> > > F_DYNAMIC?
> > > > >
> > > > No. it would abuse the init time config registers for the dynamic
> > > > things like
> > > this.
> > > > For flow filters and others there is need for dynamic q creation
> > > > with multiple
> > > physical address anyway.
> > >
> > > That seems like a completely unrelated issue.
> > >
> > It isn't.
> > Driver requirements are:
> > 1. Driver needs to dynamically create vqs 2. Sometimes this VQ needs
> > to have multiple physical addresses 3. Driver needs to create them
> > after driver is fully running, past the bootstrap stage using tiny
> > config registers
> >
> > Device requirements are:
> > 1. Not to keep growing 64K VQs *(8+8+8) bytes of address registers +
> > enable bit 2. Ability to return appropriate error code when fail to
> > create queue 3. Above #2
> >
> > Users of this new infrastructure are eth tx,rx queues, flow filter queues, aq, blk
> rq per cpu.
> > AQs are just one of those.
> > When a generic infrastructure for this will be built in the spec as we started
> that, all above use cases will be handled.
> >
> > > > So creating virtqueues dynamically using a generic scheme is
> > > > desired with
> > > new feature bit.
> 
> Reducing config registers and returning errors should be handled by a new
> transport.
> VQ with multiple addresses - I can see how you would maybe only support that
> with that new transport?
> 
PCI is the transport that offers unified way to create and destroy dynamic VQs. And modify/query VQ attributes.
Unified across PFs, VFs.

So no need for some grand transport. Virtio spec already underlying infrastructure that can be extended for PCI.
For example,

VQ attributes are already modified and queried by CVQ for net already.

Such create/destroy commands can easily be supported on cvq.
(cvq already exists on 6 out of 18 virtio devices).
Rest 12 devices are so small which will unlikely need dynamic vqs.

All will be neatly tied to single interface between driver and device for VQ create/destroy/modify/query.

Anyway, this is the work OASIS currently doing for 1.4-timeline.
This patch is based on 1.3 standard update.

> 
> I think I can guess why you are tying multiple addresses to dynamic VQs - you
> suspect that allocating huge half-megabyte VQs dynamically will fail if triggered
> on a busy system. Is that the point?
Yes, it is likely. I don't have the link right now, but Eric S and/or Saeed had some links to it.

> 
> 
> In that case I feel it's a good argument to special-case admin VQs because
> there's no real need to make them huge at the moment - for example this
> driver just adds one at a time.
> No?
In current form creating a VQ with single outstanding command with 4 descriptors = 64 bytes is not that huge resource wastage either.

So Linux driver can adapt to dynamic aq and with multiple PAs can be done in future when OASIS adapts to it.
diff mbox series

Patch

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 3893dc29eb26..f4080692b351 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -302,9 +302,15 @@  static int virtio_dev_probe(struct device *_d)
 	if (err)
 		goto err;
 
+	if (dev->config->create_avq) {
+		err = dev->config->create_avq(dev);
+		if (err)
+			goto err;
+	}
+
 	err = drv->probe(dev);
 	if (err)
-		goto err;
+		goto err_probe;
 
 	/* If probe didn't do it, mark device DRIVER_OK ourselves. */
 	if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK))
@@ -316,6 +322,10 @@  static int virtio_dev_probe(struct device *_d)
 	virtio_config_enable(dev);
 
 	return 0;
+
+err_probe:
+	if (dev->config->destroy_avq)
+		dev->config->destroy_avq(dev);
 err:
 	virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
 	return err;
@@ -331,6 +341,9 @@  static void virtio_dev_remove(struct device *_d)
 
 	drv->remove(dev);
 
+	if (dev->config->destroy_avq)
+		dev->config->destroy_avq(dev);
+
 	/* Driver should have reset device. */
 	WARN_ON_ONCE(dev->config->get_status(dev));
 
@@ -489,13 +502,20 @@  EXPORT_SYMBOL_GPL(unregister_virtio_device);
 int virtio_device_freeze(struct virtio_device *dev)
 {
 	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+	int ret;
 
 	virtio_config_disable(dev);
 
 	dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
 
-	if (drv && drv->freeze)
-		return drv->freeze(dev);
+	if (drv && drv->freeze) {
+		ret = drv->freeze(dev);
+		if (ret)
+			return ret;
+	}
+
+	if (dev->config->destroy_avq)
+		dev->config->destroy_avq(dev);
 
 	return 0;
 }
@@ -532,10 +552,16 @@  int virtio_device_restore(struct virtio_device *dev)
 	if (ret)
 		goto err;
 
+	if (dev->config->create_avq) {
+		ret = dev->config->create_avq(dev);
+		if (ret)
+			goto err;
+	}
+
 	if (drv->restore) {
 		ret = drv->restore(dev);
 		if (ret)
-			goto err;
+			goto err_restore;
 	}
 
 	/* If restore didn't do it, mark device DRIVER_OK ourselves. */
@@ -546,6 +572,9 @@  int virtio_device_restore(struct virtio_device *dev)
 
 	return 0;
 
+err_restore:
+	if (dev->config->destroy_avq)
+		dev->config->destroy_avq(dev);
 err:
 	virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
 	return ret;
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index c2524a7207cf..6b4766d5abe6 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -236,6 +236,9 @@  void vp_del_vqs(struct virtio_device *vdev)
 	int i;
 
 	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
+		if (vp_dev->is_avq(vdev, vq->index))
+			continue;
+
 		if (vp_dev->per_vq_vectors) {
 			int v = vp_dev->vqs[vq->index]->msix_vector;
 
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 4b773bd7c58c..e03af0966a4b 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -41,6 +41,14 @@  struct virtio_pci_vq_info {
 	unsigned int msix_vector;
 };
 
+struct virtio_pci_admin_vq {
+	/* Virtqueue info associated with this admin queue. */
+	struct virtio_pci_vq_info info;
+	/* Name of the admin queue: avq.$index. */
+	char name[10];
+	u16 vq_index;
+};
+
 /* Our device structure */
 struct virtio_pci_device {
 	struct virtio_device vdev;
@@ -58,9 +66,13 @@  struct virtio_pci_device {
 	spinlock_t lock;
 	struct list_head virtqueues;
 
-	/* array of all queues for house-keeping */
+	/* Array of all virtqueues reported in the
+	 * PCI common config num_queues field
+	 */
 	struct virtio_pci_vq_info **vqs;
 
+	struct virtio_pci_admin_vq admin_vq;
+
 	/* MSI-X support */
 	int msix_enabled;
 	int intx_enabled;
@@ -86,6 +98,7 @@  struct virtio_pci_device {
 	void (*del_vq)(struct virtio_pci_vq_info *info);
 
 	u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
+	bool (*is_avq)(struct virtio_device *vdev, unsigned int index);
 };
 
 /* Constants for MSI-X */
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index d6bb68ba84e5..01c5ba346471 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -26,6 +26,16 @@  static u64 vp_get_features(struct virtio_device *vdev)
 	return vp_modern_get_features(&vp_dev->mdev);
 }
 
+static bool vp_is_avq(struct virtio_device *vdev, unsigned int index)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ))
+		return false;
+
+	return index == vp_dev->admin_vq.vq_index;
+}
+
 static void vp_transport_features(struct virtio_device *vdev, u64 features)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
@@ -37,6 +47,9 @@  static void vp_transport_features(struct virtio_device *vdev, u64 features)
 
 	if (features & BIT_ULL(VIRTIO_F_RING_RESET))
 		__virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
+
+	if (features & BIT_ULL(VIRTIO_F_ADMIN_VQ))
+		__virtio_set_bit(vdev, VIRTIO_F_ADMIN_VQ);
 }
 
 /* virtio config->finalize_features() implementation */
@@ -317,7 +330,8 @@  static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	else
 		notify = vp_notify;
 
-	if (index >= vp_modern_get_num_queues(mdev))
+	if (index >= vp_modern_get_num_queues(mdev) &&
+	    !vp_is_avq(&vp_dev->vdev, index))
 		return ERR_PTR(-EINVAL);
 
 	/* Check if queue is either not available or already active. */
@@ -491,6 +505,46 @@  static bool vp_get_shm_region(struct virtio_device *vdev,
 	return true;
 }
 
+static int vp_modern_create_avq(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct virtio_pci_admin_vq *avq;
+	struct virtqueue *vq;
+	u16 admin_q_num;
+
+	if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ))
+		return 0;
+
+	admin_q_num = vp_modern_avq_num(&vp_dev->mdev);
+	if (!admin_q_num)
+		return -EINVAL;
+
+	avq = &vp_dev->admin_vq;
+	avq->vq_index = vp_modern_avq_index(&vp_dev->mdev);
+	sprintf(avq->name, "avq.%u", avq->vq_index);
+	vq = vp_dev->setup_vq(vp_dev, &vp_dev->admin_vq.info, avq->vq_index, NULL,
+			      avq->name, NULL, VIRTIO_MSI_NO_VECTOR);
+	if (IS_ERR(vq)) {
+		dev_err(&vdev->dev, "failed to setup admin virtqueue, err=%ld",
+			PTR_ERR(vq));
+		return PTR_ERR(vq);
+	}
+
+	vp_dev->admin_vq.info.vq = vq;
+	vp_modern_set_queue_enable(&vp_dev->mdev, avq->info.vq->index, true);
+	return 0;
+}
+
+static void vp_modern_destroy_avq(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ))
+		return;
+
+	vp_dev->del_vq(&vp_dev->admin_vq.info);
+}
+
 static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
 	.get		= NULL,
 	.set		= NULL,
@@ -509,6 +563,8 @@  static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
 	.get_shm_region  = vp_get_shm_region,
 	.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
 	.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
+	.create_avq = vp_modern_create_avq,
+	.destroy_avq = vp_modern_destroy_avq,
 };
 
 static const struct virtio_config_ops virtio_pci_config_ops = {
@@ -529,6 +585,8 @@  static const struct virtio_config_ops virtio_pci_config_ops = {
 	.get_shm_region  = vp_get_shm_region,
 	.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
 	.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
+	.create_avq = vp_modern_create_avq,
+	.destroy_avq = vp_modern_destroy_avq,
 };
 
 /* the PCI probing function */
@@ -552,6 +610,7 @@  int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
 	vp_dev->config_vector = vp_config_vector;
 	vp_dev->setup_vq = setup_vq;
 	vp_dev->del_vq = del_vq;
+	vp_dev->is_avq = vp_is_avq;
 	vp_dev->isr = mdev->isr;
 	vp_dev->vdev.id = mdev->id;
 
diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c
index 9cb601e16688..4aab1727d121 100644
--- a/drivers/virtio/virtio_pci_modern_dev.c
+++ b/drivers/virtio/virtio_pci_modern_dev.c
@@ -714,6 +714,24 @@  void __iomem *vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev,
 }
 EXPORT_SYMBOL_GPL(vp_modern_map_vq_notify);
 
+u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev)
+{
+	struct virtio_pci_modern_common_cfg __iomem *cfg;
+
+	cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common;
+	return vp_ioread16(&cfg->admin_queue_num);
+}
+EXPORT_SYMBOL_GPL(vp_modern_avq_num);
+
+u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev)
+{
+	struct virtio_pci_modern_common_cfg __iomem *cfg;
+
+	cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common;
+	return vp_ioread16(&cfg->admin_queue_index);
+}
+EXPORT_SYMBOL_GPL(vp_modern_avq_index);
+
 MODULE_VERSION("0.1");
 MODULE_DESCRIPTION("Modern Virtio PCI Device");
 MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>");
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 2b3438de2c4d..da9b271b54db 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -93,6 +93,8 @@  typedef void vq_callback_t(struct virtqueue *);
  *	Returns 0 on success or error status
  *	If disable_vq_and_reset is set, then enable_vq_after_reset must also be
  *	set.
+ * @create_avq: create admin virtqueue resource.
+ * @destroy_avq: destroy admin virtqueue resource.
  */
 struct virtio_config_ops {
 	void (*get)(struct virtio_device *vdev, unsigned offset,
@@ -120,6 +122,8 @@  struct virtio_config_ops {
 			       struct virtio_shm_region *region, u8 id);
 	int (*disable_vq_and_reset)(struct virtqueue *vq);
 	int (*enable_vq_after_reset)(struct virtqueue *vq);
+	int (*create_avq)(struct virtio_device *vdev);
+	void (*destroy_avq)(struct virtio_device *vdev);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h
index 067ac1d789bc..0f8737c9ae7d 100644
--- a/include/linux/virtio_pci_modern.h
+++ b/include/linux/virtio_pci_modern.h
@@ -10,6 +10,9 @@  struct virtio_pci_modern_common_cfg {
 
 	__le16 queue_notify_data;	/* read-write */
 	__le16 queue_reset;		/* read-write */
+
+	__le16 admin_queue_index;	/* read-only */
+	__le16 admin_queue_num;		/* read-only */
 };
 
 struct virtio_pci_modern_device {
@@ -121,4 +124,6 @@  int vp_modern_probe(struct virtio_pci_modern_device *mdev);
 void vp_modern_remove(struct virtio_pci_modern_device *mdev);
 int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
 void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
+u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev);
+u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev);
 #endif