diff mbox series

[V13,4/6] mdev: introduce mediated virtio bus

Message ID 20191118105923.7991-5-jasowang@redhat.com (mailing list archive)
State New, archived
Headers show
Series mdev based hardware virtio offloading support | expand

Commit Message

Jason Wang Nov. 18, 2019, 10:59 a.m. UTC
This patch implements a mediated virtio bus over mdev framework. This
will be used by the future virtio-mdev and vhost-mdev on top to allow
driver from either userspace or kernel to control the device which is
capable of offloading virtio datapath.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 MAINTAINERS                       |   2 +
 drivers/mdev/Kconfig              |  10 ++
 drivers/mdev/Makefile             |   2 +
 drivers/mdev/virtio.c             | 126 +++++++++++++++++++++++
 include/linux/mdev_virtio.h       | 163 ++++++++++++++++++++++++++++++
 include/linux/mod_devicetable.h   |   8 ++
 scripts/mod/devicetable-offsets.c |   3 +
 scripts/mod/file2alias.c          |  12 +++
 8 files changed, 326 insertions(+)
 create mode 100644 drivers/mdev/virtio.c
 create mode 100644 include/linux/mdev_virtio.h

Comments

Jason Gunthorpe Nov. 18, 2019, 1:41 p.m. UTC | #1
On Mon, Nov 18, 2019 at 06:59:21PM +0800, Jason Wang wrote:
> +struct bus_type mdev_virtio_bus_type;
> +
> +struct mdev_virtio_device {
> +	struct mdev_device mdev;
> +	const struct mdev_virtio_ops *ops;
> +	u16 class_id;
> +};

This seems to share nothing with mdev (ie mdev-vfio), why is it on the
same bus?

We went over this recently with Greg and he seemed pretty clear on
this..

> +struct mdev_virtio_ops {
> +	/* Virtqueue ops */
> +	int (*set_vq_address)(struct mdev_device *mdev,
> +			      u16 idx, u64 desc_area, u64 driver_area,
> +			      u64 device_area);
> +	void (*set_vq_num)(struct mdev_device *mdev, u16 idx, u32 num);
> +	void (*kick_vq)(struct mdev_device *mdev, u16 idx);
> +	void (*set_vq_cb)(struct mdev_device *mdev, u16 idx,
> +			  struct virtio_mdev_callback *cb);
> +	void (*set_vq_ready)(struct mdev_device *mdev, u16 idx, bool ready);
> +	bool (*get_vq_ready)(struct mdev_device *mdev, u16 idx);
> +	int (*set_vq_state)(struct mdev_device *mdev, u16 idx, u64 state);
> +	u64 (*get_vq_state)(struct mdev_device *mdev, u16 idx);
> +
> +	/* Device ops */
> +	u16 (*get_vq_align)(struct mdev_device *mdev);
> +	u64 (*get_features)(struct mdev_device *mdev);
> +	int (*set_features)(struct mdev_device *mdev, u64 features);
> +	void (*set_config_cb)(struct mdev_device *mdev,
> +			      struct virtio_mdev_callback *cb);
> +	u16 (*get_vq_num_max)(struct mdev_device *mdev);
> +	u32 (*get_device_id)(struct mdev_device *mdev);
> +	u32 (*get_vendor_id)(struct mdev_device *mdev);
> +	u8 (*get_status)(struct mdev_device *mdev);
> +	void (*set_status)(struct mdev_device *mdev, u8 status);
> +	void (*get_config)(struct mdev_device *mdev, unsigned int offset,
> +			   void *buf, unsigned int len);
> +	void (*set_config)(struct mdev_device *mdev, unsigned int offset,
> +			   const void *buf, unsigned int len);
> +	u32 (*get_generation)(struct mdev_device *mdev);
> +};

Why aren't all of these 'struct mdev_device_virtio *' ?

Jason
Michael S. Tsirkin Nov. 18, 2019, 8:27 p.m. UTC | #2
On Mon, Nov 18, 2019 at 01:41:00PM +0000, Jason Gunthorpe wrote:
> On Mon, Nov 18, 2019 at 06:59:21PM +0800, Jason Wang wrote:
> > +struct bus_type mdev_virtio_bus_type;
> > +
> > +struct mdev_virtio_device {
> > +	struct mdev_device mdev;
> > +	const struct mdev_virtio_ops *ops;
> > +	u16 class_id;
> > +};
> 
> This seems to share nothing with mdev (ie mdev-vfio), why is it on the
> same bus?

I must be missing something - which bus do they share?
Jason Gunthorpe Nov. 18, 2019, 8:28 p.m. UTC | #3
On Mon, Nov 18, 2019 at 03:27:13PM -0500, Michael S. Tsirkin wrote:
> On Mon, Nov 18, 2019 at 01:41:00PM +0000, Jason Gunthorpe wrote:
> > On Mon, Nov 18, 2019 at 06:59:21PM +0800, Jason Wang wrote:
> > > +struct bus_type mdev_virtio_bus_type;
> > > +
> > > +struct mdev_virtio_device {
> > > +	struct mdev_device mdev;
> > > +	const struct mdev_virtio_ops *ops;
> > > +	u16 class_id;
> > > +};
> > 
> > This seems to share nothing with mdev (ie mdev-vfio), why is it on the
> > same bus?
> 
> I must be missing something - which bus do they share?

mdev_bus_type ?

Jason
Jason Wang Nov. 19, 2019, 2:40 a.m. UTC | #4
On 2019/11/18 下午9:41, Jason Gunthorpe wrote:
> On Mon, Nov 18, 2019 at 06:59:21PM +0800, Jason Wang wrote:
>> +struct bus_type mdev_virtio_bus_type;
>> +
>> +struct mdev_virtio_device {
>> +	struct mdev_device mdev;
>> +	const struct mdev_virtio_ops *ops;
>> +	u16 class_id;
>> +};
> This seems to share nothing with mdev (ie mdev-vfio), why is it on the
> same bus?
>
> We went over this recently with Greg and he seemed pretty clear on
> this..


Mdev-vfio is not on this bus. The class_id here is used for distinguish 
userspace virtio driver (vhost-mdev) and kernel virtio driver (virtio-mdev).

Parent can choose to create a type of "vhost" device then vhost-mdev 
driver is matched, or "virtio" device then virtio-mdev driver is matched.


>
>> +struct mdev_virtio_ops {
>> +	/* Virtqueue ops */
>> +	int (*set_vq_address)(struct mdev_device *mdev,
>> +			      u16 idx, u64 desc_area, u64 driver_area,
>> +			      u64 device_area);
>> +	void (*set_vq_num)(struct mdev_device *mdev, u16 idx, u32 num);
>> +	void (*kick_vq)(struct mdev_device *mdev, u16 idx);
>> +	void (*set_vq_cb)(struct mdev_device *mdev, u16 idx,
>> +			  struct virtio_mdev_callback *cb);
>> +	void (*set_vq_ready)(struct mdev_device *mdev, u16 idx, bool ready);
>> +	bool (*get_vq_ready)(struct mdev_device *mdev, u16 idx);
>> +	int (*set_vq_state)(struct mdev_device *mdev, u16 idx, u64 state);
>> +	u64 (*get_vq_state)(struct mdev_device *mdev, u16 idx);
>> +
>> +	/* Device ops */
>> +	u16 (*get_vq_align)(struct mdev_device *mdev);
>> +	u64 (*get_features)(struct mdev_device *mdev);
>> +	int (*set_features)(struct mdev_device *mdev, u64 features);
>> +	void (*set_config_cb)(struct mdev_device *mdev,
>> +			      struct virtio_mdev_callback *cb);
>> +	u16 (*get_vq_num_max)(struct mdev_device *mdev);
>> +	u32 (*get_device_id)(struct mdev_device *mdev);
>> +	u32 (*get_vendor_id)(struct mdev_device *mdev);
>> +	u8 (*get_status)(struct mdev_device *mdev);
>> +	void (*set_status)(struct mdev_device *mdev, u8 status);
>> +	void (*get_config)(struct mdev_device *mdev, unsigned int offset,
>> +			   void *buf, unsigned int len);
>> +	void (*set_config)(struct mdev_device *mdev, unsigned int offset,
>> +			   const void *buf, unsigned int len);
>> +	u32 (*get_generation)(struct mdev_device *mdev);
>> +};
> Why aren't all of these 'struct mdev_device_virtio *' ?
>
> Jason


It can simplify the assignment of those ops in mdev device implementation.

Thanks
Jason Wang Nov. 19, 2019, 2:41 a.m. UTC | #5
On 2019/11/19 上午4:28, Jason Gunthorpe wrote:
> On Mon, Nov 18, 2019 at 03:27:13PM -0500, Michael S. Tsirkin wrote:
>> On Mon, Nov 18, 2019 at 01:41:00PM +0000, Jason Gunthorpe wrote:
>>> On Mon, Nov 18, 2019 at 06:59:21PM +0800, Jason Wang wrote:
>>>> +struct bus_type mdev_virtio_bus_type;
>>>> +
>>>> +struct mdev_virtio_device {
>>>> +	struct mdev_device mdev;
>>>> +	const struct mdev_virtio_ops *ops;
>>>> +	u16 class_id;
>>>> +};
>>> This seems to share nothing with mdev (ie mdev-vfio), why is it on the
>>> same bus?
>> I must be missing something - which bus do they share?
> mdev_bus_type ?
>
> Jason


Note: virtio has its own bus: mdev_virtio_bus_type. So they are not the 
same bus.

Thanks
Randy Dunlap Nov. 19, 2019, 3:13 a.m. UTC | #6
Hi,

On 11/18/19 2:59 AM, Jason Wang wrote:
> diff --git a/drivers/mdev/Kconfig b/drivers/mdev/Kconfig
> index 4561f2d4178f..cd84d4670552 100644
> --- a/drivers/mdev/Kconfig
> +++ b/drivers/mdev/Kconfig
> @@ -17,3 +17,13 @@ config VFIO_MDEV
>  	  more details.
>  
>  	  If you don't know what do here, say N.
> +
> +config MDEV_VIRTIO
> +       tristate "Mediated VIRTIO bus"
> +       depends on VIRTIO && MDEV
> +       default n
> +       help
> +	  Proivdes a mediated BUS for virtio. It could be used by

	  Provides

> +          either kenrel driver or userspace driver.

	            kernel

> +
> +	  If you don't know what do here, say N.

All of these lines should be indented with one tab, not spaces.
Jason Wang Nov. 19, 2019, 2:02 p.m. UTC | #7
On 2019/11/19 下午8:38, Jason Gunthorpe wrote:
> On Tue, Nov 19, 2019 at 10:41:31AM +0800, Jason Wang wrote:
>> On 2019/11/19 上午4:28, Jason Gunthorpe wrote:
>>> On Mon, Nov 18, 2019 at 03:27:13PM -0500, Michael S. Tsirkin wrote:
>>>> On Mon, Nov 18, 2019 at 01:41:00PM +0000, Jason Gunthorpe wrote:
>>>>> On Mon, Nov 18, 2019 at 06:59:21PM +0800, Jason Wang wrote:
>>>>>> +struct bus_type mdev_virtio_bus_type;
>>>>>> +
>>>>>> +struct mdev_virtio_device {
>>>>>> +	struct mdev_device mdev;
>>>>>> +	const struct mdev_virtio_ops *ops;
>>>>>> +	u16 class_id;
>>>>>> +};
>>>>> This seems to share nothing with mdev (ie mdev-vfio), why is it on the
>>>>> same bus?
>>>> I must be missing something - which bus do they share?
>>> mdev_bus_type ?
>>>
>>> Jason
>>
>> Note: virtio has its own bus: mdev_virtio_bus_type. So they are not the same
>> bus.
> That is even worse, why involve struct mdev_device at all then?
>
> Jason


I don't quite get the question here.

My understanding for mdev is that it was a mediator between the driver 
and physical device when it's hard to let them talk directly due to the 
complexity of refactoring and maintenance. This is exact the case of 
hardware that can offload virtio datapath but not control path. We want 
to present a unified interface (standard virtio) instead of a vendor 
specific interface, so a mediator level in the middle is a must. For 
virtio driver, mediator present a full virtio compatible device. For 
hardware, mediator will mediate the difference between the behavior 
defined by virtio spec and real hardware.

And the reason why not inventing something new instead of existed mdev 
is because mdev fits into the requirement of virtio-mdev very well:
1) mature framework which has been used by vGPU and other type for years
2) life cycle interface, have a unified interface for management instead 
of a vendor specific one so less pain for management
3) device type management. In the case of virtio, user can choose to 
create a vhost type of device or virtio type of device, or technically 
it can choose which version or features of virtio device it want to create.
4) IOMMU support, mdev allows DMA isolation done at either parent level 
or platform/bus level
5) vendor specific attributes

So in Parav's thread [1], if I understand correctly.  The major concern 
is the  API multiplexer at a single mdev bus level. So comes to this 
series which decouple VFIO and make mdev more generic to be suitable for 
implementing a set of independent buses with similar functions.

Thanks

[1] https://www.spinics.net/lists/linux-rdma/msg85856.html
Jason Wang Nov. 20, 2019, 2:14 a.m. UTC | #8
On 2019/11/19 下午10:14, Jason Gunthorpe wrote:
> On Tue, Nov 19, 2019 at 10:02:08PM +0800, Jason Wang wrote:
>> On 2019/11/19 下午8:38, Jason Gunthorpe wrote:
>>> On Tue, Nov 19, 2019 at 10:41:31AM +0800, Jason Wang wrote:
>>>> On 2019/11/19 上午4:28, Jason Gunthorpe wrote:
>>>>> On Mon, Nov 18, 2019 at 03:27:13PM -0500, Michael S. Tsirkin wrote:
>>>>>> On Mon, Nov 18, 2019 at 01:41:00PM +0000, Jason Gunthorpe wrote:
>>>>>>> On Mon, Nov 18, 2019 at 06:59:21PM +0800, Jason Wang wrote:
>>>>>>>> +struct bus_type mdev_virtio_bus_type;
>>>>>>>> +
>>>>>>>> +struct mdev_virtio_device {
>>>>>>>> +	struct mdev_device mdev;
>>>>>>>> +	const struct mdev_virtio_ops *ops;
>>>>>>>> +	u16 class_id;
>>>>>>>> +};
>>>>>>> This seems to share nothing with mdev (ie mdev-vfio), why is it on the
>>>>>>> same bus?
>>>>>> I must be missing something - which bus do they share?
>>>>> mdev_bus_type ?
>>>>>
>>>>> Jason
>>>> Note: virtio has its own bus: mdev_virtio_bus_type. So they are not the same
>>>> bus.
>>> That is even worse, why involve struct mdev_device at all then?
>>>
>>> Jason
>>
>> I don't quite get the question here.
> In the driver model the bus_type and foo_device are closely
> linked.


I don't get the definition of "closely linked" here. Do you think the 
bus and device implement virtual bus series are closely linked? If yes, 
how did they achieve that?


>   Creating 'mdev_device' instances and overriding the bus_type
> is a very abusive thing to do.


Ok, mdev_device (without this series) had:

struct mdev_device {
     struct device dev;
     struct mdev_parent *parent;
     guid_t uuid;
     void *driver_data;
     struct list_head next;
     struct kobject *type_kobj;
     struct device *iommu_device;
     bool active;
};

So it's nothing bus or VFIO specific. And what virtual bus had is:

struct virtbus_device {
     const char            *name;
     int                id;
     const struct virtbus_dev_id    *dev_id;
     struct device            dev;
         void                *data;
};

Are there any fundamental issues that you think mdev_device is abused? I 
won't expect the answers are generic objects like kobj, iommu device 
pointer etc.


>
>> My understanding for mdev is that it was a mediator between the driver and
>> physical device when it's hard to let them talk directly due to the
>> complexity of refactoring and maintenance.
> Really, mdev is to support vfio with a backend other than PCI, nothing
> more.


That partially explain why it was called mdev. So for virito, we want 
standard virtio driver to talk with a backend other than virtio.

For the issue of PCI, actually the API is generic enough to support 
device other than PCI, e.g AP bus.


>
> Abusing it for other things is not appropriate. ie creating an
> instance and not filling in most of the vfio focused ops is an abusive
> thing to do.


Well, it's only half of the mdev_parent_ops in mdev_parent, various 
methods could be done do be more generic to avoid duplication of codes. No?


>
>> hardware that can offload virtio datapath but not control path. We want to
>> present a unified interface (standard virtio) instead of a vendor specific
>> interface, so a mediator level in the middle is a must. For virtio driver,
>> mediator present a full virtio compatible device. For hardware, mediator
>> will mediate the difference between the behavior defined by virtio spec and
>> real hardware.
> If you need to bind to the VFIO driver then mdev is the right thing to
> use, otherwise it is not.
>
> It certainly should not be used to bind to random kernel drivers. This
> problem is what this virtual bus idea Intel is working on might solve.


What do you mean by random here? With this series, we have dedicated bus 
and dedicated driver with matching method to make sure the binding is 
correct.


>
> It seems the only thing people care about with mdev is the GUID
> lifecycle stuff, but at the same time folks like Parav are saying they
> don't want to use that lifecycle stuff and prefer devlink
> instead.


I'm sure you will need to handle other issues besides GUID which had 
been settled by mdev e.g IOMMU and types when starting to write a real 
hardware driver.


>
> Most likely, at least for virtio-net, everyone else will be able to
> use devlink as well, making it much less clear if that GUID lifecycle
> stuff is a good idea or not.


This assumption is wrong, we hard already had at least two concrete 
examples of vDPA device that doesn't use devlink:

- Intel IFC where virtio is done at VF level
- Ali Cloud ECS instance where virtio is done at PF level

Again, the device slicing is only part of our goal. The major goal is to 
have a mediator level that can take over the virtio control path between 
a standard virtio driver and a hardware who datapath is virtio 
compatible but not control path.

Thanks


>
> Jason
Jason Gunthorpe Nov. 20, 2019, 1:49 p.m. UTC | #9
On Wed, Nov 20, 2019 at 10:14:26AM +0800, Jason Wang wrote:

> > > I don't quite get the question here.
> > In the driver model the bus_type and foo_device are closely
> > linked.
> 
> I don't get the definition of "closely linked" here. Do you think the bus
> and device implement virtual bus series are closely linked? If yes, how did
> they achieve that?

I mean if you have a 'foo_device' then it should be on a 'foo_bus' and
not on some 'bar_bus', as that is how the driver core generally works.
 
> >   Creating 'mdev_device' instances and overriding the bus_type
> > is a very abusive thing to do.
> 
> Ok, mdev_device (without this series) had:
> 
> struct mdev_device {
>     struct device dev;
>     struct mdev_parent *parent;
>     guid_t uuid;
>     void *driver_data;
>     struct list_head next;
>     struct kobject *type_kobj;
>     struct device *iommu_device;
>     bool active;
> };
> 
> So it's nothing bus or VFIO specific. And what virtual bus had is:

What do mean? 'struct mdev_parent *parent' is the VFIO specific
stuff. I haven't figured out what the confusing mdev_parent is
supposed to be, or whhy the VFIO ops are linked to the parent or not
the device.. Honestly the whole mdev thing has a very strange take on
how to use the driver core.

> > Abusing it for other things is not appropriate. ie creating an
> > instance and not filling in most of the vfio focused ops is an abusive
> > thing to do.
> 
> Well, it's only half of the mdev_parent_ops in mdev_parent, various methods
> could be done do be more generic to avoid duplication of codes. No?

There are many ways to avoid duplicating code.

Taking something well defined, and bolting on something unrelated just
to share a bit of code is a very poor way to avoid code duplication.

> I'm sure you will need to handle other issues besides GUID which had been
> settled by mdev e.g IOMMU and types when starting to write a real hardware
> driver.

The iommu framework already handles that, the mdev stuff contributes
very little from what I can see.

> > Most likely, at least for virtio-net, everyone else will be able to
> > use devlink as well, making it much less clear if that GUID lifecycle
> > stuff is a good idea or not.
> 
> This assumption is wrong, we hard already had at least two concrete examples
> of vDPA device that doesn't use devlink:
> 
> - Intel IFC where virtio is done at VF level
> - Ali Cloud ECS instance where virtio is done at PF level

Again, you don't explain why they couldn't use devlink.

Or, why do we need GUID lifecycle stuff when these PCI devices can
only create a single virtio and can just go ahead and do that as soon
as they are probed.

The GUID stuff was invented for slicing, which you say is not
happening in these cases.

Jason
Jason Wang Nov. 21, 2019, 3:05 a.m. UTC | #10
On 2019/11/20 下午9:49, Jason Gunthorpe wrote:
> On Wed, Nov 20, 2019 at 10:14:26AM +0800, Jason Wang wrote:
>
>>>> I don't quite get the question here.
>>> In the driver model the bus_type and foo_device are closely
>>> linked.
>> I don't get the definition of "closely linked" here. Do you think the bus
>> and device implement virtual bus series are closely linked? If yes, how did
>> they achieve that?
> I mean if you have a 'foo_device' then it should be on a 'foo_bus' and
> not on some 'bar_bus', as that is how the driver core generally works.


I fully agree with you here. But isn't that just what this patch did? We 
had "mdev_virtio" device on "mdev_virtio" bus not "mdev_vfio" bus.


>   
>>>    Creating 'mdev_device' instances and overriding the bus_type
>>> is a very abusive thing to do.
>> Ok, mdev_device (without this series) had:
>>
>> struct mdev_device {
>>      struct device dev;
>>      struct mdev_parent *parent;
>>      guid_t uuid;
>>      void *driver_data;
>>      struct list_head next;
>>      struct kobject *type_kobj;
>>      struct device *iommu_device;
>>      bool active;
>> };
>>
>> So it's nothing bus or VFIO specific. And what virtual bus had is:
> What do mean? 'struct mdev_parent *parent' is the VFIO specific
> stuff. I haven't figured out what the confusing mdev_parent is
> supposed to be,


struct mdev_parent_ops {
     struct module   *owner;
     const struct attribute_group **dev_attr_groups;
     const struct attribute_group **mdev_attr_groups;
     struct attribute_group **supported_type_groups;

     int     (*create)(struct kobject *kobj, struct mdev_device *mdev);
     int     (*remove)(struct mdev_device *mdev);
     int     (*open)(struct mdev_device *mdev);
     void    (*release)(struct mdev_device *mdev);
     ssize_t (*read)(struct mdev_device *mdev, char __user *buf,
             size_t count, loff_t *ppos);
     ssize_t (*write)(struct mdev_device *mdev, const char __user *buf,
              size_t count, loff_t *ppos);
     long    (*ioctl)(struct mdev_device *mdev, unsigned int cmd,
              unsigned long arg);
     int    (*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma);
};

You can see that nothing is really VFIO specific here..


> or whhy the VFIO ops are linked to the parent or not
> the device..


I guess the answer the mdev_devices belongs to the same parent are 
expected to have same ops.


> Honestly the whole mdev thing has a very strange take on
> how to use the driver core.


Suggestions are welcomed.


>
>>> Abusing it for other things is not appropriate. ie creating an
>>> instance and not filling in most of the vfio focused ops is an abusive
>>> thing to do.
>> Well, it's only half of the mdev_parent_ops in mdev_parent, various methods
>> could be done do be more generic to avoid duplication of codes. No?
> There are many ways to avoid duplicating code.
>
> Taking something well defined, and bolting on something unrelated just
> to share a bit of code is a very poor way to avoid code duplication.


We can have make the code better...


>> I'm sure you will need to handle other issues besides GUID which had been
>> settled by mdev e.g IOMMU and types when starting to write a real hardware
>> driver.
> The iommu framework already handles that, the mdev stuff contributes
> very little from what I can see.


Yes, but if we start from beginning to invent a new infrastructure and 
we still need GUID, IOMMU, types. So it will be very similar to mdev 
looks right now. So why not improve mdev?


>
>>> Most likely, at least for virtio-net, everyone else will be able to
>>> use devlink as well, making it much less clear if that GUID lifecycle
>>> stuff is a good idea or not.
>> This assumption is wrong, we hard already had at least two concrete examples
>> of vDPA device that doesn't use devlink:
>>
>> - Intel IFC where virtio is done at VF level
>> - Ali Cloud ECS instance where virtio is done at PF level
> Again, you don't explain why they couldn't use devlink.


Yes, they could, but of course for many reasons they won't use devlink. 
Not only devlink, even netlink is not used or implemented in all type of 
network devices.


>
> Or, why do we need GUID lifecycle stuff when these PCI devices can
> only create a single virtio and can just go ahead and do that as soon
> as they are probed.
>
> The GUID stuff was invented for slicing, which you say is not
> happening in these cases.


I think that's all about a consistent management interface, "slicing by 
one" is still compatible.

Thanks


>
> Jason
Rob Miller Nov. 26, 2019, 12:07 p.m. UTC | #11
On Tue, Nov 19, 2019 at 9:16 PM Jason Wang <jasowang@redhat.com> wrote:

>
> On 2019/11/19 下午10:14, Jason Gunthorpe wrote:
> > On Tue, Nov 19, 2019 at 10:02:08PM +0800, Jason Wang wrote:
> >> On 2019/11/19 下午8:38, Jason Gunthorpe wrote:
> >>> On Tue, Nov 19, 2019 at 10:41:31AM +0800, Jason Wang wrote:
> >>>> On 2019/11/19 上午4:28, Jason Gunthorpe wrote:
> >>>>> On Mon, Nov 18, 2019 at 03:27:13PM -0500, Michael S. Tsirkin wrote:
> >>>>>> On Mon, Nov 18, 2019 at 01:41:00PM +0000, Jason Gunthorpe wrote:
> >>>>>>> On Mon, Nov 18, 2019 at 06:59:21PM +0800, Jason Wang wrote:
> >>>>>>>> +struct bus_type mdev_virtio_bus_type;
> >>>>>>>> +
> >>>>>>>> +struct mdev_virtio_device {
> >>>>>>>> +      struct mdev_device mdev;
> >>>>>>>> +      const struct mdev_virtio_ops *ops;
> >>>>>>>> +      u16 class_id;
> >>>>>>>> +};
> >>>>>>> This seems to share nothing with mdev (ie mdev-vfio), why is it on
> the
> >>>>>>> same bus?
> >>>>>> I must be missing something - which bus do they share?
> >>>>> mdev_bus_type ?
> >>>>>
> >>>>> Jason
> >>>> Note: virtio has its own bus: mdev_virtio_bus_type. So they are not
> the same
> >>>> bus.
> >>> That is even worse, why involve struct mdev_device at all then?
> >>>
> >>> Jason
> >>
> >> I don't quite get the question here.
> > In the driver model the bus_type and foo_device are closely
> > linked.
>
>
> I don't get the definition of "closely linked" here. Do you think the
> bus and device implement virtual bus series are closely linked? If yes,
> how did they achieve that?
>
>
> >   Creating 'mdev_device' instances and overriding the bus_type
> > is a very abusive thing to do.
>
RJM>] abusive is a subjective term. Looking at the whole context of the
vDPA framework, I still believe that extending the mdev API is preferable.
Without the framework, every vendor would have to "mediate" their own
devices, which would force us to effectively "duplicate" the mdev core code
and implement our own functionality on top. The core idea of VIRTIO is to
have a common interface and having a framework that also supports a lot of
commonality is fantastic, since we (hw vendors) too, really want to get out
of the business of crafting/verify/maintaining device drivers for every
version of Linux/Windows/... Heck, i',m hoping that a generic sample vDPA
parent driver (ie: sort of like Intel's IFCVF driver but even more so)
would be good enough for our product such that we (Brcm) don't have to
supply any driver.

>
>
> Ok, mdev_device (without this series) had:
>
> struct mdev_device {
>      struct device dev;
>      struct mdev_parent *parent;
>      guid_t uuid;
>      void *driver_data;
>      struct list_head next;
>      struct kobject *type_kobj;
>      struct device *iommu_device;
>      bool active;
> };
>
> So it's nothing bus or VFIO specific. And what virtual bus had is:
>
> struct virtbus_device {
>      const char            *name;
>      int                id;
>      const struct virtbus_dev_id    *dev_id;
>      struct device            dev;
>          void                *data;
> };
>
> Are there any fundamental issues that you think mdev_device is abused? I
> won't expect the answers are generic objects like kobj, iommu device
> pointer etc.
>
>
> >
> >> My understanding for mdev is that it was a mediator between the driver
> and
> >> physical device when it's hard to let them talk directly due to the
> >> complexity of refactoring and maintenance.
> > Really, mdev is to support vfio with a backend other than PCI, nothing
> > more.
>
>
> That partially explain why it was called mdev. So for virito, we want
> standard virtio driver to talk with a backend other than virtio.
>
> For the issue of PCI, actually the API is generic enough to support
> device other than PCI, e.g AP bus.
>
>
> >
> > Abusing it for other things is not appropriate. ie creating an
> > instance and not filling in most of the vfio focused ops is an abusive
> > thing to do.
>
>
> Well, it's only half of the mdev_parent_ops in mdev_parent, various
> methods could be done do be more generic to avoid duplication of codes. No?
>
>
> >
> >> hardware that can offload virtio datapath but not control path. We want
> to
> >> present a unified interface (standard virtio) instead of a vendor
> specific
> >> interface, so a mediator level in the middle is a must. For virtio
> driver,
> >> mediator present a full virtio compatible device. For hardware, mediator
> >> will mediate the difference between the behavior defined by virtio spec
> and
> >> real hardware.
> > If you need to bind to the VFIO driver then mdev is the right thing to
> > use, otherwise it is not.
> >
> > It certainly should not be used to bind to random kernel drivers. This
> > problem is what this virtual bus idea Intel is working on might solve.
>
>
> What do you mean by random here? With this series, we have dedicated bus
> and dedicated driver with matching method to make sure the binding is
> correct.
>
RJM>] I think it's pretty clear that it's not random. The class id takes
care of the match and allows flexibility to choose vhost-mdev vs
vitrio-mdev, depending if the deployment is bare-metal vs virtualized.

>
>
> >
> > It seems the only thing people care about with mdev is the GUID
> > lifecycle stuff, but at the same time folks like Parav are saying they
> > don't want to use that lifecycle stuff and prefer devlink
> > instead.
>
>
> I'm sure you will need to handle other issues besides GUID which had
> been settled by mdev e.g IOMMU and types when starting to write a real
> hardware driver.
>
>
> >
> > Most likely, at least for virtio-net, everyone else will be able to
> > use devlink as well, making it much less clear if that GUID lifecycle
> > stuff is a good idea or not.
>
>
> This assumption is wrong, we hard already had at least two concrete
> examples of vDPA device that doesn't use devlink:
>
> - Intel IFC where virtio is done at VF level
> - Ali Cloud ECS instance where virtio is done at PF level
>
> Again, the device slicing is only part of our goal. The major goal is to
> have a mediator level that can take over the virtio control path between
> a standard virtio driver and a hardware who datapath is virtio
> compatible but not control path.
>
> Thanks
>
>
> >
> > Jason
>
>
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 5d7e8badf58c..e1b57c84f249 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17269,6 +17269,8 @@  F:	include/linux/virtio*.h
 F:	include/uapi/linux/virtio_*.h
 F:	drivers/crypto/virtio/
 F:	mm/balloon_compaction.c
+F:	include/linux/mdev_virtio.h
+F:	drivers/mdev/virtio.c
 
 VIRTIO BLOCK AND SCSI DRIVERS
 M:	"Michael S. Tsirkin" <mst@redhat.com>
diff --git a/drivers/mdev/Kconfig b/drivers/mdev/Kconfig
index 4561f2d4178f..cd84d4670552 100644
--- a/drivers/mdev/Kconfig
+++ b/drivers/mdev/Kconfig
@@ -17,3 +17,13 @@  config VFIO_MDEV
 	  more details.
 
 	  If you don't know what do here, say N.
+
+config MDEV_VIRTIO
+       tristate "Mediated VIRTIO bus"
+       depends on VIRTIO && MDEV
+       default n
+       help
+	  Proivdes a mediated BUS for virtio. It could be used by
+          either kenrel driver or userspace driver.
+
+	  If you don't know what do here, say N.
diff --git a/drivers/mdev/Makefile b/drivers/mdev/Makefile
index 0b749e7f8ff4..eb14031c9944 100644
--- a/drivers/mdev/Makefile
+++ b/drivers/mdev/Makefile
@@ -1,5 +1,7 @@ 
 
 mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o
 mdev_vfio-y := vfio.o
+mdev_virtio-y := virtio.o
 obj-$(CONFIG_MDEV) += mdev.o
 obj-$(CONFIG_VFIO_MDEV) += mdev_vfio.o
+obj-$(CONFIG_MDEV_VIRTIO) += mdev_virtio.o
diff --git a/drivers/mdev/virtio.c b/drivers/mdev/virtio.c
new file mode 100644
index 000000000000..25de329615c4
--- /dev/null
+++ b/drivers/mdev/virtio.c
@@ -0,0 +1,126 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Mediated VIRTIO bus
+ *
+ * Copyright (c) 2019, Red Hat. All rights reserved.
+ *     Author: Jason Wang <jasowang@redhat.com>
+ */
+
+#include <linux/module.h>
+#include <linux/uuid.h>
+#include <linux/device.h>
+#include <linux/mdev.h>
+#include <linux/mdev_virtio.h>
+#include <linux/mod_devicetable.h>
+
+#include "mdev_private.h"
+
+#define DRIVER_VERSION		"0.1"
+#define DRIVER_AUTHOR		"Jason Wang"
+#define DRIVER_DESC		"Mediated VIRTIO bus"
+
+struct bus_type mdev_virtio_bus_type;
+
+struct mdev_virtio_device {
+	struct mdev_device mdev;
+	const struct mdev_virtio_ops *ops;
+	u16 class_id;
+};
+
+#define to_mdev_virtio(mdev) container_of(mdev, \
+					  struct mdev_virtio_device, mdev)
+#define to_mdev_virtio_drv(mdrv) container_of(mdrv, \
+					      struct mdev_virtio_driver, drv)
+
+static int mdev_virtio_match(struct device *dev, struct device_driver *drv)
+{
+	unsigned int i;
+	struct mdev_device *mdev = mdev_from_dev(dev, &mdev_virtio_bus_type);
+	struct mdev_virtio_device *mdev_virtio = to_mdev_virtio(mdev);
+	struct mdev_driver *mdrv = to_mdev_driver(drv);
+	struct mdev_virtio_driver *mdrv_virtio = to_mdev_virtio_drv(mdrv);
+	const struct mdev_virtio_class_id *ids = mdrv_virtio->id_table;
+
+	if (!ids)
+		return 0;
+
+	for (i = 0; ids[i].id; i++)
+		if (ids[i].id == mdev_virtio->class_id)
+			return 1;
+	return 0;
+}
+
+static int mdev_virtio_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct mdev_device *mdev = mdev_from_dev(dev, &mdev_virtio_bus_type);
+	struct mdev_virtio_device *mdev_virtio = to_mdev_virtio(mdev);
+
+	return add_uevent_var(env, "MODALIAS=mdev_virtio:c%02X",
+			      mdev_virtio->class_id);
+}
+
+struct bus_type mdev_virtio_bus_type = {
+	.name		= "mdev_virtio",
+	.probe		= mdev_probe,
+	.remove		= mdev_remove,
+	.match	        = mdev_virtio_match,
+	.uevent		= mdev_virtio_uevent,
+};
+EXPORT_SYMBOL(mdev_virtio_bus_type);
+
+void mdev_virtio_set_class_id(struct mdev_device *mdev, u16 class_id)
+{
+	struct mdev_virtio_device *mdev_virtio = to_mdev_virtio(mdev);
+
+	mdev_virtio->class_id = class_id;
+}
+EXPORT_SYMBOL(mdev_virtio_set_class_id);
+
+int mdev_virtio_register_device(struct device *dev,
+				const struct mdev_parent_ops *ops)
+{
+	return mdev_register_device(dev, ops, &mdev_virtio_bus_type,
+				    sizeof(struct mdev_virtio_device));
+}
+EXPORT_SYMBOL(mdev_virtio_register_device);
+
+void mdev_virtio_unregister_device(struct device *dev)
+{
+	return mdev_unregister_device(dev);
+}
+EXPORT_SYMBOL(mdev_virtio_unregister_device);
+
+void mdev_virtio_set_ops(struct mdev_device *mdev,
+			 const struct mdev_virtio_ops *ops)
+{
+	struct mdev_virtio_device *mdev_virtio = to_mdev_virtio(mdev);
+
+	mdev_virtio->ops = ops;
+}
+EXPORT_SYMBOL(mdev_virtio_set_ops);
+
+const struct mdev_virtio_ops *mdev_virtio_get_ops(struct mdev_device *mdev)
+{
+	struct mdev_virtio_device *mdev_virtio = to_mdev_virtio(mdev);
+
+	return mdev_virtio->ops;
+}
+EXPORT_SYMBOL(mdev_virtio_get_ops);
+
+static int __init mdev_init(void)
+{
+	return mdev_register_bus(&mdev_virtio_bus_type);
+}
+
+static void __exit mdev_exit(void)
+{
+	mdev_unregister_bus(&mdev_virtio_bus_type);
+}
+
+module_init(mdev_init)
+module_exit(mdev_exit)
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/include/linux/mdev_virtio.h b/include/linux/mdev_virtio.h
new file mode 100644
index 000000000000..ef2dbb6c383a
--- /dev/null
+++ b/include/linux/mdev_virtio.h
@@ -0,0 +1,163 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * VIRTIO Mediated device definition
+ *
+ * Copyright (c) 2019, Red Hat. All rights reserved.
+ *     Author: Jason Wang <jasowang@redhat.com>
+ */
+
+#ifndef VIRTIO_MDEV_H
+#define VIRTIO_MDEV_H
+
+#include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
+#include <linux/mdev.h>
+
+extern struct bus_type mdev_virtio_bus_type;
+
+struct mdev_virtio_driver {
+	struct mdev_driver drv;
+	const struct mdev_virtio_class_id *id_table;
+};
+
+struct virtio_mdev_callback {
+	irqreturn_t (*callback)(void *data);
+	void *private;
+};
+
+/**
+ * struct mdev_virtio_device_ops - Structure to be registered for each
+ * mdev device to register the device for virtio/vhost drivers.
+ *
+ * The callbacks are mandatory unless explicitly mentioned.
+ *
+ * @set_vq_address:		Set the address of virtqueue
+ *				@mdev: mediated device
+ *				@idx: virtqueue index
+ *				@desc_area: address of desc area
+ *				@driver_area: address of driver area
+ *				@device_area: address of device area
+ *				Returns integer: success (0) or error (< 0)
+ * @set_vq_num:			Set the size of virtqueue
+ *				@mdev: mediated device
+ *				@idx: virtqueue index
+ *				@num: the size of virtqueue
+ * @kick_vq:			Kick the virtqueue
+ *				@mdev: mediated device
+ *				@idx: virtqueue index
+ * @set_vq_cb:			Set the interrupt callback function for
+ *				a virtqueue
+ *				@mdev: mediated device
+ *				@idx: virtqueue index
+ *				@cb: virtio-mdev interrupt callback structure
+ * @set_vq_ready:		Set ready status for a virtqueue
+ *				@mdev: mediated device
+ *				@idx: virtqueue index
+ *				@ready: ready (true) not ready(false)
+ * @get_vq_ready:		Get ready status for a virtqueue
+ *				@mdev: mediated device
+ *				@idx: virtqueue index
+ *				Returns boolean: ready (true) or not (false)
+ * @set_vq_state:		Set the state for a virtqueue
+ *				@mdev: mediated device
+ *				@idx: virtqueue index
+ *				@state: virtqueue state (last_avail_idx)
+ *				Returns integer: success (0) or error (< 0)
+ * @get_vq_state:		Get the state for a virtqueue
+ *				@mdev: mediated device
+ *				@idx: virtqueue index
+ *				Returns virtqueue state (last_avail_idx)
+ * @get_vq_align:		Get the virtqueue align requirement
+ *				for the device
+ *				@mdev: mediated device
+ *				Returns virtqueue algin requirement
+ * @get_features:		Get virtio features supported by the device
+ *				@mdev: mediated device
+ *				Returns the virtio features support by the
+ *				device
+ * @set_features:		Set virtio features supported by the driver
+ *				@mdev: mediated device
+ *				@features: feature support by the driver
+ *				Returns integer: success (0) or error (< 0)
+ * @set_config_cb:		Set the config interrupt callback
+ *				@mdev: mediated device
+ *				@cb: virtio-mdev interrupt callback structure
+ * @get_vq_num_max:		Get the max size of virtqueue
+ *				@mdev: mediated device
+ *				Returns u16: max size of virtqueue
+ * @get_device_id:		Get virtio device id
+ *				@mdev: mediated device
+ *				Returns u32: virtio device id
+ * @get_vendor_id:		Get id for the vendor that provides this device
+ *				@mdev: mediated device
+ *				Returns u32: virtio vendor id
+ * @get_status:			Get the device status
+ *				@mdev: mediated device
+ *				Returns u8: virtio device status
+ * @set_status:			Set the device status
+ *				@mdev: mediated device
+ *				@status: virtio device status
+ * @get_config:			Read from device specific configuration space
+ *				@mdev: mediated device
+ *				@offset: offset from the beginning of
+ *				configuration space
+ *				@buf: buffer used to read to
+ *				@len: the length to read from
+ *				configration space
+ * @set_config:			Write to device specific configuration space
+ *				@mdev: mediated device
+ *				@offset: offset from the beginning of
+ *				configuration space
+ *				@buf: buffer used to write from
+ *				@len: the length to write to
+ *				configration space
+ * @get_generation:		Get device config generaton (optional)
+ *				@mdev: mediated device
+ *				Returns u32: device generation
+ */
+struct mdev_virtio_ops {
+	/* Virtqueue ops */
+	int (*set_vq_address)(struct mdev_device *mdev,
+			      u16 idx, u64 desc_area, u64 driver_area,
+			      u64 device_area);
+	void (*set_vq_num)(struct mdev_device *mdev, u16 idx, u32 num);
+	void (*kick_vq)(struct mdev_device *mdev, u16 idx);
+	void (*set_vq_cb)(struct mdev_device *mdev, u16 idx,
+			  struct virtio_mdev_callback *cb);
+	void (*set_vq_ready)(struct mdev_device *mdev, u16 idx, bool ready);
+	bool (*get_vq_ready)(struct mdev_device *mdev, u16 idx);
+	int (*set_vq_state)(struct mdev_device *mdev, u16 idx, u64 state);
+	u64 (*get_vq_state)(struct mdev_device *mdev, u16 idx);
+
+	/* Device ops */
+	u16 (*get_vq_align)(struct mdev_device *mdev);
+	u64 (*get_features)(struct mdev_device *mdev);
+	int (*set_features)(struct mdev_device *mdev, u64 features);
+	void (*set_config_cb)(struct mdev_device *mdev,
+			      struct virtio_mdev_callback *cb);
+	u16 (*get_vq_num_max)(struct mdev_device *mdev);
+	u32 (*get_device_id)(struct mdev_device *mdev);
+	u32 (*get_vendor_id)(struct mdev_device *mdev);
+	u8 (*get_status)(struct mdev_device *mdev);
+	void (*set_status)(struct mdev_device *mdev, u8 status);
+	void (*get_config)(struct mdev_device *mdev, unsigned int offset,
+			   void *buf, unsigned int len);
+	void (*set_config)(struct mdev_device *mdev, unsigned int offset,
+			   const void *buf, unsigned int len);
+	u32 (*get_generation)(struct mdev_device *mdev);
+};
+
+int mdev_virtio_register_device(struct device *dev,
+				const struct mdev_parent_ops *ops);
+void mdev_virtio_unregister_device(struct device *dev);
+void mdev_virtio_set_ops(struct mdev_device *mdev,
+			 const struct mdev_virtio_ops *ops);
+const struct mdev_virtio_ops *mdev_virtio_get_ops(struct mdev_device *mdev);
+void mdev_virtio_set_class_id(struct mdev_device *mdev, u16 class_id);
+
+static inline struct mdev_device *mdev_virtio_from_dev(struct device *dev)
+{
+	return mdev_from_dev(dev, &mdev_virtio_bus_type);
+}
+
+#endif
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 5714fd35a83c..59006c47ae8e 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -821,4 +821,12 @@  struct wmi_device_id {
 	const void *context;
 };
 
+/**
+ * struct mdev_class_id - MDEV VIRTIO device class identifier
+ * @id: Used to identify a specific class of device, e.g vfio-mdev device.
+ */
+struct mdev_virtio_class_id {
+	__u16 id;
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index 054405b90ba4..178fd7c70812 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -231,5 +231,8 @@  int main(void)
 	DEVID(wmi_device_id);
 	DEVID_FIELD(wmi_device_id, guid_string);
 
+	DEVID(mdev_virtio_class_id);
+	DEVID_FIELD(mdev_virtio_class_id, id);
+
 	return 0;
 }
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index c91eba751804..1a9c1f591951 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1335,6 +1335,17 @@  static int do_wmi_entry(const char *filename, void *symval, char *alias)
 	return 1;
 }
 
+/* looks like: "mdev_virtio:cN" */
+static int do_mdev_virtio_entry(const char *filename, void *symval, char *alias)
+{
+	DEF_FIELD(symval, mdev_virtio_class_id, id);
+
+	sprintf(alias, "mdev_virtio:c%02X", id);
+	add_wildcard(alias);
+	return 1;
+}
+
+
 /* Does namelen bytes of name exactly match the symbol? */
 static bool sym_is(const char *name, unsigned namelen, const char *symbol)
 {
@@ -1407,6 +1418,7 @@  static const struct devtable devtable[] = {
 	{"typec", SIZE_typec_device_id, do_typec_entry},
 	{"tee", SIZE_tee_client_device_id, do_tee_entry},
 	{"wmi", SIZE_wmi_device_id, do_wmi_entry},
+	{"mdev_virtio", SIZE_mdev_virtio_class_id, do_mdev_virtio_entry},
 };
 
 /* Create MODULE_ALIAS() statements.