Message ID | 20220204211536.321475-25-mjrosato@linux.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: s390: enable zPCI for interpretive execution | expand |
On Fri, 4 Feb 2022 16:15:30 -0500 Matthew Rosato <mjrosato@linux.ibm.com> wrote: > KVM zPCI passthrough device logic will need a reference to the associated > kvm guest that has access to the device. Let's register a group notifier > for VFIO_GROUP_NOTIFY_SET_KVM to catch this information in order to create > an association between a kvm guest and the host zdev. > > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> > --- > arch/s390/include/asm/kvm_pci.h | 2 ++ > drivers/vfio/pci/vfio_pci_core.c | 2 ++ > drivers/vfio/pci/vfio_pci_zdev.c | 46 ++++++++++++++++++++++++++++++++ > include/linux/vfio_pci_core.h | 10 +++++++ > 4 files changed, 60 insertions(+) > > diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h > index e4696f5592e1..16290b4cf2a6 100644 > --- a/arch/s390/include/asm/kvm_pci.h > +++ b/arch/s390/include/asm/kvm_pci.h > @@ -16,6 +16,7 @@ > #include <linux/kvm.h> > #include <linux/pci.h> > #include <linux/mutex.h> > +#include <linux/notifier.h> > #include <asm/pci_insn.h> > #include <asm/pci_dma.h> > > @@ -32,6 +33,7 @@ struct kvm_zdev { > u64 rpcit_count; > struct kvm_zdev_ioat ioat; > struct zpci_fib fib; > + struct notifier_block nb; > }; > > int kvm_s390_pci_dev_open(struct zpci_dev *zdev); > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c > index f948e6cd2993..fc57d4d0abbe 100644 > --- a/drivers/vfio/pci/vfio_pci_core.c > +++ b/drivers/vfio/pci/vfio_pci_core.c > @@ -452,6 +452,7 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) > > vfio_pci_vf_token_user_add(vdev, -1); > vfio_spapr_pci_eeh_release(vdev->pdev); > + vfio_pci_zdev_release(vdev); > vfio_pci_core_disable(vdev); > > mutex_lock(&vdev->igate); > @@ -470,6 +471,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_close_device); > void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) > { > vfio_pci_probe_mmaps(vdev); > + vfio_pci_zdev_open(vdev); > vfio_spapr_pci_eeh_open(vdev->pdev); > vfio_pci_vf_token_user_add(vdev, 1); > } If this handling were for a specific device, I think we'd be suggesting this is the point at which we cross over to a vendor variant making use of vfio-pci-core rather than hooking directly into the core code. But this is meant to extend vfio-pci proper for the whole arch. Is there a compromise in using #ifdefs in vfio_pci_ops to call into zpci specific code that implements these arch specific hooks and the core for everything else? SPAPR code could probably converted similarly, it exists here for legacy reasons. [Cc Jason] Also, please note the DEVICE_FEATURE generalizations in the latest series from NVIDIA for mlx5 migration support: https://lore.kernel.org/all/20220207172216.206415-8-yishaih@nvidia.com/ If this series were to go in via the s390 tree, I'd request a branch so that we can continue to work on this in vfio code as well. Thanks, Alex > diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c > index ea4c0d2b0663..9f8284499111 100644 > --- a/drivers/vfio/pci/vfio_pci_zdev.c > +++ b/drivers/vfio/pci/vfio_pci_zdev.c > @@ -13,6 +13,7 @@ > #include <linux/vfio_zdev.h> > #include <asm/pci_clp.h> > #include <asm/pci_io.h> > +#include <asm/kvm_pci.h> > > #include <linux/vfio_pci_core.h> > > @@ -136,3 +137,48 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, > > return ret; > } > + > +static int vfio_pci_zdev_group_notifier(struct notifier_block *nb, > + unsigned long action, void *data) > +{ > + struct kvm_zdev *kzdev = container_of(nb, struct kvm_zdev, nb); > + > + if (action == VFIO_GROUP_NOTIFY_SET_KVM) { > + if (!data || !kzdev->zdev) > + return NOTIFY_DONE; > + kzdev->kvm = data; > + } > + > + return NOTIFY_OK; > +} > + > +void vfio_pci_zdev_open(struct vfio_pci_core_device *vdev) > +{ > + unsigned long events = VFIO_GROUP_NOTIFY_SET_KVM; > + struct zpci_dev *zdev = to_zpci(vdev->pdev); > + > + if (!zdev) > + return; > + > + if (kvm_s390_pci_dev_open(zdev)) > + return; > + > + zdev->kzdev->nb.notifier_call = vfio_pci_zdev_group_notifier; > + > + if (vfio_register_notifier(vdev->vdev.dev, VFIO_GROUP_NOTIFY, > + &events, &zdev->kzdev->nb)) > + kvm_s390_pci_dev_release(zdev); > +} > + > +void vfio_pci_zdev_release(struct vfio_pci_core_device *vdev) > +{ > + struct zpci_dev *zdev = to_zpci(vdev->pdev); > + > + if (!zdev || !zdev->kzdev) > + return; > + > + vfio_unregister_notifier(vdev->vdev.dev, VFIO_GROUP_NOTIFY, > + &zdev->kzdev->nb); > + > + kvm_s390_pci_dev_release(zdev); > +} > diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h > index 5e2bca3b89db..05287f8ac855 100644 > --- a/include/linux/vfio_pci_core.h > +++ b/include/linux/vfio_pci_core.h > @@ -198,12 +198,22 @@ static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) > #ifdef CONFIG_VFIO_PCI_ZDEV > extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, > struct vfio_info_cap *caps); > +void vfio_pci_zdev_open(struct vfio_pci_core_device *vdev); > +void vfio_pci_zdev_release(struct vfio_pci_core_device *vdev); > #else > static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, > struct vfio_info_cap *caps) > { > return -ENODEV; > } > + > +static inline void vfio_pci_zdev_open(struct vfio_pci_core_device *vdev) > +{ > +} > + > +static inline void vfio_pci_zdev_release(struct vfio_pci_core_device *vdev) > +{ > +} > #endif > > /* Will be exported for vfio pci drivers usage */
On Tue, Feb 08, 2022 at 10:43:19AM -0700, Alex Williamson wrote: > On Fri, 4 Feb 2022 16:15:30 -0500 > Matthew Rosato <mjrosato@linux.ibm.com> wrote: > > > KVM zPCI passthrough device logic will need a reference to the associated > > kvm guest that has access to the device. Let's register a group notifier > > for VFIO_GROUP_NOTIFY_SET_KVM to catch this information in order to create > > an association between a kvm guest and the host zdev. > > > > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> > > arch/s390/include/asm/kvm_pci.h | 2 ++ > > drivers/vfio/pci/vfio_pci_core.c | 2 ++ > > drivers/vfio/pci/vfio_pci_zdev.c | 46 ++++++++++++++++++++++++++++++++ > > include/linux/vfio_pci_core.h | 10 +++++++ > > 4 files changed, 60 insertions(+) > > > > diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h > > index e4696f5592e1..16290b4cf2a6 100644 > > +++ b/arch/s390/include/asm/kvm_pci.h > > @@ -16,6 +16,7 @@ > > #include <linux/kvm.h> > > #include <linux/pci.h> > > #include <linux/mutex.h> > > +#include <linux/notifier.h> > > #include <asm/pci_insn.h> > > #include <asm/pci_dma.h> > > > > @@ -32,6 +33,7 @@ struct kvm_zdev { > > u64 rpcit_count; > > struct kvm_zdev_ioat ioat; > > struct zpci_fib fib; > > + struct notifier_block nb; > > }; > > > > int kvm_s390_pci_dev_open(struct zpci_dev *zdev); > > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c > > index f948e6cd2993..fc57d4d0abbe 100644 > > +++ b/drivers/vfio/pci/vfio_pci_core.c > > @@ -452,6 +452,7 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) > > > > vfio_pci_vf_token_user_add(vdev, -1); > > vfio_spapr_pci_eeh_release(vdev->pdev); > > + vfio_pci_zdev_release(vdev); > > vfio_pci_core_disable(vdev); > > > > mutex_lock(&vdev->igate); > > @@ -470,6 +471,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_close_device); > > void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) > > { > > vfio_pci_probe_mmaps(vdev); > > + vfio_pci_zdev_open(vdev); > > vfio_spapr_pci_eeh_open(vdev->pdev); > > vfio_pci_vf_token_user_add(vdev, 1); > > } > > If this handling were for a specific device, I think we'd be suggesting > this is the point at which we cross over to a vendor variant making use > of vfio-pci-core rather than hooking directly into the core code. Personally, I think it is wrong layering for VFIO to be aware of KVM like this. This marks the first time that VFIO core code itself is being made aware of the KVM linkage. It copies the same kind of design the s390 specific mdev use of putting VFIO in charge of KVM functionality. If we are doing this we should just give up and admit that KVM is a first-class part of struct vfio_device and get rid of the notifier stuff too, at least for s390. Reading the patches and descriptions pretty much everything is boiling down to 'use vfio to tell the kvm architecture code to do something' - which I think needs to be handled through a KVM side ioctl. Or, at the very least, everything needs to be described in some way that makes it clear what is happening to userspace, without kvm, through these ioctls. This seems especially true now that it seems s390 PCI support is almost truely functional, with actual new userspace instructions to issue MMIO operations that work outside of KVM. I'm not sure how this all fits together, but I would expect an outcome where DPDK could run on these new systems and not have to know anything more about s390 beyond using the proper MMIO instructions via some compilation time enablement. (I've been reviewing s390 patches updating rdma for a parallel set of stuff) > this is meant to extend vfio-pci proper for the whole arch. Is there a > compromise in using #ifdefs in vfio_pci_ops to call into zpci specific > code that implements these arch specific hooks and the core for > everything else? SPAPR code could probably converted similarly, it > exists here for legacy reasons. [Cc Jason] I'm not sure I get what you are suggesting? Where would these ifdefs be? > Also, please note the DEVICE_FEATURE generalizations in the latest > series from NVIDIA for mlx5 migration support: > https://lore.kernel.org/all/20220207172216.206415-8-yishaih@nvidia.com/ Yes, please don't implement a bunch of new FEATURE code without taking the cleanup patches for feature support from that series too. I can put them on a branch for you if you needed. Jason
On Tue, 8 Feb 2022 14:51:41 -0400 Jason Gunthorpe <jgg@nvidia.com> wrote: > On Tue, Feb 08, 2022 at 10:43:19AM -0700, Alex Williamson wrote: > > On Fri, 4 Feb 2022 16:15:30 -0500 > > Matthew Rosato <mjrosato@linux.ibm.com> wrote: > > > > > KVM zPCI passthrough device logic will need a reference to the associated > > > kvm guest that has access to the device. Let's register a group notifier > > > for VFIO_GROUP_NOTIFY_SET_KVM to catch this information in order to create > > > an association between a kvm guest and the host zdev. > > > > > > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> > > > arch/s390/include/asm/kvm_pci.h | 2 ++ > > > drivers/vfio/pci/vfio_pci_core.c | 2 ++ > > > drivers/vfio/pci/vfio_pci_zdev.c | 46 ++++++++++++++++++++++++++++++++ > > > include/linux/vfio_pci_core.h | 10 +++++++ > > > 4 files changed, 60 insertions(+) > > > > > > diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h > > > index e4696f5592e1..16290b4cf2a6 100644 > > > +++ b/arch/s390/include/asm/kvm_pci.h > > > @@ -16,6 +16,7 @@ > > > #include <linux/kvm.h> > > > #include <linux/pci.h> > > > #include <linux/mutex.h> > > > +#include <linux/notifier.h> > > > #include <asm/pci_insn.h> > > > #include <asm/pci_dma.h> > > > > > > @@ -32,6 +33,7 @@ struct kvm_zdev { > > > u64 rpcit_count; > > > struct kvm_zdev_ioat ioat; > > > struct zpci_fib fib; > > > + struct notifier_block nb; > > > }; > > > > > > int kvm_s390_pci_dev_open(struct zpci_dev *zdev); > > > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c > > > index f948e6cd2993..fc57d4d0abbe 100644 > > > +++ b/drivers/vfio/pci/vfio_pci_core.c > > > @@ -452,6 +452,7 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) > > > > > > vfio_pci_vf_token_user_add(vdev, -1); > > > vfio_spapr_pci_eeh_release(vdev->pdev); > > > + vfio_pci_zdev_release(vdev); > > > vfio_pci_core_disable(vdev); > > > > > > mutex_lock(&vdev->igate); > > > @@ -470,6 +471,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_close_device); > > > void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) > > > { > > > vfio_pci_probe_mmaps(vdev); > > > + vfio_pci_zdev_open(vdev); > > > vfio_spapr_pci_eeh_open(vdev->pdev); > > > vfio_pci_vf_token_user_add(vdev, 1); > > > } > > > > If this handling were for a specific device, I think we'd be suggesting > > this is the point at which we cross over to a vendor variant making use > > of vfio-pci-core rather than hooking directly into the core code. > > Personally, I think it is wrong layering for VFIO to be aware of KVM > like this. This marks the first time that VFIO core code itself is > being made aware of the KVM linkage. I agree, but I've resigned that I've lost that battle. Both mdev vGPU vendors make specific assumptions about running on a VM. VFIO was never intended to be tied to KVM or the specific use case of a VM. > It copies the same kind of design the s390 specific mdev use of > putting VFIO in charge of KVM functionality. If we are doing this we > should just give up and admit that KVM is a first-class part of struct > vfio_device and get rid of the notifier stuff too, at least for s390. Euw. You're right, I really don't like vfio core code embracing this dependency for s390, device specific use cases are bad enough. > Reading the patches and descriptions pretty much everything is boiling > down to 'use vfio to tell the kvm architecture code to do something' - > which I think needs to be handled through a KVM side ioctl. AIF at least sounds a lot like the reason we invented the irq bypass mechanism to allow interrupt producers and consumers to register independently and associate to each other with a shared token. Is the purpose of IOAT to associate the device to a set of KVM page tables? That seems like a container or future iommufd operation. I read DTSM as supported formats for the IOAT. > Or, at the very least, everything needs to be described in some way > that makes it clear what is happening to userspace, without kvm, > through these ioctls. As I understand the discussion here: https://lore.kernel.org/all/20220204211536.321475-15-mjrosato@linux.ibm.com/ The assumption is that there is no non-KVM userspace currently. This seems like a regression to me. > This seems especially true now that it seems s390 PCI support is > almost truely functional, with actual new userspace instructions to > issue MMIO operations that work outside of KVM. > > I'm not sure how this all fits together, but I would expect an outcome > where DPDK could run on these new systems and not have to know > anything more about s390 beyond using the proper MMIO instructions via > some compilation time enablement. Yes, fully enabling zPCI with vfio, but only for KVM is not optimal. > (I've been reviewing s390 patches updating rdma for a parallel set of > stuff) > > > this is meant to extend vfio-pci proper for the whole arch. Is there a > > compromise in using #ifdefs in vfio_pci_ops to call into zpci specific > > code that implements these arch specific hooks and the core for > > everything else? SPAPR code could probably converted similarly, it > > exists here for legacy reasons. [Cc Jason] > > I'm not sure I get what you are suggesting? Where would these ifdefs > be? Essentially just: static const struct vfio_device_ops vfio_pci_ops = { .name = "vfio-pci", #ifdef CONFIG_S390 .open_device = vfio_zpci_open_device, .close_device = vfio_zpci_close_device, .ioctl = vfio_zpci_ioctl, #else .open_device = vfio_pci_open_device, .close_device = vfio_pci_core_close_device, .ioctl = vfio_pci_core_ioctl, #endif .read = vfio_pci_core_read, .write = vfio_pci_core_write, .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, }; It would at least provide more validation/exercise of the core/vendor split. Thanks, Alex
On Tue, Feb 08, 2022 at 12:26:24PM -0700, Alex Williamson wrote: > > Personally, I think it is wrong layering for VFIO to be aware of KVM > > like this. This marks the first time that VFIO core code itself is > > being made aware of the KVM linkage. > > I agree, but I've resigned that I've lost that battle. Both mdev vGPU > vendors make specific assumptions about running on a VM. The vGPU's are not as egregious though, are they? > > Or, at the very least, everything needs to be described in some way > > that makes it clear what is happening to userspace, without kvm, > > through these ioctls. > > As I understand the discussion here: > > https://lore.kernel.org/all/20220204211536.321475-15-mjrosato@linux.ibm.com/ > > The assumption is that there is no non-KVM userspace currently. This > seems like a regression to me. Indeed, I definitely don't like it either. This is not VFIO if is just driving KVM. I would prefer they add a function to get the 'struct device *' from a VFIO device fd and drive more of this from kvm, as appropriate. > > > this is meant to extend vfio-pci proper for the whole arch. Is there a > > > compromise in using #ifdefs in vfio_pci_ops to call into zpci specific > > > code that implements these arch specific hooks and the core for > > > everything else? SPAPR code could probably converted similarly, it > > > exists here for legacy reasons. [Cc Jason] > > > > I'm not sure I get what you are suggesting? Where would these ifdefs > > be? > > Essentially just: > > static const struct vfio_device_ops vfio_pci_ops = { > .name = "vfio-pci", > #ifdef CONFIG_S390 > .open_device = vfio_zpci_open_device, > .close_device = vfio_zpci_close_device, > .ioctl = vfio_zpci_ioctl, > #else > .open_device = vfio_pci_open_device, > .close_device = vfio_pci_core_close_device, > .ioctl = vfio_pci_core_ioctl, > #endif > .read = vfio_pci_core_read, > .write = vfio_pci_core_write, > .mmap = vfio_pci_core_mmap, > .request = vfio_pci_core_request, > .match = vfio_pci_core_match, > }; > > It would at least provide more validation/exercise of the core/vendor > split. Thanks, This would have to be in every pci driver - this is not just code the universal vfio-pci has to enable, but every migration driver/etc too. And we will need it again in vfio-cxl for s390 in 10 years too.. So, I think this approach is the right one, asided from the philosophical question of being so tightly linking s390 vfio to KVM. Jason
On 2/8/22 2:26 PM, Alex Williamson wrote: > On Tue, 8 Feb 2022 14:51:41 -0400 > Jason Gunthorpe <jgg@nvidia.com> wrote: > >> On Tue, Feb 08, 2022 at 10:43:19AM -0700, Alex Williamson wrote: >>> On Fri, 4 Feb 2022 16:15:30 -0500 >>> Matthew Rosato <mjrosato@linux.ibm.com> wrote: >>> >>>> KVM zPCI passthrough device logic will need a reference to the associated >>>> kvm guest that has access to the device. Let's register a group notifier >>>> for VFIO_GROUP_NOTIFY_SET_KVM to catch this information in order to create >>>> an association between a kvm guest and the host zdev. >>>> >>>> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> >>>> arch/s390/include/asm/kvm_pci.h | 2 ++ >>>> drivers/vfio/pci/vfio_pci_core.c | 2 ++ >>>> drivers/vfio/pci/vfio_pci_zdev.c | 46 ++++++++++++++++++++++++++++++++ >>>> include/linux/vfio_pci_core.h | 10 +++++++ >>>> 4 files changed, 60 insertions(+) >>>> >>>> diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h >>>> index e4696f5592e1..16290b4cf2a6 100644 >>>> +++ b/arch/s390/include/asm/kvm_pci.h >>>> @@ -16,6 +16,7 @@ >>>> #include <linux/kvm.h> >>>> #include <linux/pci.h> >>>> #include <linux/mutex.h> >>>> +#include <linux/notifier.h> >>>> #include <asm/pci_insn.h> >>>> #include <asm/pci_dma.h> >>>> >>>> @@ -32,6 +33,7 @@ struct kvm_zdev { >>>> u64 rpcit_count; >>>> struct kvm_zdev_ioat ioat; >>>> struct zpci_fib fib; >>>> + struct notifier_block nb; >>>> }; >>>> >>>> int kvm_s390_pci_dev_open(struct zpci_dev *zdev); >>>> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c >>>> index f948e6cd2993..fc57d4d0abbe 100644 >>>> +++ b/drivers/vfio/pci/vfio_pci_core.c >>>> @@ -452,6 +452,7 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) >>>> >>>> vfio_pci_vf_token_user_add(vdev, -1); >>>> vfio_spapr_pci_eeh_release(vdev->pdev); >>>> + vfio_pci_zdev_release(vdev); >>>> vfio_pci_core_disable(vdev); >>>> >>>> mutex_lock(&vdev->igate); >>>> @@ -470,6 +471,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_close_device); >>>> void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) >>>> { >>>> vfio_pci_probe_mmaps(vdev); >>>> + vfio_pci_zdev_open(vdev); >>>> vfio_spapr_pci_eeh_open(vdev->pdev); >>>> vfio_pci_vf_token_user_add(vdev, 1); >>>> } >>> >>> If this handling were for a specific device, I think we'd be suggesting >>> this is the point at which we cross over to a vendor variant making use >>> of vfio-pci-core rather than hooking directly into the core code. >> >> Personally, I think it is wrong layering for VFIO to be aware of KVM >> like this. This marks the first time that VFIO core code itself is >> being made aware of the KVM linkage. > > I agree, but I've resigned that I've lost that battle. Both mdev vGPU > vendors make specific assumptions about running on a VM. VFIO was > never intended to be tied to KVM or the specific use case of a VM. > >> It copies the same kind of design the s390 specific mdev use of >> putting VFIO in charge of KVM functionality. If we are doing this we >> should just give up and admit that KVM is a first-class part of struct >> vfio_device and get rid of the notifier stuff too, at least for s390. > > Euw. You're right, I really don't like vfio core code embracing this > dependency for s390, device specific use cases are bad enough. > >> Reading the patches and descriptions pretty much everything is boiling >> down to 'use vfio to tell the kvm architecture code to do something' - >> which I think needs to be handled through a KVM side ioctl. > > AIF at least sounds a lot like the reason we invented the irq bypass > mechanism to allow interrupt producers and consumers to register > independently and associate to each other with a shared token. Yes, these do sound quite similar, looking at it now though I haven't yet fully grokked irq bypass... But with AIF you have the case where either the interrupt will be delivered directly to a guest from firmware via an s390 construct (gisa) or under various circumstances the host (kvm) will be prodded to perform the delivery (still via gisa) instead. > > Is the purpose of IOAT to associate the device to a set of KVM page > tables? That seems like a container or future iommufd operation. I Yes, here we are establishing a relationship with the DMA table in the guest so that once mappings are established guest PCI operations (handled via special instructions in s390) don't need to go through the host but can be directly handled by firmware (so, effectively guest can keep running on its vcpu vs breaking out). > read DTSM as supported formats for the IOAT. > >> Or, at the very least, everything needs to be described in some way >> that makes it clear what is happening to userspace, without kvm, >> through these ioctls. Nothing, they don't need these ioctls. Userspace without a KVM registration for the device in question gets -EINVAL. > > As I understand the discussion here: > > https://lore.kernel.org/all/20220204211536.321475-15-mjrosato@linux.ibm.com/ > > The assumption is that there is no non-KVM userspace currently. This > seems like a regression to me. It's more that non-KVM userspace doesn't care about what these ioctls are doing... The enabling of 'interp, aif, ioat' is only pertinent when there is a KVM userspace, specifically because the information being shared / actions being performed as a result are only relevant to properly enabling zPCI features when the zPCI device is being passed through to a VM guest. If you're just using a userspace driver to talk to the device (no KVM guest involved) then the kernel zPCI layer already has this device set up using whatever s390 facilities are available. > >> This seems especially true now that it seems s390 PCI support is >> almost truely functional, with actual new userspace instructions to >> issue MMIO operations that work outside of KVM. >> >> I'm not sure how this all fits together, but I would expect an outcome >> where DPDK could run on these new systems and not have to know >> anything more about s390 beyond using the proper MMIO instructions via >> some compilation time enablement. > > Yes, fully enabling zPCI with vfio, but only for KVM is not optimal. See above. I think there is a misunderstanding here, it's not that we are only enabling zPCI with vfio for KVM, but rather than when using vfio to pass the device to a guest there is additional work that has to happen in order to 'fully enable' zPCI. > >> (I've been reviewing s390 patches updating rdma for a parallel set of >> stuff) >> >>> this is meant to extend vfio-pci proper for the whole arch. Is there a >>> compromise in using #ifdefs in vfio_pci_ops to call into zpci specific >>> code that implements these arch specific hooks and the core for >>> everything else? SPAPR code could probably converted similarly, it >>> exists here for legacy reasons. [Cc Jason] >> >> I'm not sure I get what you are suggesting? Where would these ifdefs >> be? > > Essentially just: > > static const struct vfio_device_ops vfio_pci_ops = { > .name = "vfio-pci", > #ifdef CONFIG_S390 > .open_device = vfio_zpci_open_device, > .close_device = vfio_zpci_close_device, > .ioctl = vfio_zpci_ioctl, > #else > .open_device = vfio_pci_open_device, > .close_device = vfio_pci_core_close_device, > .ioctl = vfio_pci_core_ioctl, > #endif > .read = vfio_pci_core_read, > .write = vfio_pci_core_write, > .mmap = vfio_pci_core_mmap, > .request = vfio_pci_core_request, > .match = vfio_pci_core_match, > }; > > It would at least provide more validation/exercise of the core/vendor > split. Thanks, > > Alex >
On Tue, Feb 08, 2022 at 03:33:58PM -0500, Matthew Rosato wrote: > > Is the purpose of IOAT to associate the device to a set of KVM page > > tables? That seems like a container or future iommufd operation. I > > Yes, here we are establishing a relationship with the DMA table in the guest > so that once mappings are established guest PCI operations (handled via > special instructions in s390) don't need to go through the host but can be > directly handled by firmware (so, effectively guest can keep running on its > vcpu vs breaking out). Oh, well, certainly sounds like a NAK on that - anything to do with the DMA translation of a PCI device must go through the iommu layer, not here. Lets not repeat the iommu subsytem bypass mess power made please. > It's more that non-KVM userspace doesn't care about what these ioctls are > doing... The enabling of 'interp, aif, ioat' is only pertinent when there > is a KVM userspace, specifically because the information being shared / > actions being performed as a result are only relevant to properly enabling > zPCI features when the zPCI device is being passed through to a VM > guest. Then why are they KVM ioctls? Jason
On 2/8/22 3:40 PM, Jason Gunthorpe wrote: > On Tue, Feb 08, 2022 at 03:33:58PM -0500, Matthew Rosato wrote: > >>> Is the purpose of IOAT to associate the device to a set of KVM page >>> tables? That seems like a container or future iommufd operation. I >> >> Yes, here we are establishing a relationship with the DMA table in the guest >> so that once mappings are established guest PCI operations (handled via >> special instructions in s390) don't need to go through the host but can be >> directly handled by firmware (so, effectively guest can keep running on its >> vcpu vs breaking out). > > Oh, well, certainly sounds like a NAK on that - anything to do with > the DMA translation of a PCI device must go through the iommu layer, > not here. > > Lets not repeat the iommu subsytem bypass mess power made please. > >> It's more that non-KVM userspace doesn't care about what these ioctls are >> doing... The enabling of 'interp, aif, ioat' is only pertinent when there >> is a KVM userspace, specifically because the information being shared / >> actions being performed as a result are only relevant to properly enabling >> zPCI features when the zPCI device is being passed through to a VM >> guest. > > Then why are they KVM ioctls? Well, the primary reason I ended up here was that I need to ensure the the operation is only performed when guest X owns host zPCI device Y. The vfio-pci device ioctl had the benefit of acting on device granularity + also already being aware of the host PCI (and thus zPCI) device association -- so I already know exactly what hostdev is being referenced for the operation. All that was needed was the KVM notifier to ensure the vfio device was associated to a KVM guest. I think moving over to a KVM ioctl is doable; I might still need to rely on VFIO_GROUP_NOTIFY_SET_KVM though, not sure yet. Based on prior comments in this thread I'm assuming Alex shares that view too (don't use vfio device ioctl for something only being used for VM passthrough) so I'll start looking at using KVM ioctls instead.
On Tue, 2022-02-08 at 16:40 -0400, Jason Gunthorpe wrote: > On Tue, Feb 08, 2022 at 03:33:58PM -0500, Matthew Rosato wrote: > > > > Is the purpose of IOAT to associate the device to a set of KVM page > > > tables? That seems like a container or future iommufd operation. I > > > > Yes, here we are establishing a relationship with the DMA table in the guest > > so that once mappings are established guest PCI operations (handled via > > special instructions in s390) don't need to go through the host but can be > > directly handled by firmware (so, effectively guest can keep running on its > > vcpu vs breaking out). > > Oh, well, certainly sounds like a NAK on that - anything to do with > the DMA translation of a PCI device must go through the iommu layer, > not here. > > Lets not repeat the iommu subsytem bypass mess power made please. Maybe some context on all of this. First it's important to note that on s390x the PCI IOMMU hardware is controlled with special instructions. For pass-through this is actually quite nice as it makes it relatively simple for us to always run with an IOMMU in the guest we simply need to provide the instructions. Meaning we get full IOMMU protection for pass-through devices on KVM guests, guests with pass-through remain pageable and we can even support nested pass-through. This is possible with relatively little overhead because we can do all of the per map/unmap guest IOMMU operations with a single instruction intercept. The instruction we need to intercept is called Refresh PCI Translations (RPCIT). It's job is twofold. For an OS running directly on our machine hypervisor LPAR it flushes the IOMMU's TLB by informing it which pages have been invalidated while the hardware walks the page tables and fills the TLB on it's own for establishing a mapping for previously invalid IOVAs. In a KVM or z/VM guest the guest is informed that IOMMU translations need to be refreshed even for previously invalid IOVAs. With this the guest builds it's IOMMU translation tables as normal but then does a RPCIT for the IOVA range it touched. In the hypervisor we can then simply walk the translation tables, pin the guest pages and map them in the host IOMMU. Prior to this series this happened in QEMU which does the map via vfio-iommu-type1 from user-space. This works and will remain as a fallback. Sadly it is quite slow and has a large impact on performance as we need to do a lot of mapping operations as the DMA API of the guest goes through the virtual IOMMU. This series thus adds the same functionality but as a KVM intercept of RPCIT. Now I think this neatly fits into KVM, we're emulating an instruction after all and most of its work is KVM specific pinning of guest pages. Importantly all other handling like IOMMU domain attachment still goes through vfio- iommu-type1 and we just fast path the map/unmap operations. In the code the map/unmap boils down to dma_walk_cpu_trans() and parts of dma_shadow_cpu_trans() both called in dma_table_shadow(). The former is a function already shared between our DMA API and IOMMU API implementations and the only code that walks the host translation tables. So in a way we're side stepping the IOMMU API ops that is true but we do not side step the IOMMU host table access code paths. Notice how our IOMMU API is also < 400 LOC because both the DMA and IOMMU APIs share code. That said, I believe we should be able to do the mapping still in a KVM RPCIT intercept but going through IOMMU API ops if this side stepping is truly unacceptable. It definitely adds overhead though and I'm not sure what we gain in clarity or maintainability since we already share the actual host table access code and there is only one PCI IOMMU and that is part of the architecture. Also either KVM or QEMU needs to know about the same details for looking at guest IOMMU translation tables / emulating the guest IOMMU. It's also clear that the IOMMU API will remain functional on its own as it is necesssary for any non-KVM use case which of course can't intercept RPCIT but on the other hand can also keep mappings much longer signficantly reducing overhead.
On Thu, Feb 10, 2022 at 12:15:58PM +0100, Niklas Schnelle wrote: > In a KVM or z/VM guest the guest is informed that IOMMU translations > need to be refreshed even for previously invalid IOVAs. With this the > guest builds it's IOMMU translation tables as normal but then does a > RPCIT for the IOVA range it touched. In the hypervisor we can then > simply walk the translation tables, pin the guest pages and map them in > the host IOMMU. Prior to this series this happened in QEMU which does > the map via vfio-iommu-type1 from user-space. This works and will > remain as a fallback. Sadly it is quite slow and has a large impact on > performance as we need to do a lot of mapping operations as the DMA API > of the guest goes through the virtual IOMMU. This series thus adds the > same functionality but as a KVM intercept of RPCIT. Now I think this > neatly fits into KVM, we're emulating an instruction after all and most > of its work is KVM specific pinning of guest pages. Importantly all > other handling like IOMMU domain attachment still goes through vfio- > iommu-type1 and we just fast path the map/unmap operations. So you create an iommu_domain and then hand it over to kvm which then does map/unmap operations on it under the covers? How does the page pinning work? In the design we are trying to reach I would say this needs to be modeled as a special iommu_domain that has this automatic map/unmap behavior from following user pages. Creating it would specify the kvm and the in-guest base address of the guest's page table. Then the magic kernel code you describe can operate on its own domain without becoming confused with a normal map/unmap domain. It is like the HW nested translation other CPUs are doing, but instead of HW nested, it is SW nested. Jason
On Thu, 2022-02-10 at 09:01 -0400, Jason Gunthorpe wrote: > On Thu, Feb 10, 2022 at 12:15:58PM +0100, Niklas Schnelle wrote: > > > In a KVM or z/VM guest the guest is informed that IOMMU translations > > need to be refreshed even for previously invalid IOVAs. With this the > > guest builds it's IOMMU translation tables as normal but then does a > > RPCIT for the IOVA range it touched. In the hypervisor we can then > > simply walk the translation tables, pin the guest pages and map them in > > the host IOMMU. Prior to this series this happened in QEMU which does > > the map via vfio-iommu-type1 from user-space. This works and will > > remain as a fallback. Sadly it is quite slow and has a large impact on > > performance as we need to do a lot of mapping operations as the DMA API > > of the guest goes through the virtual IOMMU. This series thus adds the > > same functionality but as a KVM intercept of RPCIT. Now I think this > > neatly fits into KVM, we're emulating an instruction after all and most > > of its work is KVM specific pinning of guest pages. Importantly all > > other handling like IOMMU domain attachment still goes through vfio- > > iommu-type1 and we just fast path the map/unmap operations. > > So you create an iommu_domain and then hand it over to kvm which then > does map/unmap operations on it under the covers? Yes > > How does the page pinning work? The pinning is done directly in the RPCIT interception handler pinning both the IOMMU tables and the guest pages mapped for DMA. > > In the design we are trying to reach I would say this needs to be > modeled as a special iommu_domain that has this automatic map/unmap > behavior from following user pages. Creating it would specify the kvm > and the in-guest base address of the guest's page table. Makes sense. > Then the > magic kernel code you describe can operate on its own domain without > becoming confused with a normal map/unmap domain. This sounds like an interesting idea. Looking at drivers/iommu/s390_iommu.c most of that is pretty trivial domain handling. I wonder if we could share this by marking the existing s390_iommu_domain type with kind of a "lent out to KVM" flag. Possibly by simply having a non-NULL pointer to a struct holding the guest base address and kvm etc? That way we can share the setup/tear down of the domain and of host IOMMU tables as well as aperture checks the same while also being able to keep the IOMMU API from interfering with the KVM RPCIT intercept and vice versa. I.e. while the domain is under control of KVM's RPCIT handling we make all IOMMU map/unmap fail. To me this more direct involvement of IOMMU and KVM on s390x is also a direct consequence of it using special instructions. Naturally those instructions can be intercepted or run under hardware accelerated virtualization. > > It is like the HW nested translation other CPUs are doing, but instead > of HW nested, it is SW nested. Yes very good analogy. Has any of that nested IOMMU translations work been merged yet? I know AMD had something in the works and nested translations have been around for the MMU for a while and are also used on s390x. We're definitely thinking about HW nested IOMMU translations too so any design we come up with would be able to deal with that too. Basically we would then execute RPCIT without leaving the hardware virtualization mode (SIE). We believe that that would require pinning all of guest memory though because HW can't really pin pages.
On Thu, Feb 10, 2022 at 03:06:35PM +0100, Niklas Schnelle wrote: > > How does the page pinning work? > > The pinning is done directly in the RPCIT interception handler pinning > both the IOMMU tables and the guest pages mapped for DMA. And if pinning fails? > > Then the > > magic kernel code you describe can operate on its own domain without > > becoming confused with a normal map/unmap domain. > > This sounds like an interesting idea. Looking at > drivers/iommu/s390_iommu.c most of that is pretty trivial domain > handling. I wonder if we could share this by marking the existing > s390_iommu_domain type with kind of a "lent out to KVM" flag. Lu has posted a series here: https://lore.kernel.org/linux-iommu/20220208012559.1121729-1-baolu.lu@linux.intel.com Which allows the iommu driver to create a domain with unique ops, so you'd just fork the entire thing, have your own struct s390_kvm_iommu_domain and related ops. When the special creation flow is triggered you'd just create one of these with the proper ops already setup. We are imagining a special ioctl to create these things and each IOMMU HW driver can supply a unique implementation suited to their HW design. > KVM RPCIT intercept and vice versa. I.e. while the domain is under > control of KVM's RPCIT handling we make all IOMMU map/unmap fail. It is not "under the control of" the domain would be created as linked to kvm and would never, ever, be anything else. > To me this more direct involvement of IOMMU and KVM on s390x is also a > direct consequence of it using special instructions. Naturally those > instructions can be intercepted or run under hardware accelerated > virtualization. Well, no, you've just created a kernel-side SW emulated nested translation scheme. Other CPUs have talked about doing this too, but nobody has attempted it. You can make the same argument for any CPU's scheme, a trapped mmio store is not fundamentally any different from a special instruction that traps, other than how the information is transferred. > Yes very good analogy. Has any of that nested IOMMU translations work > been merged yet? No. We are making quiet progress, slowly though. I'll add your interest to my list > too. Basically we would then execute RPCIT without leaving the > hardware virtualization mode (SIE). We believe that that would > require pinning all of guest memory though because HW can't really > pin pages. Right, this is what other iommu HW will have to do. Jason
On 2/10/22 10:23 AM, Jason Gunthorpe wrote: > On Thu, Feb 10, 2022 at 03:06:35PM +0100, Niklas Schnelle wrote: > >>> How does the page pinning work? >> >> The pinning is done directly in the RPCIT interception handler pinning >> both the IOMMU tables and the guest pages mapped for DMA. > > And if pinning fails? The RPCIT instruction is goes back to the guest with an indication that informs it the operation failed / gives it impetus to kick off a guest DMA refresh and clear up space (unpin). > >>> Then the >>> magic kernel code you describe can operate on its own domain without >>> becoming confused with a normal map/unmap domain. >> >> This sounds like an interesting idea. Looking at >> drivers/iommu/s390_iommu.c most of that is pretty trivial domain >> handling. I wonder if we could share this by marking the existing >> s390_iommu_domain type with kind of a "lent out to KVM" flag. > > Lu has posted a series here: > > https://lore.kernel.org/linux-iommu/20220208012559.1121729-1-baolu.lu@linux.intel.com > > Which allows the iommu driver to create a domain with unique ops, so > you'd just fork the entire thing, have your own struct > s390_kvm_iommu_domain and related ops. > OK, looking into this, thanks for the pointer... Sounds to me like we then want to make the determination upfront and then ensure the right iommu domain ops are registered for the device sometime before creation, based upon the usecase -- general userspace: s390_iommu_ops (existing), kvm: s390_kvm_iommu_domain (new). > When the special creation flow is triggered you'd just create one of > these with the proper ops already setup. > > We are imagining a special ioctl to create these things and each IOMMU > HW driver can supply a unique implementation suited to their HW > design. But I haven't connected the dots on this part -- At the end of the day for this 'special creation flow' I need the kvm + starting point of the guest table + format before we let the new s390_kvm_iommu_domain start doing automatic map/unmap during RPCIT intercept -- This initial setup has to come from a special ioctl as you say, but where do you see it living? I could certainly roll my own via a KVM ioctl or whatever, but it sounds like you're also referring to a general-purpose ioctl to encompass each of the different unique implementations, with this s390 kvm approach being one.
On Thu, Feb 10, 2022 at 01:59:35PM -0500, Matthew Rosato wrote: > OK, looking into this, thanks for the pointer... Sounds to me like we then > want to make the determination upfront and then ensure the right iommu > domain ops are registered for the device sometime before creation, based > upon the usecase -- general userspace: s390_iommu_ops (existing), kvm: > s390_kvm_iommu_domain (new). Yes, that is the idea. I expect there will be many types of these special iommu domains. eg Intel has talked about directly using the KVM CPU page tabe as the IOMMU page table. > > When the special creation flow is triggered you'd just create one of > > these with the proper ops already setup. > > > We are imagining a special ioctl to create these things and each IOMMU > > HW driver can supply a unique implementation suited to their HW > > design. > > But I haven't connected the dots on this part -- At the end of the day for > this 'special creation flow' I need the kvm + starting point of the guest > table + format before we let the new s390_kvm_iommu_domain start doing > automatic map/unmap during RPCIT intercept -- This initial setup has to come > from a special ioctl as you say, but where do you see it living? I could > certainly roll my own via a KVM ioctl or whatever, but it sounds like you're > also referring to a general-purpose ioctl to encompass each of the different > unique implementations, with this s390 kvm approach being one. So, the ioctl will need, as input, a kvm FD and an iommufd FD, and additional IOMMU driver specific data (format, starting, etc). The kvm supplies the context for the RPCIT to be captured in The result is the creation of an iommu_domain inside iommufd, done by some iommu_ops->alloc_domain_xxxx() driver callback Which FD has the ioctl it is a bit of an aesthetic choice, but I predict that iommufd makes the most sense since an object is being created inside iommfd. This flow is very similar to the 'userspace page table' flow others are looking at, but has the extra twist that a KVM FD is needed to supply the CPU page table. It may overlap nicely with the intel direction I mentioned. It is just ugly layering wise that KVM is getting shoved into platform code and uapis all over the place, but I suppose that is unavoidable. And the required loose coupling with the kvm module means all kinds of symbol_gets'etc :( Jason
diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h index e4696f5592e1..16290b4cf2a6 100644 --- a/arch/s390/include/asm/kvm_pci.h +++ b/arch/s390/include/asm/kvm_pci.h @@ -16,6 +16,7 @@ #include <linux/kvm.h> #include <linux/pci.h> #include <linux/mutex.h> +#include <linux/notifier.h> #include <asm/pci_insn.h> #include <asm/pci_dma.h> @@ -32,6 +33,7 @@ struct kvm_zdev { u64 rpcit_count; struct kvm_zdev_ioat ioat; struct zpci_fib fib; + struct notifier_block nb; }; int kvm_s390_pci_dev_open(struct zpci_dev *zdev); diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index f948e6cd2993..fc57d4d0abbe 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -452,6 +452,7 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) vfio_pci_vf_token_user_add(vdev, -1); vfio_spapr_pci_eeh_release(vdev->pdev); + vfio_pci_zdev_release(vdev); vfio_pci_core_disable(vdev); mutex_lock(&vdev->igate); @@ -470,6 +471,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_close_device); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) { vfio_pci_probe_mmaps(vdev); + vfio_pci_zdev_open(vdev); vfio_spapr_pci_eeh_open(vdev->pdev); vfio_pci_vf_token_user_add(vdev, 1); } diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c index ea4c0d2b0663..9f8284499111 100644 --- a/drivers/vfio/pci/vfio_pci_zdev.c +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -13,6 +13,7 @@ #include <linux/vfio_zdev.h> #include <asm/pci_clp.h> #include <asm/pci_io.h> +#include <asm/kvm_pci.h> #include <linux/vfio_pci_core.h> @@ -136,3 +137,48 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, return ret; } + +static int vfio_pci_zdev_group_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct kvm_zdev *kzdev = container_of(nb, struct kvm_zdev, nb); + + if (action == VFIO_GROUP_NOTIFY_SET_KVM) { + if (!data || !kzdev->zdev) + return NOTIFY_DONE; + kzdev->kvm = data; + } + + return NOTIFY_OK; +} + +void vfio_pci_zdev_open(struct vfio_pci_core_device *vdev) +{ + unsigned long events = VFIO_GROUP_NOTIFY_SET_KVM; + struct zpci_dev *zdev = to_zpci(vdev->pdev); + + if (!zdev) + return; + + if (kvm_s390_pci_dev_open(zdev)) + return; + + zdev->kzdev->nb.notifier_call = vfio_pci_zdev_group_notifier; + + if (vfio_register_notifier(vdev->vdev.dev, VFIO_GROUP_NOTIFY, + &events, &zdev->kzdev->nb)) + kvm_s390_pci_dev_release(zdev); +} + +void vfio_pci_zdev_release(struct vfio_pci_core_device *vdev) +{ + struct zpci_dev *zdev = to_zpci(vdev->pdev); + + if (!zdev || !zdev->kzdev) + return; + + vfio_unregister_notifier(vdev->vdev.dev, VFIO_GROUP_NOTIFY, + &zdev->kzdev->nb); + + kvm_s390_pci_dev_release(zdev); +} diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 5e2bca3b89db..05287f8ac855 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -198,12 +198,22 @@ static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) #ifdef CONFIG_VFIO_PCI_ZDEV extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps); +void vfio_pci_zdev_open(struct vfio_pci_core_device *vdev); +void vfio_pci_zdev_release(struct vfio_pci_core_device *vdev); #else static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps) { return -ENODEV; } + +static inline void vfio_pci_zdev_open(struct vfio_pci_core_device *vdev) +{ +} + +static inline void vfio_pci_zdev_release(struct vfio_pci_core_device *vdev) +{ +} #endif /* Will be exported for vfio pci drivers usage */
KVM zPCI passthrough device logic will need a reference to the associated kvm guest that has access to the device. Let's register a group notifier for VFIO_GROUP_NOTIFY_SET_KVM to catch this information in order to create an association between a kvm guest and the host zdev. Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> --- arch/s390/include/asm/kvm_pci.h | 2 ++ drivers/vfio/pci/vfio_pci_core.c | 2 ++ drivers/vfio/pci/vfio_pci_zdev.c | 46 ++++++++++++++++++++++++++++++++ include/linux/vfio_pci_core.h | 10 +++++++ 4 files changed, 60 insertions(+)