diff mbox series

[v2] vfio/pci: Verify each MSI vector to avoid invalid MSI vectors

Message ID 1669167756-196788-1-git-send-email-chenxiang66@hisilicon.com (mailing list archive)
State New, archived
Headers show
Series [v2] vfio/pci: Verify each MSI vector to avoid invalid MSI vectors | expand

Commit Message

chenxiang Nov. 23, 2022, 1:42 a.m. UTC
From: Xiang Chen <chenxiang66@hisilicon.com>

Currently the number of MSI vectors comes from register PCI_MSI_FLAGS
which should be power-of-2 in qemu, in some scenaries it is not the same as
the number that driver requires in guest, for example, a PCI driver wants
to allocate 6 MSI vecotrs in guest, but as the limitation, it will allocate
8 MSI vectors. So it requires 8 MSI vectors in qemu while the driver in
guest only wants to allocate 6 MSI vectors.

When GICv4.1 is enabled, it iterates over all possible MSIs and enable the
forwarding while the guest has only created some of mappings in the virtual
ITS, so some calls fail. The exception print is as following:
vfio-pci 0000:3a:00.1: irq bypass producer (token 000000008f08224d) registration
fails:66311

To avoid the issue, verify each MSI vector, skip some operations such as
request_irq() and irq_bypass_register_producer() for those invalid MSI vectors.

Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
---
I reported the issue at the link:
https://lkml.kernel.org/lkml/87cze9lcut.wl-maz@kernel.org/T/

Change Log:
v1 -> v2:
Verify each MSI vector in kernel instead of adding systemcall according to
Mar's suggestion
---
 arch/arm64/kvm/vgic/vgic-irqfd.c  | 13 +++++++++++++
 arch/arm64/kvm/vgic/vgic-its.c    | 36 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/vgic/vgic.h        |  1 +
 drivers/vfio/pci/vfio_pci_intrs.c | 33 +++++++++++++++++++++++++++++++++
 include/linux/kvm_host.h          |  2 ++
 5 files changed, 85 insertions(+)

Comments

Marc Zyngier Nov. 23, 2022, 12:08 p.m. UTC | #1
On Wed, 23 Nov 2022 01:42:36 +0000,
chenxiang <chenxiang66@hisilicon.com> wrote:
> 
> From: Xiang Chen <chenxiang66@hisilicon.com>
> 
> Currently the number of MSI vectors comes from register PCI_MSI_FLAGS
> which should be power-of-2 in qemu, in some scenaries it is not the same as
> the number that driver requires in guest, for example, a PCI driver wants
> to allocate 6 MSI vecotrs in guest, but as the limitation, it will allocate
> 8 MSI vectors. So it requires 8 MSI vectors in qemu while the driver in
> guest only wants to allocate 6 MSI vectors.
> 
> When GICv4.1 is enabled, it iterates over all possible MSIs and enable the
> forwarding while the guest has only created some of mappings in the virtual
> ITS, so some calls fail. The exception print is as following:
> vfio-pci 0000:3a:00.1: irq bypass producer (token 000000008f08224d) registration
> fails:66311
> 
> To avoid the issue, verify each MSI vector, skip some operations such as
> request_irq() and irq_bypass_register_producer() for those invalid MSI vectors.
> 
> Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
> ---
> I reported the issue at the link:
> https://lkml.kernel.org/lkml/87cze9lcut.wl-maz@kernel.org/T/
> 
> Change Log:
> v1 -> v2:
> Verify each MSI vector in kernel instead of adding systemcall according to
> Mar's suggestion
> ---
>  arch/arm64/kvm/vgic/vgic-irqfd.c  | 13 +++++++++++++
>  arch/arm64/kvm/vgic/vgic-its.c    | 36 ++++++++++++++++++++++++++++++++++++
>  arch/arm64/kvm/vgic/vgic.h        |  1 +
>  drivers/vfio/pci/vfio_pci_intrs.c | 33 +++++++++++++++++++++++++++++++++
>  include/linux/kvm_host.h          |  2 ++
>  5 files changed, 85 insertions(+)
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
> index 475059b..71f6af57 100644
> --- a/arch/arm64/kvm/vgic/vgic-irqfd.c
> +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
> @@ -98,6 +98,19 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
>  	return vgic_its_inject_msi(kvm, &msi);
>  }
>  
> +int kvm_verify_msi(struct kvm *kvm,
> +		   struct kvm_kernel_irq_routing_entry *irq_entry)
> +{
> +	struct kvm_msi msi;
> +
> +	if (!vgic_has_its(kvm))
> +		return -ENODEV;
> +
> +	kvm_populate_msi(irq_entry, &msi);
> +
> +	return vgic_its_verify_msi(kvm, &msi);
> +}
> +
>  /**
>   * kvm_arch_set_irq_inatomic: fast-path for irqfd injection
>   */
> diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
> index 94a666d..8312a4a 100644
> --- a/arch/arm64/kvm/vgic/vgic-its.c
> +++ b/arch/arm64/kvm/vgic/vgic-its.c
> @@ -767,6 +767,42 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
>  	return 0;
>  }
>  
> +int vgic_its_verify_msi(struct kvm *kvm, struct kvm_msi *msi)
> +{
> +	struct vgic_its *its;
> +	struct its_ite *ite;
> +	struct kvm_vcpu *vcpu;
> +	int ret = 0;
> +
> +	if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
> +		return -EINVAL;
> +
> +	if (!vgic_has_its(kvm))
> +		return -ENODEV;
> +
> +	its = vgic_msi_to_its(kvm, msi);
> +	if (IS_ERR(its))
> +		return PTR_ERR(its);
> +
> +	mutex_lock(&its->its_lock);
> +	if (!its->enabled) {
> +		ret = -EBUSY;
> +		goto unlock;
> +	}
> +	ite = find_ite(its, msi->devid, msi->data);
> +	if (!ite || !its_is_collection_mapped(ite->collection)) {
> +		ret = E_ITS_INT_UNMAPPED_INTERRUPT;
> +		goto unlock;
> +	}
> +
> +	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
> +	if (!vcpu)
> +		ret = E_ITS_INT_UNMAPPED_INTERRUPT;

I'm sorry, but what does this mean to the caller? This should never
leak outside of the ITS code.

> +unlock:
> +	mutex_unlock(&its->its_lock);
> +	return ret;
> +}
> +
>  /*
>   * Queries the KVM IO bus framework to get the ITS pointer from the given
>   * doorbell address.
> diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
> index 0c8da72..d452150 100644
> --- a/arch/arm64/kvm/vgic/vgic.h
> +++ b/arch/arm64/kvm/vgic/vgic.h
> @@ -240,6 +240,7 @@ int kvm_vgic_register_its_device(void);
>  void vgic_enable_lpis(struct kvm_vcpu *vcpu);
>  void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu);
>  int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
> +int vgic_its_verify_msi(struct kvm *kvm, struct kvm_msi *msi);
>  int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
>  int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
>  			 int offset, u32 *val);
> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
> index 40c3d7c..3027805 100644
> --- a/drivers/vfio/pci/vfio_pci_intrs.c
> +++ b/drivers/vfio/pci/vfio_pci_intrs.c
> @@ -19,6 +19,7 @@
>  #include <linux/vfio.h>
>  #include <linux/wait.h>
>  #include <linux/slab.h>
> +#include <linux/kvm_irqfd.h>
>  
>  #include "vfio_pci_priv.h"
>  
> @@ -315,6 +316,28 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
>  	return 0;
>  }
>  
> +static int vfio_pci_verify_msi_entry(struct vfio_pci_core_device *vdev,
> +		struct eventfd_ctx *trigger)
> +{
> +	struct kvm *kvm = vdev->vdev.kvm;
> +	struct kvm_kernel_irqfd *tmp;
> +	struct kvm_kernel_irq_routing_entry irq_entry;
> +	int ret = -ENODEV;
> +
> +	spin_lock_irq(&kvm->irqfds.lock);
> +	list_for_each_entry(tmp, &kvm->irqfds.items, list) {
> +		if (trigger == tmp->eventfd) {
> +			ret = 0;
> +			break;
> +		}
> +	}
> +	spin_unlock_irq(&kvm->irqfds.lock);
> +	if (ret)
> +		return ret;
> +	irq_entry = tmp->irq_entry;
> +	return kvm_verify_msi(kvm, &irq_entry);

How does this work on !arm64? Why do we need an on-stack version of
tmp->irq_entry?

> +}
> +
>  static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>  				      int vector, int fd, bool msix)
>  {
> @@ -355,6 +378,16 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>  		return PTR_ERR(trigger);
>  	}
>  
> +	if (!msix) {
> +		ret = vfio_pci_verify_msi_entry(vdev, trigger);
> +		if (ret) {
> +			kfree(vdev->ctx[vector].name);
> +			eventfd_ctx_put(trigger);
> +			if (ret > 0)
> +				ret = 0;
> +			return ret;
> +		}
> +	}

Honestly, the whole things seems really complicated to avoid something
that is only a harmless warning . How about just toning down the
message instead?

	M.
Alex Williamson Nov. 23, 2022, 7:55 p.m. UTC | #2
On Wed, 23 Nov 2022 12:08:05 +0000
Marc Zyngier <maz@kernel.org> wrote:

> On Wed, 23 Nov 2022 01:42:36 +0000,
> chenxiang <chenxiang66@hisilicon.com> wrote:
> > 
> > From: Xiang Chen <chenxiang66@hisilicon.com>
> > 
> > Currently the number of MSI vectors comes from register PCI_MSI_FLAGS
> > which should be power-of-2 in qemu, in some scenaries it is not the same as
> > the number that driver requires in guest, for example, a PCI driver wants
> > to allocate 6 MSI vecotrs in guest, but as the limitation, it will allocate
> > 8 MSI vectors. So it requires 8 MSI vectors in qemu while the driver in
> > guest only wants to allocate 6 MSI vectors.
> > 
> > When GICv4.1 is enabled, it iterates over all possible MSIs and enable the
> > forwarding while the guest has only created some of mappings in the virtual
> > ITS, so some calls fail. The exception print is as following:
> > vfio-pci 0000:3a:00.1: irq bypass producer (token 000000008f08224d) registration
> > fails:66311
> > 
> > To avoid the issue, verify each MSI vector, skip some operations such as
> > request_irq() and irq_bypass_register_producer() for those invalid MSI vectors.
> > 
> > Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
> > ---
> > I reported the issue at the link:
> > https://lkml.kernel.org/lkml/87cze9lcut.wl-maz@kernel.org/T/
> > 
> > Change Log:
> > v1 -> v2:
> > Verify each MSI vector in kernel instead of adding systemcall according to
> > Mar's suggestion
> > ---
> >  arch/arm64/kvm/vgic/vgic-irqfd.c  | 13 +++++++++++++
> >  arch/arm64/kvm/vgic/vgic-its.c    | 36 ++++++++++++++++++++++++++++++++++++
> >  arch/arm64/kvm/vgic/vgic.h        |  1 +
> >  drivers/vfio/pci/vfio_pci_intrs.c | 33 +++++++++++++++++++++++++++++++++
> >  include/linux/kvm_host.h          |  2 ++
> >  5 files changed, 85 insertions(+)
> > 
> > diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
> > index 475059b..71f6af57 100644
> > --- a/arch/arm64/kvm/vgic/vgic-irqfd.c
> > +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
> > @@ -98,6 +98,19 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
> >  	return vgic_its_inject_msi(kvm, &msi);
> >  }
> >  
> > +int kvm_verify_msi(struct kvm *kvm,
> > +		   struct kvm_kernel_irq_routing_entry *irq_entry)
> > +{
> > +	struct kvm_msi msi;
> > +
> > +	if (!vgic_has_its(kvm))
> > +		return -ENODEV;
> > +
> > +	kvm_populate_msi(irq_entry, &msi);
> > +
> > +	return vgic_its_verify_msi(kvm, &msi);
> > +}
> > +
> >  /**
> >   * kvm_arch_set_irq_inatomic: fast-path for irqfd injection
> >   */
> > diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
> > index 94a666d..8312a4a 100644
> > --- a/arch/arm64/kvm/vgic/vgic-its.c
> > +++ b/arch/arm64/kvm/vgic/vgic-its.c
> > @@ -767,6 +767,42 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
> >  	return 0;
> >  }
> >  
> > +int vgic_its_verify_msi(struct kvm *kvm, struct kvm_msi *msi)
> > +{
> > +	struct vgic_its *its;
> > +	struct its_ite *ite;
> > +	struct kvm_vcpu *vcpu;
> > +	int ret = 0;
> > +
> > +	if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
> > +		return -EINVAL;
> > +
> > +	if (!vgic_has_its(kvm))
> > +		return -ENODEV;
> > +
> > +	its = vgic_msi_to_its(kvm, msi);
> > +	if (IS_ERR(its))
> > +		return PTR_ERR(its);
> > +
> > +	mutex_lock(&its->its_lock);
> > +	if (!its->enabled) {
> > +		ret = -EBUSY;
> > +		goto unlock;
> > +	}
> > +	ite = find_ite(its, msi->devid, msi->data);
> > +	if (!ite || !its_is_collection_mapped(ite->collection)) {
> > +		ret = E_ITS_INT_UNMAPPED_INTERRUPT;
> > +		goto unlock;
> > +	}
> > +
> > +	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
> > +	if (!vcpu)
> > +		ret = E_ITS_INT_UNMAPPED_INTERRUPT;  
> 
> I'm sorry, but what does this mean to the caller? This should never
> leak outside of the ITS code.
> 
> > +unlock:
> > +	mutex_unlock(&its->its_lock);
> > +	return ret;
> > +}
> > +
> >  /*
> >   * Queries the KVM IO bus framework to get the ITS pointer from the given
> >   * doorbell address.
> > diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
> > index 0c8da72..d452150 100644
> > --- a/arch/arm64/kvm/vgic/vgic.h
> > +++ b/arch/arm64/kvm/vgic/vgic.h
> > @@ -240,6 +240,7 @@ int kvm_vgic_register_its_device(void);
> >  void vgic_enable_lpis(struct kvm_vcpu *vcpu);
> >  void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu);
> >  int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
> > +int vgic_its_verify_msi(struct kvm *kvm, struct kvm_msi *msi);
> >  int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
> >  int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
> >  			 int offset, u32 *val);
> > diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
> > index 40c3d7c..3027805 100644
> > --- a/drivers/vfio/pci/vfio_pci_intrs.c
> > +++ b/drivers/vfio/pci/vfio_pci_intrs.c
> > @@ -19,6 +19,7 @@
> >  #include <linux/vfio.h>
> >  #include <linux/wait.h>
> >  #include <linux/slab.h>
> > +#include <linux/kvm_irqfd.h>
> >  
> >  #include "vfio_pci_priv.h"
> >  
> > @@ -315,6 +316,28 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
> >  	return 0;
> >  }
> >  
> > +static int vfio_pci_verify_msi_entry(struct vfio_pci_core_device *vdev,
> > +		struct eventfd_ctx *trigger)
> > +{
> > +	struct kvm *kvm = vdev->vdev.kvm;
> > +	struct kvm_kernel_irqfd *tmp;
> > +	struct kvm_kernel_irq_routing_entry irq_entry;
> > +	int ret = -ENODEV;
> > +
> > +	spin_lock_irq(&kvm->irqfds.lock);
> > +	list_for_each_entry(tmp, &kvm->irqfds.items, list) {
> > +		if (trigger == tmp->eventfd) {
> > +			ret = 0;
> > +			break;
> > +		}
> > +	}
> > +	spin_unlock_irq(&kvm->irqfds.lock);
> > +	if (ret)
> > +		return ret;
> > +	irq_entry = tmp->irq_entry;
> > +	return kvm_verify_msi(kvm, &irq_entry);  
> 
> How does this work on !arm64? Why do we need an on-stack version of
> tmp->irq_entry?

Not only on !arm64, but in any scenario that doesn't involve KVM.
There cannot be a hard dependency between vfio and kvm.  Thanks,

Alex

PS - What driver/device actually cares about more than 1 MSI vector and
doesn't implement MSI-X?

> 
> > +}
> > +
> >  static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
> >  				      int vector, int fd, bool msix)
> >  {
> > @@ -355,6 +378,16 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
> >  		return PTR_ERR(trigger);
> >  	}
> >  
> > +	if (!msix) {
> > +		ret = vfio_pci_verify_msi_entry(vdev, trigger);
> > +		if (ret) {
> > +			kfree(vdev->ctx[vector].name);
> > +			eventfd_ctx_put(trigger);
> > +			if (ret > 0)
> > +				ret = 0;
> > +			return ret;
> > +		}
> > +	}  
> 
> Honestly, the whole things seems really complicated to avoid something
> that is only a harmless warning . How about just toning down the
> message instead?
> 
> 	M.
>
Marc Zyngier Nov. 24, 2022, 1:19 p.m. UTC | #3
On Wed, 23 Nov 2022 19:55:14 +0000,
Alex Williamson <alex.williamson@redhat.com> wrote:
> 
> On Wed, 23 Nov 2022 12:08:05 +0000
> Marc Zyngier <maz@kernel.org> wrote:
> 
> > On Wed, 23 Nov 2022 01:42:36 +0000,
> > chenxiang <chenxiang66@hisilicon.com> wrote:
> > > 
> > > +static int vfio_pci_verify_msi_entry(struct vfio_pci_core_device *vdev,
> > > +		struct eventfd_ctx *trigger)
> > > +{
> > > +	struct kvm *kvm = vdev->vdev.kvm;
> > > +	struct kvm_kernel_irqfd *tmp;
> > > +	struct kvm_kernel_irq_routing_entry irq_entry;
> > > +	int ret = -ENODEV;
> > > +
> > > +	spin_lock_irq(&kvm->irqfds.lock);
> > > +	list_for_each_entry(tmp, &kvm->irqfds.items, list) {
> > > +		if (trigger == tmp->eventfd) {
> > > +			ret = 0;
> > > +			break;
> > > +		}
> > > +	}
> > > +	spin_unlock_irq(&kvm->irqfds.lock);
> > > +	if (ret)
> > > +		return ret;
> > > +	irq_entry = tmp->irq_entry;
> > > +	return kvm_verify_msi(kvm, &irq_entry);  
> > 
> > How does this work on !arm64? Why do we need an on-stack version of
> > tmp->irq_entry?
> 
> Not only on !arm64, but in any scenario that doesn't involve KVM.
> There cannot be a hard dependency between vfio and kvm.  Thanks,

Yup, good point.

> 
> Alex
> 
> PS - What driver/device actually cares about more than 1 MSI vector and
> doesn't implement MSI-X?

Unfortunately, there is a metric ton of crap that fits in that
description:

01:00.0 Network controller: Broadcom Inc. and subsidiaries Device 4433 (rev 07)
	Subsystem: Apple Inc. Device 4387
	Device tree node: /sys/firmware/devicetree/base/soc/pcie@690000000/pci@0,0/wifi@0,0
	Flags: bus master, fast devsel, latency 0, IRQ 97, IOMMU group 4
	Memory at 6c1400000 (64-bit, non-prefetchable) [size=64K]
	Memory at 6c0000000 (64-bit, non-prefetchable) [size=16M]
	Capabilities: [48] Power Management version 3
	Capabilities: [58] MSI: Enable+ Count=1/32 Maskable- 64bit+

... and no MSI-X in sight. Pass this to a VM, and you'll see exactly
what is described here. And that's not old stuff either. This is brand
new HW.

Do we need to care? I don't think so.

	M.
Jason Gunthorpe Nov. 24, 2022, 6 p.m. UTC | #4
On Wed, Nov 23, 2022 at 09:42:36AM +0800, chenxiang via wrote:
> From: Xiang Chen <chenxiang66@hisilicon.com>
> 
> Currently the number of MSI vectors comes from register PCI_MSI_FLAGS
> which should be power-of-2 in qemu, in some scenaries it is not the same as
> the number that driver requires in guest, for example, a PCI driver wants
> to allocate 6 MSI vecotrs in guest, but as the limitation, it will allocate
> 8 MSI vectors. So it requires 8 MSI vectors in qemu while the driver in
> guest only wants to allocate 6 MSI vectors.
> 
> When GICv4.1 is enabled, it iterates over all possible MSIs and enable the
> forwarding while the guest has only created some of mappings in the virtual
> ITS, so some calls fail. The exception print is as following:
> vfio-pci 0000:3a:00.1: irq bypass producer (token 000000008f08224d) registration
> fails:66311

With Thomas's series to make MSI more dynamic this could spell future
problems, as future kernels might have different ordering.

It is just architecturally wrong to tie the MSI programming at the PCI
level with the current state of the guest's virtual interrupt
controller.

Physical hardware doesn't do this, virtual emulation shouldn't either.

People are taking too many liberties with trapping the PCI MSI
registers through VFIO. :(

Jason
chenxiang Nov. 26, 2022, 6:33 a.m. UTC | #5
在 2022/11/23 20:08, Marc Zyngier 写道:
> On Wed, 23 Nov 2022 01:42:36 +0000,
> chenxiang <chenxiang66@hisilicon.com> wrote:
>> From: Xiang Chen <chenxiang66@hisilicon.com>
>>
>> Currently the number of MSI vectors comes from register PCI_MSI_FLAGS
>> which should be power-of-2 in qemu, in some scenaries it is not the same as
>> the number that driver requires in guest, for example, a PCI driver wants
>> to allocate 6 MSI vecotrs in guest, but as the limitation, it will allocate
>> 8 MSI vectors. So it requires 8 MSI vectors in qemu while the driver in
>> guest only wants to allocate 6 MSI vectors.
>>
>> When GICv4.1 is enabled, it iterates over all possible MSIs and enable the
>> forwarding while the guest has only created some of mappings in the virtual
>> ITS, so some calls fail. The exception print is as following:
>> vfio-pci 0000:3a:00.1: irq bypass producer (token 000000008f08224d) registration
>> fails:66311
>>
>> To avoid the issue, verify each MSI vector, skip some operations such as
>> request_irq() and irq_bypass_register_producer() for those invalid MSI vectors.
>>
>> Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
>> ---
>> I reported the issue at the link:
>> https://lkml.kernel.org/lkml/87cze9lcut.wl-maz@kernel.org/T/
>>
>> Change Log:
>> v1 -> v2:
>> Verify each MSI vector in kernel instead of adding systemcall according to
>> Mar's suggestion
>> ---
>>   arch/arm64/kvm/vgic/vgic-irqfd.c  | 13 +++++++++++++
>>   arch/arm64/kvm/vgic/vgic-its.c    | 36 ++++++++++++++++++++++++++++++++++++
>>   arch/arm64/kvm/vgic/vgic.h        |  1 +
>>   drivers/vfio/pci/vfio_pci_intrs.c | 33 +++++++++++++++++++++++++++++++++
>>   include/linux/kvm_host.h          |  2 ++
>>   5 files changed, 85 insertions(+)
>>
>> diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
>> index 475059b..71f6af57 100644
>> --- a/arch/arm64/kvm/vgic/vgic-irqfd.c
>> +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
>> @@ -98,6 +98,19 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
>>   	return vgic_its_inject_msi(kvm, &msi);
>>   }
>>   
>> +int kvm_verify_msi(struct kvm *kvm,
>> +		   struct kvm_kernel_irq_routing_entry *irq_entry)
>> +{
>> +	struct kvm_msi msi;
>> +
>> +	if (!vgic_has_its(kvm))
>> +		return -ENODEV;
>> +
>> +	kvm_populate_msi(irq_entry, &msi);
>> +
>> +	return vgic_its_verify_msi(kvm, &msi);
>> +}
>> +
>>   /**
>>    * kvm_arch_set_irq_inatomic: fast-path for irqfd injection
>>    */
>> diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
>> index 94a666d..8312a4a 100644
>> --- a/arch/arm64/kvm/vgic/vgic-its.c
>> +++ b/arch/arm64/kvm/vgic/vgic-its.c
>> @@ -767,6 +767,42 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
>>   	return 0;
>>   }
>>   
>> +int vgic_its_verify_msi(struct kvm *kvm, struct kvm_msi *msi)
>> +{
>> +	struct vgic_its *its;
>> +	struct its_ite *ite;
>> +	struct kvm_vcpu *vcpu;
>> +	int ret = 0;
>> +
>> +	if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
>> +		return -EINVAL;
>> +
>> +	if (!vgic_has_its(kvm))
>> +		return -ENODEV;
>> +
>> +	its = vgic_msi_to_its(kvm, msi);
>> +	if (IS_ERR(its))
>> +		return PTR_ERR(its);
>> +
>> +	mutex_lock(&its->its_lock);
>> +	if (!its->enabled) {
>> +		ret = -EBUSY;
>> +		goto unlock;
>> +	}
>> +	ite = find_ite(its, msi->devid, msi->data);
>> +	if (!ite || !its_is_collection_mapped(ite->collection)) {
>> +		ret = E_ITS_INT_UNMAPPED_INTERRUPT;
>> +		goto unlock;
>> +	}
>> +
>> +	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
>> +	if (!vcpu)
>> +		ret = E_ITS_INT_UNMAPPED_INTERRUPT;
> I'm sorry, but what does this mean to the caller? This should never
> leak outside of the ITS code.

Actually it is already leak outside of ITS code, and please see the 
exception printk (E_ITS_INT_UNMAPPED_INTERRUPT is 0x10307 which is equal 
to 66311):

vfio-pci 0000:3a:00.1: irq bypass producer (token 000000008f08224d) registration fails:66311


>
>> +unlock:
>> +	mutex_unlock(&its->its_lock);
>> +	return ret;
>> +}
>> +
>>   /*
>>    * Queries the KVM IO bus framework to get the ITS pointer from the given
>>    * doorbell address.
>> diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
>> index 0c8da72..d452150 100644
>> --- a/arch/arm64/kvm/vgic/vgic.h
>> +++ b/arch/arm64/kvm/vgic/vgic.h
>> @@ -240,6 +240,7 @@ int kvm_vgic_register_its_device(void);
>>   void vgic_enable_lpis(struct kvm_vcpu *vcpu);
>>   void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu);
>>   int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
>> +int vgic_its_verify_msi(struct kvm *kvm, struct kvm_msi *msi);
>>   int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
>>   int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
>>   			 int offset, u32 *val);
>> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
>> index 40c3d7c..3027805 100644
>> --- a/drivers/vfio/pci/vfio_pci_intrs.c
>> +++ b/drivers/vfio/pci/vfio_pci_intrs.c
>> @@ -19,6 +19,7 @@
>>   #include <linux/vfio.h>
>>   #include <linux/wait.h>
>>   #include <linux/slab.h>
>> +#include <linux/kvm_irqfd.h>
>>   
>>   #include "vfio_pci_priv.h"
>>   
>> @@ -315,6 +316,28 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
>>   	return 0;
>>   }
>>   
>> +static int vfio_pci_verify_msi_entry(struct vfio_pci_core_device *vdev,
>> +		struct eventfd_ctx *trigger)
>> +{
>> +	struct kvm *kvm = vdev->vdev.kvm;
>> +	struct kvm_kernel_irqfd *tmp;
>> +	struct kvm_kernel_irq_routing_entry irq_entry;
>> +	int ret = -ENODEV;
>> +
>> +	spin_lock_irq(&kvm->irqfds.lock);
>> +	list_for_each_entry(tmp, &kvm->irqfds.items, list) {
>> +		if (trigger == tmp->eventfd) {
>> +			ret = 0;
>> +			break;
>> +		}
>> +	}
>> +	spin_unlock_irq(&kvm->irqfds.lock);
>> +	if (ret)
>> +		return ret;
>> +	irq_entry = tmp->irq_entry;
>> +	return kvm_verify_msi(kvm, &irq_entry);
> How does this work on !arm64? Why do we need an on-stack version of
> tmp->irq_entry?

Right, i didn't notice that it is common code and need to be work on 
other platforms.

>
>> +}
>> +
>>   static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>>   				      int vector, int fd, bool msix)
>>   {
>> @@ -355,6 +378,16 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>>   		return PTR_ERR(trigger);
>>   	}
>>   
>> +	if (!msix) {
>> +		ret = vfio_pci_verify_msi_entry(vdev, trigger);
>> +		if (ret) {
>> +			kfree(vdev->ctx[vector].name);
>> +			eventfd_ctx_put(trigger);
>> +			if (ret > 0)
>> +				ret = 0;
>> +			return ret;
>> +		}
>> +	}
> Honestly, the whole things seems really complicated to avoid something
> that is only a harmless warning .

It seems also waste some interrupts. Allocating and requesting some 
interrupts but not used.

> How about just toning down the
> message instead?

ok
Marc Zyngier Nov. 26, 2022, 10:58 a.m. UTC | #6
On Sat, 26 Nov 2022 06:33:15 +0000,
"chenxiang (M)" <chenxiang66@hisilicon.com> wrote:
> 
> 
> 在 2022/11/23 20:08, Marc Zyngier 写道:
> > On Wed, 23 Nov 2022 01:42:36 +0000,
> > chenxiang <chenxiang66@hisilicon.com> wrote:
> >> From: Xiang Chen <chenxiang66@hisilicon.com>
> >> 
> >> Currently the number of MSI vectors comes from register PCI_MSI_FLAGS
> >> which should be power-of-2 in qemu, in some scenaries it is not the same as
> >> the number that driver requires in guest, for example, a PCI driver wants
> >> to allocate 6 MSI vecotrs in guest, but as the limitation, it will allocate
> >> 8 MSI vectors. So it requires 8 MSI vectors in qemu while the driver in
> >> guest only wants to allocate 6 MSI vectors.
> >> 
> >> When GICv4.1 is enabled, it iterates over all possible MSIs and enable the
> >> forwarding while the guest has only created some of mappings in the virtual
> >> ITS, so some calls fail. The exception print is as following:
> >> vfio-pci 0000:3a:00.1: irq bypass producer (token 000000008f08224d) registration
> >> fails:66311
> >> 
> >> To avoid the issue, verify each MSI vector, skip some operations such as
> >> request_irq() and irq_bypass_register_producer() for those invalid MSI vectors.
> >> 
> >> Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
> >> ---
> >> I reported the issue at the link:
> >> https://lkml.kernel.org/lkml/87cze9lcut.wl-maz@kernel.org/T/
> >> 
> >> Change Log:
> >> v1 -> v2:
> >> Verify each MSI vector in kernel instead of adding systemcall according to
> >> Mar's suggestion
> >> ---
> >>   arch/arm64/kvm/vgic/vgic-irqfd.c  | 13 +++++++++++++
> >>   arch/arm64/kvm/vgic/vgic-its.c    | 36 ++++++++++++++++++++++++++++++++++++
> >>   arch/arm64/kvm/vgic/vgic.h        |  1 +
> >>   drivers/vfio/pci/vfio_pci_intrs.c | 33 +++++++++++++++++++++++++++++++++
> >>   include/linux/kvm_host.h          |  2 ++
> >>   5 files changed, 85 insertions(+)
> >> 
> >> diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
> >> index 475059b..71f6af57 100644
> >> --- a/arch/arm64/kvm/vgic/vgic-irqfd.c
> >> +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
> >> @@ -98,6 +98,19 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
> >>   	return vgic_its_inject_msi(kvm, &msi);
> >>   }
> >>   +int kvm_verify_msi(struct kvm *kvm,
> >> +		   struct kvm_kernel_irq_routing_entry *irq_entry)
> >> +{
> >> +	struct kvm_msi msi;
> >> +
> >> +	if (!vgic_has_its(kvm))
> >> +		return -ENODEV;
> >> +
> >> +	kvm_populate_msi(irq_entry, &msi);
> >> +
> >> +	return vgic_its_verify_msi(kvm, &msi);
> >> +}
> >> +
> >>   /**
> >>    * kvm_arch_set_irq_inatomic: fast-path for irqfd injection
> >>    */
> >> diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
> >> index 94a666d..8312a4a 100644
> >> --- a/arch/arm64/kvm/vgic/vgic-its.c
> >> +++ b/arch/arm64/kvm/vgic/vgic-its.c
> >> @@ -767,6 +767,42 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
> >>   	return 0;
> >>   }
> >>   +int vgic_its_verify_msi(struct kvm *kvm, struct kvm_msi *msi)
> >> +{
> >> +	struct vgic_its *its;
> >> +	struct its_ite *ite;
> >> +	struct kvm_vcpu *vcpu;
> >> +	int ret = 0;
> >> +
> >> +	if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
> >> +		return -EINVAL;
> >> +
> >> +	if (!vgic_has_its(kvm))
> >> +		return -ENODEV;
> >> +
> >> +	its = vgic_msi_to_its(kvm, msi);
> >> +	if (IS_ERR(its))
> >> +		return PTR_ERR(its);
> >> +
> >> +	mutex_lock(&its->its_lock);
> >> +	if (!its->enabled) {
> >> +		ret = -EBUSY;
> >> +		goto unlock;
> >> +	}
> >> +	ite = find_ite(its, msi->devid, msi->data);
> >> +	if (!ite || !its_is_collection_mapped(ite->collection)) {
> >> +		ret = E_ITS_INT_UNMAPPED_INTERRUPT;
> >> +		goto unlock;
> >> +	}
> >> +
> >> +	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
> >> +	if (!vcpu)
> >> +		ret = E_ITS_INT_UNMAPPED_INTERRUPT;
> > I'm sorry, but what does this mean to the caller? This should never
> > leak outside of the ITS code.
> 
> Actually it is already leak outside of ITS code, and please see the
> exception printk (E_ITS_INT_UNMAPPED_INTERRUPT is 0x10307 which is
> equal to 66311):
> 
> vfio-pci 0000:3a:00.1: irq bypass producer (token 000000008f08224d) registration fails:66311
> 

But that's hardly interpreted, which is the whole point. Only zero is
considered a success value.

> > Honestly, the whole things seems really complicated to avoid something
> > that is only a harmless warning .
> 
> It seems also waste some interrupts. Allocating and requesting some
> interrupts but not used.

What makes you think they are not used? A guest can install a mapping
for those at any point. They won't be directly injected, but they will
be delivered to the guest via the normal SW injection mechanism.

	M.
Marc Zyngier Nov. 26, 2022, 11:15 a.m. UTC | #7
On Thu, 24 Nov 2022 18:00:44 +0000,
Jason Gunthorpe <jgg@ziepe.ca> wrote:
> 
> On Wed, Nov 23, 2022 at 09:42:36AM +0800, chenxiang via wrote:
> > From: Xiang Chen <chenxiang66@hisilicon.com>
> > 
> > Currently the number of MSI vectors comes from register PCI_MSI_FLAGS
> > which should be power-of-2 in qemu, in some scenaries it is not the same as
> > the number that driver requires in guest, for example, a PCI driver wants
> > to allocate 6 MSI vecotrs in guest, but as the limitation, it will allocate
> > 8 MSI vectors. So it requires 8 MSI vectors in qemu while the driver in
> > guest only wants to allocate 6 MSI vectors.
> > 
> > When GICv4.1 is enabled, it iterates over all possible MSIs and enable the
> > forwarding while the guest has only created some of mappings in the virtual
> > ITS, so some calls fail. The exception print is as following:
> > vfio-pci 0000:3a:00.1: irq bypass producer (token 000000008f08224d) registration
> > fails:66311
> 
> With Thomas's series to make MSI more dynamic this could spell future
> problems, as future kernels might have different ordering.

Enabling MSIs on the endpoint before they are programmed in the
interrupt controller? I don't think that's a realistic outcome.

> It is just architecturally wrong to tie the MSI programming at the PCI
> level with the current state of the guest's virtual interrupt
> controller.

There is no architectural ties between the two at all. There is an
optimisation that allows direct injection if you do it in a non
braindead order. Nothing breaks if you don't, you just have wasted
memory, performance, power and area. You're welcome.

> Physical hardware doesn't do this, virtual emulation shouldn't either.

If you want to fix VFIO, be my guest. My rambling about the sorry
state of this has been in the kernel for 5 years (ed8703a506a8).

> People are taking too many liberties with trapping the PCI MSI
> registers through VFIO. :(

Do you really want to leave access to the MSI BAR to userspace? The
number of ways this can go wrong is mind-boggling. Starting with
having to rebuild the interrupt translation tables on the host side to
follow what the guest does, instead of keeping the two independent.

	M.
Jason Gunthorpe Nov. 28, 2022, 6:01 p.m. UTC | #8
On Sat, Nov 26, 2022 at 11:15:14AM +0000, Marc Zyngier wrote:

> > Physical hardware doesn't do this, virtual emulation shouldn't either.
> 
> If you want to fix VFIO, be my guest. My rambling about the sorry
> state of this has been in the kernel for 5 years (ed8703a506a8).

We are talking about things. Stuff we want to do doesn't work, or is
completely insane right now.

> > People are taking too many liberties with trapping the PCI MSI
> > registers through VFIO. :(
> 
> Do you really want to leave access to the MSI BAR to userspace? The
> number of ways this can go wrong is mind-boggling. 

Yeah, actually I do. This is basically mandatory to do something like
IMS, SIOV, etc.

> Starting with having to rebuild the interrupt translation tables on
> the host side to follow what the guest does, instead of keeping the
> two independent.

At least on x86 most of the discussion has been about teaching the
interrupt controller to go to the hypervisor to get help when
establishing interrupts. The hypervisor can tell the guest what the
real MSI data is.

This is following the example of hyperv which plugs in a hyper call to
HVCALL_MAP_DEVICE_INTERRUPT in its remapping irq_chip. This allows the
hypervisor to tell the guest a real addr/data pair and the hypervisor
does not have to involve itself in the device programming.

We haven't reached a point of thinking in detail about ARM, but I would
guess the general theme would still apply.

Jason
diff mbox series

Patch

diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
index 475059b..71f6af57 100644
--- a/arch/arm64/kvm/vgic/vgic-irqfd.c
+++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
@@ -98,6 +98,19 @@  int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	return vgic_its_inject_msi(kvm, &msi);
 }
 
+int kvm_verify_msi(struct kvm *kvm,
+		   struct kvm_kernel_irq_routing_entry *irq_entry)
+{
+	struct kvm_msi msi;
+
+	if (!vgic_has_its(kvm))
+		return -ENODEV;
+
+	kvm_populate_msi(irq_entry, &msi);
+
+	return vgic_its_verify_msi(kvm, &msi);
+}
+
 /**
  * kvm_arch_set_irq_inatomic: fast-path for irqfd injection
  */
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 94a666d..8312a4a 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -767,6 +767,42 @@  int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
 	return 0;
 }
 
+int vgic_its_verify_msi(struct kvm *kvm, struct kvm_msi *msi)
+{
+	struct vgic_its *its;
+	struct its_ite *ite;
+	struct kvm_vcpu *vcpu;
+	int ret = 0;
+
+	if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
+		return -EINVAL;
+
+	if (!vgic_has_its(kvm))
+		return -ENODEV;
+
+	its = vgic_msi_to_its(kvm, msi);
+	if (IS_ERR(its))
+		return PTR_ERR(its);
+
+	mutex_lock(&its->its_lock);
+	if (!its->enabled) {
+		ret = -EBUSY;
+		goto unlock;
+	}
+	ite = find_ite(its, msi->devid, msi->data);
+	if (!ite || !its_is_collection_mapped(ite->collection)) {
+		ret = E_ITS_INT_UNMAPPED_INTERRUPT;
+		goto unlock;
+	}
+
+	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
+	if (!vcpu)
+		ret = E_ITS_INT_UNMAPPED_INTERRUPT;
+unlock:
+	mutex_unlock(&its->its_lock);
+	return ret;
+}
+
 /*
  * Queries the KVM IO bus framework to get the ITS pointer from the given
  * doorbell address.
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 0c8da72..d452150 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -240,6 +240,7 @@  int kvm_vgic_register_its_device(void);
 void vgic_enable_lpis(struct kvm_vcpu *vcpu);
 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu);
 int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
+int vgic_its_verify_msi(struct kvm *kvm, struct kvm_msi *msi);
 int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
 int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
 			 int offset, u32 *val);
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 40c3d7c..3027805 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -19,6 +19,7 @@ 
 #include <linux/vfio.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
+#include <linux/kvm_irqfd.h>
 
 #include "vfio_pci_priv.h"
 
@@ -315,6 +316,28 @@  static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
 	return 0;
 }
 
+static int vfio_pci_verify_msi_entry(struct vfio_pci_core_device *vdev,
+		struct eventfd_ctx *trigger)
+{
+	struct kvm *kvm = vdev->vdev.kvm;
+	struct kvm_kernel_irqfd *tmp;
+	struct kvm_kernel_irq_routing_entry irq_entry;
+	int ret = -ENODEV;
+
+	spin_lock_irq(&kvm->irqfds.lock);
+	list_for_each_entry(tmp, &kvm->irqfds.items, list) {
+		if (trigger == tmp->eventfd) {
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock_irq(&kvm->irqfds.lock);
+	if (ret)
+		return ret;
+	irq_entry = tmp->irq_entry;
+	return kvm_verify_msi(kvm, &irq_entry);
+}
+
 static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
 				      int vector, int fd, bool msix)
 {
@@ -355,6 +378,16 @@  static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
 		return PTR_ERR(trigger);
 	}
 
+	if (!msix) {
+		ret = vfio_pci_verify_msi_entry(vdev, trigger);
+		if (ret) {
+			kfree(vdev->ctx[vector].name);
+			eventfd_ctx_put(trigger);
+			if (ret > 0)
+				ret = 0;
+			return ret;
+		}
+	}
 	/*
 	 * The MSIx vector table resides in device memory which may be cleared
 	 * via backdoor resets. We don't allow direct access to the vector
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1cd9a22..3c8f22a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1611,6 +1611,8 @@  void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
+int kvm_verify_msi(struct kvm *kvm,
+		   struct kvm_kernel_irq_routing_entry *irq_entry);
 
 /*
  * Returns a pointer to the memslot if it contains gfn.