diff mbox series

[kvmtool,v2,3/3] vfio-pci: Re-enable INTx mode when disable MSI/MSIX

Message ID 20190320062046.3895-4-leo.yan@linaro.org (mailing list archive)
State New, archived
Headers show
Series vfio-pci: Support INTx mode re-enabling | expand

Commit Message

Leo Yan March 20, 2019, 6:20 a.m. UTC
Since PCI forbids enabling INTx, MSI or MSIX at the same time, it's by
default to disable INTx mode when enable MSI/MSIX mode; but this logic is
easily broken if the guest PCI driver detects the MSI/MSIX cannot work as
expected and tries to rollback to use INTx mode.  The INTx mode has been
disabled and it has no chance to be enabled again, thus both INTx mode
and MSI/MSIX mode will not be enabled in vfio for this case.

Below shows the detailed flow for introducing this issue:

  vfio_pci_configure_dev_irqs()
    `-> vfio_pci_enable_intx()

  vfio_pci_enable_msis()
    `-> vfio_pci_disable_intx()

  vfio_pci_disable_msis()   => Guest PCI driver disables MSI

To fix this issue, when disable MSI/MSIX we need to check if INTx mode
is available for this device or not; if the device can support INTx then
we need to re-enable it so the device can fallback to use it.

In this patch, should note two minor changes:

- vfio_pci_disable_intx() may be called multiple times (each time the
  guest enables one MSI vector).  This patch changes to use
  'intx_fd == -1' to denote the INTx disabled, vfio_pci_disable_intx()
  and vfio_pci_enable_intx will directly bail out when detect INTx has
  been disabled and enabled respectively.

- Since pci_device_header will be corrupted after PCI configuration
  and all irq related info will be lost.  Before re-enabling INTx
  mode, this patch restores 'irq_pin' and 'irq_line' fields in struct
  pci_device_header.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
---
 vfio/pci.c | 59 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 48 insertions(+), 11 deletions(-)

Comments

Jean-Philippe Brucker March 25, 2019, 6:28 p.m. UTC | #1
On 20/03/2019 06:20, Leo Yan wrote:
> Since PCI forbids enabling INTx, MSI or MSIX at the same time, it's by
> default to disable INTx mode when enable MSI/MSIX mode; but this logic is
> easily broken if the guest PCI driver detects the MSI/MSIX cannot work as
> expected and tries to rollback to use INTx mode.  The INTx mode has been
> disabled and it has no chance to be enabled again, thus both INTx mode
> and MSI/MSIX mode will not be enabled in vfio for this case.
> 
> Below shows the detailed flow for introducing this issue:
> 
>   vfio_pci_configure_dev_irqs()
>     `-> vfio_pci_enable_intx()
> 
>   vfio_pci_enable_msis()
>     `-> vfio_pci_disable_intx()
> 
>   vfio_pci_disable_msis()   => Guest PCI driver disables MSI
> 
> To fix this issue, when disable MSI/MSIX we need to check if INTx mode
> is available for this device or not; if the device can support INTx then
> we need to re-enable it so the device can fallback to use it.
> 
> In this patch, should note two minor changes:
> 
> - vfio_pci_disable_intx() may be called multiple times (each time the
>   guest enables one MSI vector).  This patch changes to use
>   'intx_fd == -1' to denote the INTx disabled, vfio_pci_disable_intx()
>   and vfio_pci_enable_intx will directly bail out when detect INTx has
>   been disabled and enabled respectively.
> 
> - Since pci_device_header will be corrupted after PCI configuration
>   and all irq related info will be lost.  Before re-enabling INTx
>   mode, this patch restores 'irq_pin' and 'irq_line' fields in struct
>   pci_device_header.
> 
> Signed-off-by: Leo Yan <leo.yan@linaro.org>
> ---
>  vfio/pci.c | 59 ++++++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 48 insertions(+), 11 deletions(-)
> 
> diff --git a/vfio/pci.c b/vfio/pci.c
> index d025581..ba971eb 100644
> --- a/vfio/pci.c
> +++ b/vfio/pci.c
> @@ -28,6 +28,7 @@ struct vfio_irq_eventfd {
>  	msi_update_state(state, val, VFIO_PCI_MSI_STATE_EMPTY)
>  
>  static void vfio_pci_disable_intx(struct kvm *kvm, struct vfio_device *vdev);
> +static int vfio_pci_enable_intx(struct kvm *kvm, struct vfio_device *vdev);
>  
>  static int vfio_pci_enable_msis(struct kvm *kvm, struct vfio_device *vdev,
>  				bool msix)
> @@ -50,17 +51,14 @@ static int vfio_pci_enable_msis(struct kvm *kvm, struct vfio_device *vdev,
>  	if (!msi_is_enabled(msis->virt_state))
>  		return 0;
>  
> -	if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_INTX) {
> -		/*
> -		 * PCI (and VFIO) forbids enabling INTx, MSI or MSIX at the same
> -		 * time. Since INTx has to be enabled from the start (we don't
> -		 * have a reliable way to know when the user starts using it),
> -		 * disable it now.
> -		 */
> +	/*
> +	 * PCI (and VFIO) forbids enabling INTx, MSI or MSIX at the same
> +	 * time. Since INTx has to be enabled from the start (after enabling
> +	 * 'pdev->intx_fd' will be assigned to an eventfd and doesn't equal
> +	 * to the init value -1), disable it now.
> +	 */
> +	if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_INTX)
>  		vfio_pci_disable_intx(kvm, vdev);
> -		/* Permanently disable INTx */
> -		pdev->irq_modes &= ~VFIO_PCI_IRQ_MODE_INTX;
> -	}
>  
>  	eventfds = (void *)msis->irq_set + sizeof(struct vfio_irq_set);
>  
> @@ -162,7 +160,34 @@ static int vfio_pci_disable_msis(struct kvm *kvm, struct vfio_device *vdev,
>  	msi_set_enabled(msis->phys_state, false);
>  	msi_set_empty(msis->phys_state, true);
>  
> -	return 0;
> +	/*
> +	 * When MSI or MSIX is disabled, this might be called when
> +	 * PCI driver detects the MSI interrupt failure and wants to
> +	 * rollback to INTx mode.  Thus enable INTx if the device
> +	 * supports INTx mode in this case.
> +	 */
> +	if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_INTX) {
> +		/*
> +		 * Struct pci_device_header is not only used for header,
> +		 * it also is used for PCI configuration; and in the function
> +		 * vfio_pci_cfg_write() it firstly writes configuration space
> +		 * and then read back the configuration space data into the
> +		 * header structure; thus 'irq_pin' and 'irq_line' in the
> +		 * header will be overwritten.
> +		 *
> +		 * If want to enable INTx mode properly, firstly needs to
> +		 * restore 'irq_pin' and 'irq_line' values; we can simply set 1
> +		 * to 'irq_pin', and 'pdev->intx_gsi' keeps gsi value when
> +		 * enable INTx mode previously so we can simply use it to
> +		 * recover irq line number by adding offset KVM_IRQ_OFFSET.
> +		 */
> +		pdev->hdr.irq_pin = 1;
> +		pdev->hdr.irq_line = pdev->intx_gsi + KVM_IRQ_OFFSET;

That doesn't look right. We shouldn't change irq_line at runtime, it's
reserved to the guest (and irq_pin is static). Maybe move the bits that
should only be done once (intx_gsi setup, and also the
VFIO_DEVICE_GET_IRQ_INFO sanity check) outside of the enable() function,
like we do for msis?

Thanks,
Jean

> +
> +		ret = vfio_pci_enable_intx(kvm, vdev);
> +	}
> +
> +	return ret >= 0 ? 0 : ret;
>  }
>  
>  static int vfio_pci_update_msi_entry(struct kvm *kvm, struct vfio_device *vdev,
> @@ -1002,6 +1027,10 @@ static void vfio_pci_disable_intx(struct kvm *kvm, struct vfio_device *vdev)
>  		.index	= VFIO_PCI_INTX_IRQ_INDEX,
>  	};
>  
> +	/* INTx mode has been disabled */
> +	if (pdev->intx_fd == -1)
> +		return;
> +
>  	pr_debug("user requested MSI, disabling INTx %d", gsi);
>  
>  	ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
> @@ -1009,6 +1038,7 @@ static void vfio_pci_disable_intx(struct kvm *kvm, struct vfio_device *vdev)
>  
>  	close(pdev->intx_fd);
>  	close(pdev->unmask_fd);
> +	pdev->intx_fd = -1;
>  }
>  
>  static int vfio_pci_enable_intx(struct kvm *kvm, struct vfio_device *vdev)
> @@ -1025,6 +1055,10 @@ static int vfio_pci_enable_intx(struct kvm *kvm, struct vfio_device *vdev)
>  		.index = VFIO_PCI_INTX_IRQ_INDEX,
>  	};
>  
> +	/* INTx mode has been enabled */
> +	if (pdev->intx_fd != -1)
> +		return 0;
> +
>  	ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
>  	if (ret || irq_info.count == 0) {
>  		vfio_dev_err(vdev, "no INTx reported by VFIO");
> @@ -1140,6 +1174,9 @@ static int vfio_pci_configure_dev_irqs(struct kvm *kvm, struct vfio_device *vdev
>  			return ret;
>  	}
>  
> +	/* Use intx_fd=-1 to denote INTx is disabled */
> +	pdev->intx_fd = -1;
> +
>  	if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_INTX)
>  		ret = vfio_pci_enable_intx(kvm, vdev);
>  
>
Leo Yan March 26, 2019, 3:17 a.m. UTC | #2
Hi Jean-Philippe,

On Mon, Mar 25, 2019 at 06:28:40PM +0000, Jean-Philippe Brucker wrote:

[...]

> > @@ -162,7 +160,34 @@ static int vfio_pci_disable_msis(struct kvm *kvm, struct vfio_device *vdev,
> >  	msi_set_enabled(msis->phys_state, false);
> >  	msi_set_empty(msis->phys_state, true);
> >  
> > -	return 0;
> > +	/*
> > +	 * When MSI or MSIX is disabled, this might be called when
> > +	 * PCI driver detects the MSI interrupt failure and wants to
> > +	 * rollback to INTx mode.  Thus enable INTx if the device
> > +	 * supports INTx mode in this case.
> > +	 */
> > +	if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_INTX) {
> > +		/*
> > +		 * Struct pci_device_header is not only used for header,
> > +		 * it also is used for PCI configuration; and in the function
> > +		 * vfio_pci_cfg_write() it firstly writes configuration space
> > +		 * and then read back the configuration space data into the
> > +		 * header structure; thus 'irq_pin' and 'irq_line' in the
> > +		 * header will be overwritten.
> > +		 *
> > +		 * If want to enable INTx mode properly, firstly needs to
> > +		 * restore 'irq_pin' and 'irq_line' values; we can simply set 1
> > +		 * to 'irq_pin', and 'pdev->intx_gsi' keeps gsi value when
> > +		 * enable INTx mode previously so we can simply use it to
> > +		 * recover irq line number by adding offset KVM_IRQ_OFFSET.
> > +		 */
> > +		pdev->hdr.irq_pin = 1;
> > +		pdev->hdr.irq_line = pdev->intx_gsi + KVM_IRQ_OFFSET;
> 
> That doesn't look right. We shouldn't change irq_line at runtime, it's
> reserved to the guest (and irq_pin is static). Maybe move the bits that
> should only be done once (intx_gsi setup, and also the
> VFIO_DEVICE_GET_IRQ_INFO sanity check) outside of the enable() function,
> like we do for msis?

Agree this and another comment in patch 0002.  Will spin new patch
set with following these good suggestions.

Thanks a lot for reviewing!

Thanks,
Leo Yan
diff mbox series

Patch

diff --git a/vfio/pci.c b/vfio/pci.c
index d025581..ba971eb 100644
--- a/vfio/pci.c
+++ b/vfio/pci.c
@@ -28,6 +28,7 @@  struct vfio_irq_eventfd {
 	msi_update_state(state, val, VFIO_PCI_MSI_STATE_EMPTY)
 
 static void vfio_pci_disable_intx(struct kvm *kvm, struct vfio_device *vdev);
+static int vfio_pci_enable_intx(struct kvm *kvm, struct vfio_device *vdev);
 
 static int vfio_pci_enable_msis(struct kvm *kvm, struct vfio_device *vdev,
 				bool msix)
@@ -50,17 +51,14 @@  static int vfio_pci_enable_msis(struct kvm *kvm, struct vfio_device *vdev,
 	if (!msi_is_enabled(msis->virt_state))
 		return 0;
 
-	if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_INTX) {
-		/*
-		 * PCI (and VFIO) forbids enabling INTx, MSI or MSIX at the same
-		 * time. Since INTx has to be enabled from the start (we don't
-		 * have a reliable way to know when the user starts using it),
-		 * disable it now.
-		 */
+	/*
+	 * PCI (and VFIO) forbids enabling INTx, MSI or MSIX at the same
+	 * time. Since INTx has to be enabled from the start (after enabling
+	 * 'pdev->intx_fd' will be assigned to an eventfd and doesn't equal
+	 * to the init value -1), disable it now.
+	 */
+	if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_INTX)
 		vfio_pci_disable_intx(kvm, vdev);
-		/* Permanently disable INTx */
-		pdev->irq_modes &= ~VFIO_PCI_IRQ_MODE_INTX;
-	}
 
 	eventfds = (void *)msis->irq_set + sizeof(struct vfio_irq_set);
 
@@ -162,7 +160,34 @@  static int vfio_pci_disable_msis(struct kvm *kvm, struct vfio_device *vdev,
 	msi_set_enabled(msis->phys_state, false);
 	msi_set_empty(msis->phys_state, true);
 
-	return 0;
+	/*
+	 * When MSI or MSIX is disabled, this might be called when
+	 * PCI driver detects the MSI interrupt failure and wants to
+	 * rollback to INTx mode.  Thus enable INTx if the device
+	 * supports INTx mode in this case.
+	 */
+	if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_INTX) {
+		/*
+		 * Struct pci_device_header is not only used for header,
+		 * it also is used for PCI configuration; and in the function
+		 * vfio_pci_cfg_write() it firstly writes configuration space
+		 * and then read back the configuration space data into the
+		 * header structure; thus 'irq_pin' and 'irq_line' in the
+		 * header will be overwritten.
+		 *
+		 * If want to enable INTx mode properly, firstly needs to
+		 * restore 'irq_pin' and 'irq_line' values; we can simply set 1
+		 * to 'irq_pin', and 'pdev->intx_gsi' keeps gsi value when
+		 * enable INTx mode previously so we can simply use it to
+		 * recover irq line number by adding offset KVM_IRQ_OFFSET.
+		 */
+		pdev->hdr.irq_pin = 1;
+		pdev->hdr.irq_line = pdev->intx_gsi + KVM_IRQ_OFFSET;
+
+		ret = vfio_pci_enable_intx(kvm, vdev);
+	}
+
+	return ret >= 0 ? 0 : ret;
 }
 
 static int vfio_pci_update_msi_entry(struct kvm *kvm, struct vfio_device *vdev,
@@ -1002,6 +1027,10 @@  static void vfio_pci_disable_intx(struct kvm *kvm, struct vfio_device *vdev)
 		.index	= VFIO_PCI_INTX_IRQ_INDEX,
 	};
 
+	/* INTx mode has been disabled */
+	if (pdev->intx_fd == -1)
+		return;
+
 	pr_debug("user requested MSI, disabling INTx %d", gsi);
 
 	ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
@@ -1009,6 +1038,7 @@  static void vfio_pci_disable_intx(struct kvm *kvm, struct vfio_device *vdev)
 
 	close(pdev->intx_fd);
 	close(pdev->unmask_fd);
+	pdev->intx_fd = -1;
 }
 
 static int vfio_pci_enable_intx(struct kvm *kvm, struct vfio_device *vdev)
@@ -1025,6 +1055,10 @@  static int vfio_pci_enable_intx(struct kvm *kvm, struct vfio_device *vdev)
 		.index = VFIO_PCI_INTX_IRQ_INDEX,
 	};
 
+	/* INTx mode has been enabled */
+	if (pdev->intx_fd != -1)
+		return 0;
+
 	ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
 	if (ret || irq_info.count == 0) {
 		vfio_dev_err(vdev, "no INTx reported by VFIO");
@@ -1140,6 +1174,9 @@  static int vfio_pci_configure_dev_irqs(struct kvm *kvm, struct vfio_device *vdev
 			return ret;
 	}
 
+	/* Use intx_fd=-1 to denote INTx is disabled */
+	pdev->intx_fd = -1;
+
 	if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_INTX)
 		ret = vfio_pci_enable_intx(kvm, vdev);