diff mbox

vfio/pci: Add option to disable GeForce quirks

Message ID 20180129202326.9417.71344.stgit@gimli.home (mailing list archive)
State New, archived
Headers show

Commit Message

Alex Williamson Jan. 29, 2018, 8:23 p.m. UTC
These quirks are necessary for GeForce, but not for Quadro/GRID/Tesla
assignment.  Leaving them enabled is fully functional and provides the
most compatibility, but due to the unique NVIDIA MSI ACK behavior[1],
it also introduces latency in re-triggering the MSI interrupt.  This
overhead is typically negligible, but has been shown to adversely
affect some (very) high interrupt rate applications.  This adds the
vfio-pci device option "x-no-geforce-quirks=" which can be set to
"on" to disable this additional overhead.

A follow-on optimization for GeForce might be to make use of an
ioeventfd to allow KVM to trigger an irqfd in the kernel vfio-pci
driver, avoiding the bounce through userspace to handle this device
write.

[1] Background: the NVIDIA driver has been observed to write 0xff to
the read-only MSI capability ID register via the MMIO mirror of PCI
config space in order for the MSI interrupt to re-trigger.  This PCI
config space mirror is virtualized in QEMU for GeForce.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio/pci-quirks.c |    9 ++++++---
 hw/vfio/pci.c        |    2 ++
 hw/vfio/pci.h        |    1 +
 3 files changed, 9 insertions(+), 3 deletions(-)

Comments

Philippe Mathieu-Daudé Jan. 30, 2018, 3:46 a.m. UTC | #1
Hi Alex,

On 01/29/2018 05:23 PM, Alex Williamson wrote:
> These quirks are necessary for GeForce, but not for Quadro/GRID/Tesla
> assignment.  Leaving them enabled is fully functional and provides the
> most compatibility, but due to the unique NVIDIA MSI ACK behavior[1],
> it also introduces latency in re-triggering the MSI interrupt.  This
> overhead is typically negligible, but has been shown to adversely
> affect some (very) high interrupt rate applications.  This adds the
> vfio-pci device option "x-no-geforce-quirks=" which can be set to
> "on" to disable this additional overhead.
> 
> A follow-on optimization for GeForce might be to make use of an
> ioeventfd to allow KVM to trigger an irqfd in the kernel vfio-pci
> driver, avoiding the bounce through userspace to handle this device
> write.
> 
> [1] Background: the NVIDIA driver has been observed to write 0xff to
> the read-only MSI capability ID register via the MMIO mirror of PCI
> config space in order for the MSI interrupt to re-trigger.  This PCI
> config space mirror is virtualized in QEMU for GeForce.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
>  hw/vfio/pci-quirks.c |    9 ++++++---
>  hw/vfio/pci.c        |    2 ++
>  hw/vfio/pci.h        |    1 +
>  3 files changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> index 60ad5fb91a83..e5779a7ad35b 100644
> --- a/hw/vfio/pci-quirks.c
> +++ b/hw/vfio/pci-quirks.c
> @@ -542,7 +542,8 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
>      VFIOQuirk *quirk;
>      VFIONvidia3d0Quirk *data;
>  
> -    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
> +    if (vdev->no_geforce_quirks ||
> +        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||

Isn't it enough to check x-pci-device-id == 0x03d0 for GeForce rather
than introduce a x-no-geforce-quirks property?

>          !vdev->bars[1].region.size) {
>          return;
>      }
> @@ -660,7 +661,8 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
>      VFIONvidiaBAR5Quirk *bar5;
>      VFIOConfigWindowQuirk *window;
>  
> -    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
> +    if (vdev->no_geforce_quirks ||
> +        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
>          !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
>          return;
>      }
> @@ -754,7 +756,8 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
>      VFIOQuirk *quirk;
>      VFIOConfigMirrorQuirk *mirror;
>  
> -    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
> +    if (vdev->no_geforce_quirks ||
> +        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
>          !vfio_is_vga(vdev) || nr != 0) {
>          return;
>      }
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 2c7129512563..6d260b92c1e1 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2989,6 +2989,8 @@ static Property vfio_pci_dev_properties[] = {
>      DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
>      DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
>      DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
> +    DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice,
> +                     no_geforce_quirks, false),
>      DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID),
>      DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID),
>      DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index a8fb3b34222c..7c55087a1c44 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -142,6 +142,7 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_intx;
>      bool no_kvm_msi;
>      bool no_kvm_msix;
> +    bool no_geforce_quirks;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> 
>
Alex Williamson Jan. 30, 2018, 4:38 a.m. UTC | #2
On Tue, 30 Jan 2018 00:46:38 -0300
Philippe Mathieu-Daudé <f4bug@amsat.org> wrote:

> Hi Alex,
> 
> On 01/29/2018 05:23 PM, Alex Williamson wrote:
> > These quirks are necessary for GeForce, but not for Quadro/GRID/Tesla
> > assignment.  Leaving them enabled is fully functional and provides the
> > most compatibility, but due to the unique NVIDIA MSI ACK behavior[1],
> > it also introduces latency in re-triggering the MSI interrupt.  This
> > overhead is typically negligible, but has been shown to adversely
> > affect some (very) high interrupt rate applications.  This adds the
> > vfio-pci device option "x-no-geforce-quirks=" which can be set to
> > "on" to disable this additional overhead.
> > 
> > A follow-on optimization for GeForce might be to make use of an
> > ioeventfd to allow KVM to trigger an irqfd in the kernel vfio-pci
> > driver, avoiding the bounce through userspace to handle this device
> > write.
> > 
> > [1] Background: the NVIDIA driver has been observed to write 0xff to
> > the read-only MSI capability ID register via the MMIO mirror of PCI
> > config space in order for the MSI interrupt to re-trigger.  This PCI
> > config space mirror is virtualized in QEMU for GeForce.
> > 
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > ---
> >  hw/vfio/pci-quirks.c |    9 ++++++---
> >  hw/vfio/pci.c        |    2 ++
> >  hw/vfio/pci.h        |    1 +
> >  3 files changed, 9 insertions(+), 3 deletions(-)
> > 
> > diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> > index 60ad5fb91a83..e5779a7ad35b 100644
> > --- a/hw/vfio/pci-quirks.c
> > +++ b/hw/vfio/pci-quirks.c
> > @@ -542,7 +542,8 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
> >      VFIOQuirk *quirk;
> >      VFIONvidia3d0Quirk *data;
> >  
> > -    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
> > +    if (vdev->no_geforce_quirks ||
> > +        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||  
> 
> Isn't it enough to check x-pci-device-id == 0x03d0 for GeForce rather
> than introduce a x-no-geforce-quirks property?

Hi,

That's not how any of this works.  x-pci-device-id is another vfio-pci
option which is only used for overriding the PCI device IDs of the
assigned device, so I assuming you mean vdev->device_id.  However, we
can lookup here:

http://pci-ids.ucw.cz/read/PC/10de

that vendor ID 0x10de, device ID 0x03d0 only refers to a GeForce 6150SE
embedded into the nForce 430 chipset.  It certainly doesn't identify
all GeForce devices, each product NVIDIA ships has a different device
ID.  Also, the "3d0" of this function refers to a section of the VGA I/O
port region for which we're inserting a quirk, not to a specific
device.  This quirk is only necessary when bootstrapping the VGA BIOS
on an NVIDIA graphics card.  Thanks,

Alex

> >          !vdev->bars[1].region.size) {
> >          return;
> >      }
> > @@ -660,7 +661,8 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
> >      VFIONvidiaBAR5Quirk *bar5;
> >      VFIOConfigWindowQuirk *window;
> >  
> > -    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
> > +    if (vdev->no_geforce_quirks ||
> > +        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
> >          !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
> >          return;
> >      }
> > @@ -754,7 +756,8 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
> >      VFIOQuirk *quirk;
> >      VFIOConfigMirrorQuirk *mirror;
> >  
> > -    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
> > +    if (vdev->no_geforce_quirks ||
> > +        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
> >          !vfio_is_vga(vdev) || nr != 0) {
> >          return;
> >      }
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index 2c7129512563..6d260b92c1e1 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -2989,6 +2989,8 @@ static Property vfio_pci_dev_properties[] = {
> >      DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
> >      DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
> >      DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
> > +    DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice,
> > +                     no_geforce_quirks, false),
> >      DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID),
> >      DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID),
> >      DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
> > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > index a8fb3b34222c..7c55087a1c44 100644
> > --- a/hw/vfio/pci.h
> > +++ b/hw/vfio/pci.h
> > @@ -142,6 +142,7 @@ typedef struct VFIOPCIDevice {
> >      bool no_kvm_intx;
> >      bool no_kvm_msi;
> >      bool no_kvm_msix;
> > +    bool no_geforce_quirks;
> >  } VFIOPCIDevice;
> >  
> >  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> > 
> >   
>
Alex Williamson Feb. 5, 2018, 6:27 a.m. UTC | #3
On Mon, 29 Jan 2018 13:23:50 -0700
Alex Williamson <alex.williamson@redhat.com> wrote:

> These quirks are necessary for GeForce, but not for Quadro/GRID/Tesla
> assignment.  Leaving them enabled is fully functional and provides the
> most compatibility, but due to the unique NVIDIA MSI ACK behavior[1],
> it also introduces latency in re-triggering the MSI interrupt.  This
> overhead is typically negligible, but has been shown to adversely
> affect some (very) high interrupt rate applications.  This adds the
> vfio-pci device option "x-no-geforce-quirks=" which can be set to
> "on" to disable this additional overhead.
> 
> A follow-on optimization for GeForce might be to make use of an
> ioeventfd to allow KVM to trigger an irqfd in the kernel vfio-pci
> driver, avoiding the bounce through userspace to handle this device
> write.
> 
> [1] Background: the NVIDIA driver has been observed to write 0xff to
> the read-only MSI capability ID register via the MMIO mirror of PCI
> config space in order for the MSI interrupt to re-trigger.  This PCI
> config space mirror is virtualized in QEMU for GeForce.

In developing the ioeventfd counterpart to this, I've found that this
footnote description is not entirely accurate.  NVIDIA is still using
an MSI-ACK write through to the MMIO mirror of config space, but the
location and data for the write is different now.  I don't know if this
is driver or hardware changes, but the write is currently observed as a
4-byte write of value 0x0 to offset 0x704 (0x88704 into BAR0).  Unless
there are no other comments, I'll update the commit log to reflect the
following in place of above:

[1] Background: the NVIDIA driver has been observed to issue a write to
the MMIO mirror of PCI config space in BAR0 in order to allow the MSI
interrupt for the device to retrigger.  Older reports indicated a write
of 0xff to the (read-only) MSI capability ID register, while more
recently a write of 0x0 is observed at config space offset 0x704,
non-architected, extended config space of the device (BAR0 offset
0x88704).  Virtualization of this range is only required for GeForce.

Thanks,
Alex

> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
>  hw/vfio/pci-quirks.c |    9 ++++++---
>  hw/vfio/pci.c        |    2 ++
>  hw/vfio/pci.h        |    1 +
>  3 files changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> index 60ad5fb91a83..e5779a7ad35b 100644
> --- a/hw/vfio/pci-quirks.c
> +++ b/hw/vfio/pci-quirks.c
> @@ -542,7 +542,8 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
>      VFIOQuirk *quirk;
>      VFIONvidia3d0Quirk *data;
>  
> -    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
> +    if (vdev->no_geforce_quirks ||
> +        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
>          !vdev->bars[1].region.size) {
>          return;
>      }
> @@ -660,7 +661,8 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
>      VFIONvidiaBAR5Quirk *bar5;
>      VFIOConfigWindowQuirk *window;
>  
> -    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
> +    if (vdev->no_geforce_quirks ||
> +        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
>          !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
>          return;
>      }
> @@ -754,7 +756,8 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
>      VFIOQuirk *quirk;
>      VFIOConfigMirrorQuirk *mirror;
>  
> -    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
> +    if (vdev->no_geforce_quirks ||
> +        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
>          !vfio_is_vga(vdev) || nr != 0) {
>          return;
>      }
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 2c7129512563..6d260b92c1e1 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2989,6 +2989,8 @@ static Property vfio_pci_dev_properties[] = {
>      DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
>      DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
>      DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
> +    DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice,
> +                     no_geforce_quirks, false),
>      DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID),
>      DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID),
>      DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index a8fb3b34222c..7c55087a1c44 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -142,6 +142,7 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_intx;
>      bool no_kvm_msi;
>      bool no_kvm_msix;
> +    bool no_geforce_quirks;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
>
diff mbox

Patch

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 60ad5fb91a83..e5779a7ad35b 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -542,7 +542,8 @@  static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
     VFIOQuirk *quirk;
     VFIONvidia3d0Quirk *data;
 
-    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
+    if (vdev->no_geforce_quirks ||
+        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
         !vdev->bars[1].region.size) {
         return;
     }
@@ -660,7 +661,8 @@  static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
     VFIONvidiaBAR5Quirk *bar5;
     VFIOConfigWindowQuirk *window;
 
-    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
+    if (vdev->no_geforce_quirks ||
+        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
         !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
         return;
     }
@@ -754,7 +756,8 @@  static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
     VFIOQuirk *quirk;
     VFIOConfigMirrorQuirk *mirror;
 
-    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
+    if (vdev->no_geforce_quirks ||
+        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
         !vfio_is_vga(vdev) || nr != 0) {
         return;
     }
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 2c7129512563..6d260b92c1e1 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2989,6 +2989,8 @@  static Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
     DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
     DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
+    DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice,
+                     no_geforce_quirks, false),
     DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index a8fb3b34222c..7c55087a1c44 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -142,6 +142,7 @@  typedef struct VFIOPCIDevice {
     bool no_kvm_intx;
     bool no_kvm_msi;
     bool no_kvm_msix;
+    bool no_geforce_quirks;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);