diff mbox

vfio/pci: fix out-of-sync BAR information on reset

Message ID 1477069233-19180-1-git-send-email-ido@wizery.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ido Yariv Oct. 21, 2016, 5 p.m. UTC
When a PCI device is reset, pci_do_device_reset resets all BAR addresses
in the relevant PCIDevice's config buffer.

The VFIO configuration space stays untouched, so the guest OS may choose
to skip restoring the BAR addresses as they would seem intact. The PCI
device may be left non-operational.
One example of such a scenario is when the guest exits S3.

Fix this by resetting the BAR addresses in the VFIO configuration space
as well.

Signed-off-by: Ido Yariv <ido@wizery.com>
---
 hw/vfio/pci.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

Comments

Alex Williamson Oct. 21, 2016, 6:03 p.m. UTC | #1
On Fri, 21 Oct 2016 13:00:33 -0400
Ido Yariv <ido@wizery.com> wrote:

> When a PCI device is reset, pci_do_device_reset resets all BAR addresses
> in the relevant PCIDevice's config buffer.
> 
> The VFIO configuration space stays untouched, so the guest OS may choose
> to skip restoring the BAR addresses as they would seem intact. The PCI
> device may be left non-operational.
> One example of such a scenario is when the guest exits S3.
> 
> Fix this by resetting the BAR addresses in the VFIO configuration space
> as well.
> 
> Signed-off-by: Ido Yariv <ido@wizery.com>
> ---
>  hw/vfio/pci.c | 30 ++++++++++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 65d30fd..9e1dee0 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1922,11 +1922,41 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
>  static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>  {
>      Error *err = NULL;
> +    int nr;
>  
>      vfio_intx_enable(vdev, &err);
>      if (err) {
>          error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
>      }
> +
> +    for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) {
> +        VFIOBAR *bar = &vdev->bars[nr];
> +        off_t addr = vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr);
> +        uint32_t org = 0;
> +        uint64_t val = 0;
> +        uint32_t len;
> +
> +        if (!bar->region.size) {
> +            continue;
> +        }
> +
> +        if (pread(vdev->vbasedev.fd, &org, sizeof(org), addr) < 0) {
> +            error_report("%s(%s) read bar %d failed: %m", __func__,
> +                         vdev->vbasedev.name, nr);
> +            continue;
> +        }
> +
> +        val = le32_to_cpu(org);
> +        val &= (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
> +                              ~PCI_BASE_ADDRESS_MEM_MASK);
> +        val = cpu_to_le64(val);
> +
> +        len = bar->mem64 ? sizeof(uint64_t) : sizeof(uint32_t);
> +        if (pwrite(vdev->vbasedev.fd, &val, len, addr) != len) {
> +            error_report("%s(%s) reset bar %d failed: %m", __func__,
> +                         vdev->vbasedev.name, nr);
> +        }
> +    }

Why do we care to be so precise, can't we just blindly do 4-byte writes
of zero to each of the 6 BAR registers?  The bits you're trying to
preserve should be read-only anyway.  Nothing is saved by trying to do
an 8-byte write for 64-bit BARs.  Thanks,

Alex

>  }
>  
>  static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
Ido Yariv Oct. 21, 2016, 6:35 p.m. UTC | #2
Hi Alex,

On Fri, Oct 21, 2016 at 2:03 PM, Alex Williamson <alex.williamson@redhat.com
> wrote:

> On Fri, 21 Oct 2016 13:00:33 -0400
> Ido Yariv <ido@wizery.com> wrote:
>
> > When a PCI device is reset, pci_do_device_reset resets all BAR addresses
> > in the relevant PCIDevice's config buffer.
> >
> > The VFIO configuration space stays untouched, so the guest OS may choose
> > to skip restoring the BAR addresses as they would seem intact. The PCI
> > device may be left non-operational.
> > One example of such a scenario is when the guest exits S3.
> >
> > Fix this by resetting the BAR addresses in the VFIO configuration space
> > as well.
> >
> > Signed-off-by: Ido Yariv <ido@wizery.com>
> > ---
> >  hw/vfio/pci.c | 30 ++++++++++++++++++++++++++++++
> >  1 file changed, 30 insertions(+)
> >
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index 65d30fd..9e1dee0 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -1922,11 +1922,41 @@ static void vfio_pci_pre_reset(VFIOPCIDevice
> *vdev)
> >  static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
> >  {
> >      Error *err = NULL;
> > +    int nr;
> >
> >      vfio_intx_enable(vdev, &err);
> >      if (err) {
> >          error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
> >      }
> > +
> > +    for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) {
> > +        VFIOBAR *bar = &vdev->bars[nr];
> > +        off_t addr = vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 *
> nr);
> > +        uint32_t org = 0;
> > +        uint64_t val = 0;
> > +        uint32_t len;
> > +
> > +        if (!bar->region.size) {
> > +            continue;
> > +        }
> > +
> > +        if (pread(vdev->vbasedev.fd, &org, sizeof(org), addr) < 0) {
> > +            error_report("%s(%s) read bar %d failed: %m", __func__,
> > +                         vdev->vbasedev.name, nr);
> > +            continue;
> > +        }
> > +
> > +        val = le32_to_cpu(org);
> > +        val &= (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
> > +                              ~PCI_BASE_ADDRESS_MEM_MASK);
> > +        val = cpu_to_le64(val);
> > +
> > +        len = bar->mem64 ? sizeof(uint64_t) : sizeof(uint32_t);
> > +        if (pwrite(vdev->vbasedev.fd, &val, len, addr) != len) {
> > +            error_report("%s(%s) reset bar %d failed: %m", __func__,
> > +                         vdev->vbasedev.name, nr);
> > +        }
> > +    }
>
> Why do we care to be so precise, can't we just blindly do 4-byte writes
> of zero to each of the 6 BAR registers?  The bits you're trying to
> preserve should be read-only anyway.  Nothing is saved by trying to do
> an 8-byte write for 64-bit BARs.  Thanks,
>

​I wasn't sure if the vfio-pci driver masks these bits, but I see
vfio_bar_fixup takes care of that.
I'll send a revised patch then.

Cheers,
Ido.
diff mbox

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 65d30fd..9e1dee0 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1922,11 +1922,41 @@  static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
 static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
 {
     Error *err = NULL;
+    int nr;
 
     vfio_intx_enable(vdev, &err);
     if (err) {
         error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
     }
+
+    for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) {
+        VFIOBAR *bar = &vdev->bars[nr];
+        off_t addr = vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr);
+        uint32_t org = 0;
+        uint64_t val = 0;
+        uint32_t len;
+
+        if (!bar->region.size) {
+            continue;
+        }
+
+        if (pread(vdev->vbasedev.fd, &org, sizeof(org), addr) < 0) {
+            error_report("%s(%s) read bar %d failed: %m", __func__,
+                         vdev->vbasedev.name, nr);
+            continue;
+        }
+
+        val = le32_to_cpu(org);
+        val &= (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
+                              ~PCI_BASE_ADDRESS_MEM_MASK);
+        val = cpu_to_le64(val);
+
+        len = bar->mem64 ? sizeof(uint64_t) : sizeof(uint32_t);
+        if (pwrite(vdev->vbasedev.fd, &val, len, addr) != len) {
+            error_report("%s(%s) reset bar %d failed: %m", __func__,
+                         vdev->vbasedev.name, nr);
+        }
+    }
 }
 
 static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)