diff mbox series

[RFC] vpci: allow BAR write while mapped

Message ID 20250312195019.382926-1-stewart.hildebrand@amd.com (mailing list archive)
State New
Headers show
Series [RFC] vpci: allow BAR write while mapped | expand

Commit Message

Stewart Hildebrand March 12, 2025, 7:50 p.m. UTC
Xen vPCI refuses BAR writes if the BAR is mapped in p2m. If firmware
initialized the BAR to a bad address, Linux will try to write a new
address to the BAR without disabling memory decoding. Allow the write
by updating p2m right away in the vPCI BAR write handler.

Resolves: https://gitlab.com/xen-project/xen/-/issues/197
Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
---
RFC: Currently the deferred mapping machinery supports only map or
     unmap, not both. It might be better to rework the mapping machinery
     to support unmap-then-map operations, but please let me know your
     thoughts.
RFC: This patch has not yet made an attempt to distinguish between
     32-bit and 64-bit writes. It probably should.
---
 xen/drivers/vpci/header.c | 65 +++++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 12 deletions(-)


base-commit: 8e60d47cf0112c145b6b0e454d102b04c857db8c
diff mbox series

Patch

diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index ef6c965c081c..66adb2183cfe 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -173,7 +173,7 @@  static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd,
         ASSERT_UNREACHABLE();
 }
 
-bool vpci_process_pending(struct vcpu *v)
+static bool process_pending(struct vcpu *v, bool need_lock)
 {
     struct pci_dev *pdev = v->vpci.pdev;
     struct vpci_header *header = NULL;
@@ -182,12 +182,14 @@  bool vpci_process_pending(struct vcpu *v)
     if ( !pdev )
         return false;
 
-    read_lock(&v->domain->pci_lock);
+    if ( need_lock )
+        read_lock(&v->domain->pci_lock);
 
     if ( !pdev->vpci || (v->domain != pdev->domain) )
     {
         v->vpci.pdev = NULL;
-        read_unlock(&v->domain->pci_lock);
+        if ( need_lock )
+            read_unlock(&v->domain->pci_lock);
         return false;
     }
 
@@ -209,17 +211,20 @@  bool vpci_process_pending(struct vcpu *v)
 
         if ( rc == -ERESTART )
         {
-            read_unlock(&v->domain->pci_lock);
+            if ( need_lock )
+                read_unlock(&v->domain->pci_lock);
             return true;
         }
 
         if ( rc )
         {
-            spin_lock(&pdev->vpci->lock);
+            if ( need_lock )
+                spin_lock(&pdev->vpci->lock);
             /* Disable memory decoding unconditionally on failure. */
             modify_decoding(pdev, v->vpci.cmd & ~PCI_COMMAND_MEMORY,
                             false);
-            spin_unlock(&pdev->vpci->lock);
+            if ( need_lock )
+                spin_unlock(&pdev->vpci->lock);
 
             /* Clean all the rangesets */
             for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
@@ -228,7 +233,8 @@  bool vpci_process_pending(struct vcpu *v)
 
             v->vpci.pdev = NULL;
 
-            read_unlock(&v->domain->pci_lock);
+            if ( need_lock )
+                read_unlock(&v->domain->pci_lock);
 
             if ( !is_hardware_domain(v->domain) )
                 domain_crash(v->domain);
@@ -238,15 +244,23 @@  bool vpci_process_pending(struct vcpu *v)
     }
     v->vpci.pdev = NULL;
 
-    spin_lock(&pdev->vpci->lock);
+    if ( need_lock )
+        spin_lock(&pdev->vpci->lock);
     modify_decoding(pdev, v->vpci.cmd, v->vpci.rom_only);
-    spin_unlock(&pdev->vpci->lock);
+    if ( need_lock )
+        spin_unlock(&pdev->vpci->lock);
 
-    read_unlock(&v->domain->pci_lock);
+    if ( need_lock )
+        read_unlock(&v->domain->pci_lock);
 
     return false;
 }
 
+bool vpci_process_pending(struct vcpu *v)
+{
+    return process_pending(v, true);
+}
+
 static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
                             uint16_t cmd)
 {
@@ -565,6 +579,8 @@  static void cf_check bar_write(
 {
     struct vpci_bar *bar = data;
     bool hi = false;
+    bool reenable = false;
+    uint32_t cmd = 0;
 
     ASSERT(is_hardware_domain(pdev->domain));
 
@@ -585,10 +601,31 @@  static void cf_check bar_write(
     {
         /* If the value written is the current one avoid printing a warning. */
         if ( val != (uint32_t)(bar->addr >> (hi ? 32 : 0)) )
+        {
             gprintk(XENLOG_WARNING,
-                    "%pp: ignored BAR %zu write while mapped\n",
+                    "%pp: allowing BAR %zu write while mapped\n",
                     &pdev->sbdf, bar - pdev->vpci->header.bars + hi);
-        return;
+            ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
+            ASSERT(spin_is_locked(&pdev->vpci->lock));
+            reenable = true;
+            cmd = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
+            /*
+             * Write-while-mapped: unmap the old BAR in p2m. We want this to
+             * finish right away since the deferral machinery only supports
+             * unmap OR map, not unmap-then-remap. Ultimately, it probably would
+             * be better to defer the write-while-mapped case just like regular
+             * BAR writes (but still only allow it for 32-bit BAR writes).
+             */
+            /* Disable memory decoding */
+            modify_bars(pdev, cmd & ~PCI_COMMAND_MEMORY, false);
+            /* Call process pending here to ensure P2M operations are done */
+            while ( process_pending(current, false) )
+            {
+                /* Pre-empted, try again */
+            }
+        }
+        else
+            return;
     }
 
 
@@ -610,6 +647,10 @@  static void cf_check bar_write(
     }
 
     pci_conf_write32(pdev->sbdf, reg, val);
+
+    if ( reenable )
+        /* Write-while-mapped: map the new BAR in p2m. OK to defer. */
+        modify_bars(pdev, cmd, false);
 }
 
 static void cf_check guest_mem_bar_write(const struct pci_dev *pdev,