diff mbox series

[for-4.20,v4,4/5] x86/pci: disable MSI(-X) on all devices at shutdown

Message ID 20250211144813.89137-1-roger.pau@citrix.com (mailing list archive)
State New
Headers show
Series None | expand

Commit Message

Roger Pau Monne Feb. 11, 2025, 2:48 p.m. UTC
Attempt to disable MSI(-X) capabilities on all PCI devices know by Xen at
shutdown.  Doing such disabling should facilitate kexec chained kernel from
booting more reliably, as device MSI(-X) interrupt generation should be
quiesced.

Only attempt to disable MSI(-X) on all devices in the crash context if the
PCI lock is not taken, otherwise the PCI device list could be in an
inconsistent state.  This requires introducing a new pcidevs_trylock()
helper to check whether the lock is currently taken.

Disabling MSI(-X) should prevent "Receive accept error" being raised as a
result of non-disabled interrupts targeting offline CPUs.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v3:
 - Introduce and use pcidevs_trylock().
 - Remove assert from pci_iterate_devices().

Changes since v2:
 - Disable MSI(-X) ahead of IO-APIC disabling.
 - Duplicate and expand comment about pci_iterate_devices() in function
   declaration.
 - Add assert interrupts are disabled in pci_iterate_devices().
 - Only walk PCI device list on crash context if the pci lock is not taken.

Changes since v1:
 - Split from bigger patch.
 - Iterate over all devices, even if the handler returns failure.
---
 xen/arch/x86/crash.c           | 10 ++++++++
 xen/arch/x86/include/asm/msi.h |  1 +
 xen/arch/x86/msi.c             | 18 +++++++++++++++
 xen/arch/x86/smp.c             |  1 +
 xen/drivers/passthrough/pci.c  | 42 ++++++++++++++++++++++++++++++++++
 xen/include/xen/pci.h          | 12 ++++++++++
 6 files changed, 84 insertions(+)

Comments

Jan Beulich Feb. 11, 2025, 5 p.m. UTC | #1
On 11.02.2025 15:48, Roger Pau Monne wrote:
> Attempt to disable MSI(-X) capabilities on all PCI devices know by Xen at
> shutdown.  Doing such disabling should facilitate kexec chained kernel from
> booting more reliably, as device MSI(-X) interrupt generation should be
> quiesced.
> 
> Only attempt to disable MSI(-X) on all devices in the crash context if the
> PCI lock is not taken, otherwise the PCI device list could be in an
> inconsistent state.  This requires introducing a new pcidevs_trylock()
> helper to check whether the lock is currently taken.
> 
> Disabling MSI(-X) should prevent "Receive accept error" being raised as a
> result of non-disabled interrupts targeting offline CPUs.
> 
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>
diff mbox series

Patch

diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c
index a789416ca3ae..22b1121d7a40 100644
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -175,6 +175,16 @@  static void nmi_shootdown_cpus(void)
          */
         x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
 
+        if ( pcidevs_trylock() )
+        {
+            /*
+             * Assume the PCI device list to be in a consistent state if the
+             * lock is not held when the crash happened.
+             */
+            pci_disable_msi_all();
+            pcidevs_unlock();
+        }
+
         disable_IO_APIC();
         hpet_disable();
     }
diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h
index 63adb19820e8..7f9e531f73e6 100644
--- a/xen/arch/x86/include/asm/msi.h
+++ b/xen/arch/x86/include/asm/msi.h
@@ -86,6 +86,7 @@  extern int pci_enable_msi(struct pci_dev *pdev, struct msi_info *msi,
 extern void pci_disable_msi(struct msi_desc *msi_desc);
 extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool off);
 extern void pci_cleanup_msi(struct pci_dev *pdev);
+extern void pci_disable_msi_all(void);
 extern int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc);
 extern int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc,
                            hw_irq_controller *handler);
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index e2360579deda..c9fe942c46f3 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -1248,6 +1248,24 @@  void pci_cleanup_msi(struct pci_dev *pdev)
     msi_free_irqs(pdev);
 }
 
+static int cf_check disable_msi(struct pci_dev *pdev, void *arg)
+{
+    msi_set_enable(pdev, 0);
+    msix_set_enable(pdev, 0);
+
+    return 0;
+}
+
+/* Disable MSI and/or MSI-X on all devices known by Xen. */
+void pci_disable_msi_all(void)
+{
+    int rc = pci_iterate_devices(disable_msi, NULL);
+
+    if ( rc )
+        printk(XENLOG_ERR
+               "Failed to disable MSI(-X) on some devices: %d\n", rc);
+}
+
 int pci_reset_msix_state(struct pci_dev *pdev)
 {
     unsigned int pos = pdev->msix_pos;
diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
index 4d29a09a9a95..28bc041e03a9 100644
--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -374,6 +374,7 @@  void smp_send_stop(void)
         smp_call_function(stop_this_cpu, &stop_aps, 0);
 
     local_irq_disable();
+    pci_disable_msi_all();
     disable_IO_APIC();
     hpet_disable();
 
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index f398c3aa65dc..e1a09344df25 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -68,6 +68,11 @@  bool pcidevs_locked(void)
     return rspin_is_locked(&_pcidevs_lock);
 }
 
+bool pcidevs_trylock_unsafe(void)
+{
+    return _rspin_trylock(&_pcidevs_lock);
+}
+
 static RADIX_TREE(pci_segments);
 
 static inline struct pci_seg *get_pseg(u16 seg)
@@ -1802,6 +1807,43 @@  int iommu_do_pci_domctl(
     return ret;
 }
 
+struct segment_iter {
+    int (*handler)(struct pci_dev *pdev, void *arg);
+    void *arg;
+    int rc;
+};
+
+static int cf_check iterate_all(struct pci_seg *pseg, void *arg)
+{
+    struct segment_iter *iter = arg;
+    struct pci_dev *pdev;
+
+    list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
+    {
+        int rc = iter->handler(pdev, iter->arg);
+
+        if ( !iter->rc )
+            iter->rc = rc;
+    }
+
+    return 0;
+}
+
+/*
+ * Iterate without locking or preemption over all PCI devices known by Xen.
+ * Can be called with interrupts disabled.
+ */
+int pci_iterate_devices(int (*handler)(struct pci_dev *pdev, void *arg),
+                        void *arg)
+{
+    struct segment_iter iter = {
+        .handler = handler,
+        .arg = arg,
+    };
+
+    return pci_segments_iterate(iterate_all, &iter) ?: iter.rc;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index f784e9116059..4f12bcf089ac 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -190,6 +190,11 @@  static always_inline void pcidevs_lock(void)
 }
 void pcidevs_unlock(void);
 bool __must_check pcidevs_locked(void);
+bool pcidevs_trylock_unsafe(void);
+static always_inline bool pcidevs_trylock(void)
+{
+    return lock_evaluate_nospec(pcidevs_trylock_unsafe());
+}
 
 #ifndef NDEBUG
 /*
@@ -226,6 +231,13 @@  struct pci_dev *pci_get_pdev(const struct domain *d, pci_sbdf_t sbdf);
 struct pci_dev *pci_get_real_pdev(pci_sbdf_t sbdf);
 void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
 
+/*
+ * Iterate without locking or preemption over all PCI devices known by Xen.
+ * Can be called with interrupts disabled.
+ */
+int pci_iterate_devices(int (*handler)(struct pci_dev *pdev, void *arg),
+                        void *arg);
+
 uint8_t pci_conf_read8(pci_sbdf_t sbdf, unsigned int reg);
 uint16_t pci_conf_read16(pci_sbdf_t sbdf, unsigned int reg);
 uint32_t pci_conf_read32(pci_sbdf_t sbdf, unsigned int reg);