Message ID | 20250206150615.52052-5-roger.pau@citrix.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xen/x86: prevent local APIC errors at shutdown | expand |
On 06.02.2025 16:06, Roger Pau Monne wrote: > --- a/xen/arch/x86/crash.c > +++ b/xen/arch/x86/crash.c > @@ -177,6 +177,7 @@ static void nmi_shootdown_cpus(void) > > disable_IO_APIC(); > hpet_disable(); > + pci_disable_msi_all(); > } Apart from my concern below regarding use of the function in this context, for both uses I wonder in how far the order of the three calls above may matter. I can't really give a precise reason, but to me it feels like the PCI device processing may better be done first. > --- a/xen/drivers/passthrough/pci.c > +++ b/xen/drivers/passthrough/pci.c > @@ -1803,6 +1803,39 @@ int iommu_do_pci_domctl( > return ret; > } > > +struct segment_iter { > + int (*handler)(struct pci_dev *pdev, void *arg); > + void *arg; > + int rc; > +}; > + > +static int cf_check iterate_all(struct pci_seg *pseg, void *arg) > +{ > + struct segment_iter *iter = arg; > + struct pci_dev *pdev; > + > + list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) > + { > + int rc = iter->handler(pdev, iter->arg); > + > + if ( !iter->rc ) > + iter->rc = rc; > + } > + > + return 0; > +} > + > +int pci_iterate_devices(int (*handler)(struct pci_dev *pdev, void *arg), > + void *arg) > +{ > + struct segment_iter iter = { > + .handler = handler, > + .arg = arg, > + }; > + > + return pci_segments_iterate(iterate_all, &iter) ?: iter.rc; > +} My earlier concern remains as far as e.g. list traversal goes, especially when we're called from nmi_shootdown_cpus() context. The lists themselves may be screwed, after all. Whereas disable_IO_APIC() and hpet_disable() don't involve any list traversal, and even if they did those lists would be stable post-boot. We may want to talk about the up- and down-sides of this on the x86 call later in the day. > --- a/xen/include/xen/pci.h > +++ b/xen/include/xen/pci.h > @@ -226,6 +226,10 @@ struct pci_dev *pci_get_pdev(const struct domain *d, pci_sbdf_t sbdf); > struct pci_dev *pci_get_real_pdev(pci_sbdf_t sbdf); > void pci_check_disable_device(u16 seg, u8 bus, u8 devfn); > > +/* Iterate without locking or preemption over all PCI devices known by Xen. */ > +int pci_iterate_devices(int (*handler)(struct pci_dev *pdev, void *arg), > + void *arg); Oh, I see you added the comment here that I did ask for. As it's pretty important for people to notice, may I ask that it be replicated in (or ahead of) the function definition? And then there perhaps also mentioning that one needs to be aware of the function being expected to run with IRQs off (to make clear that it's not a simple matter of adding preemption checks, for example). Jan
diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c index a789416ca3ae..c946225c0b9b 100644 --- a/xen/arch/x86/crash.c +++ b/xen/arch/x86/crash.c @@ -177,6 +177,7 @@ static void nmi_shootdown_cpus(void) disable_IO_APIC(); hpet_disable(); + pci_disable_msi_all(); } } diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h index 63adb19820e8..7f9e531f73e6 100644 --- a/xen/arch/x86/include/asm/msi.h +++ b/xen/arch/x86/include/asm/msi.h @@ -86,6 +86,7 @@ extern int pci_enable_msi(struct pci_dev *pdev, struct msi_info *msi, extern void pci_disable_msi(struct msi_desc *msi_desc); extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool off); extern void pci_cleanup_msi(struct pci_dev *pdev); +extern void pci_disable_msi_all(void); extern int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc); extern int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc, hw_irq_controller *handler); diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c index e2360579deda..c9fe942c46f3 100644 --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -1248,6 +1248,24 @@ void pci_cleanup_msi(struct pci_dev *pdev) msi_free_irqs(pdev); } +static int cf_check disable_msi(struct pci_dev *pdev, void *arg) +{ + msi_set_enable(pdev, 0); + msix_set_enable(pdev, 0); + + return 0; +} + +/* Disable MSI and/or MSI-X on all devices known by Xen. */ +void pci_disable_msi_all(void) +{ + int rc = pci_iterate_devices(disable_msi, NULL); + + if ( rc ) + printk(XENLOG_ERR + "Failed to disable MSI(-X) on some devices: %d\n", rc); +} + int pci_reset_msix_state(struct pci_dev *pdev) { unsigned int pos = pdev->msix_pos; diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c index f931db0d71c6..f58c8d3cafe1 100644 --- a/xen/arch/x86/smp.c +++ b/xen/arch/x86/smp.c @@ -376,6 +376,7 @@ void smp_send_stop(void) local_irq_disable(); disable_IO_APIC(); hpet_disable(); + pci_disable_msi_all(); if ( num_online_cpus() > 1 ) { diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 777c6b1a7fdc..945118383f45 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -1803,6 +1803,39 @@ int iommu_do_pci_domctl( return ret; } +struct segment_iter { + int (*handler)(struct pci_dev *pdev, void *arg); + void *arg; + int rc; +}; + +static int cf_check iterate_all(struct pci_seg *pseg, void *arg) +{ + struct segment_iter *iter = arg; + struct pci_dev *pdev; + + list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) + { + int rc = iter->handler(pdev, iter->arg); + + if ( !iter->rc ) + iter->rc = rc; + } + + return 0; +} + +int pci_iterate_devices(int (*handler)(struct pci_dev *pdev, void *arg), + void *arg) +{ + struct segment_iter iter = { + .handler = handler, + .arg = arg, + }; + + return pci_segments_iterate(iterate_all, &iter) ?: iter.rc; +} + /* * Local variables: * mode: C diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h index f784e9116059..983c592124a8 100644 --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -226,6 +226,10 @@ struct pci_dev *pci_get_pdev(const struct domain *d, pci_sbdf_t sbdf); struct pci_dev *pci_get_real_pdev(pci_sbdf_t sbdf); void pci_check_disable_device(u16 seg, u8 bus, u8 devfn); +/* Iterate without locking or preemption over all PCI devices known by Xen. */ +int pci_iterate_devices(int (*handler)(struct pci_dev *pdev, void *arg), + void *arg); + uint8_t pci_conf_read8(pci_sbdf_t sbdf, unsigned int reg); uint16_t pci_conf_read16(pci_sbdf_t sbdf, unsigned int reg); uint32_t pci_conf_read32(pci_sbdf_t sbdf, unsigned int reg);
Attempt to disable MSI(-X) capabilities on all PCI devices know by Xen at shutdown. Doing such disabling should facilitate kexec chained kernel from booting more reliably, as device MSI(-X) interrupt generation should be quiesced. It would also prevent "Receive accept error" being raised as a result of non-disabled interrupts targeting offline CPUs. Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> --- Changes since v1: - Split from bigger patch. - Iterate over all devices, even if the handler returns failure. --- xen/arch/x86/crash.c | 1 + xen/arch/x86/include/asm/msi.h | 1 + xen/arch/x86/msi.c | 18 ++++++++++++++++++ xen/arch/x86/smp.c | 1 + xen/drivers/passthrough/pci.c | 33 +++++++++++++++++++++++++++++++++ xen/include/xen/pci.h | 4 ++++ 6 files changed, 58 insertions(+)