Message ID | 1c547c5581ce6192b70c68f39de108cdb2c73f7e.1687278381.git.jupham125@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v1,01/23] pc/xen: Xen Q35 support: provide IRQ handling for PCI devices | expand |
On Tue, Jun 20, 2023 at 01:24:34PM -0400, Joel Upham wrote: > > Signed-off-by: Alexey Gerasimenko <x1917x@xxxxxxxxx> This isn't a valid email address for Alexey - I presume you grabbed these patches from the xen-devel mail archives, which have mangled the addresses for anti-spam reasons. Fortunately there are alternative archives which don't mangle the patches: https://lore.kernel.org/xen-devel/6067bc3c91c9ee629a35723dfb474ef168ff4ebf.1520867955.git.x1917x@gmail.com/ Signed-off-by: Alexey Gerasimenko <x1917x@gmail.com> This affects all patches in the series, but I won't repeat my comment on each one. > Signed-off-by: Joel Upham <jupham125@gmail.com> > --- > hw/i386/pc_piix.c | 3 +- > hw/i386/xen/xen-hvm.c | 7 +++-- > hw/isa/lpc_ich9.c | 53 ++++++++++++++++++++++++++++++++--- > hw/isa/piix3.c | 2 +- > include/hw/southbridge/ich9.h | 1 + > include/hw/xen/xen.h | 4 +-- > stubs/xen-hw-stub.c | 4 +-- > 7 files changed, 61 insertions(+), 13 deletions(-) > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c > index d5b0dcd1fe..8c1b20f3bc 100644 > --- a/hw/i386/pc_piix.c > +++ b/hw/i386/pc_piix.c > @@ -62,6 +62,7 @@ > #endif > #include "hw/xen/xen-x86.h" > #include "hw/xen/xen.h" > +#include "sysemu/xen.h" > #include "migration/global_state.h" > #include "migration/misc.h" > #include "sysemu/numa.h" > @@ -233,7 +234,7 @@ static void pc_init1(MachineState *machine, > x86ms->above_4g_mem_size, > pci_memory, ram_memory); > pci_bus_map_irqs(pci_bus, > - xen_enabled() ? xen_pci_slot_get_pirq > + xen_enabled() ? xen_cmn_pci_slot_get_pirq > : pc_pci_slot_get_pirq); > pcms->bus = pci_bus; > > diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c > index 56641a550e..540ac46639 100644 > --- a/hw/i386/xen/xen-hvm.c > +++ b/hw/i386/xen/xen-hvm.c > @@ -15,6 +15,7 @@ > #include "hw/pci/pci.h" > #include "hw/pci/pci_host.h" > #include "hw/i386/pc.h" > +#include "hw/southbridge/ich9.h" > #include "hw/irq.h" > #include "hw/hw.h" > #include "hw/i386/apic-msidef.h" > @@ -136,14 +137,14 @@ typedef struct XenIOState { > Notifier wakeup; > } XenIOState; > > -/* Xen specific function for piix pci */ > +/* Xen-specific functions for pci dev IRQ handling */ > > -int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) > +int xen_cmn_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) > { > return irq_num + (PCI_SLOT(pci_dev->devfn) << 2); > } > > -void xen_piix3_set_irq(void *opaque, int irq_num, int level) > +void xen_cmn_set_irq(void *opaque, int irq_num, int level) > { > xen_set_pci_intx_level(xen_domid, 0, 0, irq_num >> 2, > irq_num & 3, level); > diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c > index 9c47a2f6c7..733a99d443 100644 > --- a/hw/isa/lpc_ich9.c > +++ b/hw/isa/lpc_ich9.c > @@ -51,6 +51,9 @@ > #include "hw/core/cpu.h" > #include "hw/nvram/fw_cfg.h" > #include "qemu/cutils.h" > +#include "hw/xen/xen.h" > +#include "sysemu/xen.h" > +#include "hw/southbridge/piix.h" > #include "hw/acpi/acpi_aml_interface.h" > #include "trace.h" > > @@ -535,11 +538,49 @@ static int ich9_lpc_post_load(void *opaque, int version_id) > return 0; > } > > +static void ich9_lpc_config_write_xen(PCIDevice *d, > + uint32_t addr, uint32_t val, int len) > +{ > + static bool pirqe_f_warned = false; > + if (ranges_overlap(addr, len, ICH9_LPC_PIRQA_ROUT, 4)) { > + /* handle PIRQA..PIRQD routing */ > + /* Scan for updates to PCI link routes (0x60-0x63). */ > + int i; > + for (i = 0; i < len; i++) { > + uint8_t v = (val >> (8 * i)) & 0xff; > + if (v & 0x80) { > + v = 0; > + } > + v &= 0xf; > + if (((addr + i) >= PIIX_PIRQCA) && ((addr + i) <= PIIX_PIRQCD)) { > + xen_set_pci_link_route(addr + i - PIIX_PIRQCA, v); > + } > + } > + } else if (ranges_overlap(addr, len, ICH9_LPC_PIRQE_ROUT, 4)) { > + while (len--) { > + if (range_covers_byte(ICH9_LPC_PIRQE_ROUT, 4, addr) && > + (val & 0x80) == 0) { > + /* print warning only once */ > + if (!pirqe_f_warned) { > + pirqe_f_warned = true; > + fprintf(stderr, "WARNING: guest domain attempted to use PIRQ%c " > + "routing which is not supported for Xen/Q35 currently\n", > + (char)(addr - ICH9_LPC_PIRQE_ROUT + 'E')); > + break; > + } > + } > + addr++, val >>= 8; > + } > + } > +} > + > static void ich9_lpc_config_write(PCIDevice *d, > uint32_t addr, uint32_t val, int len) > { > ICH9LPCState *lpc = ICH9_LPC_DEVICE(d); > uint32_t rcba_old = pci_get_long(d->config + ICH9_LPC_RCBA); > + if (xen_enabled()) > + ich9_lpc_config_write_xen(d, addr, val, len); > > pci_default_write_config(d, addr, val, len); > if (ranges_overlap(addr, len, ICH9_LPC_PMBASE, 4) || > @@ -731,10 +772,14 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp) > return; > } > > - pci_bus_irqs(pci_bus, ich9_lpc_set_irq, d, ICH9_LPC_NB_PIRQS); > - pci_bus_map_irqs(pci_bus, ich9_lpc_map_irq); > - pci_bus_set_route_irq_fn(pci_bus, ich9_route_intx_pin_to_irq); > - > + if (xen_enabled()) { > + pci_bus_irqs(pci_bus, xen_cmn_set_irq, d, ICH9_XEN_NUM_IRQ_SOURCES); > + pci_bus_map_irqs(pci_bus, xen_cmn_pci_slot_get_pirq); > + } else { > + pci_bus_irqs(pci_bus, ich9_lpc_set_irq, d, ICH9_LPC_NB_PIRQS); > + pci_bus_map_irqs(pci_bus, ich9_lpc_map_irq); > + pci_bus_set_route_irq_fn(pci_bus, ich9_route_intx_pin_to_irq); > + } > ich9_lpc_pm_init(lpc); > } > > diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c > index f9103ea45a..3d0545eb0e 100644 > --- a/hw/isa/piix3.c > +++ b/hw/isa/piix3.c > @@ -420,7 +420,7 @@ static void piix3_xen_realize(PCIDevice *dev, Error **errp) > * connected to the IOAPIC directly. > * These additional routes can be discovered through ACPI. > */ > - pci_bus_irqs(pci_bus, xen_piix3_set_irq, piix3, XEN_PIIX_NUM_PIRQS); > + pci_bus_irqs(pci_bus, xen_cmn_set_irq, piix3, XEN_PIIX_NUM_PIRQS); > } > > static void piix3_xen_class_init(ObjectClass *klass, void *data) > diff --git a/include/hw/southbridge/ich9.h b/include/hw/southbridge/ich9.h > index fd01649d04..07b84d5227 100644 > --- a/include/hw/southbridge/ich9.h > +++ b/include/hw/southbridge/ich9.h > @@ -130,6 +130,7 @@ struct ICH9LPCState { > > #define ICH9_A2_LPC_REVISION 0x2 > #define ICH9_LPC_NB_PIRQS 8 /* PCI A-H */ > +#define ICH9_XEN_NUM_IRQ_SOURCES 128 > > #define ICH9_LPC_PMBASE 0x40 > #define ICH9_LPC_PMBASE_BASE_ADDRESS_MASK ICH9_MASK(32, 15, 7) > diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h > index 2bd8ec742d..a2c3d98eaa 100644 > --- a/include/hw/xen/xen.h > +++ b/include/hw/xen/xen.h > @@ -37,9 +37,9 @@ extern uint32_t xen_domid; > extern enum xen_mode xen_mode; > extern bool xen_domid_restrict; > > -int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); > +int xen_cmn_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); > int xen_set_pci_link_route(uint8_t link, uint8_t irq); > -void xen_piix3_set_irq(void *opaque, int irq_num, int level); > +void xen_cmn_set_irq(void *opaque, int irq_num, int level); > void xen_hvm_inject_msi(uint64_t addr, uint32_t data); > int xen_is_pirq_msi(uint32_t msi_data); > > diff --git a/stubs/xen-hw-stub.c b/stubs/xen-hw-stub.c > index 34a22f2ad7..f06fbf48c8 100644 > --- a/stubs/xen-hw-stub.c > +++ b/stubs/xen-hw-stub.c > @@ -10,12 +10,12 @@ > #include "hw/xen/xen.h" > #include "hw/xen/xen-x86.h" > > -int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) > +int xen_cmn_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) > { > return -1; > } > > -void xen_piix3_set_irq(void *opaque, int irq_num, int level) > +void xen_cmn_set_irq(void *opaque, int irq_num, int level) > { > } > > -- > 2.34.1 > > With regards, Daniel
Thank you, I was working off the Xen-devel and didn’t find his email. I will update my qemu and xen patches for the next version. -Joel On Wed, Jun 21, 2023 at 3:17 AM Daniel P. Berrangé <berrange@redhat.com> wrote: > On Tue, Jun 20, 2023 at 01:24:34PM -0400, Joel Upham wrote: > > > > Signed-off-by: Alexey Gerasimenko <x1917x@xxxxxxxxx> > > This isn't a valid email address for Alexey - I presume you grabbed > these patches from the xen-devel mail archives, which have mangled > the addresses for anti-spam reasons. > > Fortunately there are alternative archives which don't mangle the > patches: > > > https://lore.kernel.org/xen-devel/6067bc3c91c9ee629a35723dfb474ef168ff4ebf.1520867955.git.x1917x@gmail.com/ > > Signed-off-by: Alexey Gerasimenko <x1917x@gmail.com> > > This affects all patches in the series, but I won't repeat my > comment on each one. > > > Signed-off-by: Joel Upham <jupham125@gmail.com> > > --- > > hw/i386/pc_piix.c | 3 +- > > hw/i386/xen/xen-hvm.c | 7 +++-- > > hw/isa/lpc_ich9.c | 53 ++++++++++++++++++++++++++++++++--- > > hw/isa/piix3.c | 2 +- > > include/hw/southbridge/ich9.h | 1 + > > include/hw/xen/xen.h | 4 +-- > > stubs/xen-hw-stub.c | 4 +-- > > 7 files changed, 61 insertions(+), 13 deletions(-) > > > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c > > index d5b0dcd1fe..8c1b20f3bc 100644 > > --- a/hw/i386/pc_piix.c > > +++ b/hw/i386/pc_piix.c > > @@ -62,6 +62,7 @@ > > #endif > > #include "hw/xen/xen-x86.h" > > #include "hw/xen/xen.h" > > +#include "sysemu/xen.h" > > #include "migration/global_state.h" > > #include "migration/misc.h" > > #include "sysemu/numa.h" > > @@ -233,7 +234,7 @@ static void pc_init1(MachineState *machine, > > x86ms->above_4g_mem_size, > > pci_memory, ram_memory); > > pci_bus_map_irqs(pci_bus, > > - xen_enabled() ? xen_pci_slot_get_pirq > > + xen_enabled() ? xen_cmn_pci_slot_get_pirq > > : pc_pci_slot_get_pirq); > > pcms->bus = pci_bus; > > > > diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c > > index 56641a550e..540ac46639 100644 > > --- a/hw/i386/xen/xen-hvm.c > > +++ b/hw/i386/xen/xen-hvm.c > > @@ -15,6 +15,7 @@ > > #include "hw/pci/pci.h" > > #include "hw/pci/pci_host.h" > > #include "hw/i386/pc.h" > > +#include "hw/southbridge/ich9.h" > > #include "hw/irq.h" > > #include "hw/hw.h" > > #include "hw/i386/apic-msidef.h" > > @@ -136,14 +137,14 @@ typedef struct XenIOState { > > Notifier wakeup; > > } XenIOState; > > > > -/* Xen specific function for piix pci */ > > +/* Xen-specific functions for pci dev IRQ handling */ > > > > -int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) > > +int xen_cmn_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) > > { > > return irq_num + (PCI_SLOT(pci_dev->devfn) << 2); > > } > > > > -void xen_piix3_set_irq(void *opaque, int irq_num, int level) > > +void xen_cmn_set_irq(void *opaque, int irq_num, int level) > > { > > xen_set_pci_intx_level(xen_domid, 0, 0, irq_num >> 2, > > irq_num & 3, level); > > diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c > > index 9c47a2f6c7..733a99d443 100644 > > --- a/hw/isa/lpc_ich9.c > > +++ b/hw/isa/lpc_ich9.c > > @@ -51,6 +51,9 @@ > > #include "hw/core/cpu.h" > > #include "hw/nvram/fw_cfg.h" > > #include "qemu/cutils.h" > > +#include "hw/xen/xen.h" > > +#include "sysemu/xen.h" > > +#include "hw/southbridge/piix.h" > > #include "hw/acpi/acpi_aml_interface.h" > > #include "trace.h" > > > > @@ -535,11 +538,49 @@ static int ich9_lpc_post_load(void *opaque, int > version_id) > > return 0; > > } > > > > +static void ich9_lpc_config_write_xen(PCIDevice *d, > > + uint32_t addr, uint32_t val, int len) > > +{ > > + static bool pirqe_f_warned = false; > > + if (ranges_overlap(addr, len, ICH9_LPC_PIRQA_ROUT, 4)) { > > + /* handle PIRQA..PIRQD routing */ > > + /* Scan for updates to PCI link routes (0x60-0x63). */ > > + int i; > > + for (i = 0; i < len; i++) { > > + uint8_t v = (val >> (8 * i)) & 0xff; > > + if (v & 0x80) { > > + v = 0; > > + } > > + v &= 0xf; > > + if (((addr + i) >= PIIX_PIRQCA) && ((addr + i) <= > PIIX_PIRQCD)) { > > + xen_set_pci_link_route(addr + i - PIIX_PIRQCA, v); > > + } > > + } > > + } else if (ranges_overlap(addr, len, ICH9_LPC_PIRQE_ROUT, 4)) { > > + while (len--) { > > + if (range_covers_byte(ICH9_LPC_PIRQE_ROUT, 4, addr) && > > + (val & 0x80) == 0) { > > + /* print warning only once */ > > + if (!pirqe_f_warned) { > > + pirqe_f_warned = true; > > + fprintf(stderr, "WARNING: guest domain attempted to > use PIRQ%c " > > + "routing which is not supported for Xen/Q35 > currently\n", > > + (char)(addr - ICH9_LPC_PIRQE_ROUT + 'E')); > > + break; > > + } > > + } > > + addr++, val >>= 8; > > + } > > + } > > +} > > + > > static void ich9_lpc_config_write(PCIDevice *d, > > uint32_t addr, uint32_t val, int len) > > { > > ICH9LPCState *lpc = ICH9_LPC_DEVICE(d); > > uint32_t rcba_old = pci_get_long(d->config + ICH9_LPC_RCBA); > > + if (xen_enabled()) > > + ich9_lpc_config_write_xen(d, addr, val, len); > > > > pci_default_write_config(d, addr, val, len); > > if (ranges_overlap(addr, len, ICH9_LPC_PMBASE, 4) || > > @@ -731,10 +772,14 @@ static void ich9_lpc_realize(PCIDevice *d, Error > **errp) > > return; > > } > > > > - pci_bus_irqs(pci_bus, ich9_lpc_set_irq, d, ICH9_LPC_NB_PIRQS); > > - pci_bus_map_irqs(pci_bus, ich9_lpc_map_irq); > > - pci_bus_set_route_irq_fn(pci_bus, ich9_route_intx_pin_to_irq); > > - > > + if (xen_enabled()) { > > + pci_bus_irqs(pci_bus, xen_cmn_set_irq, d, > ICH9_XEN_NUM_IRQ_SOURCES); > > + pci_bus_map_irqs(pci_bus, xen_cmn_pci_slot_get_pirq); > > + } else { > > + pci_bus_irqs(pci_bus, ich9_lpc_set_irq, d, ICH9_LPC_NB_PIRQS); > > + pci_bus_map_irqs(pci_bus, ich9_lpc_map_irq); > > + pci_bus_set_route_irq_fn(pci_bus, ich9_route_intx_pin_to_irq); > > + } > > ich9_lpc_pm_init(lpc); > > } > > > > diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c > > index f9103ea45a..3d0545eb0e 100644 > > --- a/hw/isa/piix3.c > > +++ b/hw/isa/piix3.c > > @@ -420,7 +420,7 @@ static void piix3_xen_realize(PCIDevice *dev, Error > **errp) > > * connected to the IOAPIC directly. > > * These additional routes can be discovered through ACPI. > > */ > > - pci_bus_irqs(pci_bus, xen_piix3_set_irq, piix3, XEN_PIIX_NUM_PIRQS); > > + pci_bus_irqs(pci_bus, xen_cmn_set_irq, piix3, XEN_PIIX_NUM_PIRQS); > > } > > > > static void piix3_xen_class_init(ObjectClass *klass, void *data) > > diff --git a/include/hw/southbridge/ich9.h > b/include/hw/southbridge/ich9.h > > index fd01649d04..07b84d5227 100644 > > --- a/include/hw/southbridge/ich9.h > > +++ b/include/hw/southbridge/ich9.h > > @@ -130,6 +130,7 @@ struct ICH9LPCState { > > > > #define ICH9_A2_LPC_REVISION 0x2 > > #define ICH9_LPC_NB_PIRQS 8 /* PCI A-H */ > > +#define ICH9_XEN_NUM_IRQ_SOURCES 128 > > > > #define ICH9_LPC_PMBASE 0x40 > > #define ICH9_LPC_PMBASE_BASE_ADDRESS_MASK ICH9_MASK(32, 15, 7) > > diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h > > index 2bd8ec742d..a2c3d98eaa 100644 > > --- a/include/hw/xen/xen.h > > +++ b/include/hw/xen/xen.h > > @@ -37,9 +37,9 @@ extern uint32_t xen_domid; > > extern enum xen_mode xen_mode; > > extern bool xen_domid_restrict; > > > > -int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); > > +int xen_cmn_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); > > int xen_set_pci_link_route(uint8_t link, uint8_t irq); > > -void xen_piix3_set_irq(void *opaque, int irq_num, int level); > > +void xen_cmn_set_irq(void *opaque, int irq_num, int level); > > void xen_hvm_inject_msi(uint64_t addr, uint32_t data); > > int xen_is_pirq_msi(uint32_t msi_data); > > > > diff --git a/stubs/xen-hw-stub.c b/stubs/xen-hw-stub.c > > index 34a22f2ad7..f06fbf48c8 100644 > > --- a/stubs/xen-hw-stub.c > > +++ b/stubs/xen-hw-stub.c > > @@ -10,12 +10,12 @@ > > #include "hw/xen/xen.h" > > #include "hw/xen/xen-x86.h" > > > > -int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) > > +int xen_cmn_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) > > { > > return -1; > > } > > > > -void xen_piix3_set_irq(void *opaque, int irq_num, int level) > > +void xen_cmn_set_irq(void *opaque, int irq_num, int level) > > { > > } > > > > -- > > 2.34.1 > > > > > > With regards, > Daniel > -- > |: https://berrange.com -o- > https://www.flickr.com/photos/dberrange :| > |: https://libvirt.org -o- > https://fstop138.berrange.com :| > |: https://entangle-photo.org -o- > https://www.instagram.com/dberrange :| > >
On Tue, 2023-06-20 at 13:24 -0400, Joel Upham wrote: > The primary difference in PCI device IRQ management between Xen HVM and > QEMU is that Xen PCI IRQs are "device-centric" while QEMU PCI IRQs are > "chipset-centric". Namely, Xen uses PCI device BDF and INTx as coordinates > to assert IRQ while QEMU finds out to which chipset PIRQ the IRQ is routed > through the hierarchy of PCI buses and manages IRQ assertion on chipset > side (as PIRQ inputs). > > Two callback functions are used for this purpose: .map_irq and .set_irq > (named after corresponding structure fields). Corresponding Xen-specific > callback functions are piix3_set_irq() and pci_slot_get_pirq(). In Xen > case these functions do not operate on pirq pin numbers. Instead, they use > a specific value to pass BDF/INTx information between .map_irq and > .set_irq -- PCI device devfn and INTx pin number are combined into > pseudo-PIRQ in pci_slot_get_pirq, which piix3_set_irq later decodes back > into devfn and INTx number for passing to *set_pci_intx_level() call. > > For Xen on Q35 this scheme is still applicable, with the exception that > function names are non-descriptive now and need to be renamed to show > their common i440/Q35 nature. Proposed new names are: > > xen_pci_slot_get_pirq --> xen_cmn_pci_slot_get_pirq > xen_piix3_set_irq --> xen_cmn_set_irq > > Another IRQ-related difference between i440 and Q35 is the number of PIRQ > inputs and PIRQ routers (PCI IRQ links in terms of ACPI) available. i440 > has 4 PCI interrupt links, while Q35 has 8 (PIRQA...PIRQH). > Currently Xen have support for only 4 PCI links, so we describe only 4 of > 8 PCI links in ACPI tables. Also, hvmloader disables PIRQ routing for > PIRQE..PIRQH by writing 80h into corresponding PIRQ[n]_ROUT registers. > > All this PCI interrupt routing stuff is largely an ancient legacy from PIC > era. It's hardly worth to extend number of PCI links supported as we > normally deal with APIC mode and/or MSI interrupts. > > The only useful thing to do with PIRQE..PIRQH routing currently is to > check if guest actually attempts to use it for some reason (despite ACPI > PCI routing information provided). In this case, a warning is logged. > > Things have changed a bit in modern Qemu, and more changes to the IRQ > mapping had to be done inside the lpc_ich9 to write the irqs and setup > the mappings. > > Signed-off-by: Alexey Gerasimenko <x1917x@xxxxxxxxx> > Signed-off-by: Joel Upham <jupham125@gmail.com> Please ensure you test Linux guests with attached/emulated PCI devices and (at least) the following command lines: • pci=nomsi • pci=nomsi noapic • xen_no_vector_callback pci=nomsi noapic • xen_no_vector_callback pci=nomsi Obviously, with pci=nomsi you're actually *using* PCI INTx, and force it not to use MSI. With noapic you force it to use the i8259. The INTx should be routed to a *different* interrupt number on the i8259 than the I/O APIC. (I thought the IRQ should be asserted on both, but QEMU's ich9_route_intx_pin_to_irq() seems to route to the I/O APIC *only* if it's disabled in the i8259? Sadly, QEMU lacks a comment explaining if that's a faithful emulation of the ICH9, or if it's just a workaround for the fact that that function is only allowed one return value.) This one has potential to be particularly entertaining because the guest may route INTx IRQs to PIRQ event channels. And I think those are supposed to use the I/O APIC pin numbering, *not* the PIC numbering. I *think* you get away with it because of the workaround discussed above. With xen_no_vector_callback you *also* use the emulated INTx of the Xen platform PCI device, for event channel callbacks. We want to test that via both the i8259 and the I/O APIC. And it also ensures that the INTx of the real PCI devices is handled via the PIC or I/OAPIC instead of letting the guest route them to PIRQs.
On Tue, 2023-06-20 at 13:24 -0400, Joel Upham wrote: > The primary difference in PCI device IRQ management between Xen HVM and > QEMU is that Xen PCI IRQs are "device-centric" while QEMU PCI IRQs are > "chipset-centric". Namely, Xen uses PCI device BDF and INTx as coordinates > to assert IRQ while QEMU finds out to which chipset PIRQ the IRQ is routed > through the hierarchy of PCI buses and manages IRQ assertion on chipset > side (as PIRQ inputs). I don't think that's an accurate way of describing it. Let's take the ICH9 as the basic case, and look at how the PIIX3 and Xen both differ from it. As far as I understand it... • ICH9 The four INTx pins from each PCI slot (32*4) are multiplexed down to a smaller number of PIRQ lines; 8 of them on the ICH9. The mapping for each slot is quite complex and depends on chipset registers. Those 8 PIRQ lines (PIRQ[A-H]) are mapped directly and unconditionally to IRQ16-23 on the I/OAPIC. There is also a set of mapping registers in the chipset which allows each PIRQ line to be mapped to the i8259 PIC (as e.g. IRQ5, 10, etc.). (I think QEMU has a bug here. It should be able to deliver to *both* the I/O APIC and the i8259, but it seems not to deliver to the I/O APIC when the i8259 routing is enabled.) • PIIX3 The PIIX3 only has four PIRQ lines, and the mapping from slot/pin to PIRQ line is a *lot* more deterministic; it's basically just a simple mask and shift of the slot/pin numbers. And since the PIIX3 also didn't have an internal I/O APIC, the chipset registers mapping PIRQ# to IRQ# *do* (at least in QEMU's emulation) affect the routing to the I/O APIC as well as the i8259. (I think this is probably a QEMU bug, or at least lack of fidelity in its PC platform emulation. Real hardware with a PIIX3 and external I/O APIC would have routed PIRQ[A-D] to I/O APIC IRQ16-20, wouldn't it?) • Xen Xen has two *separate* hard-coded rotations from slot/pin down to PIRQs. For the I/O APIC it multiplexes down to 32 I/O APIC pins (IRQ16- 47). But for the i8259 it hard-codes the PIIX3 rotation down to 4 PIRQs and expects the device model to provide the i8259 IRQ# for each of them (from the chipset registers). When you say Xen is "device-centric" I think you're saying it's hard- coded the pin mappings and that's why it expects to take the actual PCI bus/device/function/pin in order to do the mapping for itself, while QEMU would normally expect to have done that part "properly" to get a faithful emulation of the hardware in question. (Note the extra fun part I mentioned earlier: Xen can route I/O APIC interrupts as PIRQs, and needs the *I/O APIC* IRQ# for that which might differ to the i8259 IRQ#. So running with 'noapic' and XENFEAT_hvm_pirqs is probably going to *really* confuse your guests because the ACPI _PRT table can only tell them one number. > Two callback functions are used for this purpose: .map_irq and .set_irq > (named after corresponding structure fields). Corresponding Xen-specific > callback functions are piix3_set_irq() and pci_slot_get_pirq(). In Xen > case these functions do not operate on pirq pin numbers. Instead, they use > a specific value to pass BDF/INTx information between .map_irq and > .set_irq -- PCI device devfn and INTx pin number are combined into > pseudo-PIRQ in pci_slot_get_pirq, which piix3_set_irq later decodes back > into devfn and INTx number for passing to *set_pci_intx_level() call. > > For Xen on Q35 this scheme is still applicable, with the exception that > function names are non-descriptive now and need to be renamed to show > their common i440/Q35 nature. Proposed new names are: > > xen_pci_slot_get_pirq --> xen_cmn_pci_slot_get_pirq > xen_piix3_set_irq --> xen_cmn_set_irq > > Another IRQ-related difference between i440 and Q35 is the number of PIRQ > inputs and PIRQ routers (PCI IRQ links in terms of ACPI) available. i440 > has 4 PCI interrupt links, while Q35 has 8 (PIRQA...PIRQH). > Currently Xen have support for only 4 PCI links, so we describe only 4 of > 8 PCI links in ACPI tables. Also, hvmloader disables PIRQ routing for > PIRQE..PIRQH by writing 80h into corresponding PIRQ[n]_ROUT registers. > > All this PCI interrupt routing stuff is largely an ancient legacy from PIC > era. It's hardly worth to extend number of PCI links supported as we > normally deal with APIC mode and/or MSI interrupts. > > The only useful thing to do with PIRQE..PIRQH routing currently is to > check if guest actually attempts to use it for some reason (despite ACPI > PCI routing information provided). In this case, a warning is logged. I don't quite understand how this works. PIRQA-H are supposed to map unconditionally to IRQ16-23 on the ICH9 I/OAPIC. But you can't do that without fixing Xen. So doesn't the ACPI _PRT table have to reflect Xen's hard-coded I/O APIC mapping of slots to IRQ16-47? I don't see where you did that? And there are some devices which are defined to use PIRQ[E-H] by the ICH9 datasheet and which *can't* route to PIRQ[A-D], aren't there? Those devices just can't be used in i8259 mode unless Xen is fixed to handle more PIRQ routings? In fact, Xen doesn't even get the mappings to PIRQ[A-D] right for the ICH9, does it? It's just applying its hard- coded PIIX3 mappings to PIRQ[A-D] and then the ICH9's mapping to IRQ# on top of that?
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index d5b0dcd1fe..8c1b20f3bc 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -62,6 +62,7 @@ #endif #include "hw/xen/xen-x86.h" #include "hw/xen/xen.h" +#include "sysemu/xen.h" #include "migration/global_state.h" #include "migration/misc.h" #include "sysemu/numa.h" @@ -233,7 +234,7 @@ static void pc_init1(MachineState *machine, x86ms->above_4g_mem_size, pci_memory, ram_memory); pci_bus_map_irqs(pci_bus, - xen_enabled() ? xen_pci_slot_get_pirq + xen_enabled() ? xen_cmn_pci_slot_get_pirq : pc_pci_slot_get_pirq); pcms->bus = pci_bus; diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 56641a550e..540ac46639 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -15,6 +15,7 @@ #include "hw/pci/pci.h" #include "hw/pci/pci_host.h" #include "hw/i386/pc.h" +#include "hw/southbridge/ich9.h" #include "hw/irq.h" #include "hw/hw.h" #include "hw/i386/apic-msidef.h" @@ -136,14 +137,14 @@ typedef struct XenIOState { Notifier wakeup; } XenIOState; -/* Xen specific function for piix pci */ +/* Xen-specific functions for pci dev IRQ handling */ -int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) +int xen_cmn_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) { return irq_num + (PCI_SLOT(pci_dev->devfn) << 2); } -void xen_piix3_set_irq(void *opaque, int irq_num, int level) +void xen_cmn_set_irq(void *opaque, int irq_num, int level) { xen_set_pci_intx_level(xen_domid, 0, 0, irq_num >> 2, irq_num & 3, level); diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 9c47a2f6c7..733a99d443 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -51,6 +51,9 @@ #include "hw/core/cpu.h" #include "hw/nvram/fw_cfg.h" #include "qemu/cutils.h" +#include "hw/xen/xen.h" +#include "sysemu/xen.h" +#include "hw/southbridge/piix.h" #include "hw/acpi/acpi_aml_interface.h" #include "trace.h" @@ -535,11 +538,49 @@ static int ich9_lpc_post_load(void *opaque, int version_id) return 0; } +static void ich9_lpc_config_write_xen(PCIDevice *d, + uint32_t addr, uint32_t val, int len) +{ + static bool pirqe_f_warned = false; + if (ranges_overlap(addr, len, ICH9_LPC_PIRQA_ROUT, 4)) { + /* handle PIRQA..PIRQD routing */ + /* Scan for updates to PCI link routes (0x60-0x63). */ + int i; + for (i = 0; i < len; i++) { + uint8_t v = (val >> (8 * i)) & 0xff; + if (v & 0x80) { + v = 0; + } + v &= 0xf; + if (((addr + i) >= PIIX_PIRQCA) && ((addr + i) <= PIIX_PIRQCD)) { + xen_set_pci_link_route(addr + i - PIIX_PIRQCA, v); + } + } + } else if (ranges_overlap(addr, len, ICH9_LPC_PIRQE_ROUT, 4)) { + while (len--) { + if (range_covers_byte(ICH9_LPC_PIRQE_ROUT, 4, addr) && + (val & 0x80) == 0) { + /* print warning only once */ + if (!pirqe_f_warned) { + pirqe_f_warned = true; + fprintf(stderr, "WARNING: guest domain attempted to use PIRQ%c " + "routing which is not supported for Xen/Q35 currently\n", + (char)(addr - ICH9_LPC_PIRQE_ROUT + 'E')); + break; + } + } + addr++, val >>= 8; + } + } +} + static void ich9_lpc_config_write(PCIDevice *d, uint32_t addr, uint32_t val, int len) { ICH9LPCState *lpc = ICH9_LPC_DEVICE(d); uint32_t rcba_old = pci_get_long(d->config + ICH9_LPC_RCBA); + if (xen_enabled()) + ich9_lpc_config_write_xen(d, addr, val, len); pci_default_write_config(d, addr, val, len); if (ranges_overlap(addr, len, ICH9_LPC_PMBASE, 4) || @@ -731,10 +772,14 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp) return; } - pci_bus_irqs(pci_bus, ich9_lpc_set_irq, d, ICH9_LPC_NB_PIRQS); - pci_bus_map_irqs(pci_bus, ich9_lpc_map_irq); - pci_bus_set_route_irq_fn(pci_bus, ich9_route_intx_pin_to_irq); - + if (xen_enabled()) { + pci_bus_irqs(pci_bus, xen_cmn_set_irq, d, ICH9_XEN_NUM_IRQ_SOURCES); + pci_bus_map_irqs(pci_bus, xen_cmn_pci_slot_get_pirq); + } else { + pci_bus_irqs(pci_bus, ich9_lpc_set_irq, d, ICH9_LPC_NB_PIRQS); + pci_bus_map_irqs(pci_bus, ich9_lpc_map_irq); + pci_bus_set_route_irq_fn(pci_bus, ich9_route_intx_pin_to_irq); + } ich9_lpc_pm_init(lpc); } diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index f9103ea45a..3d0545eb0e 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -420,7 +420,7 @@ static void piix3_xen_realize(PCIDevice *dev, Error **errp) * connected to the IOAPIC directly. * These additional routes can be discovered through ACPI. */ - pci_bus_irqs(pci_bus, xen_piix3_set_irq, piix3, XEN_PIIX_NUM_PIRQS); + pci_bus_irqs(pci_bus, xen_cmn_set_irq, piix3, XEN_PIIX_NUM_PIRQS); } static void piix3_xen_class_init(ObjectClass *klass, void *data) diff --git a/include/hw/southbridge/ich9.h b/include/hw/southbridge/ich9.h index fd01649d04..07b84d5227 100644 --- a/include/hw/southbridge/ich9.h +++ b/include/hw/southbridge/ich9.h @@ -130,6 +130,7 @@ struct ICH9LPCState { #define ICH9_A2_LPC_REVISION 0x2 #define ICH9_LPC_NB_PIRQS 8 /* PCI A-H */ +#define ICH9_XEN_NUM_IRQ_SOURCES 128 #define ICH9_LPC_PMBASE 0x40 #define ICH9_LPC_PMBASE_BASE_ADDRESS_MASK ICH9_MASK(32, 15, 7) diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h index 2bd8ec742d..a2c3d98eaa 100644 --- a/include/hw/xen/xen.h +++ b/include/hw/xen/xen.h @@ -37,9 +37,9 @@ extern uint32_t xen_domid; extern enum xen_mode xen_mode; extern bool xen_domid_restrict; -int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); +int xen_cmn_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); int xen_set_pci_link_route(uint8_t link, uint8_t irq); -void xen_piix3_set_irq(void *opaque, int irq_num, int level); +void xen_cmn_set_irq(void *opaque, int irq_num, int level); void xen_hvm_inject_msi(uint64_t addr, uint32_t data); int xen_is_pirq_msi(uint32_t msi_data); diff --git a/stubs/xen-hw-stub.c b/stubs/xen-hw-stub.c index 34a22f2ad7..f06fbf48c8 100644 --- a/stubs/xen-hw-stub.c +++ b/stubs/xen-hw-stub.c @@ -10,12 +10,12 @@ #include "hw/xen/xen.h" #include "hw/xen/xen-x86.h" -int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) +int xen_cmn_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) { return -1; } -void xen_piix3_set_irq(void *opaque, int irq_num, int level) +void xen_cmn_set_irq(void *opaque, int irq_num, int level) { }