diff mbox series

[02/13] x86/msi: Only use high bits of MSI address for DMAR unit

Message ID 20201005152856.974112-2-dwmw2@infradead.org (mailing list archive)
State New, archived
Headers show
Series Fix per-domain IRQ affinity, allow >255 CPUs on x86 without IRQ remapping | expand

Commit Message

David Woodhouse Oct. 5, 2020, 3:28 p.m. UTC
From: David Woodhouse <dwmw@amazon.co.uk>

The Intel IOMMU has an MSI-like configuration for its interrupt, but
it isn't really MSI. So it gets to abuse the high 32 bits of the address,
and puts the high 24 bits of the extended APIC ID there.

This isn't something that can be used in the general case for real MSIs,
since external devices using the high bits of the address would be
performing writes to actual memory space above 4GiB, not targeted at the
APIC.

Factor the hack out and allow it only to be used when appropriate,
adding a WARN_ON_ONCE() if other MSIs are targeted at an unreachable APIC
ID. That should never happen since the legacy MSI messages are not supposed
to be used with Interrupt Remapping enabled.

The x2apic_enabled() check isn't needed because we won't bring up CPUs
with higher APIC IDs unless x2apic is enabled anyway.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/kernel/apic/msi.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

Comments

Thomas Gleixner Oct. 6, 2020, 8:45 p.m. UTC | #1
On Mon, Oct 05 2020 at 16:28, David Woodhouse wrote:
> -static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg)
> +static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, int dmar)

bool dmar?

> +/*
> + * The Intel IOMMU (ab)uses the high bits of the MSI address to contain the
> + * high bits of the destination APIC ID. This can't be done in the general
> + * case for MSIs as it would be targeting real memory above 4GiB not the
> + * APIC.
> + */
> +static void dmar_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
> +{
> +	__irq_msi_compose_msg(irqd_cfg(data), msg, 1);
> +
> +
> +

Lots of stray newlines...
diff mbox series

Patch

diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 6313f0a05db7..356f8acf4927 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -23,13 +23,10 @@ 
 
 struct irq_domain *x86_pci_msi_default_domain __ro_after_init;
 
-static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg)
+static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, int dmar)
 {
 	msg->address_hi = MSI_ADDR_BASE_HI;
 
-	if (x2apic_enabled())
-		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
-
 	msg->address_lo =
 		MSI_ADDR_BASE_LO |
 		((apic->irq_dest_mode == 0) ?
@@ -43,18 +40,42 @@  static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg)
 		MSI_DATA_LEVEL_ASSERT |
 		MSI_DATA_DELIVERY_FIXED |
 		MSI_DATA_VECTOR(cfg->vector);
+
+	/*
+	 * Only the IOMMU itself can use the trick of putting destination
+	 * APIC ID into the high bits of the address. Anything else would
+	 * just be writing to memory if it tried that, and needs IR to
+	 * address APICs above 255.
+	 */
+	if (dmar)
+		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
+	else
+		WARN_ON_ONCE(MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid));
 }
 
 void x86_vector_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
 {
-	__irq_msi_compose_msg(irqd_cfg(data), msg);
+	__irq_msi_compose_msg(irqd_cfg(data), msg, 0);
 }
 
+/*
+ * The Intel IOMMU (ab)uses the high bits of the MSI address to contain the
+ * high bits of the destination APIC ID. This can't be done in the general
+ * case for MSIs as it would be targeting real memory above 4GiB not the
+ * APIC.
+ */
+static void dmar_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	__irq_msi_compose_msg(irqd_cfg(data), msg, 1);
+
+
+
+}
 static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg)
 {
 	struct msi_msg msg[2] = { [1] = { }, };
 
-	__irq_msi_compose_msg(cfg, msg);
+	__irq_msi_compose_msg(cfg, msg, 0);
 	irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg);
 }
 
@@ -288,6 +309,7 @@  static struct irq_chip dmar_msi_controller = {
 	.irq_ack		= irq_chip_ack_parent,
 	.irq_set_affinity	= msi_domain_set_affinity,
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_compose_msi_msg	= dmar_msi_compose_msg,
 	.irq_write_msi_msg	= dmar_msi_write_msg,
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };