@@ -40,12 +40,45 @@ union irte32 {
struct irte_basic basic;
};
+struct irte_full {
+ unsigned int remap_en:1;
+ unsigned int sup_io_pf:1;
+ unsigned int int_type:3;
+ unsigned int rq_eoi:1;
+ unsigned int dm:1;
+ unsigned int guest_mode:1; /* MBZ */
+ unsigned int dest_lo:24;
+ unsigned int :32;
+ unsigned int vector:8;
+ unsigned int :24;
+ unsigned int :24;
+ unsigned int dest_hi:8;
+};
+
+union irte128 {
+ uint64_t raw[2];
+ struct irte_full full;
+};
+
+static enum {
+ irte32,
+ irte128,
+ irteUNK,
+} irte_mode __read_mostly = irteUNK;
+
union irte_ptr {
void *ptr;
union irte32 *ptr32;
+ union irte128 *ptr128;
};
-#define INTREMAP_TABLE_ORDER 1
+union irte_cptr {
+ const void *ptr;
+ const union irte32 *ptr32;
+ const union irte128 *ptr128;
+} __transparent__;
+
+#define INTREMAP_TABLE_ORDER (irte_mode == irte32 ? 1 : 3)
#define INTREMAP_LENGTH 0xB
#define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
@@ -132,7 +165,19 @@ static union irte_ptr get_intremap_entry
ASSERT(table.ptr && (index < INTREMAP_ENTRIES));
- table.ptr32 += index;
+ switch ( irte_mode )
+ {
+ case irte32:
+ table.ptr32 += index;
+ break;
+
+ case irte128:
+ table.ptr128 += index;
+ break;
+
+ default:
+ ASSERT_UNREACHABLE();
+ }
return table;
}
@@ -142,7 +187,21 @@ static void free_intremap_entry(unsigned
{
union irte_ptr entry = get_intremap_entry(seg, bdf, index);
- ACCESS_ONCE(entry.ptr32->raw[0]) = 0;
+ switch ( irte_mode )
+ {
+ case irte32:
+ ACCESS_ONCE(entry.ptr32->raw[0]) = 0;
+ break;
+
+ case irte128:
+ ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
+ barrier();
+ entry.ptr128->raw[1] = 0;
+ break;
+
+ default:
+ ASSERT_UNREACHABLE();
+ }
__clear_bit(index, get_ivrs_mappings(seg)[bdf].intremap_inuse);
}
@@ -160,9 +219,37 @@ static void update_intremap_entry(union
.dest = dest,
.vector = vector,
};
+ struct irte_full full = {
+ .remap_en = 1,
+ .sup_io_pf = 0,
+ .int_type = int_type,
+ .rq_eoi = 0,
+ .dm = dest_mode,
+ .dest_lo = dest,
+ .dest_hi = dest >> 24,
+ .vector = vector,
+ };
+
+ switch ( irte_mode )
+ {
+ case irte32:
+ ACCESS_ONCE(entry.ptr32->raw[0]) =
+ container_of(&basic, union irte32, basic)->raw[0];
+ break;
+
+ case irte128:
+ ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
+ barrier();
+ entry.ptr128->raw[1] =
+ container_of(&full, union irte128, full)->raw[1];
+ barrier();
+ ACCESS_ONCE(entry.ptr128->raw[0]) =
+ container_of(&full, union irte128, full)->raw[0];
+ break;
- ACCESS_ONCE(entry.ptr32->raw[0]) =
- container_of(&basic, union irte32, basic)->raw[0];
+ default:
+ ASSERT_UNREACHABLE();
+ }
}
static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
@@ -176,6 +263,11 @@ static inline void set_rte_index(struct
rte->delivery_mode = offset >> 8;
}
+static inline unsigned int get_full_dest(const union irte128 *entry)
+{
+ return entry->full.dest_lo | ((unsigned int)entry->full.dest_hi << 24);
+}
+
static int update_intremap_entry_from_ioapic(
int bdf,
struct amd_iommu *iommu,
@@ -185,10 +277,11 @@ static int update_intremap_entry_from_io
{
unsigned long flags;
union irte_ptr entry;
- u8 delivery_mode, dest, vector, dest_mode;
+ unsigned int delivery_mode, dest, vector, dest_mode;
int req_id;
spinlock_t *lock;
unsigned int offset;
+ bool fresh = false;
req_id = get_intremap_requestor_id(iommu->seg, bdf);
lock = get_intremap_lock(iommu->seg, req_id);
@@ -196,7 +289,7 @@ static int update_intremap_entry_from_io
delivery_mode = rte->delivery_mode;
vector = rte->vector;
dest_mode = rte->dest_mode;
- dest = rte->dest.logical.logical_dest;
+ dest = x2apic_enabled ? rte->dest.dest32 : rte->dest.logical.logical_dest;
spin_lock_irqsave(lock, flags);
@@ -211,25 +304,40 @@ static int update_intremap_entry_from_io
return -ENOSPC;
}
*index = offset;
- lo_update = 1;
+ fresh = true;
}
entry = get_intremap_entry(iommu->seg, req_id, offset);
- if ( !lo_update )
+ if ( fresh )
+ /* nothing */;
+ else if ( !lo_update )
{
/*
* Low half of incoming RTE is already in remapped format,
* so need to recover vector and delivery mode from IRTE.
*/
ASSERT(get_rte_index(rte) == offset);
- vector = entry.ptr32->basic.vector;
+ if ( irte_mode == irte32 )
+ vector = entry.ptr32->basic.vector;
+ else
+ vector = entry.ptr128->full.vector;
+ /* The IntType fields match for both formats. */
delivery_mode = entry.ptr32->basic.int_type;
}
+ else if ( x2apic_enabled )
+ {
+ /*
+ * High half of incoming RTE was read from the I/O APIC and hence may
+ * not hold the full destination, so need to recover full destination
+ * from IRTE.
+ */
+ dest = get_full_dest(entry.ptr128);
+ }
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
- if ( iommu->enabled )
+ if ( iommu->enabled && !fresh )
{
spin_lock_irqsave(&iommu->lock, flags);
amd_iommu_flush_intremap(iommu, req_id);
@@ -253,6 +361,19 @@ int __init amd_iommu_setup_ioapic_remapp
spinlock_t *lock;
unsigned int offset;
+ for_each_amd_iommu ( iommu )
+ {
+ if ( irte_mode != irteUNK )
+ {
+ if ( iommu->ctrl.ga_en == (irte_mode == irte32) )
+ return -ENXIO;
+ }
+ else if ( iommu->ctrl.ga_en )
+ irte_mode = irte128;
+ else
+ irte_mode = irte32;
+ }
+
/* Read ioapic entries and update interrupt remapping table accordingly */
for ( apic = 0; apic < nr_ioapics; apic++ )
{
@@ -287,6 +408,18 @@ int __init amd_iommu_setup_ioapic_remapp
dest_mode = rte.dest_mode;
dest = rte.dest.logical.logical_dest;
+ if ( iommu->ctrl.xt_en )
+ {
+ /*
+ * In x2APIC mode we have no way of discovering the high 24
+ * bits of the destination of an already enabled interrupt.
+ * We come here earlier than for xAPIC mode, so no interrupts
+ * should have been set up before.
+ */
+ AMD_IOMMU_DEBUG("Unmasked IO-APIC#%u entry %u in x2APIC mode\n",
+ IO_APIC_ID(apic), pin);
+ }
+
spin_lock_irqsave(lock, flags);
offset = alloc_intremap_entry(seg, req_id, 1);
BUG_ON(offset >= INTREMAP_ENTRIES);
@@ -321,7 +454,8 @@ void amd_iommu_ioapic_update_ire(
struct IO_APIC_route_entry new_rte = { 0 };
unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
unsigned int pin = (reg - 0x10) / 2;
- int saved_mask, seg, bdf, rc;
+ int seg, bdf, rc;
+ bool saved_mask, fresh = false;
struct amd_iommu *iommu;
unsigned int idx;
@@ -363,12 +497,22 @@ void amd_iommu_ioapic_update_ire(
*(((u32 *)&new_rte) + 1) = value;
}
- if ( new_rte.mask &&
- ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
+ if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
{
ASSERT(saved_mask);
- __io_apic_write(apic, reg, value);
- return;
+
+ /*
+ * There's nowhere except the IRTE to store a full 32-bit destination,
+ * so we may not bypass entry allocation and updating of the low RTE
+ * half in the (usual) case of the high RTE half getting written first.
+ */
+ if ( new_rte.mask && !x2apic_enabled )
+ {
+ __io_apic_write(apic, reg, value);
+ return;
+ }
+
+ fresh = true;
}
/* mask the interrupt while we change the intremap table */
@@ -397,8 +541,12 @@ void amd_iommu_ioapic_update_ire(
if ( reg == rte_lo )
return;
- /* unmask the interrupt after we have updated the intremap table */
- if ( !saved_mask )
+ /*
+ * Unmask the interrupt after we have updated the intremap table. Also
+ * write the low half if a fresh entry was allocated for a high half
+ * update in x2APIC mode.
+ */
+ if ( !saved_mask || (x2apic_enabled && fresh) )
{
old_rte.mask = saved_mask;
__io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
@@ -412,27 +560,36 @@ unsigned int amd_iommu_read_ioapic_from_
unsigned int offset;
unsigned int val = __io_apic_read(apic, reg);
unsigned int pin = (reg - 0x10) / 2;
+ uint16_t seg, req_id;
+ union irte_ptr entry;
idx = ioapic_id_to_index(IO_APIC_ID(apic));
if ( idx == MAX_IO_APICS )
return -EINVAL;
offset = ioapic_sbdf[idx].pin_2_idx[pin];
+ if ( offset >= INTREMAP_ENTRIES )
+ return val;
- if ( !(reg & 1) && offset < INTREMAP_ENTRIES )
+ seg = ioapic_sbdf[idx].seg;
+ req_id = get_intremap_requestor_id(seg, ioapic_sbdf[idx].bdf);
+ entry = get_intremap_entry(seg, req_id, offset);
+
+ if ( !(reg & 1) )
{
- u16 bdf = ioapic_sbdf[idx].bdf;
- u16 seg = ioapic_sbdf[idx].seg;
- u16 req_id = get_intremap_requestor_id(seg, bdf);
- union irte_ptr entry = get_intremap_entry(seg, req_id, offset);
ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
val &= ~(INTREMAP_ENTRIES - 1);
+ /* The IntType fields match for both formats. */
val |= MASK_INSR(entry.ptr32->basic.int_type,
IO_APIC_REDIR_DELIV_MODE_MASK);
- val |= MASK_INSR(entry.ptr32->basic.vector,
+ val |= MASK_INSR(irte_mode == irte32
+ ? entry.ptr32->basic.vector
+ : entry.ptr128->full.vector,
IO_APIC_REDIR_VECTOR_MASK);
}
+ else if ( x2apic_enabled )
+ val = get_full_dest(entry.ptr128);
return val;
}
@@ -444,9 +601,9 @@ static int update_intremap_entry_from_ms
unsigned long flags;
union irte_ptr entry;
u16 req_id, alias_id;
- u8 delivery_mode, dest, vector, dest_mode;
+ uint8_t delivery_mode, vector, dest_mode;
spinlock_t *lock;
- unsigned int offset, i;
+ unsigned int dest, offset, i;
req_id = get_dma_requestor_id(iommu->seg, bdf);
alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -467,7 +624,12 @@ static int update_intremap_entry_from_ms
dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
- dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
+
+ if ( x2apic_enabled )
+ dest = msg->dest32;
+ else
+ dest = MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK);
+
offset = *remap_index;
if ( offset >= INTREMAP_ENTRIES )
{
@@ -612,10 +774,21 @@ void amd_iommu_read_msi_from_ire(
}
msg->data &= ~(INTREMAP_ENTRIES - 1);
+ /* The IntType fields match for both formats. */
msg->data |= MASK_INSR(entry.ptr32->basic.int_type,
MSI_DATA_DELIVERY_MODE_MASK);
- msg->data |= MASK_INSR(entry.ptr32->basic.vector,
- MSI_DATA_VECTOR_MASK);
+ if ( irte_mode == irte32 )
+ {
+ msg->data |= MASK_INSR(entry.ptr32->basic.vector,
+ MSI_DATA_VECTOR_MASK);
+ msg->dest32 = entry.ptr32->basic.dest;
+ }
+ else
+ {
+ msg->data |= MASK_INSR(entry.ptr128->full.vector,
+ MSI_DATA_VECTOR_MASK);
+ msg->dest32 = get_full_dest(entry.ptr128);
+ }
}
int __init amd_iommu_free_intremap_table(
@@ -678,18 +851,28 @@ int __init amd_setup_hpet_msi(struct msi
return rc;
}
-static void dump_intremap_table(const u32 *table)
+static void dump_intremap_table(union irte_cptr tbl)
{
- u32 count;
+ unsigned int count;
- if ( !table )
+ if ( !tbl.ptr || irte_mode == irteUNK )
return;
for ( count = 0; count < INTREMAP_ENTRIES; count++ )
{
- if ( !table[count] )
- continue;
- printk(" IRTE[%03x] %08x\n", count, table[count]);
+ if ( irte_mode == irte32 )
+ {
+ if ( !tbl.ptr32[count].raw[0] )
+ continue;
+ printk(" IRTE[%03x] %08x\n", count, tbl.ptr32[count].raw[0]);
+ }
+ else
+ {
+ if ( !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1] )
+ continue;
+ printk(" IRTE[%03x] %016lx_%016lx\n",
+ count, tbl.ptr128[count].raw[1], tbl.ptr128[count].raw[0]);
+ }
}
}
This is in preparation of actually enabling x2APIC mode, which requires this wider IRTE format to be used. A specific remark regarding the first hunk changing amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36, i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping tables when creating new one"). Other code introduced by that change has meanwhile disappeared or further changed, and I wonder if - rather than adding an x2apic_enabled check to the conditional - the bypass couldn't be deleted altogether. For now the goal is to affect the non-x2APIC paths as little as possible. Take the liberty and use the new "fresh" flag to suppress an unneeded flush in update_intremap_entry_from_ioapic(). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v2: Add cast in get_full_dest(). Re-base over changes earlier in the series. Don't use cmpxchg16b. Use barrier() instead of wmb(). --- Note that AMD's doc says Lowest Priority ("Arbitrated" by their naming) mode is unavailable in x2APIC mode, but they've confirmed this to be a mistake on their part.