diff mbox series

[4/9] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format

Message ID 5D024E6F0200007800237E25@prv1-mh.provo.novell.com (mailing list archive)
State Superseded
Headers show
Series x86: AMD x2APIC support | expand

Commit Message

Jan Beulich June 13, 2019, 1:23 p.m. UTC
This is in preparation of actually enabling x2APIC mode, which requires
this wider IRTE format to be used.

A specific remark regarding the first hunk changing
amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36,
i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping
tables when creating new one"). Other code introduced by that change has
meanwhile disappeared or further changed, and I wonder if - rather than
adding an x2apic_enabled check to the conditional - the bypass couldn't
be deleted altogether. For now the goal is to affect the non-x2APIC
paths as little as possible.

Take the liberty and use the new "fresh" flag to suppress an unneeded
flush in update_intremap_entry_from_ioapic().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Note that AMD's doc says Lowest Priority ("Arbitrated" by their naming)
mode is unavailable in x2APIC mode, but they've confirmed this to be a
mistake on their part.

Comments

Andrew Cooper June 18, 2019, 11:57 a.m. UTC | #1
On 13/06/2019 14:23, Jan Beulich wrote:
> This is in preparation of actually enabling x2APIC mode, which requires
> this wider IRTE format to be used.
>
> A specific remark regarding the first hunk changing
> amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36,
> i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping
> tables when creating new one"). Other code introduced by that change has
> meanwhile disappeared or further changed, and I wonder if - rather than
> adding an x2apic_enabled check to the conditional - the bypass couldn't
> be deleted altogether. For now the goal is to affect the non-x2APIC
> paths as little as possible.
>
> Take the liberty and use the new "fresh" flag to suppress an unneeded
> flush in update_intremap_entry_from_ioapic().

What is the meaning of fresh?  Wouldn't "needs_update" be a more
descriptive name?

>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> Note that AMD's doc says Lowest Priority ("Arbitrated" by their naming)
> mode is unavailable in x2APIC mode, but they've confirmed this to be a
> mistake on their part.
>
> --- a/xen/drivers/passthrough/amd/iommu_intr.c
> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
> @@ -35,12 +35,34 @@ struct irte_basic {
>      unsigned int :8;
>  };
>  
> +struct irte_full {
> +    unsigned int remap_en:1;
> +    unsigned int sup_io_pf:1;
> +    unsigned int int_type:3;
> +    unsigned int rq_eoi:1;
> +    unsigned int dm:1;
> +    unsigned int guest_mode:1; /* MBZ */

/* MBZ - not implemented yet. */

Seeing as interrupt posting will be a minor tweak to this data
structure, rather than implementing a new one.

> +    unsigned int dest_lo:24;
> +    unsigned int :32;
> +    unsigned int vector:8;
> +    unsigned int :24;
> +    unsigned int :24;
> +    unsigned int dest_hi:8;

The manual says that we should prefer aligned 64bit access, so some
raw_{lo,hi} fields here will allow...

> @@ -136,7 +170,21 @@ static void free_intremap_entry(unsigned
>  {
>      union irte_ptr entry = get_intremap_entry(seg, bdf, offset);
>  
> -    *entry.basic = (struct irte_basic){};
> +    switch ( irte_mode )
> +    {
> +    case irte_basic:
> +        *entry.basic = (struct irte_basic){};
> +        break;
> +
> +    case irte_full:
> +        entry.full->remap_en = 0;
> +        wmb();

... this to become

entry._128->raw_lo = 0;
smp_wmb();
entry._128->raw_hi = 0;

The interrupt mapping table is allocated in WB memory and accessed
coherently, so an sfence instruction isn't necessary.  All that matters
is that remap_en gets cleared first.

> +        *entry.full = (struct irte_full){};
> +        break;
> +
> +    default:
> +        ASSERT_UNREACHABLE();
> +    }
>  
>      __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
>  }
> @@ -154,8 +202,38 @@ static void update_intremap_entry(union
>          .dest = dest,
>          .vector = vector,
>      };
> +    struct irte_full full = {
> +        .remap_en = 1,
> +        .sup_io_pf = 0,
> +        .int_type = int_type,
> +        .rq_eoi = 0,
> +        .dm = dest_mode,
> +        .dest_lo = dest,
> +        .dest_hi = dest >> 24,
> +        .vector = vector,
> +    };

Looking at the resulting code after this patch, I think these structures
should move into their respective case blocks, to help the compiler to
avoid initialising both.

> +
> +    switch ( irte_mode )
> +    {
> +        __uint128_t ret;
> +        union {
> +            __uint128_t raw;
> +            struct irte_full full;
> +        } old;
> +
> +    case irte_basic:
> +        *entry.basic = basic;
> +        break;
> +
> +    case irte_full:
> +        old.full = *entry.full;
> +        ret = cmpxchg16b(entry.full, &old, &full);
> +        ASSERT(ret == old.raw);

Similarly, this can be implemented with

entry.full->remap_en = 0;
smp_wmb();
entry._128->raw_hi = full.raw_hi;
smp_wmb();
entry._128->raw_lo = full.raw_lo;

which avoids using a locked operation.

> +        break;
>  
> -    *entry.basic = basic;
> +    default:
> +        ASSERT_UNREACHABLE();
> +    }
>  }
>  
>  static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
> @@ -169,6 +247,11 @@ static inline void set_rte_index(struct
>      rte->delivery_mode = offset >> 8;
>  }
>  
> +static inline unsigned int get_full_dest(const struct irte_full *entry)
> +{
> +    return entry->dest_lo | (entry->dest_hi << 24);

Given your observation on my still-not-upstream-yet patch about GCC not
honouring the type of bitfields, doesn't dest_hi need explicitly casting
to unsigned int before the shift, to avoid it being performed as int?

> @@ -280,6 +392,18 @@ int __init amd_iommu_setup_ioapic_remapp
>              dest_mode = rte.dest_mode;
>              dest = rte.dest.logical.logical_dest;
>  
> +            if ( iommu->ctrl.xt_en )
> +            {
> +                /*
> +                 * In x2APIC mode we have no way of discovering the high 24
> +                 * bits of the destination of an already enabled interrupt.

Yes we do.  We read the interrupt remapping table, which is where that
information lives.

Any firmware driver which is following the spec won't have let an IRTE
get cached, then played with the table without appropriate flushing.

~Andrew
Jan Beulich June 18, 2019, 3:31 p.m. UTC | #2
>>> On 18.06.19 at 13:57, <andrew.cooper3@citrix.com> wrote:
> On 13/06/2019 14:23, Jan Beulich wrote:
>> This is in preparation of actually enabling x2APIC mode, which requires
>> this wider IRTE format to be used.
>>
>> A specific remark regarding the first hunk changing
>> amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36,
>> i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping
>> tables when creating new one"). Other code introduced by that change has
>> meanwhile disappeared or further changed, and I wonder if - rather than
>> adding an x2apic_enabled check to the conditional - the bypass couldn't
>> be deleted altogether. For now the goal is to affect the non-x2APIC
>> paths as little as possible.
>>
>> Take the liberty and use the new "fresh" flag to suppress an unneeded
>> flush in update_intremap_entry_from_ioapic().
> 
> What is the meaning of fresh?  Wouldn't "needs_update" be a more
> descriptive name?

I don't think so, no. "Fresh" means "freshly allocated" and hence not
holding any meaningful data yet.

>> --- a/xen/drivers/passthrough/amd/iommu_intr.c
>> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
>> @@ -35,12 +35,34 @@ struct irte_basic {
>>      unsigned int :8;
>>  };
>>  
>> +struct irte_full {
>> +    unsigned int remap_en:1;
>> +    unsigned int sup_io_pf:1;
>> +    unsigned int int_type:3;
>> +    unsigned int rq_eoi:1;
>> +    unsigned int dm:1;
>> +    unsigned int guest_mode:1; /* MBZ */
> 
> /* MBZ - not implemented yet. */
> 
> Seeing as interrupt posting will be a minor tweak to this data
> structure, rather than implementing a new one.

Again I don't think so: Bits 2...6 have entirely different meaning,
so I think we'd better not try to fold both into one structure.
Separate structures will also better enforce thinking about using
the correct one in any given context, I think (hope).

>> +    unsigned int dest_lo:24;
>> +    unsigned int :32;
>> +    unsigned int vector:8;
>> +    unsigned int :24;
>> +    unsigned int :24;
>> +    unsigned int dest_hi:8;
> 
> The manual says that we should prefer aligned 64bit access, so some
> raw_{lo,hi} fields here will allow...
> 
>> @@ -136,7 +170,21 @@ static void free_intremap_entry(unsigned
>>  {
>>      union irte_ptr entry = get_intremap_entry(seg, bdf, offset);
>>  
>> -    *entry.basic = (struct irte_basic){};
>> +    switch ( irte_mode )
>> +    {
>> +    case irte_basic:
>> +        *entry.basic = (struct irte_basic){};
>> +        break;
>> +
>> +    case irte_full:
>> +        entry.full->remap_en = 0;
>> +        wmb();
> 
> ... this to become
> 
> entry._128->raw_lo = 0;
> smp_wmb();
> entry._128->raw_hi = 0;
> 
> The interrupt mapping table is allocated in WB memory and accessed
> coherently, so an sfence instruction isn't necessary.  All that matters
> is that remap_en gets cleared first.

I've been trying to spot such a coherency statement - where did I
overlook this being said?

As to introducing "raw" fields - see my reply on the earlier patch. It's
possible, but has other downsides.

And finally on smb_wmb() - there's nothing SMP-ish here. The barrier
is specifically not to vanish (in the theoretical case of us patching in
SMP alternatives) in the UP case. Either it's wmb(), or it's just barrier().

>> @@ -154,8 +202,38 @@ static void update_intremap_entry(union
>>          .dest = dest,
>>          .vector = vector,
>>      };
>> +    struct irte_full full = {
>> +        .remap_en = 1,
>> +        .sup_io_pf = 0,
>> +        .int_type = int_type,
>> +        .rq_eoi = 0,
>> +        .dm = dest_mode,
>> +        .dest_lo = dest,
>> +        .dest_hi = dest >> 24,
>> +        .vector = vector,
>> +    };
> 
> Looking at the resulting code after this patch, I think these structures
> should move into their respective case blocks, to help the compiler to
> avoid initialising both.

I admit I didn't check the code, but I'm pretty surprised you say they
can't track this. I pretty much dislike the improperly indented braces
we use to frame case blocks with their own local variables, which is
why I've decided to put the variables where they are now (assuming
that it ought to be rather easy for the compiler to move the actual
initialization into the switch()).

>> +
>> +    switch ( irte_mode )
>> +    {
>> +        __uint128_t ret;
>> +        union {
>> +            __uint128_t raw;
>> +            struct irte_full full;
>> +        } old;
>> +
>> +    case irte_basic:
>> +        *entry.basic = basic;
>> +        break;
>> +
>> +    case irte_full:
>> +        old.full = *entry.full;
>> +        ret = cmpxchg16b(entry.full, &old, &full);
>> +        ASSERT(ret == old.raw);
> 
> Similarly, this can be implemented with
> 
> entry.full->remap_en = 0;
> smp_wmb();
> entry._128->raw_hi = full.raw_hi;
> smp_wmb();
> entry._128->raw_lo = full.raw_lo;
> 
> which avoids using a locked operation.

Right, and the locked operation goes away again in the last patch.
As said there, that part of that patch can probably be moved here.

But first of all we need to settle on whether we want "raw" union
members.

>> @@ -169,6 +247,11 @@ static inline void set_rte_index(struct
>>      rte->delivery_mode = offset >> 8;
>>  }
>>  
>> +static inline unsigned int get_full_dest(const struct irte_full *entry)
>> +{
>> +    return entry->dest_lo | (entry->dest_hi << 24);
> 
> Given your observation on my still-not-upstream-yet patch about GCC not
> honouring the type of bitfields, doesn't dest_hi need explicitly casting
> to unsigned int before the shift, to avoid it being performed as int?

Hmm, interesting question. I think you're right, and a little bit of
experimenting supports your position.

>> @@ -280,6 +392,18 @@ int __init amd_iommu_setup_ioapic_remapp
>>              dest_mode = rte.dest_mode;
>>              dest = rte.dest.logical.logical_dest;
>>  
>> +            if ( iommu->ctrl.xt_en )
>> +            {
>> +                /*
>> +                 * In x2APIC mode we have no way of discovering the high 24
>> +                 * bits of the destination of an already enabled interrupt.
> 
> Yes we do.  We read the interrupt remapping table, which is where that
> information lives.
> 
> Any firmware driver which is following the spec won't have let an IRTE
> get cached, then played with the table without appropriate flushing.

Which interrupt remapping table? This is code which runs when we've
just freshly allocated one, with all zeros in it. I'm unaware of a protocol
to communicate whatever interrupt remapping table the firmware may
have used.

But - how would there legitimately be an enabled interrupt here in the
first place?

Jan
diff mbox series

Patch

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -35,12 +35,34 @@  struct irte_basic {
     unsigned int :8;
 };
 
+struct irte_full {
+    unsigned int remap_en:1;
+    unsigned int sup_io_pf:1;
+    unsigned int int_type:3;
+    unsigned int rq_eoi:1;
+    unsigned int dm:1;
+    unsigned int guest_mode:1; /* MBZ */
+    unsigned int dest_lo:24;
+    unsigned int :32;
+    unsigned int vector:8;
+    unsigned int :24;
+    unsigned int :24;
+    unsigned int dest_hi:8;
+};
+
+static enum {
+    irte_basic,
+    irte_full,
+    irte_unset,
+} irte_mode __read_mostly = irte_unset;
+
 union irte_ptr {
     void *raw;
     struct irte_basic *basic;
+    struct irte_full *full;
 };
 
-#define INTREMAP_TABLE_ORDER    1
+#define INTREMAP_TABLE_ORDER (irte_mode == irte_basic ? 1 : 3)
 #define INTREMAP_LENGTH 0xB
 #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
 
@@ -127,7 +149,19 @@  static union irte_ptr get_intremap_entry
 
     ASSERT(table.raw && (offset < INTREMAP_ENTRIES));
 
-    table.basic += offset;
+    switch ( irte_mode )
+    {
+    case irte_basic:
+        table.basic += offset;
+        break;
+
+    case irte_full:
+        table.full += offset;
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+    }
 
     return table;
 }
@@ -136,7 +170,21 @@  static void free_intremap_entry(unsigned
 {
     union irte_ptr entry = get_intremap_entry(seg, bdf, offset);
 
-    *entry.basic = (struct irte_basic){};
+    switch ( irte_mode )
+    {
+    case irte_basic:
+        *entry.basic = (struct irte_basic){};
+        break;
+
+    case irte_full:
+        entry.full->remap_en = 0;
+        wmb();
+        *entry.full = (struct irte_full){};
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+    }
 
     __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
 }
@@ -154,8 +202,38 @@  static void update_intremap_entry(union
         .dest = dest,
         .vector = vector,
     };
+    struct irte_full full = {
+        .remap_en = 1,
+        .sup_io_pf = 0,
+        .int_type = int_type,
+        .rq_eoi = 0,
+        .dm = dest_mode,
+        .dest_lo = dest,
+        .dest_hi = dest >> 24,
+        .vector = vector,
+    };
+
+    switch ( irte_mode )
+    {
+        __uint128_t ret;
+        union {
+            __uint128_t raw;
+            struct irte_full full;
+        } old;
+
+    case irte_basic:
+        *entry.basic = basic;
+        break;
+
+    case irte_full:
+        old.full = *entry.full;
+        ret = cmpxchg16b(entry.full, &old, &full);
+        ASSERT(ret == old.raw);
+        break;
 
-    *entry.basic = basic;
+    default:
+        ASSERT_UNREACHABLE();
+    }
 }
 
 static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
@@ -169,6 +247,11 @@  static inline void set_rte_index(struct
     rte->delivery_mode = offset >> 8;
 }
 
+static inline unsigned int get_full_dest(const struct irte_full *entry)
+{
+    return entry->dest_lo | (entry->dest_hi << 24);
+}
+
 static int update_intremap_entry_from_ioapic(
     int bdf,
     struct amd_iommu *iommu,
@@ -178,10 +261,11 @@  static int update_intremap_entry_from_io
 {
     unsigned long flags;
     union irte_ptr entry;
-    u8 delivery_mode, dest, vector, dest_mode;
+    unsigned int delivery_mode, dest, vector, dest_mode;
     int req_id;
     spinlock_t *lock;
     unsigned int offset;
+    bool fresh = false;
 
     req_id = get_intremap_requestor_id(iommu->seg, bdf);
     lock = get_intremap_lock(iommu->seg, req_id);
@@ -189,7 +273,7 @@  static int update_intremap_entry_from_io
     delivery_mode = rte->delivery_mode;
     vector = rte->vector;
     dest_mode = rte->dest_mode;
-    dest = rte->dest.logical.logical_dest;
+    dest = x2apic_enabled ? rte->dest.dest32 : rte->dest.logical.logical_dest;
 
     spin_lock_irqsave(lock, flags);
 
@@ -204,25 +288,40 @@  static int update_intremap_entry_from_io
             return -ENOSPC;
         }
         *index = offset;
-        lo_update = 1;
+        fresh = true;
     }
 
     entry = get_intremap_entry(iommu->seg, req_id, offset);
-    if ( !lo_update )
+    if ( fresh )
+        /* nothing */;
+    else if ( !lo_update )
     {
         /*
          * Low half of incoming RTE is already in remapped format,
          * so need to recover vector and delivery mode from IRTE.
          */
         ASSERT(get_rte_index(rte) == offset);
-        vector = entry.basic->vector;
+        if ( irte_mode == irte_basic )
+            vector = entry.basic->vector;
+        else
+            vector = entry.full->vector;
+        /* The IntType fields match for both formats. */
         delivery_mode = entry.basic->int_type;
     }
+    else if ( x2apic_enabled )
+    {
+        /*
+         * High half of incoming RTE was read from the I/O APIC and hence may
+         * not hold the full destination, so need to recover full destination
+         * from IRTE.
+         */
+        dest = get_full_dest(entry.full);
+    }
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
 
     spin_unlock_irqrestore(lock, flags);
 
-    if ( iommu->enabled )
+    if ( iommu->enabled && !fresh )
     {
         spin_lock_irqsave(&iommu->lock, flags);
         amd_iommu_flush_intremap(iommu, req_id);
@@ -246,6 +345,19 @@  int __init amd_iommu_setup_ioapic_remapp
     spinlock_t *lock;
     unsigned int offset;
 
+    for_each_amd_iommu ( iommu )
+    {
+        if ( irte_mode != irte_unset )
+        {
+            if ( iommu->ctrl.ga_en == (irte_mode == irte_basic) )
+                return -ENXIO;
+        }
+        else if ( iommu->ctrl.ga_en )
+            irte_mode = irte_full;
+        else
+            irte_mode = irte_basic;
+    }
+
     /* Read ioapic entries and update interrupt remapping table accordingly */
     for ( apic = 0; apic < nr_ioapics; apic++ )
     {
@@ -280,6 +392,18 @@  int __init amd_iommu_setup_ioapic_remapp
             dest_mode = rte.dest_mode;
             dest = rte.dest.logical.logical_dest;
 
+            if ( iommu->ctrl.xt_en )
+            {
+                /*
+                 * In x2APIC mode we have no way of discovering the high 24
+                 * bits of the destination of an already enabled interrupt.
+                 * We come here earlier than for xAPIC mode, so no interrupts
+                 * should have been set up before.
+                 */
+                AMD_IOMMU_DEBUG("Unmasked IO-APIC#%u entry %u in x2APIC mode\n",
+                                IO_APIC_ID(apic), pin);
+            }
+
             spin_lock_irqsave(lock, flags);
             offset = alloc_intremap_entry(seg, req_id, 1);
             BUG_ON(offset >= INTREMAP_ENTRIES);
@@ -314,7 +438,8 @@  void amd_iommu_ioapic_update_ire(
     struct IO_APIC_route_entry new_rte = { 0 };
     unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
     unsigned int pin = (reg - 0x10) / 2;
-    int saved_mask, seg, bdf, rc;
+    int seg, bdf, rc;
+    bool saved_mask, fresh = false;
     struct amd_iommu *iommu;
     unsigned int idx;
 
@@ -356,12 +481,22 @@  void amd_iommu_ioapic_update_ire(
         *(((u32 *)&new_rte) + 1) = value;
     }
 
-    if ( new_rte.mask &&
-         ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
+    if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
     {
         ASSERT(saved_mask);
-        __io_apic_write(apic, reg, value);
-        return;
+
+        /*
+         * There's nowhere except the IRTE to store a full 32-bit destination,
+         * so we may not bypass entry allocation and updating of the low RTE
+         * half in the (usual) case of the high RTE half getting written first.
+         */
+        if ( new_rte.mask && !x2apic_enabled )
+        {
+            __io_apic_write(apic, reg, value);
+            return;
+        }
+
+        fresh = true;
     }
 
     /* mask the interrupt while we change the intremap table */
@@ -390,8 +525,12 @@  void amd_iommu_ioapic_update_ire(
     if ( reg == rte_lo )
         return;
 
-    /* unmask the interrupt after we have updated the intremap table */
-    if ( !saved_mask )
+    /*
+     * Unmask the interrupt after we have updated the intremap table. Also
+     * write the low half if a fresh entry was allocated for a high half
+     * update in x2APIC mode.
+     */
+    if ( !saved_mask || (x2apic_enabled && fresh) )
     {
         old_rte.mask = saved_mask;
         __io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
@@ -405,25 +544,35 @@  unsigned int amd_iommu_read_ioapic_from_
     unsigned int offset;
     unsigned int val = __io_apic_read(apic, reg);
     unsigned int pin = (reg - 0x10) / 2;
+    uint16_t seg, req_id;
+    union irte_ptr entry;
 
     idx = ioapic_id_to_index(IO_APIC_ID(apic));
     if ( idx == MAX_IO_APICS )
         return -EINVAL;
 
     offset = ioapic_sbdf[idx].pin_2_idx[pin];
+    if ( offset >= INTREMAP_ENTRIES )
+        return val;
 
-    if ( !(reg & 1) && offset < INTREMAP_ENTRIES )
+    seg = ioapic_sbdf[idx].seg;
+    req_id = get_intremap_requestor_id(seg, ioapic_sbdf[idx].bdf);
+    entry = get_intremap_entry(seg, req_id, offset);
+
+    if ( !(reg & 1) )
     {
-        u16 bdf = ioapic_sbdf[idx].bdf;
-        u16 seg = ioapic_sbdf[idx].seg;
-        u16 req_id = get_intremap_requestor_id(seg, bdf);
-        union irte_ptr entry = get_intremap_entry(seg, req_id, offset);
 
         ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
         val &= ~(INTREMAP_ENTRIES - 1);
+        /* The IntType fields match for both formats. */
         val |= MASK_INSR(entry.basic->int_type, IO_APIC_REDIR_DELIV_MODE_MASK);
-        val |= MASK_INSR(entry.basic->vector, IO_APIC_REDIR_VECTOR_MASK);
+        if ( irte_mode == irte_basic )
+            val |= MASK_INSR(entry.basic->vector, IO_APIC_REDIR_VECTOR_MASK);
+        else
+            val |= MASK_INSR(entry.full->vector, IO_APIC_REDIR_VECTOR_MASK);
     }
+    else if ( x2apic_enabled )
+        val = get_full_dest(entry.full);
 
     return val;
 }
@@ -435,9 +584,9 @@  static int update_intremap_entry_from_ms
     unsigned long flags;
     union irte_ptr entry;
     u16 req_id, alias_id;
-    u8 delivery_mode, dest, vector, dest_mode;
+    uint8_t delivery_mode, vector, dest_mode;
     spinlock_t *lock;
-    unsigned int offset, i;
+    unsigned int dest, offset, i;
 
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -458,7 +607,12 @@  static int update_intremap_entry_from_ms
     dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
     delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
-    dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
+
+    if ( x2apic_enabled )
+        dest = msg->dest32;
+    else
+        dest = MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK);
+
     offset = *remap_index;
     if ( offset >= INTREMAP_ENTRIES )
     {
@@ -603,8 +757,18 @@  void amd_iommu_read_msi_from_ire(
     }
 
     msg->data &= ~(INTREMAP_ENTRIES - 1);
+    /* The IntType fields match for both formats. */
     msg->data |= MASK_INSR(entry.basic->int_type, MSI_DATA_DELIVERY_MODE_MASK);
-    msg->data |= MASK_INSR(entry.basic->vector, MSI_DATA_VECTOR_MASK);
+    if ( irte_mode == irte_basic )
+    {
+        msg->data |= MASK_INSR(entry.basic->vector, MSI_DATA_VECTOR_MASK);
+        msg->dest32 = entry.basic->dest;
+    }
+    else
+    {
+        msg->data |= MASK_INSR(entry.full->vector, MSI_DATA_VECTOR_MASK);
+        msg->dest32 = get_full_dest(entry.full);
+    }
 }
 
 int __init amd_iommu_free_intremap_table(
@@ -667,18 +831,33 @@  int __init amd_setup_hpet_msi(struct msi
     return rc;
 }
 
-static void dump_intremap_table(const u32 *table)
+static void dump_intremap_table(const void *table)
 {
-    u32 count;
+    unsigned int count;
+    union {
+        const void *raw;
+        const uint32_t *basic;
+        const uint64_t (*full)[2];
+    } tbl = { .raw = table };
 
-    if ( !table )
+    if ( !table || irte_mode == irte_unset )
         return;
 
     for ( count = 0; count < INTREMAP_ENTRIES; count++ )
     {
-        if ( !table[count] )
-            continue;
-        printk("    IRTE[%03x] %08x\n", count, table[count]);
+        if ( irte_mode == irte_basic )
+        {
+            if ( !tbl.basic[count] )
+                continue;
+            printk("    IRTE[%03x] %08x\n", count, tbl.basic[count]);
+        }
+        else
+        {
+            if ( !tbl.full[count][0] && !tbl.full[count][1] )
+                continue;
+            printk("    IRTE[%03x] %016lx_%016lx\n",
+                   count, tbl.full[count][1], tbl.full[count][0]);
+        }
     }
 }