diff mbox series

[RFC,9/9] AMD/IOMMU: correct IRTE updating

Message ID 5D024F7C0200007800237E7F@prv1-mh.provo.novell.com (mailing list archive)
State Superseded
Headers show
Series x86: AMD x2APIC support | expand

Commit Message

Jan Beulich June 13, 2019, 1:28 p.m. UTC
While for 32-bit IRTEs I think we can safely continue to assume that the
writes will translate to a single MOV, the use of CMPXCHG16B is more
heavy handed than necessary for the 128-bit form, and the flushing
didn't get done along the lines of what the specification says. Mark
entries to be updated as not remapped (which will result in interrupt
requests to get target aborted, but the interrupts should be masked
anyway at that point in time), issue the flush, and only then write the
new entry. In the 128-bit IRTE case set RemapEn separately last, to that
the ordering of the writes of the two 64-bit halves won't matter.

In update_intremap_entry_from_msi_msg() also fold the duplicate initial
lock determination and acquire into just a single instance.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
RFC: Putting the flush invocations in loops isn't overly nice, but I
     don't think this can really be abused, since callers up the stack
     hold further locks. Nevertheless I'd like to ask for better
     suggestions.

Comments

Andrew Cooper June 18, 2019, 1:28 p.m. UTC | #1
On 13/06/2019 14:28, Jan Beulich wrote:
> While for 32-bit IRTEs I think we can safely continue to assume that the
> writes will translate to a single MOV, the use of CMPXCHG16B is more
> heavy handed than necessary for the 128-bit form, and the flushing
> didn't get done along the lines of what the specification says. Mark
> entries to be updated as not remapped (which will result in interrupt
> requests to get target aborted, but the interrupts should be masked
> anyway at that point in time), issue the flush, and only then write the
> new entry. In the 128-bit IRTE case set RemapEn separately last, to that
> the ordering of the writes of the two 64-bit halves won't matter.
>
> In update_intremap_entry_from_msi_msg() also fold the duplicate initial
> lock determination and acquire into just a single instance.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Looking at this patch, I think quite a bit of it should be folded into
patch 4.  However, my review suggestions on that patch take precedent
over the net result here.

> ---
> RFC: Putting the flush invocations in loops isn't overly nice, but I
>      don't think this can really be abused, since callers up the stack
>      hold further locks. Nevertheless I'd like to ask for better
>      suggestions.

Lets focus on getting it functioning first, and fast second.  However, I
think we can do better than the loop.  Let me dig some notes out.

~Andrew
Jan Beulich June 18, 2019, 2:58 p.m. UTC | #2
>>> On 18.06.19 at 15:28, <andrew.cooper3@citrix.com> wrote:
> On 13/06/2019 14:28, Jan Beulich wrote:
>> While for 32-bit IRTEs I think we can safely continue to assume that the
>> writes will translate to a single MOV, the use of CMPXCHG16B is more
>> heavy handed than necessary for the 128-bit form, and the flushing
>> didn't get done along the lines of what the specification says. Mark
>> entries to be updated as not remapped (which will result in interrupt
>> requests to get target aborted, but the interrupts should be masked
>> anyway at that point in time), issue the flush, and only then write the
>> new entry. In the 128-bit IRTE case set RemapEn separately last, to that
>> the ordering of the writes of the two 64-bit halves won't matter.
>>
>> In update_intremap_entry_from_msi_msg() also fold the duplicate initial
>> lock determination and acquire into just a single instance.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Looking at this patch, I think quite a bit of it should be folded into
> patch 4.

Not that much - the changes to update_intremap_entry()
could go there, and then of course the on change to
iov_supports_xt(). But that's about it - the rest isn't specific to
handling of 128-bit IRTEs, and hence wouldn't belong there.

What could be discussed is moving the change here towards the
start of the series, ahead of the ones playing with how IRTEs
get updated. I've put it last for now because (a) I've added it
last, after the CMPXCHG16B approach was already tested and
(b) because of its RFC status (I don't want it to block the rest of
the series).

>  However, my review suggestions on that patch take precedent
> over the net result here.

Sure - let's settle on the least bad variant of that one first. I did
already reply there.

>> ---
>> RFC: Putting the flush invocations in loops isn't overly nice, but I
>>      don't think this can really be abused, since callers up the stack
>>      hold further locks. Nevertheless I'd like to ask for better
>>      suggestions.
> 
> Lets focus on getting it functioning first, and fast second.

My remark wasn't about this being slow, but the theoretical risk of
this allowing for a DoS attack.

>  However, I
> think we can do better than the loop.  Let me dig some notes out.

Thanks much,
Jan
diff mbox series

Patch

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -203,7 +203,7 @@  static void update_intremap_entry(union
         .vector = vector,
     };
     struct irte_full full = {
-        .remap_en = 1,
+        .remap_en = 0, /* Will be set explicitly below. */
         .sup_io_pf = 0,
         .int_type = int_type,
         .rq_eoi = 0,
@@ -215,20 +215,15 @@  static void update_intremap_entry(union
 
     switch ( irte_mode )
     {
-        __uint128_t ret;
-        union {
-            __uint128_t raw;
-            struct irte_full full;
-        } old;
-
     case irte_basic:
         *entry.basic = basic;
         break;
 
     case irte_full:
-        old.full = *entry.full;
-        ret = cmpxchg16b(entry.full, &old, &full);
-        ASSERT(ret == old.raw);
+        *entry.full = full;
+        wmb();
+        /* Enable the entry /after/ having written all other fields. */
+        entry.full->remap_en = 1;
         break;
 
     default:
@@ -292,6 +287,20 @@  static int update_intremap_entry_from_io
     }
 
     entry = get_intremap_entry(iommu->seg, req_id, offset);
+
+    /* The RemapEn fields match for all formats. */
+    while ( iommu->enabled && entry.basic->remap_en )
+    {
+        entry.basic->remap_en = 0;
+        spin_unlock(lock);
+
+        spin_lock(&iommu->lock);
+        amd_iommu_flush_intremap(iommu, req_id);
+        spin_unlock(&iommu->lock);
+
+        spin_lock(lock);
+    }
+
     if ( fresh )
         /* nothing */;
     else if ( !lo_update )
@@ -321,13 +330,6 @@  static int update_intremap_entry_from_io
 
     spin_unlock_irqrestore(lock, flags);
 
-    if ( iommu->enabled && !fresh )
-    {
-        spin_lock_irqsave(&iommu->lock, flags);
-        amd_iommu_flush_intremap(iommu, req_id);
-        spin_unlock_irqrestore(&iommu->lock, flags);
-    }
-
     set_rte_index(rte, offset);
 
     return 0;
@@ -591,19 +593,27 @@  static int update_intremap_entry_from_ms
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
 
+    lock = get_intremap_lock(iommu->seg, req_id);
+    spin_lock_irqsave(lock, flags);
+
     if ( msg == NULL )
     {
-        lock = get_intremap_lock(iommu->seg, req_id);
-        spin_lock_irqsave(lock, flags);
         for ( i = 0; i < nr; ++i )
             free_intremap_entry(iommu->seg, req_id, *remap_index + i);
         spin_unlock_irqrestore(lock, flags);
-        goto done;
-    }
 
-    lock = get_intremap_lock(iommu->seg, req_id);
+        if ( iommu->enabled )
+        {
+            spin_lock_irqsave(&iommu->lock, flags);
+            amd_iommu_flush_intremap(iommu, req_id);
+            if ( alias_id != req_id )
+                amd_iommu_flush_intremap(iommu, alias_id);
+            spin_unlock_irqrestore(&iommu->lock, flags);
+        }
+
+        return 0;
+    }
 
-    spin_lock_irqsave(lock, flags);
     dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
     delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
@@ -627,6 +637,22 @@  static int update_intremap_entry_from_ms
     }
 
     entry = get_intremap_entry(iommu->seg, req_id, offset);
+
+    /* The RemapEn fields match for all formats. */
+    while ( iommu->enabled && entry.basic->remap_en )
+    {
+        entry.basic->remap_en = 0;
+        spin_unlock(lock);
+
+        spin_lock(&iommu->lock);
+        amd_iommu_flush_intremap(iommu, req_id);
+        if ( alias_id != req_id )
+            amd_iommu_flush_intremap(iommu, alias_id);
+        spin_unlock(&iommu->lock);
+
+        spin_lock(lock);
+    }
+
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
     spin_unlock_irqrestore(lock, flags);
 
@@ -646,16 +672,6 @@  static int update_intremap_entry_from_ms
                get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
     }
 
-done:
-    if ( iommu->enabled )
-    {
-        spin_lock_irqsave(&iommu->lock, flags);
-        amd_iommu_flush_intremap(iommu, req_id);
-        if ( alias_id != req_id )
-            amd_iommu_flush_intremap(iommu, alias_id);
-        spin_unlock_irqrestore(&iommu->lock, flags);
-    }
-
     return 0;
 }
 
@@ -801,7 +817,7 @@  bool __init iov_supports_xt(void)
     unsigned int apic;
     struct amd_iommu *iommu;
 
-    if ( !iommu_enable || !iommu_intremap || !cpu_has_cx16 )
+    if ( !iommu_enable || !iommu_intremap )
         return false;
 
     if ( amd_iommu_prepare() )