diff mbox series

[RFC,v2,10/10] AMD/IOMMU: correct IRTE updating

Message ID 5D14DF81020000780023B9DF@prv1-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show
Series x86: AMD x2APIC support | expand

Commit Message

Jan Beulich June 27, 2019, 3:23 p.m. UTC
While for 32-bit IRTEs I think we can safely continue to assume that the
writes will translate to a single MOV, the use of CMPXCHG16B is more
heavy handed than necessary for the 128-bit form, and the flushing
didn't get done along the lines of what the specification says. Mark
entries to be updated as not remapped (which will result in interrupt
requests to get target aborted, but the interrupts should be masked
anyway at that point in time), issue the flush, and only then write the
new entry. In the 128-bit IRTE case set RemapEn separately last, to that
the ordering of the writes of the two 64-bit halves won't matter.

In update_intremap_entry_from_msi_msg() also fold the duplicate initial
lock determination and acquire into just a single instance.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
RFC: Putting the flush invocations in loops isn't overly nice, but I
     don't think this can really be abused, since callers up the stack
     hold further locks. Nevertheless I'd like to ask for better
     suggestions.
---
v2: Parts morphed into earlier patch.

Comments

Andrew Cooper July 2, 2019, 3:08 p.m. UTC | #1
On 27/06/2019 16:23, Jan Beulich wrote:
> While for 32-bit IRTEs I think we can safely continue to assume that the
> writes will translate to a single MOV, the use of CMPXCHG16B is more

The CMPXCHG16B here is stale.

> heavy handed than necessary for the 128-bit form, and the flushing
> didn't get done along the lines of what the specification says. Mark
> entries to be updated as not remapped (which will result in interrupt
> requests to get target aborted, but the interrupts should be masked
> anyway at that point in time), issue the flush, and only then write the
> new entry. In the 128-bit IRTE case set RemapEn separately last, to that
> the ordering of the writes of the two 64-bit halves won't matter.
>
> In update_intremap_entry_from_msi_msg() also fold the duplicate initial
> lock determination and acquire into just a single instance.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> RFC: Putting the flush invocations in loops isn't overly nice, but I
>      don't think this can really be abused, since callers up the stack
>      hold further locks. Nevertheless I'd like to ask for better
>      suggestions.
> ---
> v2: Parts morphed into earlier patch.
>
> --- a/xen/drivers/passthrough/amd/iommu_intr.c
> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
> @@ -238,8 +238,7 @@ static void update_intremap_entry(union
>          break;
>  
>      case irte128:
> -        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
> -        barrier();
> +        ASSERT(!entry.ptr128->full.remap_en);
>          entry.ptr128->raw[1] =
>              container_of(&full, union irte128, full)->raw[1];
>          barrier();
> @@ -308,6 +307,20 @@ static int update_intremap_entry_from_io
>      }
>  
>      entry = get_intremap_entry(iommu->seg, req_id, offset);
> +
> +    /* The RemapEn fields match for all formats. */
> +    while ( iommu->enabled && entry.ptr32->basic.remap_en )

Why while?  (and by this, what I mean is that this definitely needs a
comment, because the code looks like it ought to be an if.)

~Andrew
Jan Beulich July 3, 2019, 8:55 a.m. UTC | #2
On 02.07.2019 17:08, Andrew Cooper wrote:
> On 27/06/2019 16:23, Jan Beulich wrote:
>> While for 32-bit IRTEs I think we can safely continue to assume that the
>> writes will translate to a single MOV, the use of CMPXCHG16B is more
> 
> The CMPXCHG16B here is stale.

Indeed, as is the 32-bit IRTE part of the sentence (now that I
use ACCESS_ONCE() already before this patch).

>> heavy handed than necessary for the 128-bit form, and the flushing
>> didn't get done along the lines of what the specification says. Mark
>> entries to be updated as not remapped (which will result in interrupt
>> requests to get target aborted, but the interrupts should be masked
>> anyway at that point in time), issue the flush, and only then write the
>> new entry. In the 128-bit IRTE case set RemapEn separately last, to that
>> the ordering of the writes of the two 64-bit halves won't matter.

This last sentence is stale too, and hence I've now removed it.

>> --- a/xen/drivers/passthrough/amd/iommu_intr.c
>> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
>> @@ -238,8 +238,7 @@ static void update_intremap_entry(union
>>           break;
>>   
>>       case irte128:
>> -        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
>> -        barrier();
>> +        ASSERT(!entry.ptr128->full.remap_en);
>>           entry.ptr128->raw[1] =
>>               container_of(&full, union irte128, full)->raw[1];
>>           barrier();
>> @@ -308,6 +307,20 @@ static int update_intremap_entry_from_io
>>       }
>>   
>>       entry = get_intremap_entry(iommu->seg, req_id, offset);
>> +
>> +    /* The RemapEn fields match for all formats. */
>> +    while ( iommu->enabled && entry.ptr32->basic.remap_en )
> 
> Why while?  (and by this, what I mean is that this definitely needs a
> comment, because the code looks like it ought to be an if.)

Well - see the RFC remark after the description. I'd be happy to
change to if(), but only on solid grounds. Without clear
guarantees that no races between IRTE updates can occur, we need
to continue flushing as long as we find RemapEn to have got set
again after a flush. Note how the necessary lock guarding against
such is getting dropped and re-acquired in the loop bodies.

Jan
diff mbox series

Patch

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -238,8 +238,7 @@  static void update_intremap_entry(union
         break;
 
     case irte128:
-        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
-        barrier();
+        ASSERT(!entry.ptr128->full.remap_en);
         entry.ptr128->raw[1] =
             container_of(&full, union irte128, full)->raw[1];
         barrier();
@@ -308,6 +307,20 @@  static int update_intremap_entry_from_io
     }
 
     entry = get_intremap_entry(iommu->seg, req_id, offset);
+
+    /* The RemapEn fields match for all formats. */
+    while ( iommu->enabled && entry.ptr32->basic.remap_en )
+    {
+        entry.ptr32->basic.remap_en = 0;
+        spin_unlock(lock);
+
+        spin_lock(&iommu->lock);
+        amd_iommu_flush_intremap(iommu, req_id);
+        spin_unlock(&iommu->lock);
+
+        spin_lock(lock);
+    }
+
     if ( fresh )
         /* nothing */;
     else if ( !lo_update )
@@ -337,13 +350,6 @@  static int update_intremap_entry_from_io
 
     spin_unlock_irqrestore(lock, flags);
 
-    if ( iommu->enabled && !fresh )
-    {
-        spin_lock_irqsave(&iommu->lock, flags);
-        amd_iommu_flush_intremap(iommu, req_id);
-        spin_unlock_irqrestore(&iommu->lock, flags);
-    }
-
     set_rte_index(rte, offset);
 
     return 0;
@@ -608,19 +614,27 @@  static int update_intremap_entry_from_ms
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
 
+    lock = get_intremap_lock(iommu->seg, req_id);
+    spin_lock_irqsave(lock, flags);
+
     if ( msg == NULL )
     {
-        lock = get_intremap_lock(iommu->seg, req_id);
-        spin_lock_irqsave(lock, flags);
         for ( i = 0; i < nr; ++i )
             free_intremap_entry(iommu->seg, req_id, *remap_index + i);
         spin_unlock_irqrestore(lock, flags);
-        goto done;
-    }
 
-    lock = get_intremap_lock(iommu->seg, req_id);
+        if ( iommu->enabled )
+        {
+            spin_lock_irqsave(&iommu->lock, flags);
+            amd_iommu_flush_intremap(iommu, req_id);
+            if ( alias_id != req_id )
+                amd_iommu_flush_intremap(iommu, alias_id);
+            spin_unlock_irqrestore(&iommu->lock, flags);
+        }
+
+        return 0;
+    }
 
-    spin_lock_irqsave(lock, flags);
     dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
     delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
@@ -644,6 +658,22 @@  static int update_intremap_entry_from_ms
     }
 
     entry = get_intremap_entry(iommu->seg, req_id, offset);
+
+    /* The RemapEn fields match for all formats. */
+    while ( iommu->enabled && entry.ptr32->basic.remap_en )
+    {
+        entry.ptr32->basic.remap_en = 0;
+        spin_unlock(lock);
+
+        spin_lock(&iommu->lock);
+        amd_iommu_flush_intremap(iommu, req_id);
+        if ( alias_id != req_id )
+            amd_iommu_flush_intremap(iommu, alias_id);
+        spin_unlock(&iommu->lock);
+
+        spin_lock(lock);
+    }
+
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
     spin_unlock_irqrestore(lock, flags);
 
@@ -663,16 +693,6 @@  static int update_intremap_entry_from_ms
                get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
     }
 
-done:
-    if ( iommu->enabled )
-    {
-        spin_lock_irqsave(&iommu->lock, flags);
-        amd_iommu_flush_intremap(iommu, req_id);
-        if ( alias_id != req_id )
-            amd_iommu_flush_intremap(iommu, alias_id);
-        spin_unlock_irqrestore(&iommu->lock, flags);
-    }
-
     return 0;
 }