diff mbox series

[for,8.0,v2] memory: Prevent recursive memory access

Message ID 20230316122430.10529-1-akihiko.odaki@daynix.com (mailing list archive)
State New, archived
Headers show
Series [for,8.0,v2] memory: Prevent recursive memory access | expand

Commit Message

Akihiko Odaki March 16, 2023, 12:24 p.m. UTC
A guest may request ask a memory-mapped device to perform DMA. If the
address specified for DMA is the device performing DMA, it will create
recursion. It is very unlikely that device implementations are prepared
for such an abnormal access, which can result in unpredictable behavior.

In particular, such a recursion breaks e1000e, a network device. If
the device is configured to write the received packet to the register
to trigger receiving, it triggers re-entry to the Rx logic of e1000e.
This causes use-after-free since the Rx logic is not re-entrant.

As there should be no valid reason to perform recursive memory access,
check for recursion before accessing memory-mapped device.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1543
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
 softmmu/memory.c | 79 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 62 insertions(+), 17 deletions(-)

Comments

Paolo Bonzini March 16, 2023, 12:55 p.m. UTC | #1
On 3/16/23 13:24, Akihiko Odaki wrote:
> +static const Object **accessed_region_owners;
> +static size_t accessed_region_owners_capacity;
> +static size_t accessed_region_owners_num;
> +

These should be thread-local variables at least, because the memory 
access API is thread-safe.

Paolo
Alexander Bulekov March 16, 2023, 4:15 p.m. UTC | #2
On 230316 2124, Akihiko Odaki wrote:
> A guest may request ask a memory-mapped device to perform DMA. If the
> address specified for DMA is the device performing DMA, it will create
> recursion. It is very unlikely that device implementations are prepared
> for such an abnormal access, which can result in unpredictable behavior.
> 
> In particular, such a recursion breaks e1000e, a network device. If
> the device is configured to write the received packet to the register
> to trigger receiving, it triggers re-entry to the Rx logic of e1000e.
> This causes use-after-free since the Rx logic is not re-entrant.
> 
> As there should be no valid reason to perform recursive memory access,
> check for recursion before accessing memory-mapped device.
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1543
> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>

Hi Akihiko,
I think the spirit of this is similar to the fix I proposed here:
https://lore.kernel.org/qemu-devel/20230313082417.827484-1-alxndr@bu.edu/

My version also addresses the following case, which we have found
instances of:
Device Foo Bottom Half -> DMA write to Device Foo Memory Region

That said, the patch is held up on some corner cases and it seems it
will not make it into 8.0. I guess we can add #1543 to the list of
issues in https://gitlab.com/qemu-project/qemu/-/issues/556

Thanks
-Alex

> ---
>  softmmu/memory.c | 79 +++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 62 insertions(+), 17 deletions(-)
> 
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index 4699ba55ec..19c60ee1f0 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -50,6 +50,10 @@ static QTAILQ_HEAD(, AddressSpace) address_spaces
>  
>  static GHashTable *flat_views;
>  
> +static const Object **accessed_region_owners;
> +static size_t accessed_region_owners_capacity;
> +static size_t accessed_region_owners_num;
> +
>  typedef struct AddrRange AddrRange;
>  
>  /*
> @@ -1394,6 +1398,16 @@ bool memory_region_access_valid(MemoryRegion *mr,
>          return false;
>      }
>  
> +    for (size_t i = 0; i < accessed_region_owners_num; i++) {
> +        if (accessed_region_owners[i] == mr->owner) {
> +            qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX
> +                          ", size %u, region '%s', reason: recursive access\n",
> +                          is_write ? "write" : "read",
> +                          addr, size, memory_region_name(mr));
> +            return false;
> +        }
> +    }
> +
>      /* Treat zero as compatibility all valid */
>      if (!mr->ops->valid.max_access_size) {
>          return true;
> @@ -1413,6 +1427,34 @@ bool memory_region_access_valid(MemoryRegion *mr,
>      return true;
>  }
>  
> +static bool memory_region_access_start(MemoryRegion *mr,
> +                                       hwaddr addr,
> +                                       unsigned size,
> +                                       bool is_write,
> +                                       MemTxAttrs attrs)
> +{
> +    if (!memory_region_access_valid(mr, addr, size, is_write, attrs)) {
> +        return false;
> +    }
> +
> +    accessed_region_owners_num++;
> +    if (accessed_region_owners_num > accessed_region_owners_capacity) {
> +        accessed_region_owners_capacity = accessed_region_owners_num;
> +        accessed_region_owners = g_realloc_n(accessed_region_owners,
> +                                             accessed_region_owners_capacity,
> +                                             sizeof(*accessed_region_owners));
> +    }
> +
> +    accessed_region_owners[accessed_region_owners_num - 1] = mr->owner;
> +
> +    return true;
> +}
> +
> +static void memory_region_access_end(void)
> +{
> +    accessed_region_owners_num--;
> +}
> +
>  static MemTxResult memory_region_dispatch_read1(MemoryRegion *mr,
>                                                  hwaddr addr,
>                                                  uint64_t *pval,
> @@ -1450,12 +1492,13 @@ MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
>                                             mr->alias_offset + addr,
>                                             pval, op, attrs);
>      }
> -    if (!memory_region_access_valid(mr, addr, size, false, attrs)) {
> +    if (!memory_region_access_start(mr, addr, size, false, attrs)) {
>          *pval = unassigned_mem_read(mr, addr, size);
>          return MEMTX_DECODE_ERROR;
>      }
>  
>      r = memory_region_dispatch_read1(mr, addr, pval, size, attrs);
> +    memory_region_access_end();
>      adjust_endianness(mr, pval, op);
>      return r;
>  }
> @@ -1493,13 +1536,14 @@ MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
>                                           MemTxAttrs attrs)
>  {
>      unsigned size = memop_size(op);
> +    MemTxResult result;
>  
>      if (mr->alias) {
>          return memory_region_dispatch_write(mr->alias,
>                                              mr->alias_offset + addr,
>                                              data, op, attrs);
>      }
> -    if (!memory_region_access_valid(mr, addr, size, true, attrs)) {
> +    if (!memory_region_access_start(mr, addr, size, true, attrs)) {
>          unassigned_mem_write(mr, addr, data, size);
>          return MEMTX_DECODE_ERROR;
>      }
> @@ -1508,23 +1552,24 @@ MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
>  
>      if ((!kvm_eventfds_enabled()) &&
>          memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
> -        return MEMTX_OK;
> -    }
> -
> -    if (mr->ops->write) {
> -        return access_with_adjusted_size(addr, &data, size,
> -                                         mr->ops->impl.min_access_size,
> -                                         mr->ops->impl.max_access_size,
> -                                         memory_region_write_accessor, mr,
> -                                         attrs);
> +        result = MEMTX_OK;
> +    } else if (mr->ops->write) {
> +        result = access_with_adjusted_size(addr, &data, size,
> +                                           mr->ops->impl.min_access_size,
> +                                           mr->ops->impl.max_access_size,
> +                                           memory_region_write_accessor, mr,
> +                                           attrs);
>      } else {
> -        return
> -            access_with_adjusted_size(addr, &data, size,
> -                                      mr->ops->impl.min_access_size,
> -                                      mr->ops->impl.max_access_size,
> -                                      memory_region_write_with_attrs_accessor,
> -                                      mr, attrs);
> +        result = access_with_adjusted_size(addr, &data, size,
> +                                           mr->ops->impl.min_access_size,
> +                                           mr->ops->impl.max_access_size,
> +                                           memory_region_write_with_attrs_accessor,
> +                                           mr, attrs);
>      }
> +
> +    memory_region_access_end();
> +
> +    return result;
>  }
>  
>  void memory_region_init_io(MemoryRegion *mr,
> -- 
> 2.39.2
>
Akihiko Odaki March 16, 2023, 4:32 p.m. UTC | #3
On 2023/03/17 1:15, Alexander Bulekov wrote:
> On 230316 2124, Akihiko Odaki wrote:
>> A guest may request ask a memory-mapped device to perform DMA. If the
>> address specified for DMA is the device performing DMA, it will create
>> recursion. It is very unlikely that device implementations are prepared
>> for such an abnormal access, which can result in unpredictable behavior.
>>
>> In particular, such a recursion breaks e1000e, a network device. If
>> the device is configured to write the received packet to the register
>> to trigger receiving, it triggers re-entry to the Rx logic of e1000e.
>> This causes use-after-free since the Rx logic is not re-entrant.
>>
>> As there should be no valid reason to perform recursive memory access,
>> check for recursion before accessing memory-mapped device.
>>
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1543
>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> 
> Hi Akihiko,
> I think the spirit of this is similar to the fix I proposed here:
> https://lore.kernel.org/qemu-devel/20230313082417.827484-1-alxndr@bu.edu/
> 
> My version also addresses the following case, which we have found
> instances of:
> Device Foo Bottom Half -> DMA write to Device Foo Memory Region
> 
> That said, the patch is held up on some corner cases and it seems it
> will not make it into 8.0. I guess we can add #1543 to the list of
> issues in https://gitlab.com/qemu-project/qemu/-/issues/556

The e1000e bug is certainly covered by your fix. It is nice that it also 
covers the case of DMA from bottom half. I hope it will land soon in the 
next version.

Regards,
Akihiko Odaki

> 
> Thanks
> -Alex
> 
>> ---
>>   softmmu/memory.c | 79 +++++++++++++++++++++++++++++++++++++-----------
>>   1 file changed, 62 insertions(+), 17 deletions(-)
>>
>> diff --git a/softmmu/memory.c b/softmmu/memory.c
>> index 4699ba55ec..19c60ee1f0 100644
>> --- a/softmmu/memory.c
>> +++ b/softmmu/memory.c
>> @@ -50,6 +50,10 @@ static QTAILQ_HEAD(, AddressSpace) address_spaces
>>   
>>   static GHashTable *flat_views;
>>   
>> +static const Object **accessed_region_owners;
>> +static size_t accessed_region_owners_capacity;
>> +static size_t accessed_region_owners_num;
>> +
>>   typedef struct AddrRange AddrRange;
>>   
>>   /*
>> @@ -1394,6 +1398,16 @@ bool memory_region_access_valid(MemoryRegion *mr,
>>           return false;
>>       }
>>   
>> +    for (size_t i = 0; i < accessed_region_owners_num; i++) {
>> +        if (accessed_region_owners[i] == mr->owner) {
>> +            qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX
>> +                          ", size %u, region '%s', reason: recursive access\n",
>> +                          is_write ? "write" : "read",
>> +                          addr, size, memory_region_name(mr));
>> +            return false;
>> +        }
>> +    }
>> +
>>       /* Treat zero as compatibility all valid */
>>       if (!mr->ops->valid.max_access_size) {
>>           return true;
>> @@ -1413,6 +1427,34 @@ bool memory_region_access_valid(MemoryRegion *mr,
>>       return true;
>>   }
>>   
>> +static bool memory_region_access_start(MemoryRegion *mr,
>> +                                       hwaddr addr,
>> +                                       unsigned size,
>> +                                       bool is_write,
>> +                                       MemTxAttrs attrs)
>> +{
>> +    if (!memory_region_access_valid(mr, addr, size, is_write, attrs)) {
>> +        return false;
>> +    }
>> +
>> +    accessed_region_owners_num++;
>> +    if (accessed_region_owners_num > accessed_region_owners_capacity) {
>> +        accessed_region_owners_capacity = accessed_region_owners_num;
>> +        accessed_region_owners = g_realloc_n(accessed_region_owners,
>> +                                             accessed_region_owners_capacity,
>> +                                             sizeof(*accessed_region_owners));
>> +    }
>> +
>> +    accessed_region_owners[accessed_region_owners_num - 1] = mr->owner;
>> +
>> +    return true;
>> +}
>> +
>> +static void memory_region_access_end(void)
>> +{
>> +    accessed_region_owners_num--;
>> +}
>> +
>>   static MemTxResult memory_region_dispatch_read1(MemoryRegion *mr,
>>                                                   hwaddr addr,
>>                                                   uint64_t *pval,
>> @@ -1450,12 +1492,13 @@ MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
>>                                              mr->alias_offset + addr,
>>                                              pval, op, attrs);
>>       }
>> -    if (!memory_region_access_valid(mr, addr, size, false, attrs)) {
>> +    if (!memory_region_access_start(mr, addr, size, false, attrs)) {
>>           *pval = unassigned_mem_read(mr, addr, size);
>>           return MEMTX_DECODE_ERROR;
>>       }
>>   
>>       r = memory_region_dispatch_read1(mr, addr, pval, size, attrs);
>> +    memory_region_access_end();
>>       adjust_endianness(mr, pval, op);
>>       return r;
>>   }
>> @@ -1493,13 +1536,14 @@ MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
>>                                            MemTxAttrs attrs)
>>   {
>>       unsigned size = memop_size(op);
>> +    MemTxResult result;
>>   
>>       if (mr->alias) {
>>           return memory_region_dispatch_write(mr->alias,
>>                                               mr->alias_offset + addr,
>>                                               data, op, attrs);
>>       }
>> -    if (!memory_region_access_valid(mr, addr, size, true, attrs)) {
>> +    if (!memory_region_access_start(mr, addr, size, true, attrs)) {
>>           unassigned_mem_write(mr, addr, data, size);
>>           return MEMTX_DECODE_ERROR;
>>       }
>> @@ -1508,23 +1552,24 @@ MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
>>   
>>       if ((!kvm_eventfds_enabled()) &&
>>           memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
>> -        return MEMTX_OK;
>> -    }
>> -
>> -    if (mr->ops->write) {
>> -        return access_with_adjusted_size(addr, &data, size,
>> -                                         mr->ops->impl.min_access_size,
>> -                                         mr->ops->impl.max_access_size,
>> -                                         memory_region_write_accessor, mr,
>> -                                         attrs);
>> +        result = MEMTX_OK;
>> +    } else if (mr->ops->write) {
>> +        result = access_with_adjusted_size(addr, &data, size,
>> +                                           mr->ops->impl.min_access_size,
>> +                                           mr->ops->impl.max_access_size,
>> +                                           memory_region_write_accessor, mr,
>> +                                           attrs);
>>       } else {
>> -        return
>> -            access_with_adjusted_size(addr, &data, size,
>> -                                      mr->ops->impl.min_access_size,
>> -                                      mr->ops->impl.max_access_size,
>> -                                      memory_region_write_with_attrs_accessor,
>> -                                      mr, attrs);
>> +        result = access_with_adjusted_size(addr, &data, size,
>> +                                           mr->ops->impl.min_access_size,
>> +                                           mr->ops->impl.max_access_size,
>> +                                           memory_region_write_with_attrs_accessor,
>> +                                           mr, attrs);
>>       }
>> +
>> +    memory_region_access_end();
>> +
>> +    return result;
>>   }
>>   
>>   void memory_region_init_io(MemoryRegion *mr,
>> -- 
>> 2.39.2
>>
diff mbox series

Patch

diff --git a/softmmu/memory.c b/softmmu/memory.c
index 4699ba55ec..19c60ee1f0 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -50,6 +50,10 @@  static QTAILQ_HEAD(, AddressSpace) address_spaces
 
 static GHashTable *flat_views;
 
+static const Object **accessed_region_owners;
+static size_t accessed_region_owners_capacity;
+static size_t accessed_region_owners_num;
+
 typedef struct AddrRange AddrRange;
 
 /*
@@ -1394,6 +1398,16 @@  bool memory_region_access_valid(MemoryRegion *mr,
         return false;
     }
 
+    for (size_t i = 0; i < accessed_region_owners_num; i++) {
+        if (accessed_region_owners[i] == mr->owner) {
+            qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX
+                          ", size %u, region '%s', reason: recursive access\n",
+                          is_write ? "write" : "read",
+                          addr, size, memory_region_name(mr));
+            return false;
+        }
+    }
+
     /* Treat zero as compatibility all valid */
     if (!mr->ops->valid.max_access_size) {
         return true;
@@ -1413,6 +1427,34 @@  bool memory_region_access_valid(MemoryRegion *mr,
     return true;
 }
 
+static bool memory_region_access_start(MemoryRegion *mr,
+                                       hwaddr addr,
+                                       unsigned size,
+                                       bool is_write,
+                                       MemTxAttrs attrs)
+{
+    if (!memory_region_access_valid(mr, addr, size, is_write, attrs)) {
+        return false;
+    }
+
+    accessed_region_owners_num++;
+    if (accessed_region_owners_num > accessed_region_owners_capacity) {
+        accessed_region_owners_capacity = accessed_region_owners_num;
+        accessed_region_owners = g_realloc_n(accessed_region_owners,
+                                             accessed_region_owners_capacity,
+                                             sizeof(*accessed_region_owners));
+    }
+
+    accessed_region_owners[accessed_region_owners_num - 1] = mr->owner;
+
+    return true;
+}
+
+static void memory_region_access_end(void)
+{
+    accessed_region_owners_num--;
+}
+
 static MemTxResult memory_region_dispatch_read1(MemoryRegion *mr,
                                                 hwaddr addr,
                                                 uint64_t *pval,
@@ -1450,12 +1492,13 @@  MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
                                            mr->alias_offset + addr,
                                            pval, op, attrs);
     }
-    if (!memory_region_access_valid(mr, addr, size, false, attrs)) {
+    if (!memory_region_access_start(mr, addr, size, false, attrs)) {
         *pval = unassigned_mem_read(mr, addr, size);
         return MEMTX_DECODE_ERROR;
     }
 
     r = memory_region_dispatch_read1(mr, addr, pval, size, attrs);
+    memory_region_access_end();
     adjust_endianness(mr, pval, op);
     return r;
 }
@@ -1493,13 +1536,14 @@  MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
                                          MemTxAttrs attrs)
 {
     unsigned size = memop_size(op);
+    MemTxResult result;
 
     if (mr->alias) {
         return memory_region_dispatch_write(mr->alias,
                                             mr->alias_offset + addr,
                                             data, op, attrs);
     }
-    if (!memory_region_access_valid(mr, addr, size, true, attrs)) {
+    if (!memory_region_access_start(mr, addr, size, true, attrs)) {
         unassigned_mem_write(mr, addr, data, size);
         return MEMTX_DECODE_ERROR;
     }
@@ -1508,23 +1552,24 @@  MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
 
     if ((!kvm_eventfds_enabled()) &&
         memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
-        return MEMTX_OK;
-    }
-
-    if (mr->ops->write) {
-        return access_with_adjusted_size(addr, &data, size,
-                                         mr->ops->impl.min_access_size,
-                                         mr->ops->impl.max_access_size,
-                                         memory_region_write_accessor, mr,
-                                         attrs);
+        result = MEMTX_OK;
+    } else if (mr->ops->write) {
+        result = access_with_adjusted_size(addr, &data, size,
+                                           mr->ops->impl.min_access_size,
+                                           mr->ops->impl.max_access_size,
+                                           memory_region_write_accessor, mr,
+                                           attrs);
     } else {
-        return
-            access_with_adjusted_size(addr, &data, size,
-                                      mr->ops->impl.min_access_size,
-                                      mr->ops->impl.max_access_size,
-                                      memory_region_write_with_attrs_accessor,
-                                      mr, attrs);
+        result = access_with_adjusted_size(addr, &data, size,
+                                           mr->ops->impl.min_access_size,
+                                           mr->ops->impl.max_access_size,
+                                           memory_region_write_with_attrs_accessor,
+                                           mr, attrs);
     }
+
+    memory_region_access_end();
+
+    return result;
 }
 
 void memory_region_init_io(MemoryRegion *mr,