diff mbox series

[v4,1/3] memory: prevent dma-reentracy issues

Message ID 20230119070004.321358-2-alxndr@bu.edu (mailing list archive)
State New, archived
Headers show
Series memory: prevent dma-reentracy issues | expand

Commit Message

Alexander Bulekov Jan. 19, 2023, 7 a.m. UTC
Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
This flag is set/checked prior to calling a device's MemoryRegion
handlers, and set when device code initiates DMA.  The purpose of this
flag is to prevent two types of DMA-based reentrancy issues:

1.) mmio -> dma -> mmio case
2.) bh -> dma write -> mmio case

These issues have led to problems such as stack-exhaustion and
use-after-frees.

Summary of the problem from Peter Maydell:
https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
---
 include/hw/qdev-core.h |  7 +++++++
 softmmu/memory.c       | 15 +++++++++++++++
 softmmu/trace-events   |  1 +
 3 files changed, 23 insertions(+)

Comments

Darren Kenny Jan. 20, 2023, 2:41 p.m. UTC | #1
Hi Alex,

Generally, this looks good, but I do have a comment below...

On Thursday, 2023-01-19 at 02:00:02 -05, Alexander Bulekov wrote:
> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> This flag is set/checked prior to calling a device's MemoryRegion
> handlers, and set when device code initiates DMA.  The purpose of this
> flag is to prevent two types of DMA-based reentrancy issues:
>
> 1.) mmio -> dma -> mmio case
> 2.) bh -> dma write -> mmio case
>
> These issues have led to problems such as stack-exhaustion and
> use-after-frees.
>
> Summary of the problem from Peter Maydell:
> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> ---
>  include/hw/qdev-core.h |  7 +++++++
>  softmmu/memory.c       | 15 +++++++++++++++
>  softmmu/trace-events   |  1 +
>  3 files changed, 23 insertions(+)
>
> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index 35fddb19a6..8858195262 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -162,6 +162,10 @@ struct NamedClockList {
>      QLIST_ENTRY(NamedClockList) node;
>  };
>  
> +typedef struct {
> +    bool engaged_in_io;
> +} MemReentrancyGuard;
> +
>  /**
>   * DeviceState:
>   * @realized: Indicates whether the device has been fully constructed.
> @@ -194,6 +198,9 @@ struct DeviceState {
>      int alias_required_for_version;
>      ResettableState reset;
>      GSList *unplug_blockers;
> +
> +    /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
> +    MemReentrancyGuard mem_reentrancy_guard;
>  };
>  
>  struct DeviceListener {
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index e05332d07f..90ffaaa4f5 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -533,6 +533,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>      uint64_t access_mask;
>      unsigned access_size;
>      unsigned i;
> +    DeviceState *dev = NULL;
>      MemTxResult r = MEMTX_OK;
>  
>      if (!access_size_min) {
> @@ -542,6 +543,17 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>          access_size_max = 4;
>      }
>  
> +    /* Do not allow more than one simultanous access to a device's IO Regions */
> +    if (mr->owner &&
> +        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
> +        dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);

I don't know how likely this is to happen, but according to:

- https://qemu-project.gitlab.io/qemu/devel/qom.html#c.object_dynamic_cast

it is possible for the object_dynamic_cast() function to return NULL,
so it might make sense to wrap the subsequent calls in a test of dev !=
NULL.

Thanks,

Darren.

> +        if (dev->mem_reentrancy_guard.engaged_in_io) {
> +            trace_memory_region_reentrant_io(get_cpu_index(), mr, addr, size);
> +            return MEMTX_ERROR;
> +        }
> +        dev->mem_reentrancy_guard.engaged_in_io = true;
> +    }
> +
>      /* FIXME: support unaligned access? */
>      access_size = MAX(MIN(size, access_size_max), access_size_min);
>      access_mask = MAKE_64BIT_MASK(0, access_size * 8);
> @@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>                          access_mask, attrs);
>          }
>      }
> +    if (dev) {
> +        dev->mem_reentrancy_guard.engaged_in_io = false;
> +    }
>      return r;
>  }
>  
> diff --git a/softmmu/trace-events b/softmmu/trace-events
> index 22606dc27b..62d04ea9a7 100644
> --- a/softmmu/trace-events
> +++ b/softmmu/trace-events
> @@ -13,6 +13,7 @@ memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, u
>  memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
>  memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
>  memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
> +memory_region_reentrant_io(int cpu_index, void *mr, uint64_t offset, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" size %u"
>  memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
>  memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
>  memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr '%s' listener '%s' synced (global=%d)"
> -- 
> 2.39.0
Peter Maydell Jan. 20, 2023, 2:47 p.m. UTC | #2
On Fri, 20 Jan 2023 at 14:42, Darren Kenny <darren.kenny@oracle.com> wrote:
> Generally, this looks good, but I do have a comment below...
>
> On Thursday, 2023-01-19 at 02:00:02 -05, Alexander Bulekov wrote:
> > Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> > This flag is set/checked prior to calling a device's MemoryRegion
> > handlers, and set when device code initiates DMA.  The purpose of this
> > flag is to prevent two types of DMA-based reentrancy issues:

> > diff --git a/softmmu/memory.c b/softmmu/memory.c
> > index e05332d07f..90ffaaa4f5 100644
> > --- a/softmmu/memory.c
> > +++ b/softmmu/memory.c
> > @@ -533,6 +533,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> >      uint64_t access_mask;
> >      unsigned access_size;
> >      unsigned i;
> > +    DeviceState *dev = NULL;
> >      MemTxResult r = MEMTX_OK;
> >
> >      if (!access_size_min) {
> > @@ -542,6 +543,17 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> >          access_size_max = 4;
> >      }
> >
> > +    /* Do not allow more than one simultanous access to a device's IO Regions */
> > +    if (mr->owner &&
> > +        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
> > +        dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
>
> I don't know how likely this is to happen, but according to:
>
> - https://qemu-project.gitlab.io/qemu/devel/qom.html#c.object_dynamic_cast
>
> it is possible for the object_dynamic_cast() function to return NULL,
> so it might make sense to wrap the subsequent calls in a test of dev !=
> NULL.

Yes. This came up in a previous version of this:
https://lore.kernel.org/qemu-devel/CAFEAcA8E4nDoAWcj-v-dED-0hDtXGjJNSp3A=kdGF8UOCw0DrQ@mail.gmail.com/

It's generally a bug to call object_dynamic_cast() and then not check
the return value.

thanks
-- PMM
Alexander Bulekov Jan. 26, 2023, 5:19 a.m. UTC | #3
On 230120 1447, Peter Maydell wrote:
> On Fri, 20 Jan 2023 at 14:42, Darren Kenny <darren.kenny@oracle.com> wrote:
> > Generally, this looks good, but I do have a comment below...
> >
> > On Thursday, 2023-01-19 at 02:00:02 -05, Alexander Bulekov wrote:
> > > Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> > > This flag is set/checked prior to calling a device's MemoryRegion
> > > handlers, and set when device code initiates DMA.  The purpose of this
> > > flag is to prevent two types of DMA-based reentrancy issues:
> 
> > > diff --git a/softmmu/memory.c b/softmmu/memory.c
> > > index e05332d07f..90ffaaa4f5 100644
> > > --- a/softmmu/memory.c
> > > +++ b/softmmu/memory.c
> > > @@ -533,6 +533,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> > >      uint64_t access_mask;
> > >      unsigned access_size;
> > >      unsigned i;
> > > +    DeviceState *dev = NULL;
> > >      MemTxResult r = MEMTX_OK;
> > >
> > >      if (!access_size_min) {
> > > @@ -542,6 +543,17 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> > >          access_size_max = 4;
> > >      }
> > >
> > > +    /* Do not allow more than one simultanous access to a device's IO Regions */
> > > +    if (mr->owner &&
> > > +        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
> > > +        dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
> >
> > I don't know how likely this is to happen, but according to:
> >
> > - https://qemu-project.gitlab.io/qemu/devel/qom.html#c.object_dynamic_cast
> >
> > it is possible for the object_dynamic_cast() function to return NULL,
> > so it might make sense to wrap the subsequent calls in a test of dev !=
> > NULL.
> 
> Yes. This came up in a previous version of this:
> https://lore.kernel.org/qemu-devel/CAFEAcA8E4nDoAWcj-v-dED-0hDtXGjJNSp3A=kdGF8UOCw0DrQ@mail.gmail.com/
> 
> It's generally a bug to call object_dynamic_cast() and then not check
> the return value.
> 

Sorry I missed that - Will be fixed in V5.
-Alex

> thanks
> -- PMM
diff mbox series

Patch

diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 35fddb19a6..8858195262 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -162,6 +162,10 @@  struct NamedClockList {
     QLIST_ENTRY(NamedClockList) node;
 };
 
+typedef struct {
+    bool engaged_in_io;
+} MemReentrancyGuard;
+
 /**
  * DeviceState:
  * @realized: Indicates whether the device has been fully constructed.
@@ -194,6 +198,9 @@  struct DeviceState {
     int alias_required_for_version;
     ResettableState reset;
     GSList *unplug_blockers;
+
+    /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
+    MemReentrancyGuard mem_reentrancy_guard;
 };
 
 struct DeviceListener {
diff --git a/softmmu/memory.c b/softmmu/memory.c
index e05332d07f..90ffaaa4f5 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -533,6 +533,7 @@  static MemTxResult access_with_adjusted_size(hwaddr addr,
     uint64_t access_mask;
     unsigned access_size;
     unsigned i;
+    DeviceState *dev = NULL;
     MemTxResult r = MEMTX_OK;
 
     if (!access_size_min) {
@@ -542,6 +543,17 @@  static MemTxResult access_with_adjusted_size(hwaddr addr,
         access_size_max = 4;
     }
 
+    /* Do not allow more than one simultanous access to a device's IO Regions */
+    if (mr->owner &&
+        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
+        dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
+        if (dev->mem_reentrancy_guard.engaged_in_io) {
+            trace_memory_region_reentrant_io(get_cpu_index(), mr, addr, size);
+            return MEMTX_ERROR;
+        }
+        dev->mem_reentrancy_guard.engaged_in_io = true;
+    }
+
     /* FIXME: support unaligned access? */
     access_size = MAX(MIN(size, access_size_max), access_size_min);
     access_mask = MAKE_64BIT_MASK(0, access_size * 8);
@@ -556,6 +568,9 @@  static MemTxResult access_with_adjusted_size(hwaddr addr,
                         access_mask, attrs);
         }
     }
+    if (dev) {
+        dev->mem_reentrancy_guard.engaged_in_io = false;
+    }
     return r;
 }
 
diff --git a/softmmu/trace-events b/softmmu/trace-events
index 22606dc27b..62d04ea9a7 100644
--- a/softmmu/trace-events
+++ b/softmmu/trace-events
@@ -13,6 +13,7 @@  memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, u
 memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
 memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
 memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_reentrant_io(int cpu_index, void *mr, uint64_t offset, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" size %u"
 memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
 memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
 memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr '%s' listener '%s' synced (global=%d)"