diff mbox series

[v2] memory: prevent dma-reentracy issues

Message ID 20220609135851.42193-1-alxndr@bu.edu (mailing list archive)
State New, archived
Headers show
Series [v2] memory: prevent dma-reentracy issues | expand

Commit Message

Alexander Bulekov June 9, 2022, 1:58 p.m. UTC
Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
This flag is set/checked prior to calling a device's MemoryRegion
handlers, and set when device code initiates DMA.  The purpose of this
flag is to prevent two types of DMA-based reentrancy issues:

1.) mmio -> dma -> mmio case
2.) bh -> dma write -> mmio case

These issues have led to problems such as stack-exhaustion and
use-after-frees.

Summary of the problem from Peter Maydell:
https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
---
 include/hw/pci/pci.h   | 13 +++++++++++--
 include/hw/qdev-core.h |  3 +++
 softmmu/dma-helpers.c  | 12 ++++++++++++
 softmmu/memory.c       | 15 +++++++++++++++
 softmmu/trace-events   |  1 +
 5 files changed, 42 insertions(+), 2 deletions(-)

Comments

Alexander Bulekov June 20, 2022, 2:06 p.m. UTC | #1
On 220609 0958, Alexander Bulekov wrote:
> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> This flag is set/checked prior to calling a device's MemoryRegion
> handlers, and set when device code initiates DMA.  The purpose of this
> flag is to prevent two types of DMA-based reentrancy issues:
> 
> 1.) mmio -> dma -> mmio case
> 2.) bh -> dma write -> mmio case
> 
> These issues have led to problems such as stack-exhaustion and
> use-after-frees.
> 
> Summary of the problem from Peter Maydell:
> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> ---
>  include/hw/pci/pci.h   | 13 +++++++++++--
>  include/hw/qdev-core.h |  3 +++
>  softmmu/dma-helpers.c  | 12 ++++++++++++
>  softmmu/memory.c       | 15 +++++++++++++++
>  softmmu/trace-events   |  1 +
>  5 files changed, 42 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 44dacfa224..ab1ad0f7a8 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -834,8 +834,17 @@ static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
>                                       void *buf, dma_addr_t len,
>                                       DMADirection dir, MemTxAttrs attrs)
>  {
> -    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> -                         dir, attrs);
> +    bool prior_engaged_state;
> +    MemTxResult result;
> +
> +    prior_engaged_state = dev->qdev.engaged_in_io;
> +
> +    dev->qdev.engaged_in_io = true;
> +    result = dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> +                           dir, attrs);
> +    dev->qdev.engaged_in_io = prior_engaged_state;
> +
> +    return result;
>  }
>  
>  /**
> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index 92c3d65208..6474dc51fa 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -193,6 +193,9 @@ struct DeviceState {
>      int instance_id_alias;
>      int alias_required_for_version;
>      ResettableState reset;
> +
> +    /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
> +    int engaged_in_io;
>  };
>  
>  struct DeviceListener {
> diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
> index 7820fec54c..7a4f1fb9b3 100644
> --- a/softmmu/dma-helpers.c
> +++ b/softmmu/dma-helpers.c
> @@ -288,8 +288,16 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
>      uint8_t *ptr = buf;
>      dma_addr_t xresidual;
>      int sg_cur_index;
> +    DeviceState *dev;
> +    bool prior_engaged_state;
>      MemTxResult res = MEMTX_OK;
>  
> +    dev = sg->dev;
> +    if (dev) {
> +        prior_engaged_state = dev->engaged_in_io;
> +        dev->engaged_in_io = true;
> +    }
> +
>      xresidual = sg->size;
>      sg_cur_index = 0;
>      len = MIN(len, xresidual);
> @@ -302,6 +310,10 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
>          xresidual -= xfer;
>      }
>  
> +    if (dev) {
> +        dev->engaged_in_io = prior_engaged_state;
> +    }
> +
>      if (residual) {
>          *residual = xresidual;
>      }
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index 7ba2048836..44a14bb4f5 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -532,6 +532,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>      uint64_t access_mask;
>      unsigned access_size;
>      unsigned i;
> +    DeviceState *dev = NULL;
>      MemTxResult r = MEMTX_OK;
>  
>      if (!access_size_min) {
> @@ -541,6 +542,17 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>          access_size_max = 4;
>      }
>  
> +    /* Do not allow more than one simultanous access to a device's IO Regions */
> +    if (mr->owner &&
> +        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
> +        dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
> +        if (dev->engaged_in_io) {
> +            trace_memory_region_reentrant_io(get_cpu_index(), mr, addr, size);
> +            return MEMTX_ERROR;
> +        }
> +        dev->engaged_in_io = true;
> +    }
> +
>      /* FIXME: support unaligned access? */
>      access_size = MAX(MIN(size, access_size_max), access_size_min);
>      access_mask = MAKE_64BIT_MASK(0, access_size * 8);
> @@ -555,6 +567,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>                          access_mask, attrs);
>          }
>      }
> +    if (dev) {
> +        dev->engaged_in_io = false;
> +    }
>      return r;
>  }
>  
> diff --git a/softmmu/trace-events b/softmmu/trace-events
> index 9c88887b3c..d7228316db 100644
> --- a/softmmu/trace-events
> +++ b/softmmu/trace-events
> @@ -13,6 +13,7 @@ memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, u
>  memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
>  memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
>  memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
> +memory_region_reentrant_io(int cpu_index, void *mr, uint64_t offset, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" size %u"
>  memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
>  memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
>  memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr '%s' listener '%s' synced (global=%d)"
> -- 
> 2.33.0
> 

ping
David Hildenbrand June 21, 2022, 8:34 a.m. UTC | #2
On 09.06.22 15:58, Alexander Bulekov wrote:
> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> This flag is set/checked prior to calling a device's MemoryRegion
> handlers, and set when device code initiates DMA.  The purpose of this
> flag is to prevent two types of DMA-based reentrancy issues:
> 
> 1.) mmio -> dma -> mmio case
> 2.) bh -> dma write -> mmio case
> 
> These issues have led to problems such as stack-exhaustion and
> use-after-frees.
> 
> Summary of the problem from Peter Maydell:
> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> ---
>  include/hw/pci/pci.h   | 13 +++++++++++--
>  include/hw/qdev-core.h |  3 +++
>  softmmu/dma-helpers.c  | 12 ++++++++++++
>  softmmu/memory.c       | 15 +++++++++++++++
>  softmmu/trace-events   |  1 +
>  5 files changed, 42 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 44dacfa224..ab1ad0f7a8 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -834,8 +834,17 @@ static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
>                                       void *buf, dma_addr_t len,
>                                       DMADirection dir, MemTxAttrs attrs)
>  {
> -    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> -                         dir, attrs);
> +    bool prior_engaged_state;
> +    MemTxResult result;
> +
> +    prior_engaged_state = dev->qdev.engaged_in_io;
> +
> +    dev->qdev.engaged_in_io = true;
> +    result = dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> +                           dir, attrs);
> +    dev->qdev.engaged_in_io = prior_engaged_state;
> +
> +    return result;
>  }
>  
>  /**
> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index 92c3d65208..6474dc51fa 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -193,6 +193,9 @@ struct DeviceState {
>      int instance_id_alias;
>      int alias_required_for_version;
>      ResettableState reset;
> +
> +    /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
> +    int engaged_in_io;
>  };
>  
>  struct DeviceListener {
> diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
> index 7820fec54c..7a4f1fb9b3 100644
> --- a/softmmu/dma-helpers.c
> +++ b/softmmu/dma-helpers.c
> @@ -288,8 +288,16 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
>      uint8_t *ptr = buf;
>      dma_addr_t xresidual;
>      int sg_cur_index;
> +    DeviceState *dev;
> +    bool prior_engaged_state;
>      MemTxResult res = MEMTX_OK;
>  
> +    dev = sg->dev;
> +    if (dev) {
> +        prior_engaged_state = dev->engaged_in_io;
> +        dev->engaged_in_io = true;
> +    }
> +
>      xresidual = sg->size;
>      sg_cur_index = 0;
>      len = MIN(len, xresidual);
> @@ -302,6 +310,10 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
>          xresidual -= xfer;
>      }
>  
> +    if (dev) {
> +        dev->engaged_in_io = prior_engaged_state;
> +    }
> +
>      if (residual) {
>          *residual = xresidual;
>      }
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index 7ba2048836..44a14bb4f5 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -532,6 +532,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>      uint64_t access_mask;
>      unsigned access_size;
>      unsigned i;
> +    DeviceState *dev = NULL;
>      MemTxResult r = MEMTX_OK;
>  
>      if (!access_size_min) {
> @@ -541,6 +542,17 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>          access_size_max = 4;
>      }
>  
> +    /* Do not allow more than one simultanous access to a device's IO Regions */
> +    if (mr->owner &&
> +        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {

Would it make sense to define some helper function like
memory_region_is_XXX (I assume XXX -> DEVICE_IO), to make that code
easier to be consumed by humans?

Unfortunately I cannot really comment on the sanity of the approach,
because the underlying problem isn't completely clear to me (I think
other people on CC were involved in the discussions around DMA reentry
and failed attempts in the past). Having that said, that approach
doesn't look wrong to me.
Alexander Bulekov June 21, 2022, 3:11 p.m. UTC | #3
On 220621 1034, David Hildenbrand wrote:
> On 09.06.22 15:58, Alexander Bulekov wrote:
> > Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> > This flag is set/checked prior to calling a device's MemoryRegion
> > handlers, and set when device code initiates DMA.  The purpose of this
> > flag is to prevent two types of DMA-based reentrancy issues:
> > 
> > 1.) mmio -> dma -> mmio case
> > 2.) bh -> dma write -> mmio case
> > 
> > These issues have led to problems such as stack-exhaustion and
> > use-after-frees.
> > 
> > Summary of the problem from Peter Maydell:
> > https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
> > 
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
> > Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> > ---
> >  include/hw/pci/pci.h   | 13 +++++++++++--
> >  include/hw/qdev-core.h |  3 +++
> >  softmmu/dma-helpers.c  | 12 ++++++++++++
> >  softmmu/memory.c       | 15 +++++++++++++++
> >  softmmu/trace-events   |  1 +
> >  5 files changed, 42 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > index 44dacfa224..ab1ad0f7a8 100644
> > --- a/include/hw/pci/pci.h
> > +++ b/include/hw/pci/pci.h
> > @@ -834,8 +834,17 @@ static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
> >                                       void *buf, dma_addr_t len,
> >                                       DMADirection dir, MemTxAttrs attrs)
> >  {
> > -    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > -                         dir, attrs);
> > +    bool prior_engaged_state;
> > +    MemTxResult result;
> > +
> > +    prior_engaged_state = dev->qdev.engaged_in_io;
> > +
> > +    dev->qdev.engaged_in_io = true;
> > +    result = dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > +                           dir, attrs);
> > +    dev->qdev.engaged_in_io = prior_engaged_state;
> > +
> > +    return result;
> >  }
> >  
> >  /**
> > diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> > index 92c3d65208..6474dc51fa 100644
> > --- a/include/hw/qdev-core.h
> > +++ b/include/hw/qdev-core.h
> > @@ -193,6 +193,9 @@ struct DeviceState {
> >      int instance_id_alias;
> >      int alias_required_for_version;
> >      ResettableState reset;
> > +
> > +    /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
> > +    int engaged_in_io;
> >  };
> >  
> >  struct DeviceListener {
> > diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
> > index 7820fec54c..7a4f1fb9b3 100644
> > --- a/softmmu/dma-helpers.c
> > +++ b/softmmu/dma-helpers.c
> > @@ -288,8 +288,16 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
> >      uint8_t *ptr = buf;
> >      dma_addr_t xresidual;
> >      int sg_cur_index;
> > +    DeviceState *dev;
> > +    bool prior_engaged_state;
> >      MemTxResult res = MEMTX_OK;
> >  
> > +    dev = sg->dev;
> > +    if (dev) {
> > +        prior_engaged_state = dev->engaged_in_io;
> > +        dev->engaged_in_io = true;
> > +    }
> > +
> >      xresidual = sg->size;
> >      sg_cur_index = 0;
> >      len = MIN(len, xresidual);
> > @@ -302,6 +310,10 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
> >          xresidual -= xfer;
> >      }
> >  
> > +    if (dev) {
> > +        dev->engaged_in_io = prior_engaged_state;
> > +    }
> > +
> >      if (residual) {
> >          *residual = xresidual;
> >      }
> > diff --git a/softmmu/memory.c b/softmmu/memory.c
> > index 7ba2048836..44a14bb4f5 100644
> > --- a/softmmu/memory.c
> > +++ b/softmmu/memory.c
> > @@ -532,6 +532,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> >      uint64_t access_mask;
> >      unsigned access_size;
> >      unsigned i;
> > +    DeviceState *dev = NULL;
> >      MemTxResult r = MEMTX_OK;
> >  
> >      if (!access_size_min) {
> > @@ -541,6 +542,17 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> >          access_size_max = 4;
> >      }
> >  
> > +    /* Do not allow more than one simultanous access to a device's IO Regions */
> > +    if (mr->owner &&
> > +        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
> 
> Would it make sense to define some helper function like
> memory_region_is_XXX (I assume XXX -> DEVICE_IO), to make that code
> easier to be consumed by humans?

Yes - There are a few other places that have similar checks. It probably
makes sense to consolidate them.
Thanks

> 
> Unfortunately I cannot really comment on the sanity of the approach,
> because the underlying problem isn't completely clear to me (I think
> other people on CC were involved in the discussions around DMA reentry
> and failed attempts in the past). Having that said, that approach
> doesn't look wrong to me.
> 
> -- 
> Thanks,
> 
> David / dhildenb
>
Peter Maydell June 21, 2022, 3:30 p.m. UTC | #4
On Thu, 9 Jun 2022 at 14:59, Alexander Bulekov <alxndr@bu.edu> wrote:
>
> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> This flag is set/checked prior to calling a device's MemoryRegion
> handlers, and set when device code initiates DMA.  The purpose of this
> flag is to prevent two types of DMA-based reentrancy issues:
>
> 1.) mmio -> dma -> mmio case
> 2.) bh -> dma write -> mmio case
>
> These issues have led to problems such as stack-exhaustion and
> use-after-frees.
>
> Summary of the problem from Peter Maydell:
> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> ---
>  include/hw/pci/pci.h   | 13 +++++++++++--
>  include/hw/qdev-core.h |  3 +++
>  softmmu/dma-helpers.c  | 12 ++++++++++++
>  softmmu/memory.c       | 15 +++++++++++++++
>  softmmu/trace-events   |  1 +
>  5 files changed, 42 insertions(+), 2 deletions(-)
>
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 44dacfa224..ab1ad0f7a8 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -834,8 +834,17 @@ static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
>                                       void *buf, dma_addr_t len,
>                                       DMADirection dir, MemTxAttrs attrs)
>  {
> -    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> -                         dir, attrs);
> +    bool prior_engaged_state;
> +    MemTxResult result;
> +
> +    prior_engaged_state = dev->qdev.engaged_in_io;
> +
> +    dev->qdev.engaged_in_io = true;
> +    result = dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> +                           dir, attrs);
> +    dev->qdev.engaged_in_io = prior_engaged_state;
> +
> +    return result;

Why do we need to do something in this pci-specific function ?
I was expecting this to only need changes at the generic-to-all-devices
level.


> diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
> index 7820fec54c..7a4f1fb9b3 100644
> --- a/softmmu/dma-helpers.c
> +++ b/softmmu/dma-helpers.c
> @@ -288,8 +288,16 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
>      uint8_t *ptr = buf;
>      dma_addr_t xresidual;
>      int sg_cur_index;
> +    DeviceState *dev;
> +    bool prior_engaged_state;
>      MemTxResult res = MEMTX_OK;
>
> +    dev = sg->dev;
> +    if (dev) {
> +        prior_engaged_state = dev->engaged_in_io;
> +        dev->engaged_in_io = true;
> +    }
> +
>      xresidual = sg->size;
>      sg_cur_index = 0;
>      len = MIN(len, xresidual);
> @@ -302,6 +310,10 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
>          xresidual -= xfer;
>      }
>
> +    if (dev) {
> +        dev->engaged_in_io = prior_engaged_state;
> +    }

Not all DMA goes through dma_buf_rw() -- why does it need changes?

> +
>      if (residual) {
>          *residual = xresidual;
>      }

thanks
-- PMM
Alexander Bulekov June 21, 2022, 3:53 p.m. UTC | #5
On 220621 1630, Peter Maydell wrote:
> On Thu, 9 Jun 2022 at 14:59, Alexander Bulekov <alxndr@bu.edu> wrote:
> >
> > Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> > This flag is set/checked prior to calling a device's MemoryRegion
> > handlers, and set when device code initiates DMA.  The purpose of this
> > flag is to prevent two types of DMA-based reentrancy issues:
> >
> > 1.) mmio -> dma -> mmio case
> > 2.) bh -> dma write -> mmio case
> >
> > These issues have led to problems such as stack-exhaustion and
> > use-after-frees.
> >
> > Summary of the problem from Peter Maydell:
> > https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
> >
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
> > Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> > ---
> >  include/hw/pci/pci.h   | 13 +++++++++++--
> >  include/hw/qdev-core.h |  3 +++
> >  softmmu/dma-helpers.c  | 12 ++++++++++++
> >  softmmu/memory.c       | 15 +++++++++++++++
> >  softmmu/trace-events   |  1 +
> >  5 files changed, 42 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > index 44dacfa224..ab1ad0f7a8 100644
> > --- a/include/hw/pci/pci.h
> > +++ b/include/hw/pci/pci.h
> > @@ -834,8 +834,17 @@ static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
> >                                       void *buf, dma_addr_t len,
> >                                       DMADirection dir, MemTxAttrs attrs)
> >  {
> > -    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > -                         dir, attrs);
> > +    bool prior_engaged_state;
> > +    MemTxResult result;
> > +
> > +    prior_engaged_state = dev->qdev.engaged_in_io;
> > +
> > +    dev->qdev.engaged_in_io = true;
> > +    result = dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > +                           dir, attrs);
> > +    dev->qdev.engaged_in_io = prior_engaged_state;
> > +
> > +    return result;
> 
> Why do we need to do something in this pci-specific function ?
> I was expecting this to only need changes at the generic-to-all-devices
> level.

Both of these handle the BH->DMA->MMIO case. Unlike MMIO, I don't think
there is any neat way to set the engaged_in_io flag as we enter a BH. So
instead, we try to set it when a device initiates DMA.

The pci function lets us do that since we get a glimpse of the dev/qdev
(unlike the dma_memory_...  functions).

> 
> 
> > diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
> > index 7820fec54c..7a4f1fb9b3 100644
> > --- a/softmmu/dma-helpers.c
> > +++ b/softmmu/dma-helpers.c
> > @@ -288,8 +288,16 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
> >      uint8_t *ptr = buf;
> >      dma_addr_t xresidual;
> >      int sg_cur_index;
> > +    DeviceState *dev;
> > +    bool prior_engaged_state;
> >      MemTxResult res = MEMTX_OK;
> >
> > +    dev = sg->dev;
> > +    if (dev) {
> > +        prior_engaged_state = dev->engaged_in_io;
> > +        dev->engaged_in_io = true;
> > +    }
> > +
> >      xresidual = sg->size;
> >      sg_cur_index = 0;
> >      len = MIN(len, xresidual);
> > @@ -302,6 +310,10 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
> >          xresidual -= xfer;
> >      }
> >
> > +    if (dev) {
> > +        dev->engaged_in_io = prior_engaged_state;
> > +    }
> 
> Not all DMA goes through dma_buf_rw() -- why does it need changes?

This one has the same goal, but accesses the qdev through sg, instead of
PCI.
-Alex


> 
> > +
> >      if (residual) {
> >          *residual = xresidual;
> >      }
> 
> thanks
> -- PMM
Stefan Hajnoczi July 12, 2022, 9:34 a.m. UTC | #6
On Tue, Jun 21, 2022 at 11:53:06AM -0400, Alexander Bulekov wrote:
> On 220621 1630, Peter Maydell wrote:
> > On Thu, 9 Jun 2022 at 14:59, Alexander Bulekov <alxndr@bu.edu> wrote:
> > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > index 44dacfa224..ab1ad0f7a8 100644
> > > --- a/include/hw/pci/pci.h
> > > +++ b/include/hw/pci/pci.h
> > > @@ -834,8 +834,17 @@ static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
> > >                                       void *buf, dma_addr_t len,
> > >                                       DMADirection dir, MemTxAttrs attrs)
> > >  {
> > > -    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > > -                         dir, attrs);
> > > +    bool prior_engaged_state;
> > > +    MemTxResult result;
> > > +
> > > +    prior_engaged_state = dev->qdev.engaged_in_io;
> > > +
> > > +    dev->qdev.engaged_in_io = true;
> > > +    result = dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > > +                           dir, attrs);
> > > +    dev->qdev.engaged_in_io = prior_engaged_state;
> > > +
> > > +    return result;
> > 
> > Why do we need to do something in this pci-specific function ?
> > I was expecting this to only need changes at the generic-to-all-devices
> > level.
> 
> Both of these handle the BH->DMA->MMIO case. Unlike MMIO, I don't think
> there is any neat way to set the engaged_in_io flag as we enter a BH. So
> instead, we try to set it when a device initiates DMA.
> 
> The pci function lets us do that since we get a glimpse of the dev/qdev
> (unlike the dma_memory_...  functions).
...
> > > @@ -302,6 +310,10 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
> > >          xresidual -= xfer;
> > >      }
> > >
> > > +    if (dev) {
> > > +        dev->engaged_in_io = prior_engaged_state;
> > > +    }
> > 
> > Not all DMA goes through dma_buf_rw() -- why does it need changes?
> 
> This one has the same goal, but accesses the qdev through sg, instead of
> PCI.

Should dma_*() APIs take a reentrancy guard argument so that all DMA
accesses are systematically covered?

  /* Define this in the memory API */
  typedef struct {
      bool engaged_in_io;
  } MemReentrancyGuard;

  /* Embed MemReentrancyGuard in DeviceState */
  ...

  /* Require it in dma_*() APIs */
  static inline MemTxResult dma_memory_rw(AddressSpace *as, dma_addr_t addr,
                                          void *buf, dma_addr_t len,
                                          DMADirection dir, MemTxAttrs attrs,
					  MemReentrancyGuard *guard);

  /* Call dma_*() APIs like this... */
  static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
                                       void *buf, dma_addr_t len,
                                       DMADirection dir, MemTxAttrs attrs)
  {
      return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
                           dir, attrs, &dev->qdev.reentrancy_guard);
  }

Stefan
Alexander Bulekov July 13, 2022, 3:51 p.m. UTC | #7
On 220712 1034, Stefan Hajnoczi wrote:
> On Tue, Jun 21, 2022 at 11:53:06AM -0400, Alexander Bulekov wrote:
> > On 220621 1630, Peter Maydell wrote:
> > > On Thu, 9 Jun 2022 at 14:59, Alexander Bulekov <alxndr@bu.edu> wrote:
> > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > index 44dacfa224..ab1ad0f7a8 100644
> > > > --- a/include/hw/pci/pci.h
> > > > +++ b/include/hw/pci/pci.h
> > > > @@ -834,8 +834,17 @@ static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
> > > >                                       void *buf, dma_addr_t len,
> > > >                                       DMADirection dir, MemTxAttrs attrs)
> > > >  {
> > > > -    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > > > -                         dir, attrs);
> > > > +    bool prior_engaged_state;
> > > > +    MemTxResult result;
> > > > +
> > > > +    prior_engaged_state = dev->qdev.engaged_in_io;
> > > > +
> > > > +    dev->qdev.engaged_in_io = true;
> > > > +    result = dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > > > +                           dir, attrs);
> > > > +    dev->qdev.engaged_in_io = prior_engaged_state;
> > > > +
> > > > +    return result;
> > > 
> > > Why do we need to do something in this pci-specific function ?
> > > I was expecting this to only need changes at the generic-to-all-devices
> > > level.
> > 
> > Both of these handle the BH->DMA->MMIO case. Unlike MMIO, I don't think
> > there is any neat way to set the engaged_in_io flag as we enter a BH. So
> > instead, we try to set it when a device initiates DMA.
> > 
> > The pci function lets us do that since we get a glimpse of the dev/qdev
> > (unlike the dma_memory_...  functions).
> ...
> > > > @@ -302,6 +310,10 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
> > > >          xresidual -= xfer;
> > > >      }
> > > >
> > > > +    if (dev) {
> > > > +        dev->engaged_in_io = prior_engaged_state;
> > > > +    }
> > > 
> > > Not all DMA goes through dma_buf_rw() -- why does it need changes?
> > 
> > This one has the same goal, but accesses the qdev through sg, instead of
> > PCI.
> 
> Should dma_*() APIs take a reentrancy guard argument so that all DMA
> accesses are systematically covered?

That seems like it would be the best option, though it carries the cost
of needing to tweak a lot of code in hw/. Maybe some refactoring tool
could help with this.
-Alex

> 
>   /* Define this in the memory API */
>   typedef struct {
>       bool engaged_in_io;
>   } MemReentrancyGuard;
> 
>   /* Embed MemReentrancyGuard in DeviceState */
>   ...
> 
>   /* Require it in dma_*() APIs */
>   static inline MemTxResult dma_memory_rw(AddressSpace *as, dma_addr_t addr,
>                                           void *buf, dma_addr_t len,
>                                           DMADirection dir, MemTxAttrs attrs,
> 					  MemReentrancyGuard *guard);
> 
>   /* Call dma_*() APIs like this... */
>   static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
>                                        void *buf, dma_addr_t len,
>                                        DMADirection dir, MemTxAttrs attrs)
>   {
>       return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
>                            dir, attrs, &dev->qdev.reentrancy_guard);
>   }
> 
> Stefan
Stefan Hajnoczi July 13, 2022, 5:31 p.m. UTC | #8
On Wed, 13 Jul 2022 at 16:51, Alexander Bulekov <alxndr@bu.edu> wrote:
>
> On 220712 1034, Stefan Hajnoczi wrote:
> > On Tue, Jun 21, 2022 at 11:53:06AM -0400, Alexander Bulekov wrote:
> > > On 220621 1630, Peter Maydell wrote:
> > > > On Thu, 9 Jun 2022 at 14:59, Alexander Bulekov <alxndr@bu.edu> wrote:
> > > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > > index 44dacfa224..ab1ad0f7a8 100644
> > > > > --- a/include/hw/pci/pci.h
> > > > > +++ b/include/hw/pci/pci.h
> > > > > @@ -834,8 +834,17 @@ static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
> > > > >                                       void *buf, dma_addr_t len,
> > > > >                                       DMADirection dir, MemTxAttrs attrs)
> > > > >  {
> > > > > -    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > > > > -                         dir, attrs);
> > > > > +    bool prior_engaged_state;
> > > > > +    MemTxResult result;
> > > > > +
> > > > > +    prior_engaged_state = dev->qdev.engaged_in_io;
> > > > > +
> > > > > +    dev->qdev.engaged_in_io = true;
> > > > > +    result = dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > > > > +                           dir, attrs);
> > > > > +    dev->qdev.engaged_in_io = prior_engaged_state;
> > > > > +
> > > > > +    return result;
> > > >
> > > > Why do we need to do something in this pci-specific function ?
> > > > I was expecting this to only need changes at the generic-to-all-devices
> > > > level.
> > >
> > > Both of these handle the BH->DMA->MMIO case. Unlike MMIO, I don't think
> > > there is any neat way to set the engaged_in_io flag as we enter a BH. So
> > > instead, we try to set it when a device initiates DMA.
> > >
> > > The pci function lets us do that since we get a glimpse of the dev/qdev
> > > (unlike the dma_memory_...  functions).
> > ...
> > > > > @@ -302,6 +310,10 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
> > > > >          xresidual -= xfer;
> > > > >      }
> > > > >
> > > > > +    if (dev) {
> > > > > +        dev->engaged_in_io = prior_engaged_state;
> > > > > +    }
> > > >
> > > > Not all DMA goes through dma_buf_rw() -- why does it need changes?
> > >
> > > This one has the same goal, but accesses the qdev through sg, instead of
> > > PCI.
> >
> > Should dma_*() APIs take a reentrancy guard argument so that all DMA
> > accesses are systematically covered?
>
> That seems like it would be the best option, though it carries the cost
> of needing to tweak a lot of code in hw/. Maybe some refactoring tool
> could help with this.

Coccinelle is good at adding/removing arguments, but it doesn't know
how to get the DeviceState from, say, a PCIDevice. It may be possible
to cover the majority of cases automatically by writing a cocci
pattern that applies when the containing function takes a PCIDevice*
argument, for example. Some manual work would still be necessary.

Stefan
Alexander Bulekov Oct. 20, 2022, 10:11 p.m. UTC | #9
On 220712 1034, Stefan Hajnoczi wrote:
> On Tue, Jun 21, 2022 at 11:53:06AM -0400, Alexander Bulekov wrote:
> > On 220621 1630, Peter Maydell wrote:
> > > On Thu, 9 Jun 2022 at 14:59, Alexander Bulekov <alxndr@bu.edu> wrote:
> > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > index 44dacfa224..ab1ad0f7a8 100644
> > > > --- a/include/hw/pci/pci.h
> > > > +++ b/include/hw/pci/pci.h
> > > > @@ -834,8 +834,17 @@ static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
> > > >                                       void *buf, dma_addr_t len,
> > > >                                       DMADirection dir, MemTxAttrs attrs)
> > > >  {
> > > > -    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > > > -                         dir, attrs);
> > > > +    bool prior_engaged_state;
> > > > +    MemTxResult result;
> > > > +
> > > > +    prior_engaged_state = dev->qdev.engaged_in_io;
> > > > +
> > > > +    dev->qdev.engaged_in_io = true;
> > > > +    result = dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > > > +                           dir, attrs);
> > > > +    dev->qdev.engaged_in_io = prior_engaged_state;
> > > > +
> > > > +    return result;
> > > 
> > > Why do we need to do something in this pci-specific function ?
> > > I was expecting this to only need changes at the generic-to-all-devices
> > > level.
> > 
> > Both of these handle the BH->DMA->MMIO case. Unlike MMIO, I don't think
> > there is any neat way to set the engaged_in_io flag as we enter a BH. So
> > instead, we try to set it when a device initiates DMA.
> > 
> > The pci function lets us do that since we get a glimpse of the dev/qdev
> > (unlike the dma_memory_...  functions).
> ...
> > > > @@ -302,6 +310,10 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
> > > >          xresidual -= xfer;
> > > >      }
> > > >
> > > > +    if (dev) {
> > > > +        dev->engaged_in_io = prior_engaged_state;
> > > > +    }
> > > 
> > > Not all DMA goes through dma_buf_rw() -- why does it need changes?
> > 
> > This one has the same goal, but accesses the qdev through sg, instead of
> > PCI.
> 
> Should dma_*() APIs take a reentrancy guard argument so that all DMA
> accesses are systematically covered?
> 
>   /* Define this in the memory API */
>   typedef struct {
>       bool engaged_in_io;
>   } MemReentrancyGuard;
> 
>   /* Embed MemReentrancyGuard in DeviceState */
>   ...
> 
>   /* Require it in dma_*() APIs */
>   static inline MemTxResult dma_memory_rw(AddressSpace *as, dma_addr_t addr,
>                                           void *buf, dma_addr_t len,
>                                           DMADirection dir, MemTxAttrs attrs,
> 					  MemReentrancyGuard *guard);
> 
>   /* Call dma_*() APIs like this... */
>   static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
>                                        void *buf, dma_addr_t len,
>                                        DMADirection dir, MemTxAttrs attrs)
>   {
>       return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
>                            dir, attrs, &dev->qdev.reentrancy_guard);
>   }
> 

Taking a stab at this. Here is the list of DMA APIs that appear to need
changes:
dma_memory_valid (1 usage)
dma_memory_rw (~5 uses)
dma_memory_read (~92 uses)
dma_memory_write (~71 uses)
dma_memory_set (~4 uses)
dma_memory_map (~18 uses)
dma_memory_unmap (~21 uses)
{ld,st}_{le,be}_{uw,l,q}_dma (~10 uses)
ldub_dma (does not appear to be used anywhere)
stb_dma (1 usage)
dma_buf_read (~18 uses)
dma_buf_write (~7 uses)

These appear to be internal to the DMA API and probably don't need to be
changed:
dma_memory_read_relaxed (does not appear to be used anywhere)
dma_memory_write_relaxed (does not appear to be used anywhere)
dma_memory_rw_relaxed 

I don't think the sglist APIs need to be changed since we can get
DeviceState from the QEMUSGList.

Does this look more-or-less right?
-Alex
Stefan Hajnoczi Oct. 24, 2022, 6:46 p.m. UTC | #10
On Thu, Oct 20, 2022 at 06:11:06PM -0400, Alexander Bulekov wrote:
> On 220712 1034, Stefan Hajnoczi wrote:
> > On Tue, Jun 21, 2022 at 11:53:06AM -0400, Alexander Bulekov wrote:
> > > On 220621 1630, Peter Maydell wrote:
> > > > On Thu, 9 Jun 2022 at 14:59, Alexander Bulekov <alxndr@bu.edu> wrote:
> > > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > > index 44dacfa224..ab1ad0f7a8 100644
> > > > > --- a/include/hw/pci/pci.h
> > > > > +++ b/include/hw/pci/pci.h
> > > > > @@ -834,8 +834,17 @@ static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
> > > > >                                       void *buf, dma_addr_t len,
> > > > >                                       DMADirection dir, MemTxAttrs attrs)
> > > > >  {
> > > > > -    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > > > > -                         dir, attrs);
> > > > > +    bool prior_engaged_state;
> > > > > +    MemTxResult result;
> > > > > +
> > > > > +    prior_engaged_state = dev->qdev.engaged_in_io;
> > > > > +
> > > > > +    dev->qdev.engaged_in_io = true;
> > > > > +    result = dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> > > > > +                           dir, attrs);
> > > > > +    dev->qdev.engaged_in_io = prior_engaged_state;
> > > > > +
> > > > > +    return result;
> > > > 
> > > > Why do we need to do something in this pci-specific function ?
> > > > I was expecting this to only need changes at the generic-to-all-devices
> > > > level.
> > > 
> > > Both of these handle the BH->DMA->MMIO case. Unlike MMIO, I don't think
> > > there is any neat way to set the engaged_in_io flag as we enter a BH. So
> > > instead, we try to set it when a device initiates DMA.
> > > 
> > > The pci function lets us do that since we get a glimpse of the dev/qdev
> > > (unlike the dma_memory_...  functions).
> > ...
> > > > > @@ -302,6 +310,10 @@ static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
> > > > >          xresidual -= xfer;
> > > > >      }
> > > > >
> > > > > +    if (dev) {
> > > > > +        dev->engaged_in_io = prior_engaged_state;
> > > > > +    }
> > > > 
> > > > Not all DMA goes through dma_buf_rw() -- why does it need changes?
> > > 
> > > This one has the same goal, but accesses the qdev through sg, instead of
> > > PCI.
> > 
> > Should dma_*() APIs take a reentrancy guard argument so that all DMA
> > accesses are systematically covered?
> > 
> >   /* Define this in the memory API */
> >   typedef struct {
> >       bool engaged_in_io;
> >   } MemReentrancyGuard;
> > 
> >   /* Embed MemReentrancyGuard in DeviceState */
> >   ...
> > 
> >   /* Require it in dma_*() APIs */
> >   static inline MemTxResult dma_memory_rw(AddressSpace *as, dma_addr_t addr,
> >                                           void *buf, dma_addr_t len,
> >                                           DMADirection dir, MemTxAttrs attrs,
> > 					  MemReentrancyGuard *guard);
> > 
> >   /* Call dma_*() APIs like this... */
> >   static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
> >                                        void *buf, dma_addr_t len,
> >                                        DMADirection dir, MemTxAttrs attrs)
> >   {
> >       return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
> >                            dir, attrs, &dev->qdev.reentrancy_guard);
> >   }
> > 
> 
> Taking a stab at this. Here is the list of DMA APIs that appear to need
> changes:
> dma_memory_valid (1 usage)
> dma_memory_rw (~5 uses)
> dma_memory_read (~92 uses)
> dma_memory_write (~71 uses)
> dma_memory_set (~4 uses)
> dma_memory_map (~18 uses)
> dma_memory_unmap (~21 uses)
> {ld,st}_{le,be}_{uw,l,q}_dma (~10 uses)
> ldub_dma (does not appear to be used anywhere)
> stb_dma (1 usage)
> dma_buf_read (~18 uses)
> dma_buf_write (~7 uses)
> 
> These appear to be internal to the DMA API and probably don't need to be
> changed:
> dma_memory_read_relaxed (does not appear to be used anywhere)
> dma_memory_write_relaxed (does not appear to be used anywhere)
> dma_memory_rw_relaxed 
> 
> I don't think the sglist APIs need to be changed since we can get
> DeviceState from the QEMUSGList.
> 
> Does this look more-or-less right?

That's along the lines of what I would expect. Interesting that
map/unmap is also on the list; it makes sense when considering bounce
buffers.

Stefan
Peter Maydell Oct. 25, 2022, 10:11 a.m. UTC | #11
On Mon, 24 Oct 2022 at 19:46, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>
> On Thu, Oct 20, 2022 at 06:11:06PM -0400, Alexander Bulekov wrote:
> > Taking a stab at this. Here is the list of DMA APIs that appear to need
> > changes:
> > dma_memory_valid (1 usage)
> > dma_memory_rw (~5 uses)
> > dma_memory_read (~92 uses)
> > dma_memory_write (~71 uses)
> > dma_memory_set (~4 uses)
> > dma_memory_map (~18 uses)
> > dma_memory_unmap (~21 uses)
> > {ld,st}_{le,be}_{uw,l,q}_dma (~10 uses)
> > ldub_dma (does not appear to be used anywhere)
> > stb_dma (1 usage)
> > dma_buf_read (~18 uses)
> > dma_buf_write (~7 uses)
> >
> > These appear to be internal to the DMA API and probably don't need to be
> > changed:
> > dma_memory_read_relaxed (does not appear to be used anywhere)
> > dma_memory_write_relaxed (does not appear to be used anywhere)
> > dma_memory_rw_relaxed
> >
> > I don't think the sglist APIs need to be changed since we can get
> > DeviceState from the QEMUSGList.
> >
> > Does this look more-or-less right?
>
> That's along the lines of what I would expect. Interesting that
> map/unmap is also on the list; it makes sense when considering bounce
> buffers.

Not all devices that DMA do it via the dma_memory_* wrappers, of course:
some just use address_space_* functions directly. I guess maybe
we can just make the devices where we care about this problem
be more consistent about what function family they use.

-- PMM
diff mbox series

Patch

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 44dacfa224..ab1ad0f7a8 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -834,8 +834,17 @@  static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
                                      void *buf, dma_addr_t len,
                                      DMADirection dir, MemTxAttrs attrs)
 {
-    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
-                         dir, attrs);
+    bool prior_engaged_state;
+    MemTxResult result;
+
+    prior_engaged_state = dev->qdev.engaged_in_io;
+
+    dev->qdev.engaged_in_io = true;
+    result = dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
+                           dir, attrs);
+    dev->qdev.engaged_in_io = prior_engaged_state;
+
+    return result;
 }
 
 /**
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 92c3d65208..6474dc51fa 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -193,6 +193,9 @@  struct DeviceState {
     int instance_id_alias;
     int alias_required_for_version;
     ResettableState reset;
+
+    /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
+    int engaged_in_io;
 };
 
 struct DeviceListener {
diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
index 7820fec54c..7a4f1fb9b3 100644
--- a/softmmu/dma-helpers.c
+++ b/softmmu/dma-helpers.c
@@ -288,8 +288,16 @@  static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
     uint8_t *ptr = buf;
     dma_addr_t xresidual;
     int sg_cur_index;
+    DeviceState *dev;
+    bool prior_engaged_state;
     MemTxResult res = MEMTX_OK;
 
+    dev = sg->dev;
+    if (dev) {
+        prior_engaged_state = dev->engaged_in_io;
+        dev->engaged_in_io = true;
+    }
+
     xresidual = sg->size;
     sg_cur_index = 0;
     len = MIN(len, xresidual);
@@ -302,6 +310,10 @@  static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
         xresidual -= xfer;
     }
 
+    if (dev) {
+        dev->engaged_in_io = prior_engaged_state;
+    }
+
     if (residual) {
         *residual = xresidual;
     }
diff --git a/softmmu/memory.c b/softmmu/memory.c
index 7ba2048836..44a14bb4f5 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -532,6 +532,7 @@  static MemTxResult access_with_adjusted_size(hwaddr addr,
     uint64_t access_mask;
     unsigned access_size;
     unsigned i;
+    DeviceState *dev = NULL;
     MemTxResult r = MEMTX_OK;
 
     if (!access_size_min) {
@@ -541,6 +542,17 @@  static MemTxResult access_with_adjusted_size(hwaddr addr,
         access_size_max = 4;
     }
 
+    /* Do not allow more than one simultanous access to a device's IO Regions */
+    if (mr->owner &&
+        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
+        dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
+        if (dev->engaged_in_io) {
+            trace_memory_region_reentrant_io(get_cpu_index(), mr, addr, size);
+            return MEMTX_ERROR;
+        }
+        dev->engaged_in_io = true;
+    }
+
     /* FIXME: support unaligned access? */
     access_size = MAX(MIN(size, access_size_max), access_size_min);
     access_mask = MAKE_64BIT_MASK(0, access_size * 8);
@@ -555,6 +567,9 @@  static MemTxResult access_with_adjusted_size(hwaddr addr,
                         access_mask, attrs);
         }
     }
+    if (dev) {
+        dev->engaged_in_io = false;
+    }
     return r;
 }
 
diff --git a/softmmu/trace-events b/softmmu/trace-events
index 9c88887b3c..d7228316db 100644
--- a/softmmu/trace-events
+++ b/softmmu/trace-events
@@ -13,6 +13,7 @@  memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, u
 memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
 memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
 memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_reentrant_io(int cpu_index, void *mr, uint64_t offset, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" size %u"
 memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
 memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
 memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr '%s' listener '%s' synced (global=%d)"