Message ID | 20230427211013.2994127-2-alxndr@bu.edu (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | memory: prevent dma-reentracy issues | expand |
On 27/04/2023 23.10, Alexander Bulekov wrote: > Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. > This flag is set/checked prior to calling a device's MemoryRegion > handlers, and set when device code initiates DMA. The purpose of this > flag is to prevent two types of DMA-based reentrancy issues: > > 1.) mmio -> dma -> mmio case > 2.) bh -> dma write -> mmio case > > These issues have led to problems such as stack-exhaustion and > use-after-frees. > > Summary of the problem from Peter Maydell: > https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 > Resolves: CVE-2023-0330 > > Signed-off-by: Alexander Bulekov <alxndr@bu.edu> > Reviewed-by: Thomas Huth <thuth@redhat.com> > --- > include/exec/memory.h | 5 +++++ > include/hw/qdev-core.h | 7 +++++++ > softmmu/memory.c | 16 ++++++++++++++++ > 3 files changed, 28 insertions(+) > > diff --git a/include/exec/memory.h b/include/exec/memory.h > index 15ade918ba..e45ce6061f 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -767,6 +767,8 @@ struct MemoryRegion { > bool is_iommu; > RAMBlock *ram_block; > Object *owner; > + /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ > + DeviceState *dev; > > const MemoryRegionOps *ops; > void *opaque; > @@ -791,6 +793,9 @@ struct MemoryRegion { > unsigned ioeventfd_nb; > MemoryRegionIoeventfd *ioeventfds; > RamDiscardManager *rdm; /* Only for RAM */ > + > + /* For devices designed to perform re-entrant IO into their own IO MRs */ > + bool disable_reentrancy_guard; > }; > > struct IOMMUMemoryRegion { > diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h > index bd50ad5ee1..7623703943 100644 > --- a/include/hw/qdev-core.h > +++ b/include/hw/qdev-core.h > @@ -162,6 +162,10 @@ struct NamedClockList { > QLIST_ENTRY(NamedClockList) node; > }; > > +typedef struct { > + bool engaged_in_io; > +} MemReentrancyGuard; > + > /** > * DeviceState: > * @realized: Indicates whether the device has been fully constructed. > @@ -194,6 +198,9 @@ struct DeviceState { > int alias_required_for_version; > ResettableState reset; > GSList *unplug_blockers; > + > + /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ > + MemReentrancyGuard mem_reentrancy_guard; > }; > > struct DeviceListener { > diff --git a/softmmu/memory.c b/softmmu/memory.c > index b1a6cae6f5..fe23f0e5ce 100644 > --- a/softmmu/memory.c > +++ b/softmmu/memory.c > @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, > access_size_max = 4; > } > > + /* Do not allow more than one simultaneous access to a device's IO Regions */ > + if (mr->dev && !mr->disable_reentrancy_guard && > + !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { > + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { > + warn_report("Blocked re-entrant IO on " > + "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX, > + memory_region_name(mr), addr); Ack, a warn_report make sense here, at least initially, to make sure that people get aware of related problems! Thomas
On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote: > Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. > This flag is set/checked prior to calling a device's MemoryRegion > handlers, and set when device code initiates DMA. The purpose of this > flag is to prevent two types of DMA-based reentrancy issues: > > 1.) mmio -> dma -> mmio case > 2.) bh -> dma write -> mmio case > > These issues have led to problems such as stack-exhaustion and > use-after-frees. > > Summary of the problem from Peter Maydell: > https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 > Resolves: CVE-2023-0330 > > Signed-off-by: Alexander Bulekov <alxndr@bu.edu> > Reviewed-by: Thomas Huth <thuth@redhat.com> > --- > include/exec/memory.h | 5 +++++ > include/hw/qdev-core.h | 7 +++++++ > softmmu/memory.c | 16 ++++++++++++++++ > 3 files changed, 28 insertions(+) > > diff --git a/include/exec/memory.h b/include/exec/memory.h > index 15ade918ba..e45ce6061f 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -767,6 +767,8 @@ struct MemoryRegion { > bool is_iommu; > RAMBlock *ram_block; > Object *owner; > + /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ > + DeviceState *dev; > > const MemoryRegionOps *ops; > void *opaque; > @@ -791,6 +793,9 @@ struct MemoryRegion { > unsigned ioeventfd_nb; > MemoryRegionIoeventfd *ioeventfds; > RamDiscardManager *rdm; /* Only for RAM */ > + > + /* For devices designed to perform re-entrant IO into their own IO MRs */ > + bool disable_reentrancy_guard; > }; > > struct IOMMUMemoryRegion { > diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h > index bd50ad5ee1..7623703943 100644 > --- a/include/hw/qdev-core.h > +++ b/include/hw/qdev-core.h > @@ -162,6 +162,10 @@ struct NamedClockList { > QLIST_ENTRY(NamedClockList) node; > }; > > +typedef struct { > + bool engaged_in_io; > +} MemReentrancyGuard; > + > /** > * DeviceState: > * @realized: Indicates whether the device has been fully constructed. > @@ -194,6 +198,9 @@ struct DeviceState { > int alias_required_for_version; > ResettableState reset; > GSList *unplug_blockers; > + > + /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ > + MemReentrancyGuard mem_reentrancy_guard; > }; > > struct DeviceListener { > diff --git a/softmmu/memory.c b/softmmu/memory.c > index b1a6cae6f5..fe23f0e5ce 100644 > --- a/softmmu/memory.c > +++ b/softmmu/memory.c > @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, > access_size_max = 4; > } > > + /* Do not allow more than one simultaneous access to a device's IO Regions */ > + if (mr->dev && !mr->disable_reentrancy_guard && > + !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { > + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { > + warn_report("Blocked re-entrant IO on " > + "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX, > + memory_region_name(mr), addr); > + return MEMTX_ACCESS_ERROR; If we issue this warn_report on every invalid memory access, is this going to become a denial of service by flooding logs, or is the return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed *once* in the lifetime of the QEMU process ? With regards, Daniel
On 28/04/2023 10.12, Daniel P. Berrangé wrote: > On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote: >> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. >> This flag is set/checked prior to calling a device's MemoryRegion >> handlers, and set when device code initiates DMA. The purpose of this >> flag is to prevent two types of DMA-based reentrancy issues: >> >> 1.) mmio -> dma -> mmio case >> 2.) bh -> dma write -> mmio case >> >> These issues have led to problems such as stack-exhaustion and >> use-after-frees. >> >> Summary of the problem from Peter Maydell: >> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com >> >> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 >> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 >> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 >> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 >> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 >> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 >> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 >> Resolves: CVE-2023-0330 >> >> Signed-off-by: Alexander Bulekov <alxndr@bu.edu> >> Reviewed-by: Thomas Huth <thuth@redhat.com> >> --- >> include/exec/memory.h | 5 +++++ >> include/hw/qdev-core.h | 7 +++++++ >> softmmu/memory.c | 16 ++++++++++++++++ >> 3 files changed, 28 insertions(+) >> >> diff --git a/include/exec/memory.h b/include/exec/memory.h >> index 15ade918ba..e45ce6061f 100644 >> --- a/include/exec/memory.h >> +++ b/include/exec/memory.h >> @@ -767,6 +767,8 @@ struct MemoryRegion { >> bool is_iommu; >> RAMBlock *ram_block; >> Object *owner; >> + /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ >> + DeviceState *dev; >> >> const MemoryRegionOps *ops; >> void *opaque; >> @@ -791,6 +793,9 @@ struct MemoryRegion { >> unsigned ioeventfd_nb; >> MemoryRegionIoeventfd *ioeventfds; >> RamDiscardManager *rdm; /* Only for RAM */ >> + >> + /* For devices designed to perform re-entrant IO into their own IO MRs */ >> + bool disable_reentrancy_guard; >> }; >> >> struct IOMMUMemoryRegion { >> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h >> index bd50ad5ee1..7623703943 100644 >> --- a/include/hw/qdev-core.h >> +++ b/include/hw/qdev-core.h >> @@ -162,6 +162,10 @@ struct NamedClockList { >> QLIST_ENTRY(NamedClockList) node; >> }; >> >> +typedef struct { >> + bool engaged_in_io; >> +} MemReentrancyGuard; >> + >> /** >> * DeviceState: >> * @realized: Indicates whether the device has been fully constructed. >> @@ -194,6 +198,9 @@ struct DeviceState { >> int alias_required_for_version; >> ResettableState reset; >> GSList *unplug_blockers; >> + >> + /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ >> + MemReentrancyGuard mem_reentrancy_guard; >> }; >> >> struct DeviceListener { >> diff --git a/softmmu/memory.c b/softmmu/memory.c >> index b1a6cae6f5..fe23f0e5ce 100644 >> --- a/softmmu/memory.c >> +++ b/softmmu/memory.c >> @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, >> access_size_max = 4; >> } >> >> + /* Do not allow more than one simultaneous access to a device's IO Regions */ >> + if (mr->dev && !mr->disable_reentrancy_guard && >> + !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { >> + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { >> + warn_report("Blocked re-entrant IO on " >> + "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX, >> + memory_region_name(mr), addr); >> + return MEMTX_ACCESS_ERROR; > > If we issue this warn_report on every invalid memory access, is this > going to become a denial of service by flooding logs, or is the > return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed > *once* in the lifetime of the QEMU process ? Maybe it's better to use warn_report_once() here instead? Thomas
On 230428 1015, Thomas Huth wrote: > On 28/04/2023 10.12, Daniel P. Berrangé wrote: > > On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote: > > > Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. > > > This flag is set/checked prior to calling a device's MemoryRegion > > > handlers, and set when device code initiates DMA. The purpose of this > > > flag is to prevent two types of DMA-based reentrancy issues: > > > > > > 1.) mmio -> dma -> mmio case > > > 2.) bh -> dma write -> mmio case > > > > > > These issues have led to problems such as stack-exhaustion and > > > use-after-frees. > > > > > > Summary of the problem from Peter Maydell: > > > https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com > > > > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 > > > Resolves: CVE-2023-0330 > > > > > > Signed-off-by: Alexander Bulekov <alxndr@bu.edu> > > > Reviewed-by: Thomas Huth <thuth@redhat.com> > > > --- > > > include/exec/memory.h | 5 +++++ > > > include/hw/qdev-core.h | 7 +++++++ > > > softmmu/memory.c | 16 ++++++++++++++++ > > > 3 files changed, 28 insertions(+) > > > > > > diff --git a/include/exec/memory.h b/include/exec/memory.h > > > index 15ade918ba..e45ce6061f 100644 > > > --- a/include/exec/memory.h > > > +++ b/include/exec/memory.h > > > @@ -767,6 +767,8 @@ struct MemoryRegion { > > > bool is_iommu; > > > RAMBlock *ram_block; > > > Object *owner; > > > + /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ > > > + DeviceState *dev; > > > const MemoryRegionOps *ops; > > > void *opaque; > > > @@ -791,6 +793,9 @@ struct MemoryRegion { > > > unsigned ioeventfd_nb; > > > MemoryRegionIoeventfd *ioeventfds; > > > RamDiscardManager *rdm; /* Only for RAM */ > > > + > > > + /* For devices designed to perform re-entrant IO into their own IO MRs */ > > > + bool disable_reentrancy_guard; > > > }; > > > struct IOMMUMemoryRegion { > > > diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h > > > index bd50ad5ee1..7623703943 100644 > > > --- a/include/hw/qdev-core.h > > > +++ b/include/hw/qdev-core.h > > > @@ -162,6 +162,10 @@ struct NamedClockList { > > > QLIST_ENTRY(NamedClockList) node; > > > }; > > > +typedef struct { > > > + bool engaged_in_io; > > > +} MemReentrancyGuard; > > > + > > > /** > > > * DeviceState: > > > * @realized: Indicates whether the device has been fully constructed. > > > @@ -194,6 +198,9 @@ struct DeviceState { > > > int alias_required_for_version; > > > ResettableState reset; > > > GSList *unplug_blockers; > > > + > > > + /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ > > > + MemReentrancyGuard mem_reentrancy_guard; > > > }; > > > struct DeviceListener { > > > diff --git a/softmmu/memory.c b/softmmu/memory.c > > > index b1a6cae6f5..fe23f0e5ce 100644 > > > --- a/softmmu/memory.c > > > +++ b/softmmu/memory.c > > > @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, > > > access_size_max = 4; > > > } > > > + /* Do not allow more than one simultaneous access to a device's IO Regions */ > > > + if (mr->dev && !mr->disable_reentrancy_guard && > > > + !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { > > > + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { > > > + warn_report("Blocked re-entrant IO on " > > > + "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX, > > > + memory_region_name(mr), addr); > > > + return MEMTX_ACCESS_ERROR; > > > > If we issue this warn_report on every invalid memory access, is this > > going to become a denial of service by flooding logs, or is the > > return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed > > *once* in the lifetime of the QEMU process ? > > Maybe it's better to use warn_report_once() here instead? Sounds good - should I respin the series to change this? -Alex
On 28/04/2023 11.11, Alexander Bulekov wrote: > On 230428 1015, Thomas Huth wrote: >> On 28/04/2023 10.12, Daniel P. Berrangé wrote: >>> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote: >>>> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. >>>> This flag is set/checked prior to calling a device's MemoryRegion >>>> handlers, and set when device code initiates DMA. The purpose of this >>>> flag is to prevent two types of DMA-based reentrancy issues: >>>> >>>> 1.) mmio -> dma -> mmio case >>>> 2.) bh -> dma write -> mmio case >>>> >>>> These issues have led to problems such as stack-exhaustion and >>>> use-after-frees. >>>> >>>> Summary of the problem from Peter Maydell: >>>> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com >>>> >>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 >>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 >>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 >>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 >>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 >>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 >>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 >>>> Resolves: CVE-2023-0330 >>>> >>>> Signed-off-by: Alexander Bulekov <alxndr@bu.edu> >>>> Reviewed-by: Thomas Huth <thuth@redhat.com> >>>> --- >>>> include/exec/memory.h | 5 +++++ >>>> include/hw/qdev-core.h | 7 +++++++ >>>> softmmu/memory.c | 16 ++++++++++++++++ >>>> 3 files changed, 28 insertions(+) >>>> >>>> diff --git a/include/exec/memory.h b/include/exec/memory.h >>>> index 15ade918ba..e45ce6061f 100644 >>>> --- a/include/exec/memory.h >>>> +++ b/include/exec/memory.h >>>> @@ -767,6 +767,8 @@ struct MemoryRegion { >>>> bool is_iommu; >>>> RAMBlock *ram_block; >>>> Object *owner; >>>> + /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ >>>> + DeviceState *dev; >>>> const MemoryRegionOps *ops; >>>> void *opaque; >>>> @@ -791,6 +793,9 @@ struct MemoryRegion { >>>> unsigned ioeventfd_nb; >>>> MemoryRegionIoeventfd *ioeventfds; >>>> RamDiscardManager *rdm; /* Only for RAM */ >>>> + >>>> + /* For devices designed to perform re-entrant IO into their own IO MRs */ >>>> + bool disable_reentrancy_guard; >>>> }; >>>> struct IOMMUMemoryRegion { >>>> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h >>>> index bd50ad5ee1..7623703943 100644 >>>> --- a/include/hw/qdev-core.h >>>> +++ b/include/hw/qdev-core.h >>>> @@ -162,6 +162,10 @@ struct NamedClockList { >>>> QLIST_ENTRY(NamedClockList) node; >>>> }; >>>> +typedef struct { >>>> + bool engaged_in_io; >>>> +} MemReentrancyGuard; >>>> + >>>> /** >>>> * DeviceState: >>>> * @realized: Indicates whether the device has been fully constructed. >>>> @@ -194,6 +198,9 @@ struct DeviceState { >>>> int alias_required_for_version; >>>> ResettableState reset; >>>> GSList *unplug_blockers; >>>> + >>>> + /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ >>>> + MemReentrancyGuard mem_reentrancy_guard; >>>> }; >>>> struct DeviceListener { >>>> diff --git a/softmmu/memory.c b/softmmu/memory.c >>>> index b1a6cae6f5..fe23f0e5ce 100644 >>>> --- a/softmmu/memory.c >>>> +++ b/softmmu/memory.c >>>> @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, >>>> access_size_max = 4; >>>> } >>>> + /* Do not allow more than one simultaneous access to a device's IO Regions */ >>>> + if (mr->dev && !mr->disable_reentrancy_guard && >>>> + !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { >>>> + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { >>>> + warn_report("Blocked re-entrant IO on " >>>> + "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX, >>>> + memory_region_name(mr), addr); >>>> + return MEMTX_ACCESS_ERROR; >>> >>> If we issue this warn_report on every invalid memory access, is this >>> going to become a denial of service by flooding logs, or is the >>> return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed >>> *once* in the lifetime of the QEMU process ? >> >> Maybe it's better to use warn_report_once() here instead? > > Sounds good - should I respin the series to change this? Not necessary, I've got v10 already queued, I'll fix it up there Thomas
Hi Alexander 在 2023/4/28 下午5:14, Thomas Huth 写道: > On 28/04/2023 11.11, Alexander Bulekov wrote: >> On 230428 1015, Thomas Huth wrote: >>> On 28/04/2023 10.12, Daniel P. Berrangé wrote: >>>> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote: >>>>> Add a flag to the DeviceState, when a device is engaged in >>>>> PIO/MMIO/DMA. >>>>> This flag is set/checked prior to calling a device's MemoryRegion >>>>> handlers, and set when device code initiates DMA. The purpose of >>>>> this >>>>> flag is to prevent two types of DMA-based reentrancy issues: >>>>> >>>>> 1.) mmio -> dma -> mmio case >>>>> 2.) bh -> dma write -> mmio case >>>>> >>>>> These issues have led to problems such as stack-exhaustion and >>>>> use-after-frees. >>>>> >>>>> Summary of the problem from Peter Maydell: >>>>> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com >>>>> >>>>> >>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 >>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 >>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 >>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 >>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 >>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 >>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 >>>>> Resolves: CVE-2023-0330 >>>>> >>>>> Signed-off-by: Alexander Bulekov <alxndr@bu.edu> >>>>> Reviewed-by: Thomas Huth <thuth@redhat.com> >>>>> --- >>>>> include/exec/memory.h | 5 +++++ >>>>> include/hw/qdev-core.h | 7 +++++++ >>>>> softmmu/memory.c | 16 ++++++++++++++++ >>>>> 3 files changed, 28 insertions(+) >>>>> >>>>> diff --git a/include/exec/memory.h b/include/exec/memory.h >>>>> index 15ade918ba..e45ce6061f 100644 >>>>> --- a/include/exec/memory.h >>>>> +++ b/include/exec/memory.h >>>>> @@ -767,6 +767,8 @@ struct MemoryRegion { >>>>> bool is_iommu; >>>>> RAMBlock *ram_block; >>>>> Object *owner; >>>>> + /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR >>>>> access hotpath */ >>>>> + DeviceState *dev; >>>>> const MemoryRegionOps *ops; >>>>> void *opaque; >>>>> @@ -791,6 +793,9 @@ struct MemoryRegion { >>>>> unsigned ioeventfd_nb; >>>>> MemoryRegionIoeventfd *ioeventfds; >>>>> RamDiscardManager *rdm; /* Only for RAM */ >>>>> + >>>>> + /* For devices designed to perform re-entrant IO into their >>>>> own IO MRs */ >>>>> + bool disable_reentrancy_guard; >>>>> }; >>>>> struct IOMMUMemoryRegion { >>>>> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h >>>>> index bd50ad5ee1..7623703943 100644 >>>>> --- a/include/hw/qdev-core.h >>>>> +++ b/include/hw/qdev-core.h >>>>> @@ -162,6 +162,10 @@ struct NamedClockList { >>>>> QLIST_ENTRY(NamedClockList) node; >>>>> }; >>>>> +typedef struct { >>>>> + bool engaged_in_io; >>>>> +} MemReentrancyGuard; >>>>> + >>>>> /** >>>>> * DeviceState: >>>>> * @realized: Indicates whether the device has been fully >>>>> constructed. >>>>> @@ -194,6 +198,9 @@ struct DeviceState { >>>>> int alias_required_for_version; >>>>> ResettableState reset; >>>>> GSList *unplug_blockers; >>>>> + >>>>> + /* Is the device currently in mmio/pio/dma? Used to prevent >>>>> re-entrancy */ >>>>> + MemReentrancyGuard mem_reentrancy_guard; >>>>> }; >>>>> struct DeviceListener { >>>>> diff --git a/softmmu/memory.c b/softmmu/memory.c >>>>> index b1a6cae6f5..fe23f0e5ce 100644 >>>>> --- a/softmmu/memory.c >>>>> +++ b/softmmu/memory.c >>>>> @@ -542,6 +542,18 @@ static MemTxResult >>>>> access_with_adjusted_size(hwaddr addr, >>>>> access_size_max = 4; >>>>> } >>>>> + /* Do not allow more than one simultaneous access to a >>>>> device's IO Regions */ >>>>> + if (mr->dev && !mr->disable_reentrancy_guard && >>>>> + !mr->ram_device && !mr->ram && !mr->rom_device && >>>>> !mr->readonly) { >>>>> + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { >>>>> + warn_report("Blocked re-entrant IO on " >>>>> + "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX, >>>>> + memory_region_name(mr), addr); >>>>> + return MEMTX_ACCESS_ERROR; >>>> >>>> If we issue this warn_report on every invalid memory access, is this >>>> going to become a denial of service by flooding logs, or is the >>>> return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed >>>> *once* in the lifetime of the QEMU process ? >>> >>> Maybe it's better to use warn_report_once() here instead? >> >> Sounds good - should I respin the series to change this? > > Not necessary, I've got v10 already queued, I'll fix it up there > > Thomas > This patch causes the loongarch virtual machine to fail to start the slave cpu. ./build/qemu-system-loongarch64 -machine virt -m 8G -cpu la464 \ -smp 4 -bios QEMU_EFI.fd -kernel vmlinuz.efi -initrd ramdisk \ -serial stdio -monitor telnet:localhost:4495,server,nowait \ -append "root=/dev/ram rdinit=/sbin/init console=ttyS0,115200" --nographic .... qemu-system-loongarch64: warning: Blocked re-entrant IO on MemoryRegion: loongarch_ipi_iocsr at addr: 0x24 .... [ 0.059284] smp: Bringing up secondary CPUs ... [ 0.062540] Booting CPU#1... [ 5.204340] CPU1: failed to start [ 5.211435] Booting CPU#2... [ 10.465762] CPU2: failed to start [ 10.467757] Booting CPU#3... [ 15.805430] CPU3: failed to start [ 15.805980] smp: Brought up 1 node, 1 CPU [ 15.818832] devtmpfs: initialized [ 15.830287] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns [ 15.830429] futex hash table entries: 1024 (order: 2, 65536 bytes, linear) [ 15.840470] NET: Registered PF_NETLINK/PF_ROUTE protocol family [ 15.845424] audit: initializing netlink subsys (disabled) ... gdb (gdb) n qemu-system-loongarch64: warning: Blocked re-entrant IO on MemoryRegion: loongarch_ipi_iocsr at addr: 0x24 552 return MEMTX_ACCESS_ERROR; (gdb) bt #0 0x0000555555ae93aa in access_with_adjusted_size (addr=36, value=0x7fffd299f988, size=4, access_size_min=<optimized out>, access_size_max=<optimized out>, access_fn= 0x555555ae98f0 <memory_region_write_accessor>, mr=0x555556e3a740, attrs=...) at ../softmmu/memory.c:552 #1 0x0000555555ae962c in memory_region_dispatch_write (mr=0x555556e3a740, addr=36, data=<optimized out>, op=<optimized out>, attrs=...) at ../softmmu/memory.c:1531 #2 0x0000555555af5739 in address_space_stl_internal (endian=DEVICE_NATIVE_ENDIAN, result=0x0, attrs=..., val=0, addr=<optimized out>, as=0x5555567b4bb8) at /home3/gaosong/code/vec/github/send/v2/qemu/include/exec/memory.h:2997 #3 0x0000555555af5739 in address_space_stl (as=0x5555567b4bb8, addr=<optimized out>, val=0, attrs=..., result=0x0) at /home3/gaosong/code/vec/github/send/v2/qemu/memory_ldst.c.inc:350 #4 0x0000555555ae99e8 in memory_region_write_accessor (mr=0x555556e3ab80, addr=0, value=<optimized out>, size=8, shift=<optimized out>, mask=<optimized out>, attrs=...) at ../softmmu/memory.c:493 #5 0x0000555555ae92ae in access_with_adjusted_size (addr=0, value=0x7fffd299fb88, size=8, access_size_min=<optimized out>, access_size_max=<optimized out>, access_fn= 0x555555ae98f0 <memory_region_write_accessor>, mr=0x555556e3ab80, attrs=...) at ../softmmu/memory.c:567 #6 0x0000555555ae962c in memory_region_dispatch_write (mr=0x555556e3ab80, addr=0, data=<optimized out>, op=<optimized out>, attrs=...) at ../softmmu/memory.c:1531 #7 0x0000555555af2591 in address_space_stq_internal (as=<optimized out>, addr=<optimized out>, val=2147483652, attrs=..., result=0x0, endian=DEVICE_NATIVE_ENDIAN) at /home3/gaosong/code/vec/github/send/v2/qemu/include/exec/memory.h:2997 #8 0x00007fff8d922923 in code_gen_buffer () #9 0x0000555555b4b9e0 in cpu_tb_exec (tb_exit=<synthetic pointer>, itb=<optimized out>, cpu=0x5555567a5d00) at ../accel/tcg/cpu-exec.c:460 #10 0x0000555555b4b9e0 in cpu_loop_exec_tb (tb_exit=<synthetic pointer>, last_tb=<synthetic pointer>, pc=<optimized out>, tb=<optimized out>, cpu=0x5555567a5d00) at ../accel/tcg/cpu-exec.c:893 #11 0x0000555555b4b9e0 in cpu_exec_loop (cpu=0x5555567a5d00, sc=<optimized out>) at ../accel/tcg/cpu-exec.c:1013 #12 0x0000555555b4c67d in cpu_exec_setjmp (cpu=0x5555567a5d00, sc=0x7fffd29a0220) at ../accel/tcg/cpu-exec.c:1043 #13 0x0000555555b4c746 in cpu_exec (cpu=0x5555567a5d00) at ../accel/tcg/cpu-exec.c:1069 #14 0x0000555555b634bf in tcg_cpus_exec (cpu=0x5555567a5d00) at ../accel/tcg/tcg-accel-ops.c:81 #15 0x0000555555b63613 in mttcg_cpu_thread_fn (arg=0x5555567a5d00) at ../accel/tcg/tcg-accel-ops-mttcg.c:95 #16 0x0000555555cdefba in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:541 #17 0x00007fffefcb717a in start_thread () at /lib64/libpthread.so.0 #18 0x00007fffef9e6dc3 in clone () at /lib64/libc.so.6 About LoongArch: - docs/system/loongarch/virt.rst Thanks. Song Gao
On 06/05/2023 11.25, Song Gao wrote: > Hi Alexander > > 在 2023/4/28 下午5:14, Thomas Huth 写道: >> On 28/04/2023 11.11, Alexander Bulekov wrote: >>> On 230428 1015, Thomas Huth wrote: >>>> On 28/04/2023 10.12, Daniel P. Berrangé wrote: >>>>> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote: >>>>>> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. >>>>>> This flag is set/checked prior to calling a device's MemoryRegion >>>>>> handlers, and set when device code initiates DMA. The purpose of this >>>>>> flag is to prevent two types of DMA-based reentrancy issues: >>>>>> >>>>>> 1.) mmio -> dma -> mmio case >>>>>> 2.) bh -> dma write -> mmio case >>>>>> >>>>>> These issues have led to problems such as stack-exhaustion and >>>>>> use-after-frees. >>>>>> >>>>>> Summary of the problem from Peter Maydell: >>>>>> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com >>>>>> >>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 >>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 >>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 >>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 >>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 >>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 >>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 >>>>>> Resolves: CVE-2023-0330 >>>>>> >>>>>> Signed-off-by: Alexander Bulekov <alxndr@bu.edu> >>>>>> Reviewed-by: Thomas Huth <thuth@redhat.com> >>>>>> --- >>>>>> include/exec/memory.h | 5 +++++ >>>>>> include/hw/qdev-core.h | 7 +++++++ >>>>>> softmmu/memory.c | 16 ++++++++++++++++ >>>>>> 3 files changed, 28 insertions(+) >>>>>> >>>>>> diff --git a/include/exec/memory.h b/include/exec/memory.h >>>>>> index 15ade918ba..e45ce6061f 100644 >>>>>> --- a/include/exec/memory.h >>>>>> +++ b/include/exec/memory.h >>>>>> @@ -767,6 +767,8 @@ struct MemoryRegion { >>>>>> bool is_iommu; >>>>>> RAMBlock *ram_block; >>>>>> Object *owner; >>>>>> + /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access >>>>>> hotpath */ >>>>>> + DeviceState *dev; >>>>>> const MemoryRegionOps *ops; >>>>>> void *opaque; >>>>>> @@ -791,6 +793,9 @@ struct MemoryRegion { >>>>>> unsigned ioeventfd_nb; >>>>>> MemoryRegionIoeventfd *ioeventfds; >>>>>> RamDiscardManager *rdm; /* Only for RAM */ >>>>>> + >>>>>> + /* For devices designed to perform re-entrant IO into their own >>>>>> IO MRs */ >>>>>> + bool disable_reentrancy_guard; >>>>>> }; >>>>>> struct IOMMUMemoryRegion { >>>>>> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h >>>>>> index bd50ad5ee1..7623703943 100644 >>>>>> --- a/include/hw/qdev-core.h >>>>>> +++ b/include/hw/qdev-core.h >>>>>> @@ -162,6 +162,10 @@ struct NamedClockList { >>>>>> QLIST_ENTRY(NamedClockList) node; >>>>>> }; >>>>>> +typedef struct { >>>>>> + bool engaged_in_io; >>>>>> +} MemReentrancyGuard; >>>>>> + >>>>>> /** >>>>>> * DeviceState: >>>>>> * @realized: Indicates whether the device has been fully constructed. >>>>>> @@ -194,6 +198,9 @@ struct DeviceState { >>>>>> int alias_required_for_version; >>>>>> ResettableState reset; >>>>>> GSList *unplug_blockers; >>>>>> + >>>>>> + /* Is the device currently in mmio/pio/dma? Used to prevent >>>>>> re-entrancy */ >>>>>> + MemReentrancyGuard mem_reentrancy_guard; >>>>>> }; >>>>>> struct DeviceListener { >>>>>> diff --git a/softmmu/memory.c b/softmmu/memory.c >>>>>> index b1a6cae6f5..fe23f0e5ce 100644 >>>>>> --- a/softmmu/memory.c >>>>>> +++ b/softmmu/memory.c >>>>>> @@ -542,6 +542,18 @@ static MemTxResult >>>>>> access_with_adjusted_size(hwaddr addr, >>>>>> access_size_max = 4; >>>>>> } >>>>>> + /* Do not allow more than one simultaneous access to a device's >>>>>> IO Regions */ >>>>>> + if (mr->dev && !mr->disable_reentrancy_guard && >>>>>> + !mr->ram_device && !mr->ram && !mr->rom_device && >>>>>> !mr->readonly) { >>>>>> + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { >>>>>> + warn_report("Blocked re-entrant IO on " >>>>>> + "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX, >>>>>> + memory_region_name(mr), addr); >>>>>> + return MEMTX_ACCESS_ERROR; >>>>> >>>>> If we issue this warn_report on every invalid memory access, is this >>>>> going to become a denial of service by flooding logs, or is the >>>>> return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed >>>>> *once* in the lifetime of the QEMU process ? >>>> >>>> Maybe it's better to use warn_report_once() here instead? >>> >>> Sounds good - should I respin the series to change this? >> >> Not necessary, I've got v10 already queued, I'll fix it up there >> >> Thomas >> > This patch causes the loongarch virtual machine to fail to start the slave cpu. > > ./build/qemu-system-loongarch64 -machine virt -m 8G -cpu la464 \ > -smp 4 -bios QEMU_EFI.fd -kernel vmlinuz.efi -initrd ramdisk \ > -serial stdio -monitor telnet:localhost:4495,server,nowait \ > -append "root=/dev/ram rdinit=/sbin/init > console=ttyS0,115200" --nographic > > > .... > qemu-system-loongarch64: warning: Blocked re-entrant IO on MemoryRegion: > loongarch_ipi_iocsr at addr: 0x24 Oh, another spot that needs special handling ... I see Alexander already sent a patch (thanks!), but anyway, this is a good indication that we're missing some test coverage in the CI. Are there any loongarch kernel images available for public download somewhere? If so, we really should add an avocado regression test for this - since as far as I can see, we don't have any tests for loongarch in tests/avocado yet? Thomas
Hi, Thomas 在 2023/5/8 下午5:33, Thomas Huth 写道: > On 06/05/2023 11.25, Song Gao wrote: >> Hi Alexander >> >> 在 2023/4/28 下午5:14, Thomas Huth 写道: >>> On 28/04/2023 11.11, Alexander Bulekov wrote: >>>> On 230428 1015, Thomas Huth wrote: >>>>> On 28/04/2023 10.12, Daniel P. Berrangé wrote: >>>>>> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote: >>>>>>> Add a flag to the DeviceState, when a device is engaged in >>>>>>> PIO/MMIO/DMA. >>>>>>> This flag is set/checked prior to calling a device's MemoryRegion >>>>>>> handlers, and set when device code initiates DMA. The purpose >>>>>>> of this >>>>>>> flag is to prevent two types of DMA-based reentrancy issues: >>>>>>> >>>>>>> 1.) mmio -> dma -> mmio case >>>>>>> 2.) bh -> dma write -> mmio case >>>>>>> >>>>>>> These issues have led to problems such as stack-exhaustion and >>>>>>> use-after-frees. >>>>>>> >>>>>>> Summary of the problem from Peter Maydell: >>>>>>> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com >>>>>>> >>>>>>> >>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 >>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 >>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 >>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 >>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 >>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 >>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 >>>>>>> Resolves: CVE-2023-0330 >>>>>>> >>>>>>> Signed-off-by: Alexander Bulekov <alxndr@bu.edu> >>>>>>> Reviewed-by: Thomas Huth <thuth@redhat.com> >>>>>>> --- >>>>>>> include/exec/memory.h | 5 +++++ >>>>>>> include/hw/qdev-core.h | 7 +++++++ >>>>>>> softmmu/memory.c | 16 ++++++++++++++++ >>>>>>> 3 files changed, 28 insertions(+) >>>>>>> >>>>>>> diff --git a/include/exec/memory.h b/include/exec/memory.h >>>>>>> index 15ade918ba..e45ce6061f 100644 >>>>>>> --- a/include/exec/memory.h >>>>>>> +++ b/include/exec/memory.h >>>>>>> @@ -767,6 +767,8 @@ struct MemoryRegion { >>>>>>> bool is_iommu; >>>>>>> RAMBlock *ram_block; >>>>>>> Object *owner; >>>>>>> + /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR >>>>>>> access hotpath */ >>>>>>> + DeviceState *dev; >>>>>>> const MemoryRegionOps *ops; >>>>>>> void *opaque; >>>>>>> @@ -791,6 +793,9 @@ struct MemoryRegion { >>>>>>> unsigned ioeventfd_nb; >>>>>>> MemoryRegionIoeventfd *ioeventfds; >>>>>>> RamDiscardManager *rdm; /* Only for RAM */ >>>>>>> + >>>>>>> + /* For devices designed to perform re-entrant IO into their >>>>>>> own IO MRs */ >>>>>>> + bool disable_reentrancy_guard; >>>>>>> }; >>>>>>> struct IOMMUMemoryRegion { >>>>>>> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h >>>>>>> index bd50ad5ee1..7623703943 100644 >>>>>>> --- a/include/hw/qdev-core.h >>>>>>> +++ b/include/hw/qdev-core.h >>>>>>> @@ -162,6 +162,10 @@ struct NamedClockList { >>>>>>> QLIST_ENTRY(NamedClockList) node; >>>>>>> }; >>>>>>> +typedef struct { >>>>>>> + bool engaged_in_io; >>>>>>> +} MemReentrancyGuard; >>>>>>> + >>>>>>> /** >>>>>>> * DeviceState: >>>>>>> * @realized: Indicates whether the device has been fully >>>>>>> constructed. >>>>>>> @@ -194,6 +198,9 @@ struct DeviceState { >>>>>>> int alias_required_for_version; >>>>>>> ResettableState reset; >>>>>>> GSList *unplug_blockers; >>>>>>> + >>>>>>> + /* Is the device currently in mmio/pio/dma? Used to prevent >>>>>>> re-entrancy */ >>>>>>> + MemReentrancyGuard mem_reentrancy_guard; >>>>>>> }; >>>>>>> struct DeviceListener { >>>>>>> diff --git a/softmmu/memory.c b/softmmu/memory.c >>>>>>> index b1a6cae6f5..fe23f0e5ce 100644 >>>>>>> --- a/softmmu/memory.c >>>>>>> +++ b/softmmu/memory.c >>>>>>> @@ -542,6 +542,18 @@ static MemTxResult >>>>>>> access_with_adjusted_size(hwaddr addr, >>>>>>> access_size_max = 4; >>>>>>> } >>>>>>> + /* Do not allow more than one simultaneous access to a >>>>>>> device's IO Regions */ >>>>>>> + if (mr->dev && !mr->disable_reentrancy_guard && >>>>>>> + !mr->ram_device && !mr->ram && !mr->rom_device && >>>>>>> !mr->readonly) { >>>>>>> + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { >>>>>>> + warn_report("Blocked re-entrant IO on " >>>>>>> + "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX, >>>>>>> + memory_region_name(mr), addr); >>>>>>> + return MEMTX_ACCESS_ERROR; >>>>>> >>>>>> If we issue this warn_report on every invalid memory access, is this >>>>>> going to become a denial of service by flooding logs, or is the >>>>>> return MEMTX_ACCESS_ERROR, sufficient to ensure this is only printed >>>>>> *once* in the lifetime of the QEMU process ? >>>>> >>>>> Maybe it's better to use warn_report_once() here instead? >>>> >>>> Sounds good - should I respin the series to change this? >>> >>> Not necessary, I've got v10 already queued, I'll fix it up there >>> >>> Thomas >>> >> This patch causes the loongarch virtual machine to fail to start the >> slave cpu. >> >> ./build/qemu-system-loongarch64 -machine virt -m 8G -cpu la464 \ >> -smp 4 -bios QEMU_EFI.fd -kernel vmlinuz.efi -initrd >> ramdisk \ >> -serial stdio -monitor >> telnet:localhost:4495,server,nowait \ >> -append "root=/dev/ram rdinit=/sbin/init >> console=ttyS0,115200" --nographic >> >> >> .... >> qemu-system-loongarch64: warning: Blocked re-entrant IO on >> MemoryRegion: loongarch_ipi_iocsr at addr: 0x24 > > Oh, another spot that needs special handling ... I see Alexander > already sent a patch (thanks!), but anyway, this is a good indication > that we're missing some test coverage in the CI. > > Are there any loongarch kernel images available for public download > somewhere? If so, we really should add an avocado regression test for > this - since as far as I can see, we don't have any tests for > loongarch in tests/avocado yet? > we can get some binarys at: https://github.com/yangxiaojuan-loongson/qemu-binary I'm not sure that avacodo testing can be done using just the kernel. Is a full loongarch system required? if yes, unfortunately, there is not yet a well-developed loongarch system. And we are moving forward with loongarch support for debian systems. There are also systems that already support loongarch, e.g archlinux. https://mirrors.wsyu.edu.cn/loongarch/archlinux/images/ But I briefly tested that it doesn't work in the latest qemu. I've already pushed the maintainer to update the system. Thanks. Song Gao
On 08/05/2023 15.03, Song Gao wrote: > Hi, Thomas > > 在 2023/5/8 下午5:33, Thomas Huth 写道: >> On 06/05/2023 11.25, Song Gao wrote: >>> Hi Alexander >>> >>> 在 2023/4/28 下午5:14, Thomas Huth 写道: >>>> On 28/04/2023 11.11, Alexander Bulekov wrote: >>>>> On 230428 1015, Thomas Huth wrote: >>>>>> On 28/04/2023 10.12, Daniel P. Berrangé wrote: >>>>>>> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote: >>>>>>>> Add a flag to the DeviceState, when a device is engaged in >>>>>>>> PIO/MMIO/DMA. ... >>> This patch causes the loongarch virtual machine to fail to start the >>> slave cpu. >>> >>> ./build/qemu-system-loongarch64 -machine virt -m 8G -cpu la464 \ >>> -smp 4 -bios QEMU_EFI.fd -kernel vmlinuz.efi -initrd >>> ramdisk \ >>> -serial stdio -monitor >>> telnet:localhost:4495,server,nowait \ >>> -append "root=/dev/ram rdinit=/sbin/init >>> console=ttyS0,115200" --nographic >>> >>> >>> .... >>> qemu-system-loongarch64: warning: Blocked re-entrant IO on MemoryRegion: >>> loongarch_ipi_iocsr at addr: 0x24 >> >> Oh, another spot that needs special handling ... I see Alexander already >> sent a patch (thanks!), but anyway, this is a good indication that we're >> missing some test coverage in the CI. >> >> Are there any loongarch kernel images available for public download >> somewhere? If so, we really should add an avocado regression test for this >> - since as far as I can see, we don't have any tests for loongarch in >> tests/avocado yet? >> > we can get some binarys at: > https://github.com/yangxiaojuan-loongson/qemu-binary > > I'm not sure that avacodo testing can be done using just the kernel. > > Is a full loongarch system required? No, you don't need a full distro installation, just a kernel with ramdisk (which is also available there) is good enough for a basic test, e.g. just check whether the kernel boots to a certain point is good enough to provide a basic sanity test. If you then can also get even into a shell (of the ramdisk), you can check some additional stuff in the sysfs or "dmesg" output, see for example tests/avocado/machine_s390_ccw_virtio.py which does such checks with a kernel and initrd on s390x. Thomas
在 2023/5/8 下午9:12, Thomas Huth 写道: > On 08/05/2023 15.03, Song Gao wrote: >> Hi, Thomas >> >> 在 2023/5/8 下午5:33, Thomas Huth 写道: >>> On 06/05/2023 11.25, Song Gao wrote: >>>> Hi Alexander >>>> >>>> 在 2023/4/28 下午5:14, Thomas Huth 写道: >>>>> On 28/04/2023 11.11, Alexander Bulekov wrote: >>>>>> On 230428 1015, Thomas Huth wrote: >>>>>>> On 28/04/2023 10.12, Daniel P. Berrangé wrote: >>>>>>>> On Thu, Apr 27, 2023 at 05:10:06PM -0400, Alexander Bulekov wrote: >>>>>>>>> Add a flag to the DeviceState, when a device is engaged in >>>>>>>>> PIO/MMIO/DMA. > ... >>>> This patch causes the loongarch virtual machine to fail to start >>>> the slave cpu. >>>> >>>> ./build/qemu-system-loongarch64 -machine virt -m 8G -cpu la464 \ >>>> -smp 4 -bios QEMU_EFI.fd -kernel vmlinuz.efi -initrd >>>> ramdisk \ >>>> -serial stdio -monitor >>>> telnet:localhost:4495,server,nowait \ >>>> -append "root=/dev/ram rdinit=/sbin/init >>>> console=ttyS0,115200" --nographic >>>> >>>> >>>> .... >>>> qemu-system-loongarch64: warning: Blocked re-entrant IO on >>>> MemoryRegion: loongarch_ipi_iocsr at addr: 0x24 >>> >>> Oh, another spot that needs special handling ... I see Alexander >>> already sent a patch (thanks!), but anyway, this is a good >>> indication that we're missing some test coverage in the CI. >>> >>> Are there any loongarch kernel images available for public download >>> somewhere? If so, we really should add an avocado regression test >>> for this - since as far as I can see, we don't have any tests for >>> loongarch in tests/avocado yet? >>> >> we can get some binarys at: >> https://github.com/yangxiaojuan-loongson/qemu-binary > > >> I'm not sure that avacodo testing can be done using just the kernel. >> >> Is a full loongarch system required? > > No, you don't need a full distro installation, just a kernel with > ramdisk (which is also available there) is good enough for a basic > test, e.g. just check whether the kernel boots to a certain point is > good enough to provide a basic sanity test. If you then can also get > even into a shell (of the ramdisk), you can check some additional > stuff in the sysfs or "dmesg" output, see for example > tests/avocado/machine_s390_ccw_virtio.py which does such checks with a > kernel and initrd on s390x. > Thanks for you suggestion . We will add a loongarch basic test on tests/avacode. Thanks. Song Gao
Hi, Thomas 在 2023/5/8 下午9:12, Thomas Huth 写道: > >>> Oh, another spot that needs special handling ... I see Alexander >>> already sent a patch (thanks!), but anyway, this is a good >>> indication that we're missing some test coverage in the CI. >>> >>> Are there any loongarch kernel images available for public download >>> somewhere? If so, we really should add an avocado regression test >>> for this - since as far as I can see, we don't have any tests for >>> loongarch in tests/avocado yet? >>> >> we can get some binarys at: >> https://github.com/yangxiaojuan-loongson/qemu-binary > > >> I'm not sure that avacodo testing can be done using just the kernel. >> >> Is a full loongarch system required? > > No, you don't need a full distro installation, just a kernel with > ramdisk (which is also available there) is good enough for a basic > test, e.g. just check whether the kernel boots to a certain point is > good enough to provide a basic sanity test. If you then can also get > even into a shell (of the ramdisk), you can check some additional > stuff in the sysfs or "dmesg" output, see for example > tests/avocado/machine_s390_ccw_virtio.py which does such checks with a > kernel and initrd on s390x. > > I have a few questions. I run ' make check-avocado AVOCADO_TESTS=./tests/avocado/machine_s390_ccw_virtio.py V=1' root@loongson-KVM:~/work/qemu#make check-avocado AVOCADO_TESTS=./tests/avocado/machine_s390_ccw_virtio.py V=1 changing dir to build for make "check-avocado"... make[1]: Entering directory '/root/work/qemu/build' (GIT="git" "/root/work/qemu/scripts/git-submodule.sh" update ui/keycodemapdb meson tests/fp/berkeley-testfloat-3 tests/fp/berkeley-softfloat-3 dtc) /root/work/qemu/build/tests/venv/bin/python3 -m avocado vmimage get --distro=fedora --distro-version=31 --arch=s390x The image was downloaded: Provider Version Architecture File fedora 31 s390x /root/avocado/data/cache/by_location/8ee06cba5485a58b2203c2c000d6d2ff6da0f040/Fedora-Cloud-Base-31-1.9.s390x.qcow2 /root/work/qemu/build/tests/venv/bin/python3 -m avocado --show=app run --job-results-dir=/root/work/qemu/build/tests/results --filter-by-tags-include-empty --filter-by-tags-include-empty-key --max-parallel-tasks 1 -t arch:loongarch64 -t arch:s390x --failfast ./tests/avocado/machine_s390_ccw_virtio.py ... This test downloaded 'Fedora-Cloud-Base-31-1.9.s390x.qcow2' image. but we don't have a 'Fedora-Cloud-Base-31-1.9.loongarch.qcow2' image. Am I missing something? One more question, How to get the 'kernel_hash' and 'initrd_hash'? Thanks. Song Gao
On 10/05/2023 11.02, Song Gao wrote: > Hi, Thomas > > 在 2023/5/8 下午9:12, Thomas Huth 写道: >> >>>> Oh, another spot that needs special handling ... I see Alexander already >>>> sent a patch (thanks!), but anyway, this is a good indication that we're >>>> missing some test coverage in the CI. >>>> >>>> Are there any loongarch kernel images available for public download >>>> somewhere? If so, we really should add an avocado regression test for >>>> this - since as far as I can see, we don't have any tests for loongarch >>>> in tests/avocado yet? >>>> >>> we can get some binarys at: >>> https://github.com/yangxiaojuan-loongson/qemu-binary >> > >>> I'm not sure that avacodo testing can be done using just the kernel. >>> >>> Is a full loongarch system required? >> >> No, you don't need a full distro installation, just a kernel with ramdisk >> (which is also available there) is good enough for a basic test, e.g. just >> check whether the kernel boots to a certain point is good enough to >> provide a basic sanity test. If you then can also get even into a shell >> (of the ramdisk), you can check some additional stuff in the sysfs or >> "dmesg" output, see for example tests/avocado/machine_s390_ccw_virtio.py >> which does such checks with a kernel and initrd on s390x. >> >> > I have a few questions. > > I run ' make check-avocado > AVOCADO_TESTS=./tests/avocado/machine_s390_ccw_virtio.py V=1' > > root@loongson-KVM:~/work/qemu#make check-avocado > AVOCADO_TESTS=./tests/avocado/machine_s390_ccw_virtio.py V=1 > changing dir to build for make "check-avocado"... > make[1]: Entering directory '/root/work/qemu/build' > (GIT="git" "/root/work/qemu/scripts/git-submodule.sh" update ui/keycodemapdb > meson tests/fp/berkeley-testfloat-3 tests/fp/berkeley-softfloat-3 dtc) > /root/work/qemu/build/tests/venv/bin/python3 -m avocado vmimage get > --distro=fedora --distro-version=31 --arch=s390x > The image was downloaded: > Provider Version Architecture File > fedora 31 s390x > /root/avocado/data/cache/by_location/8ee06cba5485a58b2203c2c000d6d2ff6da0f040/Fedora-Cloud-Base-31-1.9.s390x.qcow2 > /root/work/qemu/build/tests/venv/bin/python3 -m avocado --show=app run > --job-results-dir=/root/work/qemu/build/tests/results > --filter-by-tags-include-empty --filter-by-tags-include-empty-key > --max-parallel-tasks 1 -t arch:loongarch64 -t arch:s390x --failfast > ./tests/avocado/machine_s390_ccw_virtio.py > ... > > This test downloaded 'Fedora-Cloud-Base-31-1.9.s390x.qcow2' image. > but we don't have a 'Fedora-Cloud-Base-31-1.9.loongarch.qcow2' image. > > Am I missing something? Hmm, that image is not required for those tests... not sure why they get downloaded here... I think something in tests/avocado/avocado_qemu/__init__.py or in tests/Makefile.include tries to download the cloudinit stuff in advance for other tests, but it is certainly unrelated to the machine_s390_ccw_virtio.py test that only uses a kernel and initrd. I think you can ignore that (unless there is an error since it's trying to download the loongarch Cloud-Base image - then that's a bug). > One more question, How to get the 'kernel_hash' and 'initrd_hash'? I think it's a SHA1 hash by default, so you can for example get it with the "sha1sum" tool on the command line. But seems like it is also possible to specify different algorithms with the "algorithm=..." parameter of fetch_asset(). HTH, Thomas
在 2023/5/10 下午8:21, Thomas Huth 写道: > On 10/05/2023 11.02, Song Gao wrote: >> Hi, Thomas >> >> 在 2023/5/8 下午9:12, Thomas Huth 写道: >>> >>>>> Oh, another spot that needs special handling ... I see Alexander >>>>> already sent a patch (thanks!), but anyway, this is a good >>>>> indication that we're missing some test coverage in the CI. >>>>> >>>>> Are there any loongarch kernel images available for public >>>>> download somewhere? If so, we really should add an avocado >>>>> regression test for this - since as far as I can see, we don't >>>>> have any tests for loongarch in tests/avocado yet? >>>>> >>>> we can get some binarys at: >>>> https://github.com/yangxiaojuan-loongson/qemu-binary >>> > >>>> I'm not sure that avacodo testing can be done using just the kernel. >>>> >>>> Is a full loongarch system required? >>> >>> No, you don't need a full distro installation, just a kernel with >>> ramdisk (which is also available there) is good enough for a basic >>> test, e.g. just check whether the kernel boots to a certain point is >>> good enough to provide a basic sanity test. If you then can also get >>> even into a shell (of the ramdisk), you can check some additional >>> stuff in the sysfs or "dmesg" output, see for example >>> tests/avocado/machine_s390_ccw_virtio.py which does such checks with >>> a kernel and initrd on s390x. >>> >>> >> I have a few questions. >> >> I run ' make check-avocado >> AVOCADO_TESTS=./tests/avocado/machine_s390_ccw_virtio.py V=1' >> >> root@loongson-KVM:~/work/qemu#make check-avocado >> AVOCADO_TESTS=./tests/avocado/machine_s390_ccw_virtio.py V=1 >> changing dir to build for make "check-avocado"... >> make[1]: Entering directory '/root/work/qemu/build' >> (GIT="git" "/root/work/qemu/scripts/git-submodule.sh" update >> ui/keycodemapdb meson tests/fp/berkeley-testfloat-3 >> tests/fp/berkeley-softfloat-3 dtc) >> /root/work/qemu/build/tests/venv/bin/python3 -m avocado vmimage get >> --distro=fedora --distro-version=31 --arch=s390x >> The image was downloaded: >> Provider Version Architecture File >> fedora 31 s390x >> /root/avocado/data/cache/by_location/8ee06cba5485a58b2203c2c000d6d2ff6da0f040/Fedora-Cloud-Base-31-1.9.s390x.qcow2 >> /root/work/qemu/build/tests/venv/bin/python3 -m avocado --show=app >> run --job-results-dir=/root/work/qemu/build/tests/results >> --filter-by-tags-include-empty --filter-by-tags-include-empty-key >> --max-parallel-tasks 1 -t arch:loongarch64 -t arch:s390x --failfast >> ./tests/avocado/machine_s390_ccw_virtio.py >> ... >> >> This test downloaded 'Fedora-Cloud-Base-31-1.9.s390x.qcow2' image. >> but we don't have a 'Fedora-Cloud-Base-31-1.9.loongarch.qcow2' image. >> >> Am I missing something? > > Hmm, that image is not required for those tests... not sure why they > get downloaded here... I think something in > tests/avocado/avocado_qemu/__init__.py or in tests/Makefile.include > tries to download the cloudinit stuff in advance for other tests, but > it is certainly unrelated to the machine_s390_ccw_virtio.py test that > only uses a kernel and initrd. > > I think you can ignore that (unless there is an error since it's > trying to download the loongarch Cloud-Base image - then that's a bug). > Yes, we can ignore, no error. >> One more question, How to get the 'kernel_hash' and 'initrd_hash'? > > I think it's a SHA1 hash by default, so you can for example get it > with the "sha1sum" tool on the command line. > > But seems like it is also possible to specify different algorithms > with the "algorithm=..." parameter of fetch_asset(). > Thanks for you help. And Should we need add ' @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')' ? I see some tests add this. Thanks. Song Gao
On 11/05/2023 10.53, Song Gao wrote: ... > And > Should we need add ' @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')' ? > > I see some tests add this. No, please don't add that unless there is a good reason. That marker is only required if the test does not work reliable on gitlab, e.g. if it sometimes fails due to race conditions or if it takes incredibly long to finish. Thomas
在 2023/5/11 下午4:58, Thomas Huth 写道: > On 11/05/2023 10.53, Song Gao wrote: > ... >> And >> Should we need add ' @skipIf(os.getenv('GITLAB_CI'), 'Running on >> GitLab')' ? >> >> I see some tests add this. > > No, please don't add that unless there is a good reason. That marker > is only required if the test does not work reliable on gitlab, e.g. if > it sometimes fails due to race conditions or if it takes incredibly > long to finish. > Ok. Thanks. Song Gao
diff --git a/include/exec/memory.h b/include/exec/memory.h index 15ade918ba..e45ce6061f 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -767,6 +767,8 @@ struct MemoryRegion { bool is_iommu; RAMBlock *ram_block; Object *owner; + /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ + DeviceState *dev; const MemoryRegionOps *ops; void *opaque; @@ -791,6 +793,9 @@ struct MemoryRegion { unsigned ioeventfd_nb; MemoryRegionIoeventfd *ioeventfds; RamDiscardManager *rdm; /* Only for RAM */ + + /* For devices designed to perform re-entrant IO into their own IO MRs */ + bool disable_reentrancy_guard; }; struct IOMMUMemoryRegion { diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index bd50ad5ee1..7623703943 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -162,6 +162,10 @@ struct NamedClockList { QLIST_ENTRY(NamedClockList) node; }; +typedef struct { + bool engaged_in_io; +} MemReentrancyGuard; + /** * DeviceState: * @realized: Indicates whether the device has been fully constructed. @@ -194,6 +198,9 @@ struct DeviceState { int alias_required_for_version; ResettableState reset; GSList *unplug_blockers; + + /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ + MemReentrancyGuard mem_reentrancy_guard; }; struct DeviceListener { diff --git a/softmmu/memory.c b/softmmu/memory.c index b1a6cae6f5..fe23f0e5ce 100644 --- a/softmmu/memory.c +++ b/softmmu/memory.c @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, access_size_max = 4; } + /* Do not allow more than one simultaneous access to a device's IO Regions */ + if (mr->dev && !mr->disable_reentrancy_guard && + !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { + warn_report("Blocked re-entrant IO on " + "MemoryRegion: %s at addr: 0x%" HWADDR_PRIX, + memory_region_name(mr), addr); + return MEMTX_ACCESS_ERROR; + } + mr->dev->mem_reentrancy_guard.engaged_in_io = true; + } + /* FIXME: support unaligned access? */ access_size = MAX(MIN(size, access_size_max), access_size_min); access_mask = MAKE_64BIT_MASK(0, access_size * 8); @@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, access_mask, attrs); } } + if (mr->dev) { + mr->dev->mem_reentrancy_guard.engaged_in_io = false; + } return r; } @@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr, } mr->name = g_strdup(name); mr->owner = owner; + mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); mr->ram_block = NULL; if (name) {