diff mbox series

[RFC,01/16] accel/tcg: Store section pointer in CPUTLBEntryFull

Message ID 20240612081416.29704-2-jim.shu@sifive.com (mailing list archive)
State New, archived
Headers show
Series Implements RISC-V WorldGuard extension v0.4 | expand

Commit Message

Jim Shu June 12, 2024, 8:14 a.m. UTC
'CPUTLBEntryFull.xlat_section' stores section_index in last 12 bits to
find the correct section when CPU access the IO region over the IOTLB
(iotlb_to_section()).

However, section_index is only unique inside single AddressSpace. If
address space translation is over IOMMUMemoryRegion, it could return
section from other AddressSpace. 'iotlb_to_section()' API only finds the
sections from CPU's AddressSpace so that it couldn't find section in
other AddressSpace. Thus, using 'iotlb_to_section()' API will find the
wrong section and QEMU will have wrong load/store access.

To fix this bug, store complete MemoryRegionSection pointer in
CPUTLBEntryFull instead of section_index.

This bug occurs only when
(1) IOMMUMemoryRegion is in the path of CPU access.
(2) IOMMUMemoryRegion returns different target_as and the section is in
the IO region.

Common IOMMU devices don't have this issue since they are only in the
path of DMA access. Currently, the bug only occurs when ARM MPC device
(hw/misc/tz-mpc.c) returns 'blocked_io_as' to emulate blocked access
handling. Upcoming RISC-V wgChecker device is also affected by this bug.

Signed-off-by: Jim Shu <jim.shu@sifive.com>
---
 accel/tcg/cputlb.c    | 19 +++++++++----------
 include/hw/core/cpu.h |  3 +++
 2 files changed, 12 insertions(+), 10 deletions(-)

Comments

LIU Zhiwei June 13, 2024, 6:22 a.m. UTC | #1
On 2024/6/12 16:14, Jim Shu wrote:
> 'CPUTLBEntryFull.xlat_section' stores section_index in last 12 bits to
> find the correct section when CPU access the IO region over the IOTLB
> (iotlb_to_section()).
>
> However, section_index is only unique inside single AddressSpace. If
> address space translation is over IOMMUMemoryRegion, it could return
> section from other AddressSpace. 'iotlb_to_section()' API only finds the
> sections from CPU's AddressSpace so that it couldn't find section in
> other AddressSpace. Thus, using 'iotlb_to_section()' API will find the
> wrong section and QEMU will have wrong load/store access.
>
> To fix this bug, store complete MemoryRegionSection pointer in
> CPUTLBEntryFull instead of section_index.
>
> This bug occurs only when
> (1) IOMMUMemoryRegion is in the path of CPU access.

Hi Jim,

Can you explain a little more on when IOMMUMemoryRegion is in the path 
of CPU access?

Thanks,
Zhiwei

> (2) IOMMUMemoryRegion returns different target_as and the section is in
> the IO region.
>
> Common IOMMU devices don't have this issue since they are only in the
> path of DMA access. Currently, the bug only occurs when ARM MPC device
> (hw/misc/tz-mpc.c) returns 'blocked_io_as' to emulate blocked access
> handling. Upcoming RISC-V wgChecker device is also affected by this bug.
>
> Signed-off-by: Jim Shu <jim.shu@sifive.com>
> ---
>   accel/tcg/cputlb.c    | 19 +++++++++----------
>   include/hw/core/cpu.h |  3 +++
>   2 files changed, 12 insertions(+), 10 deletions(-)
>
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 117b516739..8cf124b760 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -1169,6 +1169,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
>       desc->fulltlb[index] = *full;
>       full = &desc->fulltlb[index];
>       full->xlat_section = iotlb - addr_page;
> +    full->section = section;
>       full->phys_addr = paddr_page;
>   
>       /* Now calculate the new entry */
> @@ -1248,14 +1249,14 @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
>   }
>   
>   static MemoryRegionSection *
> -io_prepare(hwaddr *out_offset, CPUState *cpu, hwaddr xlat,
> +io_prepare(hwaddr *out_offset, CPUState *cpu, CPUTLBEntryFull *full,
>              MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
>   {
>       MemoryRegionSection *section;
>       hwaddr mr_offset;
>   
> -    section = iotlb_to_section(cpu, xlat, attrs);
> -    mr_offset = (xlat & TARGET_PAGE_MASK) + addr;
> +    section = full->section;
> +    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
>       cpu->mem_io_pc = retaddr;
>       if (!cpu->neg.can_do_io) {
>           cpu_io_recompile(cpu, retaddr);
> @@ -1571,9 +1572,7 @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
>   
>       /* We must have an iotlb entry for MMIO */
>       if (tlb_addr & TLB_MMIO) {
> -        MemoryRegionSection *section =
> -            iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK,
> -                             full->attrs);
> +        MemoryRegionSection *section = full->section;
>           data->is_io = true;
>           data->mr = section->mr;
>       } else {
> @@ -1972,7 +1971,7 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
>       tcg_debug_assert(size > 0 && size <= 8);
>   
>       attrs = full->attrs;
> -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
> +    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
>       mr = section->mr;
>   
>       BQL_LOCK_GUARD();
> @@ -1993,7 +1992,7 @@ static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
>       tcg_debug_assert(size > 8 && size <= 16);
>   
>       attrs = full->attrs;
> -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
> +    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
>       mr = section->mr;
>   
>       BQL_LOCK_GUARD();
> @@ -2513,7 +2512,7 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
>       tcg_debug_assert(size > 0 && size <= 8);
>   
>       attrs = full->attrs;
> -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
> +    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
>       mr = section->mr;
>   
>       BQL_LOCK_GUARD();
> @@ -2533,7 +2532,7 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
>       tcg_debug_assert(size > 8 && size <= 16);
>   
>       attrs = full->attrs;
> -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
> +    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
>       mr = section->mr;
>   
>       BQL_LOCK_GUARD();
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index a2c8536943..3f6c10897b 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -217,6 +217,9 @@ typedef struct CPUTLBEntryFull {
>        */
>       hwaddr xlat_section;
>   
> +    /* @section contains physical section. */
> +    MemoryRegionSection *section;
> +
>       /*
>        * @phys_addr contains the physical address in the address space
>        * given by cpu_asidx_from_attrs(cpu, @attrs).
Jim Shu June 13, 2024, 10:37 a.m. UTC | #2
Hi Zhiwei,

Common IOMMU devices will not have IOMMUMemoryRegion in the path of
CPU access since It only affects DMA access.
In QEMU, it usually places this IOMMU MR as the parent of
"system_memory", and changes the target_mr of DMA from "system_memory"
to IOMMU MR.

For the wgChecker, it is in front of memory or device MMIO and
protects both CPU/DMA access to memory or device MMIO.
In QEMU, wgChecker re-use IOMMUMemoryRegion to implement the memory
protection inside the translate() function of IOMMU MR.
In the machine code, wgChecker replaces the MemoryRegion of protected
resources with the checker's IOMMU MR in the MemoryRegion tree of
"system_memory".
Both CPU/DMA access will go through the "system_memory". They will go
through the checker's IOMMU MR when accessing the protected resources.

This mechanism is used by Cortex-M MPC devices (hw/misc/tz-mpc.c)
originally. I have leveraged it and extended it little (in patch 2) as
MPC doesn't support RO/WO permission.
If we'd like to have a device to do the memory protection of both CPU
& DMA access, we could implement it in this mechanism.
(p.s. Cortex-A TZASC is not supported in QEMU, which is similar to MPC
or wgChecker device.)

Thanks,
Jim Shu





On Thu, Jun 13, 2024 at 2:23 PM LIU Zhiwei <zhiwei_liu@linux.alibaba.com> wrote:
>
> On 2024/6/12 16:14, Jim Shu wrote:
> > 'CPUTLBEntryFull.xlat_section' stores section_index in last 12 bits to
> > find the correct section when CPU access the IO region over the IOTLB
> > (iotlb_to_section()).
> >
> > However, section_index is only unique inside single AddressSpace. If
> > address space translation is over IOMMUMemoryRegion, it could return
> > section from other AddressSpace. 'iotlb_to_section()' API only finds the
> > sections from CPU's AddressSpace so that it couldn't find section in
> > other AddressSpace. Thus, using 'iotlb_to_section()' API will find the
> > wrong section and QEMU will have wrong load/store access.
> >
> > To fix this bug, store complete MemoryRegionSection pointer in
> > CPUTLBEntryFull instead of section_index.
> >
> > This bug occurs only when
> > (1) IOMMUMemoryRegion is in the path of CPU access.
>
> Hi Jim,
>
> Can you explain a little more on when IOMMUMemoryRegion is in the path
> of CPU access?
>
> Thanks,
> Zhiwei
>
> > (2) IOMMUMemoryRegion returns different target_as and the section is in
> > the IO region.
> >
> > Common IOMMU devices don't have this issue since they are only in the
> > path of DMA access. Currently, the bug only occurs when ARM MPC device
> > (hw/misc/tz-mpc.c) returns 'blocked_io_as' to emulate blocked access
> > handling. Upcoming RISC-V wgChecker device is also affected by this bug.
> >
> > Signed-off-by: Jim Shu <jim.shu@sifive.com>
> > ---
> >   accel/tcg/cputlb.c    | 19 +++++++++----------
> >   include/hw/core/cpu.h |  3 +++
> >   2 files changed, 12 insertions(+), 10 deletions(-)
> >
> > diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> > index 117b516739..8cf124b760 100644
> > --- a/accel/tcg/cputlb.c
> > +++ b/accel/tcg/cputlb.c
> > @@ -1169,6 +1169,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
> >       desc->fulltlb[index] = *full;
> >       full = &desc->fulltlb[index];
> >       full->xlat_section = iotlb - addr_page;
> > +    full->section = section;
> >       full->phys_addr = paddr_page;
> >
> >       /* Now calculate the new entry */
> > @@ -1248,14 +1249,14 @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
> >   }
> >
> >   static MemoryRegionSection *
> > -io_prepare(hwaddr *out_offset, CPUState *cpu, hwaddr xlat,
> > +io_prepare(hwaddr *out_offset, CPUState *cpu, CPUTLBEntryFull *full,
> >              MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
> >   {
> >       MemoryRegionSection *section;
> >       hwaddr mr_offset;
> >
> > -    section = iotlb_to_section(cpu, xlat, attrs);
> > -    mr_offset = (xlat & TARGET_PAGE_MASK) + addr;
> > +    section = full->section;
> > +    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
> >       cpu->mem_io_pc = retaddr;
> >       if (!cpu->neg.can_do_io) {
> >           cpu_io_recompile(cpu, retaddr);
> > @@ -1571,9 +1572,7 @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
> >
> >       /* We must have an iotlb entry for MMIO */
> >       if (tlb_addr & TLB_MMIO) {
> > -        MemoryRegionSection *section =
> > -            iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK,
> > -                             full->attrs);
> > +        MemoryRegionSection *section = full->section;
> >           data->is_io = true;
> >           data->mr = section->mr;
> >       } else {
> > @@ -1972,7 +1971,7 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
> >       tcg_debug_assert(size > 0 && size <= 8);
> >
> >       attrs = full->attrs;
> > -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
> > +    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
> >       mr = section->mr;
> >
> >       BQL_LOCK_GUARD();
> > @@ -1993,7 +1992,7 @@ static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
> >       tcg_debug_assert(size > 8 && size <= 16);
> >
> >       attrs = full->attrs;
> > -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
> > +    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
> >       mr = section->mr;
> >
> >       BQL_LOCK_GUARD();
> > @@ -2513,7 +2512,7 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
> >       tcg_debug_assert(size > 0 && size <= 8);
> >
> >       attrs = full->attrs;
> > -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
> > +    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
> >       mr = section->mr;
> >
> >       BQL_LOCK_GUARD();
> > @@ -2533,7 +2532,7 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
> >       tcg_debug_assert(size > 8 && size <= 16);
> >
> >       attrs = full->attrs;
> > -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
> > +    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
> >       mr = section->mr;
> >
> >       BQL_LOCK_GUARD();
> > diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> > index a2c8536943..3f6c10897b 100644
> > --- a/include/hw/core/cpu.h
> > +++ b/include/hw/core/cpu.h
> > @@ -217,6 +217,9 @@ typedef struct CPUTLBEntryFull {
> >        */
> >       hwaddr xlat_section;
> >
> > +    /* @section contains physical section. */
> > +    MemoryRegionSection *section;
> > +
> >       /*
> >        * @phys_addr contains the physical address in the address space
> >        * given by cpu_asidx_from_attrs(cpu, @attrs).
LIU Zhiwei June 14, 2024, 1:28 p.m. UTC | #3
On 2024/6/13 18:37, Jim Shu wrote:
> Hi Zhiwei,
>
> Common IOMMU devices will not have IOMMUMemoryRegion in the path of
> CPU access since It only affects DMA access.
> In QEMU, it usually places this IOMMU MR as the parent of
> "system_memory", and changes the target_mr of DMA from "system_memory"
> to IOMMU MR.
>
> For the wgChecker, it is in front of memory or device MMIO and
> protects both CPU/DMA access to memory or device MMIO.
> In QEMU, wgChecker re-use IOMMUMemoryRegion to implement the memory
> protection inside the translate() function of IOMMU MR.
> In the machine code, wgChecker replaces the MemoryRegion of protected
> resources with the checker's IOMMU MR in the MemoryRegion tree of
> "system_memory".
> Both CPU/DMA access will go through the "system_memory". They will go
> through the checker's IOMMU MR when accessing the protected resources.

Thanks. It is clear and very helpful.

Zhiwei

>
> This mechanism is used by Cortex-M MPC devices (hw/misc/tz-mpc.c)
> originally. I have leveraged it and extended it little (in patch 2) as
> MPC doesn't support RO/WO permission.
> If we'd like to have a device to do the memory protection of both CPU
> & DMA access, we could implement it in this mechanism.
> (p.s. Cortex-A TZASC is not supported in QEMU, which is similar to MPC
> or wgChecker device.)
>
> Thanks,
> Jim Shu
>
>
>
>
>
> On Thu, Jun 13, 2024 at 2:23 PM LIU Zhiwei <zhiwei_liu@linux.alibaba.com> wrote:
>> On 2024/6/12 16:14, Jim Shu wrote:
>>> 'CPUTLBEntryFull.xlat_section' stores section_index in last 12 bits to
>>> find the correct section when CPU access the IO region over the IOTLB
>>> (iotlb_to_section()).
>>>
>>> However, section_index is only unique inside single AddressSpace. If
>>> address space translation is over IOMMUMemoryRegion, it could return
>>> section from other AddressSpace. 'iotlb_to_section()' API only finds the
>>> sections from CPU's AddressSpace so that it couldn't find section in
>>> other AddressSpace. Thus, using 'iotlb_to_section()' API will find the
>>> wrong section and QEMU will have wrong load/store access.
>>>
>>> To fix this bug, store complete MemoryRegionSection pointer in
>>> CPUTLBEntryFull instead of section_index.
>>>
>>> This bug occurs only when
>>> (1) IOMMUMemoryRegion is in the path of CPU access.
>> Hi Jim,
>>
>> Can you explain a little more on when IOMMUMemoryRegion is in the path
>> of CPU access?
>>
>> Thanks,
>> Zhiwei
>>
>>> (2) IOMMUMemoryRegion returns different target_as and the section is in
>>> the IO region.
>>>
>>> Common IOMMU devices don't have this issue since they are only in the
>>> path of DMA access. Currently, the bug only occurs when ARM MPC device
>>> (hw/misc/tz-mpc.c) returns 'blocked_io_as' to emulate blocked access
>>> handling. Upcoming RISC-V wgChecker device is also affected by this bug.
>>>
>>> Signed-off-by: Jim Shu <jim.shu@sifive.com>
>>> ---
>>>    accel/tcg/cputlb.c    | 19 +++++++++----------
>>>    include/hw/core/cpu.h |  3 +++
>>>    2 files changed, 12 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
>>> index 117b516739..8cf124b760 100644
>>> --- a/accel/tcg/cputlb.c
>>> +++ b/accel/tcg/cputlb.c
>>> @@ -1169,6 +1169,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
>>>        desc->fulltlb[index] = *full;
>>>        full = &desc->fulltlb[index];
>>>        full->xlat_section = iotlb - addr_page;
>>> +    full->section = section;
>>>        full->phys_addr = paddr_page;
>>>
>>>        /* Now calculate the new entry */
>>> @@ -1248,14 +1249,14 @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
>>>    }
>>>
>>>    static MemoryRegionSection *
>>> -io_prepare(hwaddr *out_offset, CPUState *cpu, hwaddr xlat,
>>> +io_prepare(hwaddr *out_offset, CPUState *cpu, CPUTLBEntryFull *full,
>>>               MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
>>>    {
>>>        MemoryRegionSection *section;
>>>        hwaddr mr_offset;
>>>
>>> -    section = iotlb_to_section(cpu, xlat, attrs);
>>> -    mr_offset = (xlat & TARGET_PAGE_MASK) + addr;
>>> +    section = full->section;
>>> +    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
>>>        cpu->mem_io_pc = retaddr;
>>>        if (!cpu->neg.can_do_io) {
>>>            cpu_io_recompile(cpu, retaddr);
>>> @@ -1571,9 +1572,7 @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
>>>
>>>        /* We must have an iotlb entry for MMIO */
>>>        if (tlb_addr & TLB_MMIO) {
>>> -        MemoryRegionSection *section =
>>> -            iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK,
>>> -                             full->attrs);
>>> +        MemoryRegionSection *section = full->section;
>>>            data->is_io = true;
>>>            data->mr = section->mr;
>>>        } else {
>>> @@ -1972,7 +1971,7 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
>>>        tcg_debug_assert(size > 0 && size <= 8);
>>>
>>>        attrs = full->attrs;
>>> -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
>>> +    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
>>>        mr = section->mr;
>>>
>>>        BQL_LOCK_GUARD();
>>> @@ -1993,7 +1992,7 @@ static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
>>>        tcg_debug_assert(size > 8 && size <= 16);
>>>
>>>        attrs = full->attrs;
>>> -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
>>> +    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
>>>        mr = section->mr;
>>>
>>>        BQL_LOCK_GUARD();
>>> @@ -2513,7 +2512,7 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
>>>        tcg_debug_assert(size > 0 && size <= 8);
>>>
>>>        attrs = full->attrs;
>>> -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
>>> +    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
>>>        mr = section->mr;
>>>
>>>        BQL_LOCK_GUARD();
>>> @@ -2533,7 +2532,7 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
>>>        tcg_debug_assert(size > 8 && size <= 16);
>>>
>>>        attrs = full->attrs;
>>> -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
>>> +    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
>>>        mr = section->mr;
>>>
>>>        BQL_LOCK_GUARD();
>>> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
>>> index a2c8536943..3f6c10897b 100644
>>> --- a/include/hw/core/cpu.h
>>> +++ b/include/hw/core/cpu.h
>>> @@ -217,6 +217,9 @@ typedef struct CPUTLBEntryFull {
>>>         */
>>>        hwaddr xlat_section;
>>>
>>> +    /* @section contains physical section. */
>>> +    MemoryRegionSection *section;
>>> +
>>>        /*
>>>         * @phys_addr contains the physical address in the address space
>>>         * given by cpu_asidx_from_attrs(cpu, @attrs).
diff mbox series

Patch

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 117b516739..8cf124b760 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1169,6 +1169,7 @@  void tlb_set_page_full(CPUState *cpu, int mmu_idx,
     desc->fulltlb[index] = *full;
     full = &desc->fulltlb[index];
     full->xlat_section = iotlb - addr_page;
+    full->section = section;
     full->phys_addr = paddr_page;
 
     /* Now calculate the new entry */
@@ -1248,14 +1249,14 @@  static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
 }
 
 static MemoryRegionSection *
-io_prepare(hwaddr *out_offset, CPUState *cpu, hwaddr xlat,
+io_prepare(hwaddr *out_offset, CPUState *cpu, CPUTLBEntryFull *full,
            MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
 {
     MemoryRegionSection *section;
     hwaddr mr_offset;
 
-    section = iotlb_to_section(cpu, xlat, attrs);
-    mr_offset = (xlat & TARGET_PAGE_MASK) + addr;
+    section = full->section;
+    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
     cpu->mem_io_pc = retaddr;
     if (!cpu->neg.can_do_io) {
         cpu_io_recompile(cpu, retaddr);
@@ -1571,9 +1572,7 @@  bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
 
     /* We must have an iotlb entry for MMIO */
     if (tlb_addr & TLB_MMIO) {
-        MemoryRegionSection *section =
-            iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK,
-                             full->attrs);
+        MemoryRegionSection *section = full->section;
         data->is_io = true;
         data->mr = section->mr;
     } else {
@@ -1972,7 +1971,7 @@  static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
     tcg_debug_assert(size > 0 && size <= 8);
 
     attrs = full->attrs;
-    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
+    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
     mr = section->mr;
 
     BQL_LOCK_GUARD();
@@ -1993,7 +1992,7 @@  static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
     tcg_debug_assert(size > 8 && size <= 16);
 
     attrs = full->attrs;
-    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
+    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
     mr = section->mr;
 
     BQL_LOCK_GUARD();
@@ -2513,7 +2512,7 @@  static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
     tcg_debug_assert(size > 0 && size <= 8);
 
     attrs = full->attrs;
-    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
+    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
     mr = section->mr;
 
     BQL_LOCK_GUARD();
@@ -2533,7 +2532,7 @@  static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
     tcg_debug_assert(size > 8 && size <= 16);
 
     attrs = full->attrs;
-    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
+    section = io_prepare(&mr_offset, cpu, full, attrs, addr, ra);
     mr = section->mr;
 
     BQL_LOCK_GUARD();
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index a2c8536943..3f6c10897b 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -217,6 +217,9 @@  typedef struct CPUTLBEntryFull {
      */
     hwaddr xlat_section;
 
+    /* @section contains physical section. */
+    MemoryRegionSection *section;
+
     /*
      * @phys_addr contains the physical address in the address space
      * given by cpu_asidx_from_attrs(cpu, @attrs).