diff mbox series

[1/2] system/memory.c: support unaligned access

Message ID 20231211071204.30156-2-tomoyuki.hirose@igel.co.jp (mailing list archive)
State New, archived
Headers show
Series support unaligned access for some xHCI registers | expand

Commit Message

Tomoyuki HIROSE Dec. 11, 2023, 7:12 a.m. UTC
The previous code ignored 'impl.unaligned' and handled unaligned accesses
as is. But this implementation cannot emulate specific registers of some
devices that allow unaligned access such as xHCI Host Controller Capability
Registers.
This commit checks 'impl.unaligned' and if it is false, QEMU emulates
unaligned access with multiple aligned access.

Signed-off-by: Tomoyuki HIROSE <tomoyuki.hirose@igel.co.jp>
---
 system/memory.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

Comments

Cédric Le Goater Dec. 11, 2023, 1:31 p.m. UTC | #1
Hello,

On 12/11/23 08:12, Tomoyuki HIROSE wrote:
> The previous code ignored 'impl.unaligned' and handled unaligned accesses
> as is. But this implementation cannot emulate specific registers of some
> devices that allow unaligned access such as xHCI Host Controller Capability
> Registers.
> This commit checks 'impl.unaligned' and if it is false, QEMU emulates
> unaligned access with multiple aligned access.
> 
> Signed-off-by: Tomoyuki HIROSE <tomoyuki.hirose@igel.co.jp>

FWIW, there has been a previous proposal for unaligned accesses support [1].

Thanks,

C.

[1] https://lore.kernel.org/qemu-devel/20170630030058.28943-1-andrew@aj.id.au/


> ---
>   system/memory.c | 22 ++++++++++++++++------
>   1 file changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/system/memory.c b/system/memory.c
> index 798b6c0a17..b0caa90fef 100644
> --- a/system/memory.c
> +++ b/system/memory.c
> @@ -539,6 +539,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>       unsigned i;
>       MemTxResult r = MEMTX_OK;
>       bool reentrancy_guard_applied = false;
> +    hwaddr aligned_addr;
> +    unsigned corrected_size = size;
> +    signed align_diff = 0;
>   
>       if (!access_size_min) {
>           access_size_min = 1;
> @@ -560,18 +563,25 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>           reentrancy_guard_applied = true;
>       }
>   
> -    /* FIXME: support unaligned access? */
>       access_size = MAX(MIN(size, access_size_max), access_size_min);
>       access_mask = MAKE_64BIT_MASK(0, access_size * 8);
> +    if (!mr->ops->impl.unaligned) {
> +        aligned_addr = addr & ~(access_size - 1);
> +        align_diff = addr - aligned_addr;
> +        corrected_size = size < access_size ? access_size :
> +                            size + (align_diff > 0 ? access_size : 0);
> +        addr = aligned_addr;
> +    }
>       if (memory_region_big_endian(mr)) {
> -        for (i = 0; i < size; i += access_size) {
> +        for (i = 0; i < corrected_size; i += access_size) {
>               r |= access_fn(mr, addr + i, value, access_size,
> -                        (size - access_size - i) * 8, access_mask, attrs);
> +                        (size - access_size - i + align_diff) * 8,
> +                        access_mask, attrs);
>           }
>       } else {
> -        for (i = 0; i < size; i += access_size) {
> -            r |= access_fn(mr, addr + i, value, access_size, i * 8,
> -                        access_mask, attrs);
> +        for (i = 0; i < corrected_size; i += access_size) {
> +            r |= access_fn(mr, addr + i, value, access_size,
> +                        ((signed)i - align_diff) * 8, access_mask, attrs);
>           }
>       }
>       if (mr->dev && reentrancy_guard_applied) {
Tomoyuki HIROSE Dec. 15, 2023, 12:11 a.m. UTC | #2
On Mon, Dec 11, 2023 at 10:31 PM Cédric Le Goater <clg@redhat.com> wrote:
> FWIW, there has been a previous proposal for unaligned accesses support [1].
>
> Thanks,
>
> C.
>
> [1] https://lore.kernel.org/qemu-devel/20170630030058.28943-1-andrew@aj.id.au/
>

Thanks for your information.
This patch is an RFC, and unfortunately it doesn't seem to have been merged.
This feature is useful for devices that allow unaligned access.

Regards,

Tomoyuki HIROSE

On Mon, Dec 11, 2023 at 10:31 PM Cédric Le Goater <clg@redhat.com> wrote:
>
> Hello,
>
> On 12/11/23 08:12, Tomoyuki HIROSE wrote:
> > The previous code ignored 'impl.unaligned' and handled unaligned accesses
> > as is. But this implementation cannot emulate specific registers of some
> > devices that allow unaligned access such as xHCI Host Controller Capability
> > Registers.
> > This commit checks 'impl.unaligned' and if it is false, QEMU emulates
> > unaligned access with multiple aligned access.
> >
> > Signed-off-by: Tomoyuki HIROSE <tomoyuki.hirose@igel.co.jp>
>
> FWIW, there has been a previous proposal for unaligned accesses support [1].
>
> Thanks,
>
> C.
>
> [1] https://lore.kernel.org/qemu-devel/20170630030058.28943-1-andrew@aj.id.au/
>
>
> > ---
> >   system/memory.c | 22 ++++++++++++++++------
> >   1 file changed, 16 insertions(+), 6 deletions(-)
> >
> > diff --git a/system/memory.c b/system/memory.c
> > index 798b6c0a17..b0caa90fef 100644
> > --- a/system/memory.c
> > +++ b/system/memory.c
> > @@ -539,6 +539,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> >       unsigned i;
> >       MemTxResult r = MEMTX_OK;
> >       bool reentrancy_guard_applied = false;
> > +    hwaddr aligned_addr;
> > +    unsigned corrected_size = size;
> > +    signed align_diff = 0;
> >
> >       if (!access_size_min) {
> >           access_size_min = 1;
> > @@ -560,18 +563,25 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> >           reentrancy_guard_applied = true;
> >       }
> >
> > -    /* FIXME: support unaligned access? */
> >       access_size = MAX(MIN(size, access_size_max), access_size_min);
> >       access_mask = MAKE_64BIT_MASK(0, access_size * 8);
> > +    if (!mr->ops->impl.unaligned) {
> > +        aligned_addr = addr & ~(access_size - 1);
> > +        align_diff = addr - aligned_addr;
> > +        corrected_size = size < access_size ? access_size :
> > +                            size + (align_diff > 0 ? access_size : 0);
> > +        addr = aligned_addr;
> > +    }
> >       if (memory_region_big_endian(mr)) {
> > -        for (i = 0; i < size; i += access_size) {
> > +        for (i = 0; i < corrected_size; i += access_size) {
> >               r |= access_fn(mr, addr + i, value, access_size,
> > -                        (size - access_size - i) * 8, access_mask, attrs);
> > +                        (size - access_size - i + align_diff) * 8,
> > +                        access_mask, attrs);
> >           }
> >       } else {
> > -        for (i = 0; i < size; i += access_size) {
> > -            r |= access_fn(mr, addr + i, value, access_size, i * 8,
> > -                        access_mask, attrs);
> > +        for (i = 0; i < corrected_size; i += access_size) {
> > +            r |= access_fn(mr, addr + i, value, access_size,
> > +                        ((signed)i - align_diff) * 8, access_mask, attrs);
> >           }
> >       }
> >       if (mr->dev && reentrancy_guard_applied) {
>
Peter Maydell Jan. 12, 2024, 3:48 p.m. UTC | #3
On Mon, 11 Dec 2023 at 07:14, Tomoyuki HIROSE
<tomoyuki.hirose@igel.co.jp> wrote:
>
> The previous code ignored 'impl.unaligned' and handled unaligned accesses
> as is. But this implementation cannot emulate specific registers of some
> devices that allow unaligned access such as xHCI Host Controller Capability
> Registers.
> This commit checks 'impl.unaligned' and if it is false, QEMU emulates
> unaligned access with multiple aligned access.
>
> Signed-off-by: Tomoyuki HIROSE <tomoyuki.hirose@igel.co.jp>

Sorry this has taken me so long to get to reviewing.

So, first of all, I think this is definitely a cleaner looking
patch than the other one that Cédric posted a link to: if we
can have access_with_adjusted_size() cope with the unaligned
case that seems nicer than having different functions for the
aligned and the unaligned cases.

My review comments below are basically about fiddly corner case
details.

> ---
>  system/memory.c | 22 ++++++++++++++++------
>  1 file changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/system/memory.c b/system/memory.c
> index 798b6c0a17..b0caa90fef 100644
> --- a/system/memory.c
> +++ b/system/memory.c
> @@ -539,6 +539,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>      unsigned i;
>      MemTxResult r = MEMTX_OK;
>      bool reentrancy_guard_applied = false;
> +    hwaddr aligned_addr;
> +    unsigned corrected_size = size;
> +    signed align_diff = 0;
>
>      if (!access_size_min) {
>          access_size_min = 1;
> @@ -560,18 +563,25 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>          reentrancy_guard_applied = true;
>      }
>
> -    /* FIXME: support unaligned access? */
>      access_size = MAX(MIN(size, access_size_max), access_size_min);
>      access_mask = MAKE_64BIT_MASK(0, access_size * 8);
> +    if (!mr->ops->impl.unaligned) {
> +        aligned_addr = addr & ~(access_size - 1);
> +        align_diff = addr - aligned_addr;
> +        corrected_size = size < access_size ? access_size :
> +                            size + (align_diff > 0 ? access_size : 0);

I don't think this calculation of corrected_size is right for
the case when size < access_size. Consider:
 * size = 2, access_size = 4, addr = 3, little-endian:
memory contents from 0 are bytes AA BB CC DD EE FF ...

We calculate corrected_size of 4, and we will then do a
single 4-byte read of 0xDDCCBBAA. But we need to do two
4-byte reads, because the final result we want to return
is 0xEEDD.

I think also that we don't really get the optimal behaviour
here because we select access_size assuming the aligned case,
rather than selecting it specifically for the combination
of input size and align_diff in the unaligned case.
Consider: access_size_min = 2, access_size_max = 8, size = 4,
addr = 2. We'll compute access_size to be 4, and then do
the unaligned access with two 4-byte reads. But we could
better have done it with two 2-byte reads. This matters
especially for the write case, because two 2-byte writes
allows us to avoid the problem of "what do we write for
the parts of the 4-byte writes that we don't have data
from the caller for". (See below for more on that.)

> +        addr = aligned_addr;
> +    }
>      if (memory_region_big_endian(mr)) {
> -        for (i = 0; i < size; i += access_size) {
> +        for (i = 0; i < corrected_size; i += access_size) {
>              r |= access_fn(mr, addr + i, value, access_size,
> -                        (size - access_size - i) * 8, access_mask, attrs);
> +                        (size - access_size - i + align_diff) * 8,
> +                        access_mask, attrs);
>          }
>      } else {
> -        for (i = 0; i < size; i += access_size) {
> -            r |= access_fn(mr, addr + i, value, access_size, i * 8,
> -                        access_mask, attrs);
> +        for (i = 0; i < corrected_size; i += access_size) {
> +            r |= access_fn(mr, addr + i, value, access_size,
> +                        ((signed)i - align_diff) * 8, access_mask, attrs);
>          }

So, with these loops, for unaligned accesses we now load an
extra chunk and adjust the shifts so we get the right parts
of the chunks we read. However I think we also need to be
careful with the access mask for the final access (or the
first access in the big-endian case).

Consider:
 * access_size = 2, size = 4, align_diff = 1, little endian,
   addr = 1 initially (so aligned down to 0), read:
and the memory being bytes AA BB CC DD EE FF ... starting at 0.
We'll load:
 * from addr 0, 0xBBAA, which we shift right by 1 for 0xBB
 * from addr 2, 0xDDCC, shift left 1, for 0xDDCC00
 * from addr 4, 0xFFEE, shift left 3, for 0xFFEE000000
and then we OR those together for
  0xFFEEDDCCBB
so we will have written into *value an extra 0xFF byte that
we should not have done. That last access from addr 4 should
have an access_mask that says we only want part of it.

For writes, things are worse, because we'll do a 2 byte
write that writes whatever garbage might have been in the
high part of *value. If the device permits an access of
smaller size (in this case byte) we can do the end part at
that size (or even do the whole write at that size if it's
simpler). If the device doesn't permit that smaller size
write it's not clear to me what the behaviour should be.
(I was going to suggest maybe we should just rule that as
not permitted until we run into it, but then your patch 2
puts the xhci-usb device into this category, although
harmlessly because it happens to implement writes as ignored.)

thanks
-- PMM
Tomoyuki HIROSE Jan. 18, 2024, 6:43 a.m. UTC | #4
Hello,

Thank you for reviewing my patches.
Examples of corner case you explained is very helpful.
I'm currently working on patches. I will submit v2 as soon
as it's completed, so please wait a little longer.

thanks,
Tomoyuki HIROSE
diff mbox series

Patch

diff --git a/system/memory.c b/system/memory.c
index 798b6c0a17..b0caa90fef 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -539,6 +539,9 @@  static MemTxResult access_with_adjusted_size(hwaddr addr,
     unsigned i;
     MemTxResult r = MEMTX_OK;
     bool reentrancy_guard_applied = false;
+    hwaddr aligned_addr;
+    unsigned corrected_size = size;
+    signed align_diff = 0;
 
     if (!access_size_min) {
         access_size_min = 1;
@@ -560,18 +563,25 @@  static MemTxResult access_with_adjusted_size(hwaddr addr,
         reentrancy_guard_applied = true;
     }
 
-    /* FIXME: support unaligned access? */
     access_size = MAX(MIN(size, access_size_max), access_size_min);
     access_mask = MAKE_64BIT_MASK(0, access_size * 8);
+    if (!mr->ops->impl.unaligned) {
+        aligned_addr = addr & ~(access_size - 1);
+        align_diff = addr - aligned_addr;
+        corrected_size = size < access_size ? access_size :
+                            size + (align_diff > 0 ? access_size : 0);
+        addr = aligned_addr;
+    }
     if (memory_region_big_endian(mr)) {
-        for (i = 0; i < size; i += access_size) {
+        for (i = 0; i < corrected_size; i += access_size) {
             r |= access_fn(mr, addr + i, value, access_size,
-                        (size - access_size - i) * 8, access_mask, attrs);
+                        (size - access_size - i + align_diff) * 8,
+                        access_mask, attrs);
         }
     } else {
-        for (i = 0; i < size; i += access_size) {
-            r |= access_fn(mr, addr + i, value, access_size, i * 8,
-                        access_mask, attrs);
+        for (i = 0; i < corrected_size; i += access_size) {
+            r |= access_fn(mr, addr + i, value, access_size,
+                        ((signed)i - align_diff) * 8, access_mask, attrs);
         }
     }
     if (mr->dev && reentrancy_guard_applied) {