diff mbox series

[v2,1/2] system/memory.c: support unaligned access

Message ID 20240201081313.1339788-2-tomoyuki.hirose@igel.co.jp (mailing list archive)
State New, archived
Headers show
Series support unaligned access for some xHCI registers | expand

Commit Message

Tomoyuki HIROSE Feb. 1, 2024, 8:13 a.m. UTC
The previous code ignored 'impl.unaligned' and handled unaligned accesses
as is. But this implementation cannot emulate specific registers of some
devices that allow unaligned access such as xHCI Host Controller Capability
Registers.
This commit checks 'impl.unaligned' and if it is false, QEMU emulates
unaligned access with multiple aligned access.

Signed-off-by: Tomoyuki HIROSE <tomoyuki.hirose@igel.co.jp>
---
 system/memory.c | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

Comments

Tomoyuki HIROSE Feb. 26, 2024, 7:28 a.m. UTC | #1
Hello,
I would be happy if you could give me some comments.

ping.

On Thu, Feb 1, 2024 at 5:14 PM Tomoyuki HIROSE
<tomoyuki.hirose@igel.co.jp> wrote:
>
> The previous code ignored 'impl.unaligned' and handled unaligned accesses
> as is. But this implementation cannot emulate specific registers of some
> devices that allow unaligned access such as xHCI Host Controller Capability
> Registers.
> This commit checks 'impl.unaligned' and if it is false, QEMU emulates
> unaligned access with multiple aligned access.
>
> Signed-off-by: Tomoyuki HIROSE <tomoyuki.hirose@igel.co.jp>
> ---
>  system/memory.c | 38 +++++++++++++++++++++++++-------------
>  1 file changed, 25 insertions(+), 13 deletions(-)
>
> diff --git a/system/memory.c b/system/memory.c
> index a229a79988..a7ca0c9f54 100644
> --- a/system/memory.c
> +++ b/system/memory.c
> @@ -535,10 +535,17 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>                                        MemTxAttrs attrs)
>  {
>      uint64_t access_mask;
> +    unsigned access_mask_shift;
> +    unsigned access_mask_start_offset;
> +    unsigned access_mask_end_offset;
>      unsigned access_size;
> -    unsigned i;
>      MemTxResult r = MEMTX_OK;
>      bool reentrancy_guard_applied = false;
> +    bool is_big_endian = memory_region_big_endian(mr);
> +    signed start_diff;
> +    signed current_offset;
> +    signed access_shift;
> +    hwaddr current_addr;
>
>      if (!access_size_min) {
>          access_size_min = 1;
> @@ -560,19 +567,24 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>          reentrancy_guard_applied = true;
>      }
>
> -    /* FIXME: support unaligned access? */
>      access_size = MAX(MIN(size, access_size_max), access_size_min);
> -    access_mask = MAKE_64BIT_MASK(0, access_size * 8);
> -    if (memory_region_big_endian(mr)) {
> -        for (i = 0; i < size; i += access_size) {
> -            r |= access_fn(mr, addr + i, value, access_size,
> -                        (size - access_size - i) * 8, access_mask, attrs);
> -        }
> -    } else {
> -        for (i = 0; i < size; i += access_size) {
> -            r |= access_fn(mr, addr + i, value, access_size, i * 8,
> -                        access_mask, attrs);
> -        }
> +    start_diff = mr->ops->impl.unaligned ? 0 : addr & (access_size - 1);
> +    current_addr = addr - start_diff;
> +    for (current_offset = -start_diff; current_offset < (signed)size;
> +         current_offset += access_size, current_addr += access_size) {
> +        access_shift = is_big_endian
> +                          ? (signed)size - (signed)access_size - current_offset
> +                          : current_offset;
> +        access_mask_shift = current_offset > 0 ? 0 : -current_offset;
> +        access_mask_start_offset = current_offset > 0 ? current_offset : 0;
> +        access_mask_end_offset = current_offset + access_size > size
> +                                     ? size
> +                                     : current_offset + access_size;
> +        access_mask = MAKE_64BIT_MASK(access_mask_shift * 8,
> +            (access_mask_end_offset - access_mask_start_offset) * 8);
> +
> +        r |= access_fn(mr, current_addr, value, access_size, access_shift * 8,
> +                       access_mask, attrs);
>      }
>      if (mr->dev && reentrancy_guard_applied) {
>          mr->dev->mem_reentrancy_guard.engaged_in_io = false;
> --
> 2.39.2
>
Tomoyuki HIROSE March 18, 2024, 4:34 a.m. UTC | #2
ping.

On Mon, Feb 26, 2024 at 4:28 PM Tomoyuki Hirose
<tomoyuki.hirose@igel.co.jp> wrote:
>
> Hello,
> I would be happy if you could give me some comments.
>
> ping.
>
> On Thu, Feb 1, 2024 at 5:14 PM Tomoyuki HIROSE
> <tomoyuki.hirose@igel.co.jp> wrote:
> >
> > The previous code ignored 'impl.unaligned' and handled unaligned accesses
> > as is. But this implementation cannot emulate specific registers of some
> > devices that allow unaligned access such as xHCI Host Controller Capability
> > Registers.
> > This commit checks 'impl.unaligned' and if it is false, QEMU emulates
> > unaligned access with multiple aligned access.
> >
> > Signed-off-by: Tomoyuki HIROSE <tomoyuki.hirose@igel.co.jp>
> > ---
> >  system/memory.c | 38 +++++++++++++++++++++++++-------------
> >  1 file changed, 25 insertions(+), 13 deletions(-)
> >
> > diff --git a/system/memory.c b/system/memory.c
> > index a229a79988..a7ca0c9f54 100644
> > --- a/system/memory.c
> > +++ b/system/memory.c
> > @@ -535,10 +535,17 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> >                                        MemTxAttrs attrs)
> >  {
> >      uint64_t access_mask;
> > +    unsigned access_mask_shift;
> > +    unsigned access_mask_start_offset;
> > +    unsigned access_mask_end_offset;
> >      unsigned access_size;
> > -    unsigned i;
> >      MemTxResult r = MEMTX_OK;
> >      bool reentrancy_guard_applied = false;
> > +    bool is_big_endian = memory_region_big_endian(mr);
> > +    signed start_diff;
> > +    signed current_offset;
> > +    signed access_shift;
> > +    hwaddr current_addr;
> >
> >      if (!access_size_min) {
> >          access_size_min = 1;
> > @@ -560,19 +567,24 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> >          reentrancy_guard_applied = true;
> >      }
> >
> > -    /* FIXME: support unaligned access? */
> >      access_size = MAX(MIN(size, access_size_max), access_size_min);
> > -    access_mask = MAKE_64BIT_MASK(0, access_size * 8);
> > -    if (memory_region_big_endian(mr)) {
> > -        for (i = 0; i < size; i += access_size) {
> > -            r |= access_fn(mr, addr + i, value, access_size,
> > -                        (size - access_size - i) * 8, access_mask, attrs);
> > -        }
> > -    } else {
> > -        for (i = 0; i < size; i += access_size) {
> > -            r |= access_fn(mr, addr + i, value, access_size, i * 8,
> > -                        access_mask, attrs);
> > -        }
> > +    start_diff = mr->ops->impl.unaligned ? 0 : addr & (access_size - 1);
> > +    current_addr = addr - start_diff;
> > +    for (current_offset = -start_diff; current_offset < (signed)size;
> > +         current_offset += access_size, current_addr += access_size) {
> > +        access_shift = is_big_endian
> > +                          ? (signed)size - (signed)access_size - current_offset
> > +                          : current_offset;
> > +        access_mask_shift = current_offset > 0 ? 0 : -current_offset;
> > +        access_mask_start_offset = current_offset > 0 ? current_offset : 0;
> > +        access_mask_end_offset = current_offset + access_size > size
> > +                                     ? size
> > +                                     : current_offset + access_size;
> > +        access_mask = MAKE_64BIT_MASK(access_mask_shift * 8,
> > +            (access_mask_end_offset - access_mask_start_offset) * 8);
> > +
> > +        r |= access_fn(mr, current_addr, value, access_size, access_shift * 8,
> > +                       access_mask, attrs);
> >      }
> >      if (mr->dev && reentrancy_guard_applied) {
> >          mr->dev->mem_reentrancy_guard.engaged_in_io = false;
> > --
> > 2.39.2
> >
Peter Xu March 18, 2024, 4:15 p.m. UTC | #3
Hi,

On Thu, Feb 01, 2024 at 05:13:12PM +0900, Tomoyuki HIROSE wrote:
> The previous code ignored 'impl.unaligned' and handled unaligned accesses
> as is. But this implementation cannot emulate specific registers of some
> devices that allow unaligned access such as xHCI Host Controller Capability
> Registers.
> This commit checks 'impl.unaligned' and if it is false, QEMU emulates
> unaligned access with multiple aligned access.

This patch looks mostly good to me.  Just a few trivial comments.

Firstly, can we provide the USB example here (or also the bug link) so that
we can still pick up the context of why this will start to be useful when
people read about this commit separately?

> 
> Signed-off-by: Tomoyuki HIROSE <tomoyuki.hirose@igel.co.jp>
> ---
>  system/memory.c | 38 +++++++++++++++++++++++++-------------
>  1 file changed, 25 insertions(+), 13 deletions(-)
> 
> diff --git a/system/memory.c b/system/memory.c
> index a229a79988..a7ca0c9f54 100644
> --- a/system/memory.c
> +++ b/system/memory.c
> @@ -535,10 +535,17 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>                                        MemTxAttrs attrs)
>  {
>      uint64_t access_mask;
> +    unsigned access_mask_shift;
> +    unsigned access_mask_start_offset;
> +    unsigned access_mask_end_offset;
>      unsigned access_size;
> -    unsigned i;
>      MemTxResult r = MEMTX_OK;
>      bool reentrancy_guard_applied = false;
> +    bool is_big_endian = memory_region_big_endian(mr);
> +    signed start_diff;
> +    signed current_offset;
> +    signed access_shift;
> +    hwaddr current_addr;
>  
>      if (!access_size_min) {
>          access_size_min = 1;
> @@ -560,19 +567,24 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>          reentrancy_guard_applied = true;
>      }
>  
> -    /* FIXME: support unaligned access? */
>      access_size = MAX(MIN(size, access_size_max), access_size_min);
> -    access_mask = MAKE_64BIT_MASK(0, access_size * 8);
> -    if (memory_region_big_endian(mr)) {
> -        for (i = 0; i < size; i += access_size) {
> -            r |= access_fn(mr, addr + i, value, access_size,
> -                        (size - access_size - i) * 8, access_mask, attrs);
> -        }
> -    } else {
> -        for (i = 0; i < size; i += access_size) {
> -            r |= access_fn(mr, addr + i, value, access_size, i * 8,
> -                        access_mask, attrs);
> -        }
> +    start_diff = mr->ops->impl.unaligned ? 0 : addr & (access_size - 1);
> +    current_addr = addr - start_diff;
> +    for (current_offset = -start_diff; current_offset < (signed)size;
> +         current_offset += access_size, current_addr += access_size) {
> +        access_shift = is_big_endian
> +                          ? (signed)size - (signed)access_size - current_offset
> +                          : current_offset;
> +        access_mask_shift = current_offset > 0 ? 0 : -current_offset;
> +        access_mask_start_offset = current_offset > 0 ? current_offset : 0;
> +        access_mask_end_offset = current_offset + access_size > size
> +                                     ? size
> +                                     : current_offset + access_size;

Maybe this looks slightly easier to read?

        if (current_offset < 0) {
            access_mask_shift = -current_offset;
            access_mask_start_offset = 0;
        } else {
            access_mask_shift = 0;
            access_mask_start_offset = current_offset;
        }
        access_mask_end_offset = MIN(current_offset + access_size, size);

But I confess this can be pretty subjective..

Since PeterM used to comment, please remember to copy PeterM too in the
future post in case this got overlooked.

Peter, do you still have any other comments or concerns?

Thanks,

> +        access_mask = MAKE_64BIT_MASK(access_mask_shift * 8,
> +            (access_mask_end_offset - access_mask_start_offset) * 8);
> +
> +        r |= access_fn(mr, current_addr, value, access_size, access_shift * 8,
> +                       access_mask, attrs);
>      }
>      if (mr->dev && reentrancy_guard_applied) {
>          mr->dev->mem_reentrancy_guard.engaged_in_io = false;
> -- 
> 2.39.2
>
Philippe Mathieu-Daudé March 19, 2024, 6:43 a.m. UTC | #4
On 18/3/24 17:15, Peter Xu wrote:
> Hi,
> 
> On Thu, Feb 01, 2024 at 05:13:12PM +0900, Tomoyuki HIROSE wrote:
>> The previous code ignored 'impl.unaligned' and handled unaligned accesses
>> as is. But this implementation cannot emulate specific registers of some
>> devices that allow unaligned access such as xHCI Host Controller Capability
>> Registers.
>> This commit checks 'impl.unaligned' and if it is false, QEMU emulates
>> unaligned access with multiple aligned access.
> 
> This patch looks mostly good to me.  Just a few trivial comments.
> 
> Firstly, can we provide the USB example here (or also the bug link) so that
> we can still pick up the context of why this will start to be useful when
> people read about this commit separately?
> 
>>
>> Signed-off-by: Tomoyuki HIROSE <tomoyuki.hirose@igel.co.jp>
>> ---
>>   system/memory.c | 38 +++++++++++++++++++++++++-------------
>>   1 file changed, 25 insertions(+), 13 deletions(-)
>>
>> diff --git a/system/memory.c b/system/memory.c
>> index a229a79988..a7ca0c9f54 100644
>> --- a/system/memory.c
>> +++ b/system/memory.c
>> @@ -535,10 +535,17 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>>                                         MemTxAttrs attrs)
>>   {
>>       uint64_t access_mask;
>> +    unsigned access_mask_shift;
>> +    unsigned access_mask_start_offset;
>> +    unsigned access_mask_end_offset;
>>       unsigned access_size;
>> -    unsigned i;
>>       MemTxResult r = MEMTX_OK;
>>       bool reentrancy_guard_applied = false;
>> +    bool is_big_endian = memory_region_big_endian(mr);
>> +    signed start_diff;
>> +    signed current_offset;
>> +    signed access_shift;
>> +    hwaddr current_addr;
>>   
>>       if (!access_size_min) {
>>           access_size_min = 1;
>> @@ -560,19 +567,24 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>>           reentrancy_guard_applied = true;
>>       }
>>   
>> -    /* FIXME: support unaligned access? */
>>       access_size = MAX(MIN(size, access_size_max), access_size_min);
>> -    access_mask = MAKE_64BIT_MASK(0, access_size * 8);
>> -    if (memory_region_big_endian(mr)) {
>> -        for (i = 0; i < size; i += access_size) {
>> -            r |= access_fn(mr, addr + i, value, access_size,
>> -                        (size - access_size - i) * 8, access_mask, attrs);
>> -        }
>> -    } else {
>> -        for (i = 0; i < size; i += access_size) {
>> -            r |= access_fn(mr, addr + i, value, access_size, i * 8,
>> -                        access_mask, attrs);
>> -        }
>> +    start_diff = mr->ops->impl.unaligned ? 0 : addr & (access_size - 1);
>> +    current_addr = addr - start_diff;
>> +    for (current_offset = -start_diff; current_offset < (signed)size;
>> +         current_offset += access_size, current_addr += access_size) {
>> +        access_shift = is_big_endian
>> +                          ? (signed)size - (signed)access_size - current_offset
>> +                          : current_offset;
>> +        access_mask_shift = current_offset > 0 ? 0 : -current_offset;
>> +        access_mask_start_offset = current_offset > 0 ? current_offset : 0;
>> +        access_mask_end_offset = current_offset + access_size > size
>> +                                     ? size
>> +                                     : current_offset + access_size;
> 
> Maybe this looks slightly easier to read?
> 
>          if (current_offset < 0) {
>              access_mask_shift = -current_offset;
>              access_mask_start_offset = 0;
>          } else {
>              access_mask_shift = 0;
>              access_mask_start_offset = current_offset;
>          }
>          access_mask_end_offset = MIN(current_offset + access_size, size);
> 
> But I confess this can be pretty subjective..
> 
> Since PeterM used to comment, please remember to copy PeterM too in the
> future post in case this got overlooked.
> 
> Peter, do you still have any other comments or concerns?

See also this thread:
https://lore.kernel.org/qemu-devel/20200331144225.67dadl6crwd57qvi@sirius.home.kraxel.org/
->
https://www.mail-archive.com/qemu-devel@nongnu.org/msg461247.html

Also I guess remembering Richard mentioning we should unify this
code for softmmu / physmem, but I might be wrong ...

> 
> Thanks,
> 
>> +        access_mask = MAKE_64BIT_MASK(access_mask_shift * 8,
>> +            (access_mask_end_offset - access_mask_start_offset) * 8);
>> +
>> +        r |= access_fn(mr, current_addr, value, access_size, access_shift * 8,
>> +                       access_mask, attrs);
>>       }
>>       if (mr->dev && reentrancy_guard_applied) {
>>           mr->dev->mem_reentrancy_guard.engaged_in_io = false;
>> -- 
>> 2.39.2
>>
>
Philippe Mathieu-Daudé March 19, 2024, 6:50 a.m. UTC | #5
Hi Tomoyuki,

On 19/3/24 07:43, Philippe Mathieu-Daudé wrote:
> On 18/3/24 17:15, Peter Xu wrote:
>> Hi,
>>
>> On Thu, Feb 01, 2024 at 05:13:12PM +0900, Tomoyuki HIROSE wrote:
>>> The previous code ignored 'impl.unaligned' and handled unaligned 
>>> accesses
>>> as is. But this implementation cannot emulate specific registers of some
>>> devices that allow unaligned access such as xHCI Host Controller 
>>> Capability
>>> Registers.
>>> This commit checks 'impl.unaligned' and if it is false, QEMU emulates
>>> unaligned access with multiple aligned access.
>>
>> This patch looks mostly good to me.  Just a few trivial comments.
>>
>> Firstly, can we provide the USB example here (or also the bug link) so 
>> that
>> we can still pick up the context of why this will start to be useful when
>> people read about this commit separately?
>>
>>>
>>> Signed-off-by: Tomoyuki HIROSE <tomoyuki.hirose@igel.co.jp>
>>> ---
>>>   system/memory.c | 38 +++++++++++++++++++++++++-------------
>>>   1 file changed, 25 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/system/memory.c b/system/memory.c
>>> index a229a79988..a7ca0c9f54 100644
>>> --- a/system/memory.c
>>> +++ b/system/memory.c
>>> @@ -535,10 +535,17 @@ static MemTxResult 
>>> access_with_adjusted_size(hwaddr addr,
>>>                                         MemTxAttrs attrs)
>>>   {
>>>       uint64_t access_mask;
>>> +    unsigned access_mask_shift;
>>> +    unsigned access_mask_start_offset;
>>> +    unsigned access_mask_end_offset;
>>>       unsigned access_size;
>>> -    unsigned i;
>>>       MemTxResult r = MEMTX_OK;
>>>       bool reentrancy_guard_applied = false;
>>> +    bool is_big_endian = memory_region_big_endian(mr);
>>> +    signed start_diff;
>>> +    signed current_offset;
>>> +    signed access_shift;
>>> +    hwaddr current_addr;
>>>       if (!access_size_min) {
>>>           access_size_min = 1;
>>> @@ -560,19 +567,24 @@ static MemTxResult 
>>> access_with_adjusted_size(hwaddr addr,
>>>           reentrancy_guard_applied = true;
>>>       }
>>> -    /* FIXME: support unaligned access? */
>>>       access_size = MAX(MIN(size, access_size_max), access_size_min);
>>> -    access_mask = MAKE_64BIT_MASK(0, access_size * 8);
>>> -    if (memory_region_big_endian(mr)) {
>>> -        for (i = 0; i < size; i += access_size) {
>>> -            r |= access_fn(mr, addr + i, value, access_size,
>>> -                        (size - access_size - i) * 8, access_mask, 
>>> attrs);
>>> -        }
>>> -    } else {
>>> -        for (i = 0; i < size; i += access_size) {
>>> -            r |= access_fn(mr, addr + i, value, access_size, i * 8,
>>> -                        access_mask, attrs);
>>> -        }
>>> +    start_diff = mr->ops->impl.unaligned ? 0 : addr & (access_size - 
>>> 1);
>>> +    current_addr = addr - start_diff;
>>> +    for (current_offset = -start_diff; current_offset < (signed)size;
>>> +         current_offset += access_size, current_addr += access_size) {
>>> +        access_shift = is_big_endian
>>> +                          ? (signed)size - (signed)access_size - 
>>> current_offset
>>> +                          : current_offset;
>>> +        access_mask_shift = current_offset > 0 ? 0 : -current_offset;
>>> +        access_mask_start_offset = current_offset > 0 ? 
>>> current_offset : 0;
>>> +        access_mask_end_offset = current_offset + access_size > size
>>> +                                     ? size
>>> +                                     : current_offset + access_size;
>>
>> Maybe this looks slightly easier to read?
>>
>>          if (current_offset < 0) {
>>              access_mask_shift = -current_offset;
>>              access_mask_start_offset = 0;
>>          } else {
>>              access_mask_shift = 0;
>>              access_mask_start_offset = current_offset;
>>          }
>>          access_mask_end_offset = MIN(current_offset + access_size, 
>> size);
>>
>> But I confess this can be pretty subjective..
>>
>> Since PeterM used to comment, please remember to copy PeterM too in the
>> future post in case this got overlooked.
>>
>> Peter, do you still have any other comments or concerns?
> 
> See also this thread:
> https://lore.kernel.org/qemu-devel/20200331144225.67dadl6crwd57qvi@sirius.home.kraxel.org/
> ->
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg461247.html

Now I noticed this thread was mentioned on v1, but not all person Cc'ed
there are on v2.

What I'd like to see to get confidence and avoid regression is some
harness qtests triggering this problem. See for example:
https://lore.kernel.org/qemu-devel/20200817161853.593247-8-f4bug@amsat.org/

> 
> Also I guess remembering Richard mentioning we should unify this
> code for softmmu / physmem, but I might be wrong ...
> 
>>
>> Thanks,
>>
>>> +        access_mask = MAKE_64BIT_MASK(access_mask_shift * 8,
>>> +            (access_mask_end_offset - access_mask_start_offset) * 8);
>>> +
>>> +        r |= access_fn(mr, current_addr, value, access_size, 
>>> access_shift * 8,
>>> +                       access_mask, attrs);
>>>       }
>>>       if (mr->dev && reentrancy_guard_applied) {
>>>           mr->dev->mem_reentrancy_guard.engaged_in_io = false;
>>> -- 
>>> 2.39.2
>>>
>>
>
Peter Maydell March 19, 2024, 2:08 p.m. UTC | #6
On Thu, 1 Feb 2024 at 08:15, Tomoyuki HIROSE <tomoyuki.hirose@igel.co.jp> wrote:
>
> The previous code ignored 'impl.unaligned' and handled unaligned accesses
> as is. But this implementation cannot emulate specific registers of some
> devices that allow unaligned access such as xHCI Host Controller Capability
> Registers.
> This commit checks 'impl.unaligned' and if it is false, QEMU emulates
> unaligned access with multiple aligned access.
>
> Signed-off-by: Tomoyuki HIROSE <tomoyuki.hirose@igel.co.jp>
> ---
>  system/memory.c | 38 +++++++++++++++++++++++++-------------
>  1 file changed, 25 insertions(+), 13 deletions(-)
>
> diff --git a/system/memory.c b/system/memory.c
> index a229a79988..a7ca0c9f54 100644
> --- a/system/memory.c
> +++ b/system/memory.c
> @@ -535,10 +535,17 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>                                        MemTxAttrs attrs)
>  {
>      uint64_t access_mask;
> +    unsigned access_mask_shift;
> +    unsigned access_mask_start_offset;
> +    unsigned access_mask_end_offset;
>      unsigned access_size;
> -    unsigned i;
>      MemTxResult r = MEMTX_OK;
>      bool reentrancy_guard_applied = false;
> +    bool is_big_endian = memory_region_big_endian(mr);
> +    signed start_diff;
> +    signed current_offset;
> +    signed access_shift;

"signed foo" is a weird way to specify this type, which we use almost
nowhere else in the codebase -- this is equivalent to "int foo".

> +    hwaddr current_addr;
>
>      if (!access_size_min) {
>          access_size_min = 1;
> @@ -560,19 +567,24 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
>          reentrancy_guard_applied = true;
>      }
>
> -    /* FIXME: support unaligned access? */
>      access_size = MAX(MIN(size, access_size_max), access_size_min);

This still has a problem I noted for the v1 patch:
we compute the access_size without thinking about the alignment,
so for an access like:
 * addr = 2, size = 4, access_size_min = 2, access_size_max = 8
we will calculate access_size = 4 and do two 4-byte accesses
(at addresses 0 and 4) when we should do two 2-byte accesses
(at addresses 2 and 4).

> -    access_mask = MAKE_64BIT_MASK(0, access_size * 8);
> -    if (memory_region_big_endian(mr)) {
> -        for (i = 0; i < size; i += access_size) {
> -            r |= access_fn(mr, addr + i, value, access_size,
> -                        (size - access_size - i) * 8, access_mask, attrs);
> -        }
> -    } else {
> -        for (i = 0; i < size; i += access_size) {
> -            r |= access_fn(mr, addr + i, value, access_size, i * 8,
> -                        access_mask, attrs);
> -        }
> +    start_diff = mr->ops->impl.unaligned ? 0 : addr & (access_size - 1);
> +    current_addr = addr - start_diff;
> +    for (current_offset = -start_diff; current_offset < (signed)size;
> +         current_offset += access_size, current_addr += access_size) {
> +        access_shift = is_big_endian
> +                          ? (signed)size - (signed)access_size - current_offset
> +                          : current_offset;
> +        access_mask_shift = current_offset > 0 ? 0 : -current_offset;
> +        access_mask_start_offset = current_offset > 0 ? current_offset : 0;
> +        access_mask_end_offset = current_offset + access_size > size
> +                                     ? size
> +                                     : current_offset + access_size;
> +        access_mask = MAKE_64BIT_MASK(access_mask_shift * 8,
> +            (access_mask_end_offset - access_mask_start_offset) * 8);

I don't understand here why the access_mask_shift and the
access_mask_start_offset are different. Aren't we trying to create
a mask value with 1s from start through to end ?

> +
> +        r |= access_fn(mr, current_addr, value, access_size, access_shift * 8,
> +                       access_mask, attrs);
>      }
>      if (mr->dev && reentrancy_guard_applied) {
>          mr->dev->mem_reentrancy_guard.engaged_in_io = false;

I agree with Philippe that we could be a lot more confident in
this change if we had some unit tests that tested whether
various combinations of unaligned accesses turned into the
right sequence of accesses to the underlying device.

thanks
-- PMM
diff mbox series

Patch

diff --git a/system/memory.c b/system/memory.c
index a229a79988..a7ca0c9f54 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -535,10 +535,17 @@  static MemTxResult access_with_adjusted_size(hwaddr addr,
                                       MemTxAttrs attrs)
 {
     uint64_t access_mask;
+    unsigned access_mask_shift;
+    unsigned access_mask_start_offset;
+    unsigned access_mask_end_offset;
     unsigned access_size;
-    unsigned i;
     MemTxResult r = MEMTX_OK;
     bool reentrancy_guard_applied = false;
+    bool is_big_endian = memory_region_big_endian(mr);
+    signed start_diff;
+    signed current_offset;
+    signed access_shift;
+    hwaddr current_addr;
 
     if (!access_size_min) {
         access_size_min = 1;
@@ -560,19 +567,24 @@  static MemTxResult access_with_adjusted_size(hwaddr addr,
         reentrancy_guard_applied = true;
     }
 
-    /* FIXME: support unaligned access? */
     access_size = MAX(MIN(size, access_size_max), access_size_min);
-    access_mask = MAKE_64BIT_MASK(0, access_size * 8);
-    if (memory_region_big_endian(mr)) {
-        for (i = 0; i < size; i += access_size) {
-            r |= access_fn(mr, addr + i, value, access_size,
-                        (size - access_size - i) * 8, access_mask, attrs);
-        }
-    } else {
-        for (i = 0; i < size; i += access_size) {
-            r |= access_fn(mr, addr + i, value, access_size, i * 8,
-                        access_mask, attrs);
-        }
+    start_diff = mr->ops->impl.unaligned ? 0 : addr & (access_size - 1);
+    current_addr = addr - start_diff;
+    for (current_offset = -start_diff; current_offset < (signed)size;
+         current_offset += access_size, current_addr += access_size) {
+        access_shift = is_big_endian
+                          ? (signed)size - (signed)access_size - current_offset
+                          : current_offset;
+        access_mask_shift = current_offset > 0 ? 0 : -current_offset;
+        access_mask_start_offset = current_offset > 0 ? current_offset : 0;
+        access_mask_end_offset = current_offset + access_size > size
+                                     ? size
+                                     : current_offset + access_size;
+        access_mask = MAKE_64BIT_MASK(access_mask_shift * 8,
+            (access_mask_end_offset - access_mask_start_offset) * 8);
+
+        r |= access_fn(mr, current_addr, value, access_size, access_shift * 8,
+                       access_mask, attrs);
     }
     if (mr->dev && reentrancy_guard_applied) {
         mr->dev->mem_reentrancy_guard.engaged_in_io = false;