diff mbox series

[v3,07/16] aspeed/smc: fix dma moving incorrect data length issue

Message ID 20240416091904.935283-8-jamin_lin@aspeedtech.com (mailing list archive)
State New
Headers show
Series Add AST2700 support | expand

Commit Message

Jamin Lin April 16, 2024, 9:18 a.m. UTC
DMA length is from 1 byte to 32MB for AST2600 and AST10x0
and DMA length is from 4 bytes to 32MB for AST2500.

In other words, if "R_DMA_LEN" is 0, it should move at least 1 byte
data for AST2600 and AST10x0 and 4 bytes data for AST2500.
To support all ASPEED SOCs, adds dma_start_length parameter to store
the start length, add helper routines function to compute the dma length
and update DMA_LENGTH mask to "1FFFFFF" to fix dma moving
incorrect data length issue.

Currently, only supports dma length 4 bytes aligned.

Signed-off-by: Troy Lee <troy_lee@aspeedtech.com>
Signed-off-by: Jamin Lin <jamin_lin@aspeedtech.com>
---
 hw/ssi/aspeed_smc.c         | 52 ++++++++++++++++++++++++++++++++-----
 include/hw/ssi/aspeed_smc.h |  1 +
 2 files changed, 46 insertions(+), 7 deletions(-)

Comments

Cédric Le Goater April 19, 2024, 1:41 p.m. UTC | #1
On 4/16/24 11:18, Jamin Lin wrote:
> DMA length is from 1 byte to 32MB for AST2600 and AST10x0
> and DMA length is from 4 bytes to 32MB for AST2500.
> 
> In other words, if "R_DMA_LEN" is 0, it should move at least 1 byte
> data for AST2600 and AST10x0 and 4 bytes data for AST2500.
>> To support all ASPEED SOCs, adds dma_start_length parameter to store
> the start length, add helper routines function to compute the dma length
> and update DMA_LENGTH mask to "1FFFFFF" to fix dma moving
> incorrect data length issue.

OK. There are two problems to address, the "zero" length transfer and
the DMA length unit, which is missing today. Newer SoC use a 1 bit / byte
and older ones, AST2400 and AST2500, use 1 bit / 4 bytes.

We can introduce a AspeedSMCClass::dma_len_unit and rework the loop to :

     do {

       ....

        if (s->regs[R_DMA_LEN]) {
             s->regs[R_DMA_LEN] -= 4 / asc->dma_len_unit;
         }
     } while (s->regs[R_DMA_LEN]);

It should fix the current implementation.

I don't think this is necessary to add a Fixes tag because the problem
has been there for ages and no one reported it. Probably because the
only place DMA transfers are used is in U-Boot and transfers have a
non-zero length.

> Currently, only supports dma length 4 bytes aligned.

this looks like a third topic. So the minimum value R_DMA_LEN should
have on the AST2600 SoC and above is '3'. I would opt to replace the
DMA_LENGTH macro with a dma_length_sanitize() helper to fix the software
input of R_DMA_LEN.


Thanks,

C.


  
> Signed-off-by: Troy Lee <troy_lee@aspeedtech.com>
> Signed-off-by: Jamin Lin <jamin_lin@aspeedtech.com>
> ---
>   hw/ssi/aspeed_smc.c         | 52 ++++++++++++++++++++++++++++++++-----
>   include/hw/ssi/aspeed_smc.h |  1 +
>   2 files changed, 46 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
> index 8a8d77b480..71abc7a2d8 100644
> --- a/hw/ssi/aspeed_smc.c
> +++ b/hw/ssi/aspeed_smc.c
> @@ -178,13 +178,17 @@
>    * DMA flash addresses should be 4 bytes aligned and the valid address
>    * range is 0x20000000 - 0x2FFFFFFF.
>    *
> - * DMA length is from 4 bytes to 32MB
> + * DMA length is from 4 bytes to 32MB (AST2500)
>    *   0: 4 bytes
>    *   0x7FFFFF: 32M bytes
> + *
> + * DMA length is from 1 byte to 32MB (AST2600, AST10x0)
> + *   0: 1 byte
> + *   0x1FFFFFF: 32M bytes
>    */
>   #define DMA_DRAM_ADDR(asc, val)   ((val) & (asc)->dma_dram_mask)
>   #define DMA_FLASH_ADDR(asc, val)  ((val) & (asc)->dma_flash_mask)
> -#define DMA_LENGTH(val)         ((val) & 0x01FFFFFC)
> +#define DMA_LENGTH(val)         ((val) & 0x01FFFFFF)
>   
>   /* Flash opcodes. */
>   #define SPI_OP_READ       0x03    /* Read data bytes (low frequency) */
> @@ -843,6 +847,24 @@ static bool aspeed_smc_inject_read_failure(AspeedSMCState *s)
>       }
>   }
>   
> +static uint32_t aspeed_smc_dma_len(AspeedSMCState *s)
> +{
> +    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
> +    uint32_t dma_len;
> +    uint32_t extra;
> +
> +    dma_len = s->regs[R_DMA_LEN] + asc->dma_start_length;
> +
> +    /* dma length 4 bytes aligned */
> +    extra = dma_len % 4;
> +
> +    if (extra != 0) {
> +        dma_len += 4 - extra;
> +    }
> +
> +    return dma_len;
> +}
> +
>   /*
>    * Accumulate the result of the reads to provide a checksum that will
>    * be used to validate the read timing settings.
> @@ -850,6 +872,7 @@ static bool aspeed_smc_inject_read_failure(AspeedSMCState *s)
>   static void aspeed_smc_dma_checksum(AspeedSMCState *s)
>   {
>       MemTxResult result;
> +    uint32_t dma_len;
>       uint32_t data;
>   
>       if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
> @@ -861,7 +884,9 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)
>           aspeed_smc_dma_calibration(s);
>       }
>   
> -    while (s->regs[R_DMA_LEN]) {
> +    dma_len = aspeed_smc_dma_len(s);
> +
> +    while (dma_len) {
>           data = address_space_ldl_le(&s->flash_as, s->regs[R_DMA_FLASH_ADDR],
>                                       MEMTXATTRS_UNSPECIFIED, &result);
>           if (result != MEMTX_OK) {
> @@ -877,7 +902,8 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)
>            */
>           s->regs[R_DMA_CHECKSUM] += data;
>           s->regs[R_DMA_FLASH_ADDR] += 4;
> -        s->regs[R_DMA_LEN] -= 4;
> +        dma_len -= 4;
> +        s->regs[R_DMA_LEN] = dma_len;
>       }
>   
>       if (s->inject_failure && aspeed_smc_inject_read_failure(s)) {
> @@ -889,14 +915,17 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)
>   static void aspeed_smc_dma_rw(AspeedSMCState *s)
>   {
>       MemTxResult result;
> +    uint32_t dma_len;
>       uint32_t data;
>   
> +    dma_len = aspeed_smc_dma_len(s);
> +
>       trace_aspeed_smc_dma_rw(s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE ?
>                               "write" : "read",
>                               s->regs[R_DMA_FLASH_ADDR],
>                               s->regs[R_DMA_DRAM_ADDR],
> -                            s->regs[R_DMA_LEN]);
> -    while (s->regs[R_DMA_LEN]) {
> +                            dma_len);
> +    while (dma_len) {
>           if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
>               data = address_space_ldl_le(&s->dram_as, s->regs[R_DMA_DRAM_ADDR],
>                                           MEMTXATTRS_UNSPECIFIED, &result);
> @@ -937,7 +966,8 @@ static void aspeed_smc_dma_rw(AspeedSMCState *s)
>            */
>           s->regs[R_DMA_FLASH_ADDR] += 4;
>           s->regs[R_DMA_DRAM_ADDR] += 4;
> -        s->regs[R_DMA_LEN] -= 4;
> +        dma_len -= 4;
> +        s->regs[R_DMA_LEN] = dma_len;
>           s->regs[R_DMA_CHECKSUM] += data;
>       }
>   }
> @@ -1381,6 +1411,7 @@ static void aspeed_2400_fmc_class_init(ObjectClass *klass, void *data)
>       asc->features          = ASPEED_SMC_FEATURE_DMA;
>       asc->dma_flash_mask    = 0x0FFFFFFC;
>       asc->dma_dram_mask     = 0x1FFFFFFC;
> +    asc->dma_start_length  = 4;
>       asc->nregs             = ASPEED_SMC_R_MAX;
>       asc->segment_to_reg    = aspeed_smc_segment_to_reg;
>       asc->reg_to_segment    = aspeed_smc_reg_to_segment;
> @@ -1464,6 +1495,7 @@ static void aspeed_2500_fmc_class_init(ObjectClass *klass, void *data)
>       asc->features          = ASPEED_SMC_FEATURE_DMA;
>       asc->dma_flash_mask    = 0x0FFFFFFC;
>       asc->dma_dram_mask     = 0x3FFFFFFC;
> +    asc->dma_start_length  = 4;
>       asc->nregs             = ASPEED_SMC_R_MAX;
>       asc->segment_to_reg    = aspeed_smc_segment_to_reg;
>       asc->reg_to_segment    = aspeed_smc_reg_to_segment;
> @@ -1620,6 +1652,7 @@ static void aspeed_2600_fmc_class_init(ObjectClass *klass, void *data)
>                                ASPEED_SMC_FEATURE_WDT_CONTROL;
>       asc->dma_flash_mask    = 0x0FFFFFFC;
>       asc->dma_dram_mask     = 0x3FFFFFFC;
> +    asc->dma_start_length  = 1;
>       asc->nregs             = ASPEED_SMC_R_MAX;
>       asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
>       asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
> @@ -1658,6 +1691,7 @@ static void aspeed_2600_spi1_class_init(ObjectClass *klass, void *data)
>                                ASPEED_SMC_FEATURE_DMA_GRANT;
>       asc->dma_flash_mask    = 0x0FFFFFFC;
>       asc->dma_dram_mask     = 0x3FFFFFFC;
> +    asc->dma_start_length  = 1;
>       asc->nregs             = ASPEED_SMC_R_MAX;
>       asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
>       asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
> @@ -1697,6 +1731,7 @@ static void aspeed_2600_spi2_class_init(ObjectClass *klass, void *data)
>                                ASPEED_SMC_FEATURE_DMA_GRANT;
>       asc->dma_flash_mask    = 0x0FFFFFFC;
>       asc->dma_dram_mask     = 0x3FFFFFFC;
> +    asc->dma_start_length  = 1;
>       asc->nregs             = ASPEED_SMC_R_MAX;
>       asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
>       asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
> @@ -1778,6 +1813,7 @@ static void aspeed_1030_fmc_class_init(ObjectClass *klass, void *data)
>       asc->features          = ASPEED_SMC_FEATURE_DMA;
>       asc->dma_flash_mask    = 0x0FFFFFFC;
>       asc->dma_dram_mask     = 0x000BFFFC;
> +    asc->dma_start_length  = 1;
>       asc->nregs             = ASPEED_SMC_R_MAX;
>       asc->segment_to_reg    = aspeed_1030_smc_segment_to_reg;
>       asc->reg_to_segment    = aspeed_1030_smc_reg_to_segment;
> @@ -1815,6 +1851,7 @@ static void aspeed_1030_spi1_class_init(ObjectClass *klass, void *data)
>       asc->features          = ASPEED_SMC_FEATURE_DMA;
>       asc->dma_flash_mask    = 0x0FFFFFFC;
>       asc->dma_dram_mask     = 0x000BFFFC;
> +    asc->dma_start_length  = 1;
>       asc->nregs             = ASPEED_SMC_R_MAX;
>       asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
>       asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
> @@ -1851,6 +1888,7 @@ static void aspeed_1030_spi2_class_init(ObjectClass *klass, void *data)
>       asc->features          = ASPEED_SMC_FEATURE_DMA;
>       asc->dma_flash_mask    = 0x0FFFFFFC;
>       asc->dma_dram_mask     = 0x000BFFFC;
> +    asc->dma_start_length  = 1;
>       asc->nregs             = ASPEED_SMC_R_MAX;
>       asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
>       asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
> diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h
> index 8e1dda556b..f359ed22cc 100644
> --- a/include/hw/ssi/aspeed_smc.h
> +++ b/include/hw/ssi/aspeed_smc.h
> @@ -106,6 +106,7 @@ struct AspeedSMCClass {
>       uint32_t features;
>       hwaddr dma_flash_mask;
>       hwaddr dma_dram_mask;
> +    uint32_t dma_start_length;
>       uint32_t nregs;
>       uint32_t (*segment_to_reg)(const AspeedSMCState *s,
>                                  const AspeedSegments *seg);
Cédric Le Goater April 30, 2024, 7:22 a.m. UTC | #2
On 4/19/24 15:41, Cédric Le Goater wrote:
> On 4/16/24 11:18, Jamin Lin wrote:
>> DMA length is from 1 byte to 32MB for AST2600 and AST10x0
>> and DMA length is from 4 bytes to 32MB for AST2500.
>>
>> In other words, if "R_DMA_LEN" is 0, it should move at least 1 byte
>> data for AST2600 and AST10x0 and 4 bytes data for AST2500.
>>> To support all ASPEED SOCs, adds dma_start_length parameter to store
>> the start length, add helper routines function to compute the dma length
>> and update DMA_LENGTH mask to "1FFFFFF" to fix dma moving
>> incorrect data length issue.
> 
> OK. There are two problems to address, the "zero" length transfer and
> the DMA length unit, which is missing today. Newer SoC use a 1 bit / byte
> and older ones, AST2400 and AST2500, use 1 bit / 4 bytes.
> 
> We can introduce a AspeedSMCClass::dma_len_unit and rework the loop to :
> 
>      do {
> 
>        ....
> 
>         if (s->regs[R_DMA_LEN]) {
>              s->regs[R_DMA_LEN] -= 4 / asc->dma_len_unit;
>          }
>      } while (s->regs[R_DMA_LEN]);
> 
> It should fix the current implementation.


I checked what FW is doing on a QEMU ast2500-evb machine :
  
     U-Boot 2019.04-v00.04.12 (Sep 29 2022 - 10:40:37 +0000)
     ...
     
        Loading Kernel Image ... aspeed_smc_write @0x88 size 4: 0x80001000
     aspeed_smc_write @0x84 size 4: 0x20100130
     aspeed_smc_write @0x8c size 4: 0x3c6770
     aspeed_smc_write @0x80 size 4: 0x1
     aspeed_smc_dma_rw read flash:@0x00100130 dram:@0x1000 size:0x003c6774
     aspeed_smc_read @0x8 size 4: 0x800
     aspeed_smc_write @0x80 size 4: 0x0
     OK
        Loading Ramdisk to 8fef2000, end 8ffff604 ... aspeed_smc_write @0x88 size 4: 0x8fef2000
     aspeed_smc_write @0x84 size 4: 0x204cdde0
     aspeed_smc_write @0x8c size 4: 0x10d604
     aspeed_smc_write @0x80 size 4: 0x1
     aspeed_smc_dma_rw read flash:@0x004cdde0 dram:@0xfef2000 size:0x0010d608
     aspeed_smc_read @0x8 size 4: 0x800
     aspeed_smc_write @0x80 size 4: 0x0
     OK
        Loading Device Tree to 8fee7000, end 8fef135e ... aspeed_smc_write @0x88 size 4: 0x8fee7000
     aspeed_smc_write @0x84 size 4: 0x204c69b4
     aspeed_smc_write @0x8c size 4: 0x7360
     aspeed_smc_write @0x80 size 4: 0x1
     aspeed_smc_dma_rw read flash:@0x004c69b4 dram:@0xfee7000 size:0x00007364
     aspeed_smc_read @0x8 size 4: 0x800
     aspeed_smc_write @0x80 size 4: 0x0
     OK
     
     Starting kernel ...
     
It seems that the R_DMA_LEN register is set by FW without taking
into account the length unit ( bit / 4 bytes). Would you know why ?

If I change the model to match 1 bit / 4 bytes unit of the R_DMA_LEN
register. Linux fails to boot. I didn't dig further and this is
something we need to understand before committing.

> I don't think this is necessary to add a Fixes tag because the problem
> has been there for ages and no one reported it. Probably because the
> only place DMA transfers are used is in U-Boot and transfers have a
> non-zero length.
> 
>> Currently, only supports dma length 4 bytes aligned.

Is this 4 bytes alignment new for the AST2700 or is this something
you added because the mask of DMA_LENGTH is now relaxed to match
all addresses ?

#define DMA_LENGTH(val)         ((val) & 0x01FFFFFF)

Thanks,

C.
Jamin Lin April 30, 2024, 8:51 a.m. UTC | #3
Hi Cedric,
> On 4/19/24 15:41, Cédric Le Goater wrote:
> > On 4/16/24 11:18, Jamin Lin wrote:
> >> DMA length is from 1 byte to 32MB for AST2600 and AST10x0 and DMA
> >> length is from 4 bytes to 32MB for AST2500.
> >>
> >> In other words, if "R_DMA_LEN" is 0, it should move at least 1 byte
> >> data for AST2600 and AST10x0 and 4 bytes data for AST2500.
> >>> To support all ASPEED SOCs, adds dma_start_length parameter to store
> >> the start length, add helper routines function to compute the dma
> >> length and update DMA_LENGTH mask to "1FFFFFF" to fix dma moving
> >> incorrect data length issue.
> >
> > OK. There are two problems to address, the "zero" length transfer and
> > the DMA length unit, which is missing today. Newer SoC use a 1 bit /
> > byte and older ones, AST2400 and AST2500, use 1 bit / 4 bytes.
> >
> > We can introduce a AspeedSMCClass::dma_len_unit and rework the loop to :
> >
> >      do {
> >
> >        ....
> >
> >         if (s->regs[R_DMA_LEN]) {
> >              s->regs[R_DMA_LEN] -= 4 / asc->dma_len_unit;
> >          }
> >      } while (s->regs[R_DMA_LEN]);
> >
> > It should fix the current implementation.
> 
> 
> I checked what FW is doing on a QEMU ast2500-evb machine :
> 
>      U-Boot 2019.04-v00.04.12 (Sep 29 2022 - 10:40:37 +0000)
>      ...
> 
>         Loading Kernel Image ... aspeed_smc_write @0x88 size 4:
> 0x80001000
>      aspeed_smc_write @0x84 size 4: 0x20100130
>      aspeed_smc_write @0x8c size 4: 0x3c6770
>      aspeed_smc_write @0x80 size 4: 0x1
>      aspeed_smc_dma_rw read flash:@0x00100130 dram:@0x1000
> size:0x003c6774
>      aspeed_smc_read @0x8 size 4: 0x800
>      aspeed_smc_write @0x80 size 4: 0x0
>      OK
>         Loading Ramdisk to 8fef2000, end 8ffff604 ... aspeed_smc_write
> @0x88 size 4: 0x8fef2000
>      aspeed_smc_write @0x84 size 4: 0x204cdde0
>      aspeed_smc_write @0x8c size 4: 0x10d604
>      aspeed_smc_write @0x80 size 4: 0x1
>      aspeed_smc_dma_rw read flash:@0x004cdde0 dram:@0xfef2000
> size:0x0010d608
>      aspeed_smc_read @0x8 size 4: 0x800
>      aspeed_smc_write @0x80 size 4: 0x0
>      OK
>         Loading Device Tree to 8fee7000, end 8fef135e ... aspeed_smc_write
> @0x88 size 4: 0x8fee7000
>      aspeed_smc_write @0x84 size 4: 0x204c69b4
>      aspeed_smc_write @0x8c size 4: 0x7360
>      aspeed_smc_write @0x80 size 4: 0x1
>      aspeed_smc_dma_rw read flash:@0x004c69b4 dram:@0xfee7000
> size:0x00007364
>      aspeed_smc_read @0x8 size 4: 0x800
>      aspeed_smc_write @0x80 size 4: 0x0
>      OK
> 
>      Starting kernel ...
> 
> It seems that the R_DMA_LEN register is set by FW without taking into account
> the length unit ( bit / 4 bytes). Would you know why ?
> 
https://github.com/AspeedTech-BMC/u-boot/blob/aspeed-master-v2019.04/lib/string.c#L559
This line make user input data length 4 bytes alignment.
https://github.com/AspeedTech-BMC/u-boot/blob/aspeed-master-v2019.04/arch/arm/mach-aspeed/ast2500/utils.S#L35
This line set the value of count parameter to AST_FMC_DNA_LENGTH.
AST_FMC_DMA_LENGTH is 4 bytes alignment value.
Example: input 4
((4+3)/4) * 4 --> (7/4) *4 ---> 4
If AST_FMC_DMA_LENGTH is 0, it means it should move 4 bytes data and AST_FMC_DMA_LENGTH do not need to be divided by 4. 

> If I change the model to match 1 bit / 4 bytes unit of the R_DMA_LEN register.
> Linux fails to boot. I didn't dig further and this is something we need to
> understand before committing.
> 
> > I don't think this is necessary to add a Fixes tag because the problem
> > has been there for ages and no one reported it. Probably because the
> > only place DMA transfers are used is in U-Boot and transfers have a
> > non-zero length.
> >
> >> Currently, only supports dma length 4 bytes aligned.
> 
> Is this 4 bytes alignment new for the AST2700 or is this something you added
> because the mask of DMA_LENGTH is now relaxed to match all addresses ?
> 
> #define DMA_LENGTH(val)         ((val) & 0x01FFFFFF)
AST2700, AST2600 and AST1030 is from 1 byte to 1FFFFFF, so I change this Micro to fix data lost.
https://github.com/AspeedTech-BMC/u-boot/blob/aspeed-master-v2023.10/arch/arm/mach-aspeed/ast2700/spi.c#L186
Please see this line, it decrease dma_len 1 byte first then, set to DMA_LEN register because DMA_LEN is 0 which means should move 1 byte data if DMA enables for AST2600, AST1030 and AST2700. 
> 
> Thanks,
> 
> C.

Only AST2500 need 4 bytes alignment for DMA Length. However, according to the design of sapped_smc_dma_rw function,
it utilizes address_space_stl_le API and it only works data 4 bytes alignment. https://github.com/qemu/qemu/blob/master/hw/ssi/aspeed_smc.c#L889 
For example,
If users want to move 0x101 data_length, after 0x100 data has been moved and remains 1 byte data need to be moved.
Please see this line program, https://github.com/qemu/qemu/blob/master/hw/ssi/aspeed_smc.c#L940
```
s->regs[R_DMA_LEN] -= 4;
```
The value of s->regs[R_DMA_LEN] becomes 0xffffffffXX because it is uint32_t data type and this while loop run in the unexpected behavior, https://github.com/qemu/qemu/blob/master/hw/ssi/aspeed_smc.c#L864
That was, why I set 4bytes alignment for all models.
diff mbox series

Patch

diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
index 8a8d77b480..71abc7a2d8 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -178,13 +178,17 @@ 
  * DMA flash addresses should be 4 bytes aligned and the valid address
  * range is 0x20000000 - 0x2FFFFFFF.
  *
- * DMA length is from 4 bytes to 32MB
+ * DMA length is from 4 bytes to 32MB (AST2500)
  *   0: 4 bytes
  *   0x7FFFFF: 32M bytes
+ *
+ * DMA length is from 1 byte to 32MB (AST2600, AST10x0)
+ *   0: 1 byte
+ *   0x1FFFFFF: 32M bytes
  */
 #define DMA_DRAM_ADDR(asc, val)   ((val) & (asc)->dma_dram_mask)
 #define DMA_FLASH_ADDR(asc, val)  ((val) & (asc)->dma_flash_mask)
-#define DMA_LENGTH(val)         ((val) & 0x01FFFFFC)
+#define DMA_LENGTH(val)         ((val) & 0x01FFFFFF)
 
 /* Flash opcodes. */
 #define SPI_OP_READ       0x03    /* Read data bytes (low frequency) */
@@ -843,6 +847,24 @@  static bool aspeed_smc_inject_read_failure(AspeedSMCState *s)
     }
 }
 
+static uint32_t aspeed_smc_dma_len(AspeedSMCState *s)
+{
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+    uint32_t dma_len;
+    uint32_t extra;
+
+    dma_len = s->regs[R_DMA_LEN] + asc->dma_start_length;
+
+    /* dma length 4 bytes aligned */
+    extra = dma_len % 4;
+
+    if (extra != 0) {
+        dma_len += 4 - extra;
+    }
+
+    return dma_len;
+}
+
 /*
  * Accumulate the result of the reads to provide a checksum that will
  * be used to validate the read timing settings.
@@ -850,6 +872,7 @@  static bool aspeed_smc_inject_read_failure(AspeedSMCState *s)
 static void aspeed_smc_dma_checksum(AspeedSMCState *s)
 {
     MemTxResult result;
+    uint32_t dma_len;
     uint32_t data;
 
     if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
@@ -861,7 +884,9 @@  static void aspeed_smc_dma_checksum(AspeedSMCState *s)
         aspeed_smc_dma_calibration(s);
     }
 
-    while (s->regs[R_DMA_LEN]) {
+    dma_len = aspeed_smc_dma_len(s);
+
+    while (dma_len) {
         data = address_space_ldl_le(&s->flash_as, s->regs[R_DMA_FLASH_ADDR],
                                     MEMTXATTRS_UNSPECIFIED, &result);
         if (result != MEMTX_OK) {
@@ -877,7 +902,8 @@  static void aspeed_smc_dma_checksum(AspeedSMCState *s)
          */
         s->regs[R_DMA_CHECKSUM] += data;
         s->regs[R_DMA_FLASH_ADDR] += 4;
-        s->regs[R_DMA_LEN] -= 4;
+        dma_len -= 4;
+        s->regs[R_DMA_LEN] = dma_len;
     }
 
     if (s->inject_failure && aspeed_smc_inject_read_failure(s)) {
@@ -889,14 +915,17 @@  static void aspeed_smc_dma_checksum(AspeedSMCState *s)
 static void aspeed_smc_dma_rw(AspeedSMCState *s)
 {
     MemTxResult result;
+    uint32_t dma_len;
     uint32_t data;
 
+    dma_len = aspeed_smc_dma_len(s);
+
     trace_aspeed_smc_dma_rw(s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE ?
                             "write" : "read",
                             s->regs[R_DMA_FLASH_ADDR],
                             s->regs[R_DMA_DRAM_ADDR],
-                            s->regs[R_DMA_LEN]);
-    while (s->regs[R_DMA_LEN]) {
+                            dma_len);
+    while (dma_len) {
         if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
             data = address_space_ldl_le(&s->dram_as, s->regs[R_DMA_DRAM_ADDR],
                                         MEMTXATTRS_UNSPECIFIED, &result);
@@ -937,7 +966,8 @@  static void aspeed_smc_dma_rw(AspeedSMCState *s)
          */
         s->regs[R_DMA_FLASH_ADDR] += 4;
         s->regs[R_DMA_DRAM_ADDR] += 4;
-        s->regs[R_DMA_LEN] -= 4;
+        dma_len -= 4;
+        s->regs[R_DMA_LEN] = dma_len;
         s->regs[R_DMA_CHECKSUM] += data;
     }
 }
@@ -1381,6 +1411,7 @@  static void aspeed_2400_fmc_class_init(ObjectClass *klass, void *data)
     asc->features          = ASPEED_SMC_FEATURE_DMA;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x1FFFFFFC;
+    asc->dma_start_length  = 4;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_smc_reg_to_segment;
@@ -1464,6 +1495,7 @@  static void aspeed_2500_fmc_class_init(ObjectClass *klass, void *data)
     asc->features          = ASPEED_SMC_FEATURE_DMA;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x3FFFFFFC;
+    asc->dma_start_length  = 4;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_smc_reg_to_segment;
@@ -1620,6 +1652,7 @@  static void aspeed_2600_fmc_class_init(ObjectClass *klass, void *data)
                              ASPEED_SMC_FEATURE_WDT_CONTROL;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x3FFFFFFC;
+    asc->dma_start_length  = 1;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
@@ -1658,6 +1691,7 @@  static void aspeed_2600_spi1_class_init(ObjectClass *klass, void *data)
                              ASPEED_SMC_FEATURE_DMA_GRANT;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x3FFFFFFC;
+    asc->dma_start_length  = 1;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
@@ -1697,6 +1731,7 @@  static void aspeed_2600_spi2_class_init(ObjectClass *klass, void *data)
                              ASPEED_SMC_FEATURE_DMA_GRANT;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x3FFFFFFC;
+    asc->dma_start_length  = 1;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
@@ -1778,6 +1813,7 @@  static void aspeed_1030_fmc_class_init(ObjectClass *klass, void *data)
     asc->features          = ASPEED_SMC_FEATURE_DMA;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x000BFFFC;
+    asc->dma_start_length  = 1;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_1030_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_1030_smc_reg_to_segment;
@@ -1815,6 +1851,7 @@  static void aspeed_1030_spi1_class_init(ObjectClass *klass, void *data)
     asc->features          = ASPEED_SMC_FEATURE_DMA;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x000BFFFC;
+    asc->dma_start_length  = 1;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
@@ -1851,6 +1888,7 @@  static void aspeed_1030_spi2_class_init(ObjectClass *klass, void *data)
     asc->features          = ASPEED_SMC_FEATURE_DMA;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x000BFFFC;
+    asc->dma_start_length  = 1;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h
index 8e1dda556b..f359ed22cc 100644
--- a/include/hw/ssi/aspeed_smc.h
+++ b/include/hw/ssi/aspeed_smc.h
@@ -106,6 +106,7 @@  struct AspeedSMCClass {
     uint32_t features;
     hwaddr dma_flash_mask;
     hwaddr dma_dram_mask;
+    uint32_t dma_start_length;
     uint32_t nregs;
     uint32_t (*segment_to_reg)(const AspeedSMCState *s,
                                const AspeedSegments *seg);