diff mbox series

[2/3] aspeed: Add Scater-Gather support for HACE Hash

Message ID 20210324223846.17407-3-klaus@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show
Series aspeed: HACE hash Scatter-Gather support | expand

Commit Message

Klaus Heinrich Kiwi March 24, 2021, 10:38 p.m. UTC
Complement the Aspeed HACE support with Scatter-Gather hash support for
sha256 and sha512. Scatter-Gather is only supported on AST2600-series.

Signed-off-by: Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com>
---
 hw/misc/aspeed_hace.c         | 127 ++++++++++++++++++++++++++++++++--
 include/hw/misc/aspeed_hace.h |   6 ++
 2 files changed, 127 insertions(+), 6 deletions(-)

Comments

Joel Stanley March 25, 2021, 3:40 a.m. UTC | #1
On Wed, 24 Mar 2021 at 22:39, Klaus Heinrich Kiwi
<klaus@linux.vnet.ibm.com> wrote:
>
> Complement the Aspeed HACE support with Scatter-Gather hash support for
> sha256 and sha512. Scatter-Gather is only supported on AST2600-series.

Please update the documentation at docs/system/arm/aspeed.rst too.

>
> Signed-off-by: Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com>
> ---
>  hw/misc/aspeed_hace.c         | 127 ++++++++++++++++++++++++++++++++--
>  include/hw/misc/aspeed_hace.h |   6 ++
>  2 files changed, 127 insertions(+), 6 deletions(-)
>
> diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c
> index 93313d2b80..8a37b1d961 100644
> --- a/hw/misc/aspeed_hace.c
> +++ b/hw/misc/aspeed_hace.c
> @@ -57,6 +57,10 @@
>  /* Other cmd bits */
>  #define  HASH_IRQ_EN                    BIT(9)
>  #define  HASH_SG_EN                     BIT(18)
> +/* Scatter-gather data list */
> +#define  SG_LIST_LAST                   BIT(31)
> +#define  SG_LIST_LEN_MASK               0x7fffffff
> +#define  SG_LIST_ADDR_MASK              0x7ffffff8  /* 8-byte aligned */
>
>  static const struct {
>      uint32_t mask;
> @@ -129,6 +133,117 @@ static int do_hash_operation(AspeedHACEState *s, int algo)
>      return 0;
>  }
>
> +static int do_hash_sg_operation(AspeedHACEState *s, int algo)
> +{
> +    uint32_t src, dest, reqSize;
> +    hwaddr len;
> +    const size_t reqLen = sizeof(struct aspeed_sg_list);

It would be more descriptive to use this sizeof where you need it,
instead of assigning to a constant.

> +    struct iovec iov[ASPEED_HACE_MAX_SG];
> +    unsigned int i = 0;
> +    unsigned int isLast = 0;

This sounds like it's a boolean.

> +    uint8_t *digestBuf = NULL;
> +    size_t digestLen = 0, size = 0;
> +    struct aspeed_sg_list *sgList;
> +    int rc;

This needs some work to match qemu coding style.

> +
> +    reqSize = s->regs[R_HASH_SRC_LEN];
> +    dest = s->regs[R_HASH_DEST];
> +
> +    while (!isLast && i < ASPEED_HACE_MAX_SG) {
> +        src = s->regs[R_HASH_SRC] + (i * reqLen);
> +        len = reqLen;
> +        sgList = (struct aspeed_sg_list *) address_space_map(&s->dram_as,

You can remove this cast as the function returns a void pointer.

> +                                                                     src,
> +                                                         (hwaddr *) &len,

You can remove this cast as the variable is already a hwaddr type.

> +                                                                   false,
> +                                                 MEMTXATTRS_UNSPECIFIED);

In the direct access code, we use address_space_map to save copying
the memory contents that is to be hashed. That's not the case for the
scatter gather list.

Instead of creating mappings to read the sg list, you could load the
addr, len pairs using address_space_ldl_le. This would give you the
pointer to create mappings for.

> +        if (!sgList) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +             "%s: failed to map dram for SG Array entry '%u' for address '0x%0x'\n",
> +             __func__, i, src);
> +            rc = -EACCES;
> +            goto cleanup;
> +        }
> +        if (len != reqLen)
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +             "%s:  Warning: dram map for SG array entry '%u' requested size '%lu' != mapped size '%lu'\n",
> +             __func__, i, reqLen, len);
> +
> +        isLast = sgList->len & SG_LIST_LAST;

You could drop the isLast variable, and perform this test at the
bottom of the while loop:

if (sgList->len & SG_LIST_LAST)
   break;

> +
> +        iov[i].iov_len = (hwaddr) (sgList->len & SG_LIST_LEN_MASK);
> +        iov[i].iov_base = address_space_map(&s->dram_as,
> +                            sgList->phy_addr & SG_LIST_ADDR_MASK,
> +                            &iov[i].iov_len, false,
> +                            MEMTXATTRS_UNSPECIFIED);
> +        if (!iov[i].iov_base) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +             "%s: failed to map dram for SG array entry '%u' for region '0x%x', len '%u'\n",
> +             __func__, i, sgList->phy_addr & SG_LIST_ADDR_MASK,
> +             sgList->len & SG_LIST_LEN_MASK);
> +            rc = -EACCES;
> +            goto cleanup;
> +        }
> +        if (iov[i].iov_len != (sgList->len & SG_LIST_LEN_MASK))
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +             "%s:  Warning: dram map for SG region entry %u requested size %u != mapped size %lu\n",
> +             __func__, i, (sgList->len & SG_LIST_LEN_MASK), iov[i].iov_len);
> +
> +
> +        address_space_unmap(&s->dram_as, (void *) sgList, len, false,
> +                            len);
> +        size += iov[i].iov_len;
> +        i++;
> +    }
> +
> +    if (!isLast) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                     "%s: Error: Exhausted maximum of '%u' SG array entries\n",
> +                     __func__, ASPEED_HACE_MAX_SG);
> +        rc = -ENOTSUP;
> +        goto cleanup;
> +    }
> +
> +    if (size != reqSize)
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +         "%s: Warning: requested SG total size %u != actual size %lu\n",
> +         __func__, reqSize, size);
> +
> +    rc = qcrypto_hash_bytesv(algo, iov, i, &digestBuf, &digestLen,
> +                            &error_fatal);
> +    if (rc < 0) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto failed\n",
> +                      __func__);
> +        goto cleanup;
> +    }
> +
> +    rc = address_space_write(&s->dram_as, dest, MEMTXATTRS_UNSPECIFIED,
> +                             digestBuf, digestLen);
> +    if (rc)
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: address space write failed\n", __func__);
> +    g_free(digestBuf);
> +
> +cleanup:
> +
> +    for (; i > 0; i--) {
> +        address_space_unmap(&s->dram_as, iov[i - 1].iov_base,
> +                            iov[i - 1].iov_len, false,
> +                            iov[i - 1].iov_len);
> +    }
> +
> +    /*
> +     * Set status bits to indicate completion. Testing shows hardware sets
> +     * these irrespective of HASH_IRQ_EN.

This is the same comment from the direct method. Have you confirmed
this is true on hardware?

> +     */
> +    if (!rc) {
> +        s->regs[R_STATUS] |= HASH_IRQ;
> +    }
> +
> +    return rc;
> +}
> +
> +
>
>  static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size)
>  {
> @@ -187,11 +302,6 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
>                            "%s: HMAC engine command mode %"PRIx64" not implemented",
>                            __func__, (data & HASH_HMAC_MASK) >> 8);
>          }
> -        if (data & HASH_SG_EN) {
> -            qemu_log_mask(LOG_UNIMP,
> -                          "%s: Hash scatter gather mode not implemented",
> -                          __func__);
> -        }
>          if (data & BIT(1)) {
>              qemu_log_mask(LOG_UNIMP,
>                            "%s: Cascaded mode not implemented",
> @@ -204,7 +314,12 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
>                          __func__, data & ahc->hash_mask);
>                  break;
>          }
> -        do_hash_operation(s, algo);
> +        if (data & HASH_SG_EN) {
> +            s->regs[(R_HASH_SRC >> 2)] &= 0x7FFFFFF8;

This is setting (0x20 / 4) >> 2 == 2, which is Crypto Data
Destination. I suspect you wanted R_HASH_SRC, so you can omit the
right shift.

However I suggest you check that hardware masks the register when the
write occurs, and if it does, implement that in the write callback for
R_HASH_SRC. That way a guest code doing a read-write will see the
correct thing.

> +            do_hash_sg_operation(s, algo);
> +        } else {
> +            do_hash_operation(s, algo);
> +        }
>
>          if (data & HASH_IRQ_EN) {
>              qemu_irq_raise(s->irq);
> diff --git a/include/hw/misc/aspeed_hace.h b/include/hw/misc/aspeed_hace.h
> index 94d5ada95f..ead46afda9 100644
> --- a/include/hw/misc/aspeed_hace.h
> +++ b/include/hw/misc/aspeed_hace.h
> @@ -40,4 +40,10 @@ struct AspeedHACEClass {
>      uint32_t hash_mask;
>  };
>
> +#define ASPEED_HACE_MAX_SG      256
> +struct aspeed_sg_list {
> +        uint32_t len;
> +        uint32_t phy_addr;

Does "phy" mean physical? If so, we could call it phys_addr to avoid
confusion with the addresses of PHYs.

Alternatively, call it 'addr'.

> +} __attribute__ ((__packed__));
> +
>  #endif /* _ASPEED_HACE_H_ */
> --
> 2.25.1
>
Cédric Le Goater March 25, 2021, 7:55 a.m. UTC | #2
On 3/24/21 11:38 PM, Klaus Heinrich Kiwi wrote:
> Complement the Aspeed HACE support with Scatter-Gather hash support for
> sha256 and sha512. Scatter-Gather is only supported on AST2600-series.
> 
> Signed-off-by: Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com>

this looks good. A few extra comments,

> ---
>  hw/misc/aspeed_hace.c         | 127 ++++++++++++++++++++++++++++++++--
>  include/hw/misc/aspeed_hace.h |   6 ++
>  2 files changed, 127 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c
> index 93313d2b80..8a37b1d961 100644
> --- a/hw/misc/aspeed_hace.c
> +++ b/hw/misc/aspeed_hace.c
> @@ -57,6 +57,10 @@
>  /* Other cmd bits */
>  #define  HASH_IRQ_EN                    BIT(9)
>  #define  HASH_SG_EN                     BIT(18)
> +/* Scatter-gather data list */
> +#define  SG_LIST_LAST                   BIT(31)
> +#define  SG_LIST_LEN_MASK               0x7fffffff
> +#define  SG_LIST_ADDR_MASK              0x7ffffff8  /* 8-byte aligned */
>  
>  static const struct {
>      uint32_t mask;
> @@ -129,6 +133,117 @@ static int do_hash_operation(AspeedHACEState *s, int algo)
>      return 0;
>  }
>  
> +static int do_hash_sg_operation(AspeedHACEState *s, int algo)

Do we really care about the return value ? 

> +{
> +    uint32_t src, dest, reqSize;
> +    hwaddr len;
> +    const size_t reqLen = sizeof(struct aspeed_sg_list);
> +    struct iovec iov[ASPEED_HACE_MAX_SG];
> +    unsigned int i = 0;
> +    unsigned int isLast = 0;
> +    uint8_t *digestBuf = NULL;
> +    size_t digestLen = 0, size = 0;
> +    struct aspeed_sg_list *sgList;
> +    int rc;
> +
> +    reqSize = s->regs[R_HASH_SRC_LEN];
> +    dest = s->regs[R_HASH_DEST];
> +
> +    while (!isLast && i < ASPEED_HACE_MAX_SG) {
> +        src = s->regs[R_HASH_SRC] + (i * reqLen);
> +        len = reqLen;
> +        sgList = (struct aspeed_sg_list *) address_space_map(&s->dram_as,
> +                                                                     src,
> +                                                         (hwaddr *) &len,
> +                                                                   false,
> +                                                 MEMTXATTRS_UNSPECIFIED);

This should be doing LE loads.

> +        if (!sgList) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +             "%s: failed to map dram for SG Array entry '%u' for address '0x%0x'\n",
> +             __func__, i, src);
> +            rc = -EACCES;
> +            goto cleanup;
> +        }
> +        if (len != reqLen)
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +             "%s:  Warning: dram map for SG array entry '%u' requested size '%lu' != mapped size '%lu'\n",
> +             __func__, i, reqLen, len);
> +
> +        isLast = sgList->len & SG_LIST_LAST;
> +
> +        iov[i].iov_len = (hwaddr) (sgList->len & SG_LIST_LEN_MASK);
> +        iov[i].iov_base = address_space_map(&s->dram_as,
> +                            sgList->phy_addr & SG_LIST_ADDR_MASK,
> +                            &iov[i].iov_len, false,
> +                            MEMTXATTRS_UNSPECIFIED);
> +        if (!iov[i].iov_base) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +             "%s: failed to map dram for SG array entry '%u' for region '0x%x', len '%u'\n",
> +             __func__, i, sgList->phy_addr & SG_LIST_ADDR_MASK,
> +             sgList->len & SG_LIST_LEN_MASK);
> +            rc = -EACCES;
> +            goto cleanup;
> +        }
> +        if (iov[i].iov_len != (sgList->len & SG_LIST_LEN_MASK))
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +             "%s:  Warning: dram map for SG region entry %u requested size %u != mapped size %lu\n",
> +             __func__, i, (sgList->len & SG_LIST_LEN_MASK), iov[i].iov_len);
> +
> +
> +        address_space_unmap(&s->dram_as, (void *) sgList, len, false,
> +                            len);
> +        size += iov[i].iov_len;
> +        i++;
> +    }
> +
> +    if (!isLast) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                     "%s: Error: Exhausted maximum of '%u' SG array entries\n",
> +                     __func__, ASPEED_HACE_MAX_SG);
> +        rc = -ENOTSUP;
> +        goto cleanup;
> +    }
> +
> +    if (size != reqSize)
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +         "%s: Warning: requested SG total size %u != actual size %lu\n",
> +         __func__, reqSize, size);
> +
> +    rc = qcrypto_hash_bytesv(algo, iov, i, &digestBuf, &digestLen,
> +                            &error_fatal);
> +    if (rc < 0) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto failed\n",
> +                      __func__);
> +        goto cleanup;
> +    }
> +
> +    rc = address_space_write(&s->dram_as, dest, MEMTXATTRS_UNSPECIFIED,
> +                             digestBuf, digestLen);
> +    if (rc)
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: address space write failed\n", __func__);
> +    g_free(digestBuf);
> +
> +cleanup:
> +
> +    for (; i > 0; i--) {
> +        address_space_unmap(&s->dram_as, iov[i - 1].iov_base,
> +                            iov[i - 1].iov_len, false,
> +                            iov[i - 1].iov_len);
> +    }
> +
> +    /*
> +     * Set status bits to indicate completion. Testing shows hardware sets
> +     * these irrespective of HASH_IRQ_EN.
> +     */
> +    if (!rc) {
> +        s->regs[R_STATUS] |= HASH_IRQ;
> +    }
> +
> +    return rc;
> +}
> +
> +
>  
>  static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size)
>  {
> @@ -187,11 +302,6 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
>                            "%s: HMAC engine command mode %"PRIx64" not implemented",
>                            __func__, (data & HASH_HMAC_MASK) >> 8);
>          }
> -        if (data & HASH_SG_EN) {
> -            qemu_log_mask(LOG_UNIMP,
> -                          "%s: Hash scatter gather mode not implemented",
> -                          __func__);
> -        }
>          if (data & BIT(1)) {
>              qemu_log_mask(LOG_UNIMP,
>                            "%s: Cascaded mode not implemented",
> @@ -204,7 +314,12 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
>                          __func__, data & ahc->hash_mask);
>                  break;
>          }
> -        do_hash_operation(s, algo);
> +        if (data & HASH_SG_EN) {
> +            s->regs[(R_HASH_SRC >> 2)] &= 0x7FFFFFF8;

I think Joel introduced a class mask for this value.

> +            do_hash_sg_operation(s, algo);
> +        } else {
> +            do_hash_operation(s, algo);
> +        }
>  
>          if (data & HASH_IRQ_EN) {
>              qemu_irq_raise(s->irq);
> diff --git a/include/hw/misc/aspeed_hace.h b/include/hw/misc/aspeed_hace.h
> index 94d5ada95f..ead46afda9 100644
> --- a/include/hw/misc/aspeed_hace.h
> +++ b/include/hw/misc/aspeed_hace.h
> @@ -40,4 +40,10 @@ struct AspeedHACEClass {
>      uint32_t hash_mask;
>  };
>  
> +#define ASPEED_HACE_MAX_SG      256
> +struct aspeed_sg_list {
> +        uint32_t len;
> +        uint32_t phy_addr;
> +} __attribute__ ((__packed__));
> +

May be keep the definition in the .c file 

Thanks,

C. 

>  #endif /* _ASPEED_HACE_H_ */
>
Klaus Heinrich Kiwi March 26, 2021, 4:47 p.m. UTC | #3
Hi Joel,

On 3/25/2021 12:40 AM, Joel Stanley wrote:
> On Wed, 24 Mar 2021 at 22:39, Klaus Heinrich Kiwi
> <klaus@linux.vnet.ibm.com> wrote:
>>
>> Complement the Aspeed HACE support with Scatter-Gather hash support for
>> sha256 and sha512. Scatter-Gather is only supported on AST2600-series.
> 
> Please update the documentation at docs/system/arm/aspeed.rst too.
>
I've removed the 'no scatter-gather' from the doc.

>> +                                                                   false,
>> +                                                 MEMTXATTRS_UNSPECIFIED);
> 
> In the direct access code, we use address_space_map to save copying
> the memory contents that is to be hashed. That's not the case for the
> scatter gather list.
> 
> Instead of creating mappings to read the sg list, you could load the
> addr, len pairs using address_space_ldl_le. This would give you the
> pointer to create mappings for.

I've reworked the code to use address_space_ldl_le, also removed the redundant
isLast variable.

  
>> +    /*
>> +     * Set status bits to indicate completion. Testing shows hardware sets
>> +     * these irrespective of HASH_IRQ_EN.
> 
> This is the same comment from the direct method. Have you confirmed
> this is true on hardware?
> 
Yes, I was able to confirm that on real hardware (AST2600-A1)

>> -        do_hash_operation(s, algo);
>> +        if (data & HASH_SG_EN) {
>> +            s->regs[(R_HASH_SRC >> 2)] &= 0x7FFFFFF8;
> 
> This is setting (0x20 / 4) >> 2 == 2, which is Crypto Data
> Destination. I suspect you wanted R_HASH_SRC, so you can omit the
> right shift.
> 
> However I suggest you check that hardware masks the register when the
> write occurs, and if it does, implement that in the write callback for
> R_HASH_SRC. That way a guest code doing a read-write will see the
> correct thing.

I was able to check on real hardware that, even when requesting a
HASH_SG_EN operation, the masking on the src address register is only
0x7fffffff, so I removed the masking specific to HASH_SG_EN.


>>   };
>>
>> +#define ASPEED_HACE_MAX_SG      256
>> +struct aspeed_sg_list {
>> +        uint32_t len;
>> +        uint32_t phy_addr;
> 
> Does "phy" mean physical? If so, we could call it phys_addr to avoid
> confusion with the addresses of PHYs.
> 
> Alternatively, call it 'addr'.

Since I'm not using address_map_ldl_le to access these, I decided to
do so based on #defines offsets, so I no longer need a Struct here.


Will send a V2 soon.

Thanks,

  -Klaus
Klaus Heinrich Kiwi March 26, 2021, 4:51 p.m. UTC | #4
On 3/25/2021 4:55 AM, Cédric Le Goater wrote:
> On 3/24/21 11:38 PM, Klaus Heinrich Kiwi wrote:
>> Complement the Aspeed HACE support with Scatter-Gather hash support for
>> sha256 and sha512. Scatter-Gather is only supported on AST2600-series.
>>
>> Signed-off-by: Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com>
> 
> this looks good. A few extra comments,
> 
>
>> @@ -129,6 +133,117 @@ static int do_hash_operation(AspeedHACEState *s, int algo)
>>       return 0;
>>   }
>>   
>> +static int do_hash_sg_operation(AspeedHACEState *s, int algo)
> 
> Do we really care about the return value ?

I'm keeping it consistent with do_hash_operation() above it. Maybe the underlying
Qemu layers could use it?

  
>> +        sgList = (struct aspeed_sg_list *) address_space_map(&s->dram_as,
>> +                                                                     src,
>> +                                                         (hwaddr *) &len,
>> +                                                                   false,
>> +                                                 MEMTXATTRS_UNSPECIFIED);
> 
> This should be doing LE loads.

ack. I'm using address_space_ldl_le() now.

  
>> -        do_hash_operation(s, algo);
>> +        if (data & HASH_SG_EN) {
>> +            s->regs[(R_HASH_SRC >> 2)] &= 0x7FFFFFF8;
> 
> I think Joel introduced a class mask for this value.

Turns out that the hardware doesn't do any additional masking on the src
register for the HASH_SG_EN operation, so I'll just remove it.

  
>>   
>> +#define ASPEED_HACE_MAX_SG      256
>> +struct aspeed_sg_list {
>> +        uint32_t len;
>> +        uint32_t phy_addr;
>> +} __attribute__ ((__packed__));
>> +> 
> May be keep the definition in the .c file

I actually opted to use #define offsets now that I'm using ldl_le

Thanks,

  -Klaus
diff mbox series

Patch

diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c
index 93313d2b80..8a37b1d961 100644
--- a/hw/misc/aspeed_hace.c
+++ b/hw/misc/aspeed_hace.c
@@ -57,6 +57,10 @@ 
 /* Other cmd bits */
 #define  HASH_IRQ_EN                    BIT(9)
 #define  HASH_SG_EN                     BIT(18)
+/* Scatter-gather data list */
+#define  SG_LIST_LAST                   BIT(31)
+#define  SG_LIST_LEN_MASK               0x7fffffff
+#define  SG_LIST_ADDR_MASK              0x7ffffff8  /* 8-byte aligned */
 
 static const struct {
     uint32_t mask;
@@ -129,6 +133,117 @@  static int do_hash_operation(AspeedHACEState *s, int algo)
     return 0;
 }
 
+static int do_hash_sg_operation(AspeedHACEState *s, int algo)
+{
+    uint32_t src, dest, reqSize;
+    hwaddr len;
+    const size_t reqLen = sizeof(struct aspeed_sg_list);
+    struct iovec iov[ASPEED_HACE_MAX_SG];
+    unsigned int i = 0;
+    unsigned int isLast = 0;
+    uint8_t *digestBuf = NULL;
+    size_t digestLen = 0, size = 0;
+    struct aspeed_sg_list *sgList;
+    int rc;
+
+    reqSize = s->regs[R_HASH_SRC_LEN];
+    dest = s->regs[R_HASH_DEST];
+
+    while (!isLast && i < ASPEED_HACE_MAX_SG) {
+        src = s->regs[R_HASH_SRC] + (i * reqLen);
+        len = reqLen;
+        sgList = (struct aspeed_sg_list *) address_space_map(&s->dram_as,
+                                                                     src,
+                                                         (hwaddr *) &len,
+                                                                   false,
+                                                 MEMTXATTRS_UNSPECIFIED);
+        if (!sgList) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+             "%s: failed to map dram for SG Array entry '%u' for address '0x%0x'\n",
+             __func__, i, src);
+            rc = -EACCES;
+            goto cleanup;
+        }
+        if (len != reqLen)
+            qemu_log_mask(LOG_GUEST_ERROR,
+             "%s:  Warning: dram map for SG array entry '%u' requested size '%lu' != mapped size '%lu'\n",
+             __func__, i, reqLen, len);
+
+        isLast = sgList->len & SG_LIST_LAST;
+
+        iov[i].iov_len = (hwaddr) (sgList->len & SG_LIST_LEN_MASK);
+        iov[i].iov_base = address_space_map(&s->dram_as,
+                            sgList->phy_addr & SG_LIST_ADDR_MASK,
+                            &iov[i].iov_len, false,
+                            MEMTXATTRS_UNSPECIFIED);
+        if (!iov[i].iov_base) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+             "%s: failed to map dram for SG array entry '%u' for region '0x%x', len '%u'\n",
+             __func__, i, sgList->phy_addr & SG_LIST_ADDR_MASK,
+             sgList->len & SG_LIST_LEN_MASK);
+            rc = -EACCES;
+            goto cleanup;
+        }
+        if (iov[i].iov_len != (sgList->len & SG_LIST_LEN_MASK))
+            qemu_log_mask(LOG_GUEST_ERROR,
+             "%s:  Warning: dram map for SG region entry %u requested size %u != mapped size %lu\n",
+             __func__, i, (sgList->len & SG_LIST_LEN_MASK), iov[i].iov_len);
+
+
+        address_space_unmap(&s->dram_as, (void *) sgList, len, false,
+                            len);
+        size += iov[i].iov_len;
+        i++;
+    }
+
+    if (!isLast) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                     "%s: Error: Exhausted maximum of '%u' SG array entries\n",
+                     __func__, ASPEED_HACE_MAX_SG);
+        rc = -ENOTSUP;
+        goto cleanup;
+    }
+
+    if (size != reqSize)
+        qemu_log_mask(LOG_GUEST_ERROR,
+         "%s: Warning: requested SG total size %u != actual size %lu\n",
+         __func__, reqSize, size);
+
+    rc = qcrypto_hash_bytesv(algo, iov, i, &digestBuf, &digestLen,
+                            &error_fatal);
+    if (rc < 0) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto failed\n",
+                      __func__);
+        goto cleanup;
+    }
+
+    rc = address_space_write(&s->dram_as, dest, MEMTXATTRS_UNSPECIFIED,
+                             digestBuf, digestLen);
+    if (rc)
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: address space write failed\n", __func__);
+    g_free(digestBuf);
+
+cleanup:
+
+    for (; i > 0; i--) {
+        address_space_unmap(&s->dram_as, iov[i - 1].iov_base,
+                            iov[i - 1].iov_len, false,
+                            iov[i - 1].iov_len);
+    }
+
+    /*
+     * Set status bits to indicate completion. Testing shows hardware sets
+     * these irrespective of HASH_IRQ_EN.
+     */
+    if (!rc) {
+        s->regs[R_STATUS] |= HASH_IRQ;
+    }
+
+    return rc;
+}
+
+
 
 static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size)
 {
@@ -187,11 +302,6 @@  static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
                           "%s: HMAC engine command mode %"PRIx64" not implemented",
                           __func__, (data & HASH_HMAC_MASK) >> 8);
         }
-        if (data & HASH_SG_EN) {
-            qemu_log_mask(LOG_UNIMP,
-                          "%s: Hash scatter gather mode not implemented",
-                          __func__);
-        }
         if (data & BIT(1)) {
             qemu_log_mask(LOG_UNIMP,
                           "%s: Cascaded mode not implemented",
@@ -204,7 +314,12 @@  static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
                         __func__, data & ahc->hash_mask);
                 break;
         }
-        do_hash_operation(s, algo);
+        if (data & HASH_SG_EN) {
+            s->regs[(R_HASH_SRC >> 2)] &= 0x7FFFFFF8;
+            do_hash_sg_operation(s, algo);
+        } else {
+            do_hash_operation(s, algo);
+        }
 
         if (data & HASH_IRQ_EN) {
             qemu_irq_raise(s->irq);
diff --git a/include/hw/misc/aspeed_hace.h b/include/hw/misc/aspeed_hace.h
index 94d5ada95f..ead46afda9 100644
--- a/include/hw/misc/aspeed_hace.h
+++ b/include/hw/misc/aspeed_hace.h
@@ -40,4 +40,10 @@  struct AspeedHACEClass {
     uint32_t hash_mask;
 };
 
+#define ASPEED_HACE_MAX_SG      256
+struct aspeed_sg_list {
+        uint32_t len;
+        uint32_t phy_addr;
+} __attribute__ ((__packed__));
+
 #endif /* _ASPEED_HACE_H_ */