diff mbox

[v4,1/2] Implement .hex file loader

Message ID 1523245158-16443-2-git-send-email-suhang16@mails.ucas.ac.cn (mailing list archive)
State New, archived
Headers show

Commit Message

Su Hang April 9, 2018, 3:39 a.m. UTC
This patch adds Intel Hexadecimal Object File format support to
the loader.  The file format specification is available here:
http://www.piclist.com/techref/fileext/hex/intel.htm

The file format is mainly intended for embedded systems
and microcontrollers, such as Arduino, ARM, STM32, etc.

Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Su Hang <suhang16@mails.ucas.ac.cn>
---
 hw/arm/boot.c       |   9 +-
 hw/core/loader.c    | 280 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/loader.h |  17 ++++
 3 files changed, 305 insertions(+), 1 deletion(-)

Comments

Stefan Hajnoczi April 14, 2018, 2:08 p.m. UTC | #1
On Mon, Apr 9, 2018 at 11:39 AM, Su Hang <suhang16@mails.ucas.ac.cn> wrote:
> This patch adds Intel Hexadecimal Object File format support to
> the loader.  The file format specification is available here:
> http://www.piclist.com/techref/fileext/hex/intel.htm
>
> The file format is mainly intended for embedded systems
> and microcontrollers, such as Arduino, ARM, STM32, etc.
>
> Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
> Signed-off-by: Su Hang <suhang16@mails.ucas.ac.cn>
> ---
>  hw/arm/boot.c       |   9 +-
>  hw/core/loader.c    | 280 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/loader.h |  17 ++++
>  3 files changed, 305 insertions(+), 1 deletion(-)

Parsers must be robust against invalid inputs so that a corrupt input
file cannot crash QEMU.  Please validate all addresses/offsets/lengths
so that this cannot happen.  For example, the byte_count is currently
not validated and can overflow HexLine.data[].

parse_hex_blob() must handle input files that touch large ranges of
memory.  At the moment it assumes bin_buf will be large enough for the
memory regions described by the input file.  Since Intel HEX files
support 32-bit addressing, that means processing a file in this way
could require 4 GB of RAM!  Three solutions:
1. Reject files that have large gaps in their address ranges.
2. Return a list of data blobs, each with its own start address.
3. Set up the ROM structs directly within parse_hex_blob() instead of
returning a linear buffer.

> diff --git a/hw/arm/boot.c b/hw/arm/boot.c
> index 9319b12fcd2a..07ce54e5936d 100644
> --- a/hw/arm/boot.c
> +++ b/hw/arm/boot.c
> @@ -1060,8 +1060,15 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
>          kernel_size = load_aarch64_image(info->kernel_filename,
>                                           info->loader_start, &entry, as);
>          is_linux = 1;
> +    } else if (kernel_size < 0 && strstr(info->kernel_filename, ".hex")) {

Most UNIX-style programs, including QEMU, do not check the file
extension to determine its format.  Instead they look at the contents
of the file to see if it can be parsed.

Please do not check for ".hex".  Try to load it as a hex file and fall
back to the next file type if parsing fails.

> +        /* 32-bit ARM .hex file */
> +        entry = info->loader_start + KERNEL_LOAD_ADDR;
> +        kernel_size = load_targphys_hex_as(info->kernel_filename, entry,
> +                                           info->ram_size - KERNEL_LOAD_ADDR,
> +                                           as);
> +        is_linux = 1;

Why is this needed?  Linux images are not loaded from hex files and
the extra information provided by is_linux = 1 won't be used/tested.

>      } else if (kernel_size < 0) {
> -        /* 32-bit ARM */
> +        /* 32-bit ARM binary file */
>          entry = info->loader_start + KERNEL_LOAD_ADDR;
>          kernel_size = load_image_targphys_as(info->kernel_filename, entry,
>                                               info->ram_size - KERNEL_LOAD_ADDR,
> diff --git a/hw/core/loader.c b/hw/core/loader.c
> index 06bdbca53709..41d714520be4 100644
> --- a/hw/core/loader.c
> +++ b/hw/core/loader.c
> @@ -1286,3 +1286,283 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict)
>          }
>      }
>  }
> +
> +typedef enum HexRecord HexRecord;
> +enum HexRecord {
> +    DATA_RECORD = 0,
> +    EOF_RECORD,
> +    EXT_SEG_ADDR_RECORD,
> +    START_SEG_ADDR_RECORD,
> +    EXT_LINEAR_ADDR_RECORD,
> +    START_LINEAR_ADDR_RECORD,
> +};
> +
> +typedef union HexLine HexLine;
> +union HexLine {
> +    uint8_t buf[0x25];

Why is the length 0x25?  According to the file format specification
the data[] part can be 255 bytes long.

> +    struct __attribute__((packed)) {

This use of packed is not portable since the address field is not
aligned to 16 bits.  Some CPU architectures do not support unaligned
memory accesses and the program would terminate with an exception.

Have you considered using fscanf() instead?  It removes the need for
HexLine, hex_buf, and the character parsing loop:

  if (fscanf(fp, ":%02hhx%04hx%02hhx", &byte_count, &address,
&record_type) != 3) {
      ...
      goto fail;
  }

  our_checksum = byte_count  + ((address >> 8) & 0xff) + (address &
0xff) + record_type;

  for (i = 0; i < byte_count; i++) {
      if (fscanf(fp, "%02hhx", &data[i]) != 1) {
          ...
          goto fail;
      }

      our_checksum += data[i];
  }

  if (fscanf(fp, "%02hhx\n", &checksum) != 1) {
      ...
      goto fail;
  }

  if (our_checksum + checksum != 0) {
      ...
      goto fail;
  }

  ...process record...

> +        uint8_t byte_count;
> +        uint16_t address;
> +        uint8_t record_type;
> +        uint8_t data[0x25 - 0x5];
> +        uint8_t checksum;
> +    };
> +};
> +
> +static uint8_t ctoh(char c)
> +{
> +    return (c & 0x10) ? /*0-9*/ c & 0xf : /*A-F, a-f*/ (c & 0xf) + 9;
> +}

Not needed if you switch to fscanf(), but otherwise please use glib's
g_ascii_xdigit_value():
https://developer.gnome.org/glib/stable/glib-String-Utility-Functions.html#g-ascii-xdigit-value

> +
> +static uint8_t validate_checksum(HexLine *record)
> +{
> +    uint8_t result = 0, i = 0;
> +
> +    for (; i < (record->byte_count + 5); ++i) {

This is an infinite loop when byte_count > 250 since the right-hand
side of the comparison is an int (due to the 5 int literal) but the
left-hand side is a uint8_t.

> +        result += record->buf[i];
> +    }
> +
> +    return result == 0;
> +}
> +
> +/* return pointer of bin_blob or NULL if error */
> +static uint8_t *parse_hex_blob(char *filename, size_t *p_size)
> +{
> +    int fd;
> +    off_t hex_blob_size;
> +    uint8_t *p_data = NULL;
> +    uint8_t *hex_blob;
> +    uint8_t *hex_blob_ori;         /* used to free temporary memory */
> +    uint8_t *bin_buf;
> +    uint8_t *end;
> +    uint8_t idx = 0;
> +    uint8_t in_process = 0;        /* avoid re-enter */
> +    uint8_t low_nibble = 0;        /* process two hex char into 8-bits */
> +    uint8_t ext_linear_record = 0; /* record non-constitutes block */

Please use bool for flags.  QEMU coding style uses the bool type
instead of integer types when there are boolean values.

> +    uint32_t next_address_to_write = 0;
> +    uint32_t current_address = 0;
> +    uint32_t last_address = 0;
> +    HexLine line = {0};
> +
> +    fd = open(filename, O_RDONLY);
> +    if (fd < 0) {
> +        return NULL;
> +    }
> +    hex_blob_size = lseek(fd, 0, SEEK_END);
> +    if (hex_blob_size < 0) {
> +        close(fd);
> +        return NULL;
> +    }
> +    hex_blob = g_malloc(hex_blob_size);
> +    hex_blob_ori = hex_blob;
> +    bin_buf = g_malloc(hex_blob_size * 2);

Why is the size hex_blob_size * 2?

> +    lseek(fd, 0, SEEK_SET);
> +    if (read(fd, hex_blob, hex_blob_size) != hex_blob_size) {
> +        close(fd);
> +        goto hex_parser_exit;
> +    }
> +    close(fd);
> +
> +    memset(line.buf, 0, sizeof(HexLine));

This was already zero-initialized above:

  HexLine line = {0};

> +    end = (uint8_t *)hex_blob + hex_blob_size;
> +
> +    for (; hex_blob != end; ++hex_blob) {
> +        switch ((uint8_t)(*hex_blob)) {

hex_block is already uint8_t.  Why is there a cast?

> +        case '\r':
> +        case '\n':
> +            if (!in_process) {
> +                break;
> +            }
> +
> +            in_process = 0;
> +            if (validate_checksum(&line) == 0) {

There is a small (1/256) chance that checksum validation succeeds when
a truncated line is read.  Please validate the byte count field to
make sure we've read the correct number of bytes.

> +                p_data = NULL;

p_data is already NULL.

> +                goto hex_parser_exit;
> +            }
> +
> +            line.address = bswap16(line.address);

This assumes the host CPU is little-endian.  Please use be16_to_cpu()
instead so it works on big-endian host CPUs too.

> +            switch (line.record_type) {
> +            case DATA_RECORD:
> +                current_address =
> +                    (next_address_to_write & 0xffff0000) | line.address;
> +                /* verify this is a continous block of memory */

s/continous/contiguous/

> +                if (current_address != next_address_to_write ||
> +                    ext_linear_record) {
> +                    if (!ext_linear_record) {
> +                        /* Store next address to write */
> +                        last_address = next_address_to_write;
> +                        next_address_to_write = current_address;
> +                    }
> +                    ext_linear_record = 0;
> +                    memset(bin_buf + last_address, 0x0,
> +                           current_address - last_address);
> +                }
> +
> +                /* copy from line buffer to output bin_buf */
> +                memcpy((uint8_t *)bin_buf + current_address,
> +                       (uint8_t *)line.data, line.byte_count);

bin_buf and line.data are already uint8_t, there is no need to cast.

> +                /* Save next address to write */
> +                last_address = current_address;
> +                next_address_to_write = current_address + line.byte_count;
> +                break;
> +
> +            case EOF_RECORD:
> +                /* nothing to do */
> +                break;
> +            case EXT_SEG_ADDR_RECORD:

Please check that the byte count is 2 for this record type.

> +                /* save next address to write,
> +                 * in case of non-continous block of memory */
> +                ext_linear_record = 1;
> +                last_address = next_address_to_write;
> +                next_address_to_write =
> +                    ((line.data[0] << 12) | (line.data[1] << 4));
> +                break;
> +            case START_SEG_ADDR_RECORD:
> +                /* TODO */

The function should return an error if an unsupported record is encountered.

> +                break;
> +
> +            case EXT_LINEAR_ADDR_RECORD:

Please check that the byte count is 2 for this record type.

> +                /* save next address to write,
> +                 * in case of non-continous block of memory */
> +                ext_linear_record = 1;
> +                last_address = next_address_to_write;
> +                next_address_to_write =
> +                    ((line.data[0] << 24) | (line.data[1] << 16));
> +                break;
> +            case START_LINEAR_ADDR_RECORD:
> +                /* TODO */

The function should return an error if an unsupported record is encountered.

> +                break;
> +
> +            default:
> +                p_data = NULL;

p_data is already NULL.
Su Hang April 14, 2018, 3:44 p.m. UTC | #2
Thanks for your detailed reply! I  will carefully treat
all the content that you mentioned, and apply them in v5.


> -----Original Messages-----
> From: "Stefan Hajnoczi" <stefanha@gmail.com>
> Sent Time: 2018-04-14 22:08:43 (Saturday)
> To: "Su Hang" <suhang16@mails.ucas.ac.cn>
> Cc: "Jim Mussared" <jim@groklearning.com>, qemu-devel <qemu-devel@nongnu.org>, "Joel Stanley" <joel@jms.id.au>
> Subject: Re: [Qemu-devel] [PATCH v4 1/2] Implement .hex file loader
> 
> On Mon, Apr 9, 2018 at 11:39 AM, Su Hang <suhang16@mails.ucas.ac.cn> wrote:
> > This patch adds Intel Hexadecimal Object File format support to
> > the loader.  The file format specification is available here:
> > http://www.piclist.com/techref/fileext/hex/intel.htm
> >
> > The file format is mainly intended for embedded systems
> > and microcontrollers, such as Arduino, ARM, STM32, etc.
> >
> > Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
> > Signed-off-by: Su Hang <suhang16@mails.ucas.ac.cn>
> > ---
> >  hw/arm/boot.c       |   9 +-
> >  hw/core/loader.c    | 280 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  include/hw/loader.h |  17 ++++
> >  3 files changed, 305 insertions(+), 1 deletion(-)
> 
> Parsers must be robust against invalid inputs so that a corrupt input
> file cannot crash QEMU.  Please validate all addresses/offsets/lengths
> so that this cannot happen.  For example, the byte_count is currently
> not validated and can overflow HexLine.data[].
> 
> parse_hex_blob() must handle input files that touch large ranges of
> memory.  At the moment it assumes bin_buf will be large enough for the
> memory regions described by the input file.  Since Intel HEX files
> support 32-bit addressing, that means processing a file in this way
> could require 4 GB of RAM!  Three solutions:
> 1. Reject files that have large gaps in their address ranges.
> 2. Return a list of data blobs, each with its own start address.
> 3. Set up the ROM structs directly within parse_hex_blob() instead of
> returning a linear buffer.
> 
> > diff --git a/hw/arm/boot.c b/hw/arm/boot.c
> > index 9319b12fcd2a..07ce54e5936d 100644
> > --- a/hw/arm/boot.c
> > +++ b/hw/arm/boot.c
> > @@ -1060,8 +1060,15 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
> >          kernel_size = load_aarch64_image(info->kernel_filename,
> >                                           info->loader_start, &entry, as);
> >          is_linux = 1;
> > +    } else if (kernel_size < 0 && strstr(info->kernel_filename, ".hex")) {
> 
> Most UNIX-style programs, including QEMU, do not check the file
> extension to determine its format.  Instead they look at the contents
> of the file to see if it can be parsed.
> 
> Please do not check for ".hex".  Try to load it as a hex file and fall
> back to the next file type if parsing fails.
> 
> > +        /* 32-bit ARM .hex file */
> > +        entry = info->loader_start + KERNEL_LOAD_ADDR;
> > +        kernel_size = load_targphys_hex_as(info->kernel_filename, entry,
> > +                                           info->ram_size - KERNEL_LOAD_ADDR,
> > +                                           as);
> > +        is_linux = 1;
> 
> Why is this needed?  Linux images are not loaded from hex files and
> the extra information provided by is_linux = 1 won't be used/tested.
> 
> >      } else if (kernel_size < 0) {
> > -        /* 32-bit ARM */
> > +        /* 32-bit ARM binary file */
> >          entry = info->loader_start + KERNEL_LOAD_ADDR;
> >          kernel_size = load_image_targphys_as(info->kernel_filename, entry,
> >                                               info->ram_size - KERNEL_LOAD_ADDR,
> > diff --git a/hw/core/loader.c b/hw/core/loader.c
> > index 06bdbca53709..41d714520be4 100644
> > --- a/hw/core/loader.c
> > +++ b/hw/core/loader.c
> > @@ -1286,3 +1286,283 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict)
> >          }
> >      }
> >  }
> > +
> > +typedef enum HexRecord HexRecord;
> > +enum HexRecord {
> > +    DATA_RECORD = 0,
> > +    EOF_RECORD,
> > +    EXT_SEG_ADDR_RECORD,
> > +    START_SEG_ADDR_RECORD,
> > +    EXT_LINEAR_ADDR_RECORD,
> > +    START_LINEAR_ADDR_RECORD,
> > +};
> > +
> > +typedef union HexLine HexLine;
> > +union HexLine {
> > +    uint8_t buf[0x25];
> 
> Why is the length 0x25?  According to the file format specification
> the data[] part can be 255 bytes long.
> 
> > +    struct __attribute__((packed)) {
> 
> This use of packed is not portable since the address field is not
> aligned to 16 bits.  Some CPU architectures do not support unaligned
> memory accesses and the program would terminate with an exception.
> 
> Have you considered using fscanf() instead?  It removes the need for
> HexLine, hex_buf, and the character parsing loop:
> 
>   if (fscanf(fp, ":%02hhx%04hx%02hhx", &byte_count, &address,
> &record_type) != 3) {
>       ...
>       goto fail;
>   }
> 
>   our_checksum = byte_count  + ((address >> 8) & 0xff) + (address &
> 0xff) + record_type;
> 
>   for (i = 0; i < byte_count; i++) {
>       if (fscanf(fp, "%02hhx", &data[i]) != 1) {
>           ...
>           goto fail;
>       }
> 
>       our_checksum += data[i];
>   }
> 
>   if (fscanf(fp, "%02hhx\n", &checksum) != 1) {
>       ...
>       goto fail;
>   }
> 
>   if (our_checksum + checksum != 0) {
>       ...
>       goto fail;
>   }
> 
>   ...process record...
> 
> > +        uint8_t byte_count;
> > +        uint16_t address;
> > +        uint8_t record_type;
> > +        uint8_t data[0x25 - 0x5];
> > +        uint8_t checksum;
> > +    };
> > +};
> > +
> > +static uint8_t ctoh(char c)
> > +{
> > +    return (c & 0x10) ? /*0-9*/ c & 0xf : /*A-F, a-f*/ (c & 0xf) + 9;
> > +}
> 
> Not needed if you switch to fscanf(), but otherwise please use glib's
> g_ascii_xdigit_value():
> https://developer.gnome.org/glib/stable/glib-String-Utility-Functions.html#g-ascii-xdigit-value
> 
> > +
> > +static uint8_t validate_checksum(HexLine *record)
> > +{
> > +    uint8_t result = 0, i = 0;
> > +
> > +    for (; i < (record->byte_count + 5); ++i) {
> 
> This is an infinite loop when byte_count > 250 since the right-hand
> side of the comparison is an int (due to the 5 int literal) but the
> left-hand side is a uint8_t.
> 
> > +        result += record->buf[i];
> > +    }
> > +
> > +    return result == 0;
> > +}
> > +
> > +/* return pointer of bin_blob or NULL if error */
> > +static uint8_t *parse_hex_blob(char *filename, size_t *p_size)
> > +{
> > +    int fd;
> > +    off_t hex_blob_size;
> > +    uint8_t *p_data = NULL;
> > +    uint8_t *hex_blob;
> > +    uint8_t *hex_blob_ori;         /* used to free temporary memory */
> > +    uint8_t *bin_buf;
> > +    uint8_t *end;
> > +    uint8_t idx = 0;
> > +    uint8_t in_process = 0;        /* avoid re-enter */
> > +    uint8_t low_nibble = 0;        /* process two hex char into 8-bits */
> > +    uint8_t ext_linear_record = 0; /* record non-constitutes block */
> 
> Please use bool for flags.  QEMU coding style uses the bool type
> instead of integer types when there are boolean values.
> 
> > +    uint32_t next_address_to_write = 0;
> > +    uint32_t current_address = 0;
> > +    uint32_t last_address = 0;
> > +    HexLine line = {0};
> > +
> > +    fd = open(filename, O_RDONLY);
> > +    if (fd < 0) {
> > +        return NULL;
> > +    }
> > +    hex_blob_size = lseek(fd, 0, SEEK_END);
> > +    if (hex_blob_size < 0) {
> > +        close(fd);
> > +        return NULL;
> > +    }
> > +    hex_blob = g_malloc(hex_blob_size);
> > +    hex_blob_ori = hex_blob;
> > +    bin_buf = g_malloc(hex_blob_size * 2);
> 
> Why is the size hex_blob_size * 2?
> 
> > +    lseek(fd, 0, SEEK_SET);
> > +    if (read(fd, hex_blob, hex_blob_size) != hex_blob_size) {
> > +        close(fd);
> > +        goto hex_parser_exit;
> > +    }
> > +    close(fd);
> > +
> > +    memset(line.buf, 0, sizeof(HexLine));
> 
> This was already zero-initialized above:
> 
>   HexLine line = {0};
> 
> > +    end = (uint8_t *)hex_blob + hex_blob_size;
> > +
> > +    for (; hex_blob != end; ++hex_blob) {
> > +        switch ((uint8_t)(*hex_blob)) {
> 
> hex_block is already uint8_t.  Why is there a cast?
> 
> > +        case '\r':
> > +        case '\n':
> > +            if (!in_process) {
> > +                break;
> > +            }
> > +
> > +            in_process = 0;
> > +            if (validate_checksum(&line) == 0) {
> 
> There is a small (1/256) chance that checksum validation succeeds when
> a truncated line is read.  Please validate the byte count field to
> make sure we've read the correct number of bytes.
> 
> > +                p_data = NULL;
> 
> p_data is already NULL.
> 
> > +                goto hex_parser_exit;
> > +            }
> > +
> > +            line.address = bswap16(line.address);
> 
> This assumes the host CPU is little-endian.  Please use be16_to_cpu()
> instead so it works on big-endian host CPUs too.
> 
> > +            switch (line.record_type) {
> > +            case DATA_RECORD:
> > +                current_address =
> > +                    (next_address_to_write & 0xffff0000) | line.address;
> > +                /* verify this is a continous block of memory */
> 
> s/continous/contiguous/
> 
> > +                if (current_address != next_address_to_write ||
> > +                    ext_linear_record) {
> > +                    if (!ext_linear_record) {
> > +                        /* Store next address to write */
> > +                        last_address = next_address_to_write;
> > +                        next_address_to_write = current_address;
> > +                    }
> > +                    ext_linear_record = 0;
> > +                    memset(bin_buf + last_address, 0x0,
> > +                           current_address - last_address);
> > +                }
> > +
> > +                /* copy from line buffer to output bin_buf */
> > +                memcpy((uint8_t *)bin_buf + current_address,
> > +                       (uint8_t *)line.data, line.byte_count);
> 
> bin_buf and line.data are already uint8_t, there is no need to cast.
> 
> > +                /* Save next address to write */
> > +                last_address = current_address;
> > +                next_address_to_write = current_address + line.byte_count;
> > +                break;
> > +
> > +            case EOF_RECORD:
> > +                /* nothing to do */
> > +                break;
> > +            case EXT_SEG_ADDR_RECORD:
> 
> Please check that the byte count is 2 for this record type.
> 
> > +                /* save next address to write,
> > +                 * in case of non-continous block of memory */
> > +                ext_linear_record = 1;
> > +                last_address = next_address_to_write;
> > +                next_address_to_write =
> > +                    ((line.data[0] << 12) | (line.data[1] << 4));
> > +                break;
> > +            case START_SEG_ADDR_RECORD:
> > +                /* TODO */
> 
> The function should return an error if an unsupported record is encountered.
> 
> > +                break;
> > +
> > +            case EXT_LINEAR_ADDR_RECORD:
> 
> Please check that the byte count is 2 for this record type.
> 
> > +                /* save next address to write,
> > +                 * in case of non-continous block of memory */
> > +                ext_linear_record = 1;
> > +                last_address = next_address_to_write;
> > +                next_address_to_write =
> > +                    ((line.data[0] << 24) | (line.data[1] << 16));
> > +                break;
> > +            case START_LINEAR_ADDR_RECORD:
> > +                /* TODO */
> 
> The function should return an error if an unsupported record is encountered.
> 
> > +                break;
> > +
> > +            default:
> > +                p_data = NULL;
> 
> p_data is already NULL.
Stefan Hajnoczi April 15, 2018, 1:25 a.m. UTC | #3
On Sat, Apr 14, 2018 at 11:44 PM, Su Hang <suhang16@mails.ucas.ac.cn> wrote:
> Thanks for your detailed reply! I  will carefully treat
> all the content that you mentioned, and apply them in v5.

By the way, if you are wondering why the parser needs to validate
everything, here is an example scenario:

QEMU may be used to host an online micro:bit simulator where the user
can provide a .hex file.  In that case QEMU is running on a server and
the .hex file is provided by an untrusted user.  That user must not be
able to crash QEMU (and exploit security holes).

Stefan
Su Hang April 16, 2018, 2:49 p.m. UTC | #4
Sorry I didn't reply in detai on last weekend.
Here are some points I want to discuss.

> parse_hex_blob() must handle input files that touch large ranges of
> memory.  At the moment it assumes bin_buf will be large enough for the
> memory regions described by the input file.  Since Intel HEX files
> support 32-bit addressing, that means processing a file in this way
> could require 4 GB of RAM!  Three solutions:
> 1. Reject files that have large gaps in their address ranges.
> 2. Return a list of data blobs, each with its own start address.
> 3. Set up the ROM structs directly within parse_hex_blob() instead of
> returning a linear buffer.

I think solution 3 is more natural and should be the best in this case.
I will read the source code to learn how to set up ROM structs correctly,
then try to apply solution 3.

> Have you considered using fscanf() instead?  It removes the need for
> HexLine, hex_buf, and the character parsing loop:
>
>   if (fscanf(fp, ":%02hhx%04hx%02hhx", &byte_count, &address,
> &record_type) != 3) {

fscanf() indeed works, but I worry it will be much slower than handle it
manually, especially when you mentioned the input file can be as large as 4GB.

> Why is the size hex_blob_size * 2?
I must confess I did it in a rough way, in order to quickly write
a runnable prototype, I didn't take safety into consideration.
Sorry, I will keep safety point in mind in my future work.
Why I set size = hex_blob_size * 2, because I assumed it would
satisfy bin_buf requirement.(Of course, deliberately constructed
malicious user input can easily break this hypothesis.)

> > +            case START_SEG_ADDR_RECORD:
> > +                /* TODO */
>
> The function should return an error if an unsupported record is encountered.

I read DAPLink code(and few others hex to bin converter's source code),
they just simply ignore this record type and do nothing.
The hex file generated by official online compiler do contain this kind of
record type.

I vimdiff the hex file and bin file(generated by
`arm-none-eabi-objcopy -I ihex microbit.hex -O binary microbit.bin`,
then use '%!xxd' in vim),
I noticed 'arm-none-eabi-objcopy' didn't generate any code corresponding to
START_SEG_ADDR_RECORD record type.

1) So should I return an error for 'Start Segment Address' and
   'Start Linear Address' type?

2) Or maybe I should insert some ARM assembly like
'''
mov     r0, #0      @ Set r0 to 0.
b       target      @ Jump forward to 'target'.
''' to bin_buf?

3) Or should I just ignore this two types like DAPLink and
   'arm-none-eabi-objcopy' do?

> > +            line.address = bswap16(line.address);
>
> This assumes the host CPU is little-endian.  Please use be16_to_cpu()
> instead so it works on big-endian host CPUs too.

I have tried to use be16_to_cpu() the first time you suggested it to me,
but qemu crashed...
I have read other places call this function to verify I use it correctly,
but it did crash(that's why I choose bswap16()), It's strange...
I'll report more detail about this case tomorrow.


Thank you for your patience with my ignorance!

With sincerest gratitude,
Su Hang


> -----Original Messages-----
> From: "Stefan Hajnoczi" <stefanha@gmail.com>
> Sent Time: 2018-04-15 09:25:30 (Sunday)
> To: "Su Hang" <suhang16@mails.ucas.ac.cn>
> Cc: "jim mussared" <jim@groklearning.com>, qemu-devel <qemu-devel@nongnu.org>, "joel stanley" <joel@jms.id.au>
> Subject: Re: [Qemu-devel] [PATCH v4 1/2] Implement .hex file loader
> 
> On Sat, Apr 14, 2018 at 11:44 PM, Su Hang <suhang16@mails.ucas.ac.cn> wrote:
> > Thanks for your detailed reply! I  will carefully treat
> > all the content that you mentioned, and apply them in v5.
> 
> By the way, if you are wondering why the parser needs to validate
> everything, here is an example scenario:
> 
> QEMU may be used to host an online micro:bit simulator where the user
> can provide a .hex file.  In that case QEMU is running on a server and
> the .hex file is provided by an untrusted user.  That user must not be
> able to crash QEMU (and exploit security holes).
> 
> Stefan
Su Hang April 17, 2018, 5:38 a.m. UTC | #5
> -----Original Messages-----
> From: "Su Hang" <suhang16@mails.ucas.ac.cn>
> Sent Time: 2018-04-16 22:49:48 (Monday)
> To: "stefan hajnoczi" <stefanha@gmail.com>
> Cc: "jim mussared" <jim@groklearning.com>, qemu-devel <qemu-devel@nongnu.org>, "joel stanley" <joel@jms.id.au>
> Subject: Re: [Qemu-devel] [PATCH v4 1/2] Implement .hex file loader

> I have tried to use be16_to_cpu() the first time you suggested it to me,
> but qemu crashed...
> I have read other places call this function to verify I use it correctly,
> but it did crash(that's why I choose bswap16()), It's strange...
> I'll report more detail about this case tomorrow.

Oops, when I try be16_to_cpu() today, It works without any problems...
I don't know what to say, maybe I have misused someplace last time.
diff mbox

Patch

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 9319b12fcd2a..07ce54e5936d 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -1060,8 +1060,15 @@  static void arm_load_kernel_notify(Notifier *notifier, void *data)
         kernel_size = load_aarch64_image(info->kernel_filename,
                                          info->loader_start, &entry, as);
         is_linux = 1;
+    } else if (kernel_size < 0 && strstr(info->kernel_filename, ".hex")) {
+        /* 32-bit ARM .hex file */
+        entry = info->loader_start + KERNEL_LOAD_ADDR;
+        kernel_size = load_targphys_hex_as(info->kernel_filename, entry,
+                                           info->ram_size - KERNEL_LOAD_ADDR,
+                                           as);
+        is_linux = 1;
     } else if (kernel_size < 0) {
-        /* 32-bit ARM */
+        /* 32-bit ARM binary file */
         entry = info->loader_start + KERNEL_LOAD_ADDR;
         kernel_size = load_image_targphys_as(info->kernel_filename, entry,
                                              info->ram_size - KERNEL_LOAD_ADDR,
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 06bdbca53709..41d714520be4 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -1286,3 +1286,283 @@  void hmp_info_roms(Monitor *mon, const QDict *qdict)
         }
     }
 }
+
+typedef enum HexRecord HexRecord;
+enum HexRecord {
+    DATA_RECORD = 0,
+    EOF_RECORD,
+    EXT_SEG_ADDR_RECORD,
+    START_SEG_ADDR_RECORD,
+    EXT_LINEAR_ADDR_RECORD,
+    START_LINEAR_ADDR_RECORD,
+};
+
+typedef union HexLine HexLine;
+union HexLine {
+    uint8_t buf[0x25];
+    struct __attribute__((packed)) {
+        uint8_t byte_count;
+        uint16_t address;
+        uint8_t record_type;
+        uint8_t data[0x25 - 0x5];
+        uint8_t checksum;
+    };
+};
+
+static uint8_t ctoh(char c)
+{
+    return (c & 0x10) ? /*0-9*/ c & 0xf : /*A-F, a-f*/ (c & 0xf) + 9;
+}
+
+static uint8_t validate_checksum(HexLine *record)
+{
+    uint8_t result = 0, i = 0;
+
+    for (; i < (record->byte_count + 5); ++i) {
+        result += record->buf[i];
+    }
+
+    return result == 0;
+}
+
+/* return pointer of bin_blob or NULL if error */
+static uint8_t *parse_hex_blob(char *filename, size_t *p_size)
+{
+    int fd;
+    off_t hex_blob_size;
+    uint8_t *p_data = NULL;
+    uint8_t *hex_blob;
+    uint8_t *hex_blob_ori;         /* used to free temporary memory */
+    uint8_t *bin_buf;
+    uint8_t *end;
+    uint8_t idx = 0;
+    uint8_t in_process = 0;        /* avoid re-enter */
+    uint8_t low_nibble = 0;        /* process two hex char into 8-bits */
+    uint8_t ext_linear_record = 0; /* record non-constitutes block */
+    uint32_t next_address_to_write = 0;
+    uint32_t current_address = 0;
+    uint32_t last_address = 0;
+    HexLine line = {0};
+
+    fd = open(filename, O_RDONLY);
+    if (fd < 0) {
+        return NULL;
+    }
+    hex_blob_size = lseek(fd, 0, SEEK_END);
+    if (hex_blob_size < 0) {
+        close(fd);
+        return NULL;
+    }
+    hex_blob = g_malloc(hex_blob_size);
+    hex_blob_ori = hex_blob;
+    bin_buf = g_malloc(hex_blob_size * 2);
+    lseek(fd, 0, SEEK_SET);
+    if (read(fd, hex_blob, hex_blob_size) != hex_blob_size) {
+        close(fd);
+        goto hex_parser_exit;
+    }
+    close(fd);
+
+    memset(line.buf, 0, sizeof(HexLine));
+    end = (uint8_t *)hex_blob + hex_blob_size;
+
+    for (; hex_blob != end; ++hex_blob) {
+        switch ((uint8_t)(*hex_blob)) {
+        case '\r':
+        case '\n':
+            if (!in_process) {
+                break;
+            }
+
+            in_process = 0;
+            if (validate_checksum(&line) == 0) {
+                p_data = NULL;
+                goto hex_parser_exit;
+            }
+
+            line.address = bswap16(line.address);
+            switch (line.record_type) {
+            case DATA_RECORD:
+                current_address =
+                    (next_address_to_write & 0xffff0000) | line.address;
+                /* verify this is a continous block of memory */
+                if (current_address != next_address_to_write ||
+                    ext_linear_record) {
+                    if (!ext_linear_record) {
+                        /* Store next address to write */
+                        last_address = next_address_to_write;
+                        next_address_to_write = current_address;
+                    }
+                    ext_linear_record = 0;
+                    memset(bin_buf + last_address, 0x0,
+                           current_address - last_address);
+                }
+
+                /* copy from line buffer to output bin_buf */
+                memcpy((uint8_t *)bin_buf + current_address,
+                       (uint8_t *)line.data, line.byte_count);
+                /* Save next address to write */
+                last_address = current_address;
+                next_address_to_write = current_address + line.byte_count;
+                break;
+
+            case EOF_RECORD:
+                /* nothing to do */
+                break;
+            case EXT_SEG_ADDR_RECORD:
+                /* save next address to write,
+                 * in case of non-continous block of memory */
+                ext_linear_record = 1;
+                last_address = next_address_to_write;
+                next_address_to_write =
+                    ((line.data[0] << 12) | (line.data[1] << 4));
+                break;
+            case START_SEG_ADDR_RECORD:
+                /* TODO */
+                break;
+
+            case EXT_LINEAR_ADDR_RECORD:
+                /* save next address to write,
+                 * in case of non-continous block of memory */
+                ext_linear_record = 1;
+                last_address = next_address_to_write;
+                next_address_to_write =
+                    ((line.data[0] << 24) | (line.data[1] << 16));
+                break;
+            case START_LINEAR_ADDR_RECORD:
+                /* TODO */
+                break;
+
+            default:
+                p_data = NULL;
+                goto hex_parser_exit;
+            }
+            break;
+
+        /* start of a new record. */
+        case ':':
+            memset(line.buf, 0, sizeof(HexLine));
+            in_process = 1;
+            low_nibble = 0;
+            idx = 0;
+            break;
+
+        /* decoding lines */
+        default:
+            if (low_nibble) {
+                line.buf[idx] |= ctoh((uint8_t)(*hex_blob)) & 0xf;
+                ++idx;
+            } else {
+                line.buf[idx] = ctoh((uint8_t)(*hex_blob)) << 4;
+            }
+
+            low_nibble = !low_nibble;
+            break;
+        }
+
+    }
+
+    *p_size = (size_t)next_address_to_write;
+    p_data = g_malloc(next_address_to_write);
+
+    memcpy(p_data, bin_buf, next_address_to_write);
+hex_parser_exit:
+    g_free(hex_blob_ori);
+    g_free(bin_buf);
+    return p_data;
+}
+
+/* return size or -1 if error */
+int load_targphys_hex_as(const char *filename, hwaddr addr, uint64_t max_sz,
+                         AddressSpace *as)
+{
+    return rom_add_hex_file(filename, NULL, addr, -1, false, NULL, as);
+}
+
+/* return size -1 if error */
+int rom_add_hex_file(const char *file, const char *fw_dir, hwaddr addr,
+                     int32_t bootindex, bool option_rom, MemoryRegion *mr,
+                     AddressSpace *as)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+    Rom *rom;
+    char devpath[100];
+    size_t datasize = 0;
+
+    if (as && mr) {
+        fprintf(stderr, "Specifying an Address Space and Memory Region is "
+                        "not valid when loading a rom\n");
+        /* We haven't allocated anything so we don't need any cleanup */
+        return -1;
+    }
+
+    rom = g_malloc0(sizeof(*rom));
+    rom->name = g_strdup(file);
+    rom->path = qemu_find_file(QEMU_FILE_TYPE_BIOS, rom->name);
+    rom->as = as;
+    if (rom->path == NULL) {
+        rom->path = g_strdup(file);
+    }
+
+    rom->data = parse_hex_blob(rom->path, &datasize);
+    if (rom->data == NULL) {
+        fprintf(stderr, "failed to parse hex file '%s': %s\n", rom->path,
+                strerror(errno));
+        goto err;
+    }
+    rom->datasize = datasize;
+
+    if (fw_dir) {
+        rom->fw_dir = g_strdup(fw_dir);
+        rom->fw_file = g_strdup(file);
+    }
+    rom->addr = addr;
+    rom->romsize = rom->datasize;
+
+    rom_insert(rom);
+
+    if (rom->fw_file && fw_cfg) {
+        const char *basename;
+        char fw_file_name[FW_CFG_MAX_FILE_PATH];
+        void *data;
+
+        basename = strrchr(rom->fw_file, '/');
+        if (basename) {
+            ++basename;
+        } else {
+            basename = rom->fw_file;
+        }
+        snprintf(fw_file_name, sizeof(fw_file_name), "%s/%s", rom->fw_dir,
+                 basename);
+        snprintf(devpath, sizeof(devpath), "/rom@%s", fw_file_name);
+
+        if ((!option_rom || mc->option_rom_has_mr) && mc->rom_file_has_mr) {
+            data = rom_set_mr(rom, OBJECT(fw_cfg), devpath, true);
+        } else {
+            data = rom->data;
+        }
+
+        fw_cfg_add_file(fw_cfg, fw_file_name, data, rom->romsize);
+    } else {
+        if (mr) {
+            rom->mr = mr;
+            snprintf(devpath, sizeof(devpath), "/rom@%s", file);
+        } else {
+            snprintf(devpath, sizeof(devpath), "/rom@" TARGET_FMT_plx, addr);
+        }
+    }
+
+    add_boot_device_path(bootindex, NULL, devpath);
+    return rom->datasize;
+
+err:
+    g_free(rom->path);
+    g_free(rom->name);
+    if (fw_dir) {
+        g_free(rom->fw_dir);
+        g_free(rom->fw_file);
+    }
+    g_free(rom);
+
+    return -1;
+}
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 5ed3fd8ae67a..176b11221a27 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -28,6 +28,20 @@  ssize_t load_image_size(const char *filename, void *addr, size_t size);
 int load_image_targphys_as(const char *filename,
                            hwaddr addr, uint64_t max_sz, AddressSpace *as);
 
+/**load_image_targphys_hex_as:
+ * @filename: Path to the .hex file
+ * @addr: Address to load the .hex file to
+ * @max_sz: The maximum size of the .hex file to load
+ * @as: The AddressSpace to load the .hex file to. The value of
+ *      address_space_memory is used if nothing is supplied here.
+ *
+ * Load a fixed .hex file into memory.
+ *
+ * Returns the size of the loaded .hex file on success, -1 otherwise.
+ */
+int load_targphys_hex_as(const char *filename,
+                         hwaddr addr, uint64_t max_sz, AddressSpace *as);
+
 /** load_image_targphys:
  * Same as load_image_targphys_as(), but doesn't allow the caller to specify
  * an AddressSpace.
@@ -210,6 +224,9 @@  void pstrcpy_targphys(const char *name,
 extern bool option_rom_has_mr;
 extern bool rom_file_has_mr;
 
+int rom_add_hex_file(const char *file, const char *fw_dir,
+                 hwaddr addr, int32_t bootindex,
+                 bool option_rom, MemoryRegion *mr, AddressSpace *as);
 int rom_add_file(const char *file, const char *fw_dir,
                  hwaddr addr, int32_t bootindex,
                  bool option_rom, MemoryRegion *mr, AddressSpace *as);