diff mbox series

[v2,1/1] include: Auto-generate the sizes lookup table

Message ID 20190103213320.2653-2-lbloch@janustech.com (mailing list archive)
State New, archived
Headers show
Series include: Auto-generate the sizes lookup table | expand

Commit Message

Leonid Bloch Jan. 3, 2019, 9:33 p.m. UTC
The lookup table for power-of-two sizes is now auto-generated during the
build, and not hard-coded into the units.h file.

This partially reverts commit 540b8492618eb.

Signed-off-by: Leonid Bloch <lbloch@janustech.com>
---
 .gitignore           |  1 +
 Makefile             |  5 +++
 block/qcow2.h        |  2 +-
 block/vdi.c          |  1 +
 include/qemu/units.h | 73 --------------------------------------------
 scripts/gen-sizes.sh | 66 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 74 insertions(+), 74 deletions(-)
 create mode 100755 scripts/gen-sizes.sh

Comments

Markus Armbruster Jan. 8, 2019, 9:31 a.m. UTC | #1
Copying our resident shell script guru Eric.

Leonid Bloch <lbloch@janustech.com> writes:

> The lookup table for power-of-two sizes is now auto-generated during the
> build, and not hard-coded into the units.h file.
>
> This partially reverts commit 540b8492618eb.
>
> Signed-off-by: Leonid Bloch <lbloch@janustech.com>
> ---
>  .gitignore           |  1 +
>  Makefile             |  5 +++
>  block/qcow2.h        |  2 +-
>  block/vdi.c          |  1 +
>  include/qemu/units.h | 73 --------------------------------------------
>  scripts/gen-sizes.sh | 66 +++++++++++++++++++++++++++++++++++++++
>  6 files changed, 74 insertions(+), 74 deletions(-)
>  create mode 100755 scripts/gen-sizes.sh

I'd leave it hard-coded.  Replacing a few trivial defines by an arguably
less trivial script doesn't feel like an improvement.  In this case, it
doesn't even save lines.  But I'm not the maintainer.  Hmm,
include/qemu/units.h lacks one.  For what it's worth, its only user
block/vdi.c is maintained by

    $ scripts/get_maintainer.pl -f block/vdi.c 
    Stefan Weil <sw@weilnetz.de> (maintainer:VDI)
    Kevin Wolf <kwolf@redhat.com> (supporter:Block layer core)
    Max Reitz <mreitz@redhat.com> (supporter:Block layer core)
    qemu-block@nongnu.org (open list:VDI)
    qemu-devel@nongnu.org (open list:All patches CC here)

> diff --git a/.gitignore b/.gitignore
> index 0430257313..721a7f4454 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -59,6 +59,7 @@
>  /qemu-version.h
>  /qemu-version.h.tmp
>  /module_block.h
> +/pow2_sizes.h
>  /scsi/qemu-pr-helper
>  /vhost-user-scsi
>  /vhost-user-blk
> diff --git a/Makefile b/Makefile
> index dd53965f77..db72786ccb 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -122,6 +122,8 @@ endif
>  
>  GENERATED_FILES += module_block.h
>  
> +GENERATED_FILES += pow2_sizes.h
> +
>  TRACE_HEADERS = trace-root.h $(trace-events-subdirs:%=%/trace.h)
>  TRACE_SOURCES = trace-root.c $(trace-events-subdirs:%=%/trace.c)
>  TRACE_DTRACE =
> @@ -499,6 +501,9 @@ ifdef CONFIG_MPATH
>  scsi/qemu-pr-helper$(EXESUF): LIBS += -ludev -lmultipath -lmpathpersist
>  endif
>  
> +pow2_sizes.h: $(SRC_PATH)/scripts/gen-sizes.sh
> +	$(call quiet-command,sh $(SRC_PATH)/scripts/gen-sizes.sh $@,"GEN","$@")
> +
>  qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
>  	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"GEN","$@")
>  
> diff --git a/block/qcow2.h b/block/qcow2.h
> index a98d24500b..f5fa419ae7 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -27,7 +27,7 @@
>  
>  #include "crypto/block.h"
>  #include "qemu/coroutine.h"
> -#include "qemu/units.h"
> +#include "pow2_sizes.h"
>  
>  //#define DEBUG_ALLOC
>  //#define DEBUG_ALLOC2
> diff --git a/block/vdi.c b/block/vdi.c
> index 2380daa583..06d7335b3e 100644
> --- a/block/vdi.c
> +++ b/block/vdi.c
> @@ -51,6 +51,7 @@
>  
>  #include "qemu/osdep.h"
>  #include "qemu/units.h"
> +#include "pow2_sizes.h"
>  #include "qapi/error.h"
>  #include "qapi/qobject-input-visitor.h"
>  #include "qapi/qapi-visit-block-core.h"
> diff --git a/include/qemu/units.h b/include/qemu/units.h
> index 1c959d182e..692db3fbb2 100644
> --- a/include/qemu/units.h
> +++ b/include/qemu/units.h
> @@ -17,77 +17,4 @@
>  #define PiB     (INT64_C(1) << 50)
>  #define EiB     (INT64_C(1) << 60)
>  
> -/*
> - * The following lookup table is intended to be used when a literal string of
> - * the number of bytes is required (for example if it needs to be stringified).
> - * It can also be used for generic shortcuts of power-of-two sizes.
> - * This table is generated using the AWK script below:
> - *
> - *  BEGIN {
> - *      suffix="KMGTPE";
> - *      for(i=10; i<64; i++) {
> - *          val=2**i;
> - *          s=substr(suffix, int(i/10), 1);
> - *          n=2**(i%10);
> - *          pad=21-int(log(n)/log(10));
> - *          printf("#define S_%d%siB %*d\n", n, s, pad, val);
> - *      }
> - *  }

If we decide not keep the defines hard-coded, I think we can delete the
AWK script.

> - */
> -
> -#define S_1KiB                  1024
> -#define S_2KiB                  2048
> -#define S_4KiB                  4096
> -#define S_8KiB                  8192
> -#define S_16KiB                16384
> -#define S_32KiB                32768
> -#define S_64KiB                65536
> -#define S_128KiB              131072
> -#define S_256KiB              262144
> -#define S_512KiB              524288
> -#define S_1MiB               1048576
> -#define S_2MiB               2097152
> -#define S_4MiB               4194304
> -#define S_8MiB               8388608
> -#define S_16MiB             16777216
> -#define S_32MiB             33554432
> -#define S_64MiB             67108864
> -#define S_128MiB           134217728
> -#define S_256MiB           268435456
> -#define S_512MiB           536870912
> -#define S_1GiB            1073741824
> -#define S_2GiB            2147483648
> -#define S_4GiB            4294967296
> -#define S_8GiB            8589934592
> -#define S_16GiB          17179869184
> -#define S_32GiB          34359738368
> -#define S_64GiB          68719476736
> -#define S_128GiB        137438953472
> -#define S_256GiB        274877906944
> -#define S_512GiB        549755813888
> -#define S_1TiB         1099511627776
> -#define S_2TiB         2199023255552
> -#define S_4TiB         4398046511104
> -#define S_8TiB         8796093022208
> -#define S_16TiB       17592186044416
> -#define S_32TiB       35184372088832
> -#define S_64TiB       70368744177664
> -#define S_128TiB     140737488355328
> -#define S_256TiB     281474976710656
> -#define S_512TiB     562949953421312
> -#define S_1PiB      1125899906842624
> -#define S_2PiB      2251799813685248
> -#define S_4PiB      4503599627370496
> -#define S_8PiB      9007199254740992
> -#define S_16PiB    18014398509481984
> -#define S_32PiB    36028797018963968
> -#define S_64PiB    72057594037927936
> -#define S_128PiB  144115188075855872
> -#define S_256PiB  288230376151711744
> -#define S_512PiB  576460752303423488
> -#define S_1EiB   1152921504606846976
> -#define S_2EiB   2305843009213693952
> -#define S_4EiB   4611686018427387904
> -#define S_8EiB   9223372036854775808
> -
>  #endif
> diff --git a/scripts/gen-sizes.sh b/scripts/gen-sizes.sh
> new file mode 100755
> index 0000000000..28fe62a4c2
> --- /dev/null
> +++ b/scripts/gen-sizes.sh

I'm not sure I'd use shell for this, but since you already wrote it and
it works...

> @@ -0,0 +1,66 @@
> +#!/bin/sh
> +
> +size_suffix() {
> +    case ${1} in
> +        1)
> +            printf "KiB"
> +            ;;
> +        2)
> +            printf "MiB"
> +            ;;
> +        3)
> +            printf "GiB"
> +            ;;
> +        4)
> +            printf "TiB"
> +            ;;
> +        5)
> +            printf "PiB"
> +            ;;
> +        6)
> +            printf "EiB"
> +            ;;
> +    esac
> +}

Terser:

       suf=('' 'K' 'M' 'G' 'T' 'P' 'E')
       printf "${suf[$1]}iB"

> +
> +print_sizes() {
> +    local p=10
> +    while [ ${p} -lt 64 ]
> +    do
> +        local pad=' '
> +        local n=$((p % 10))
> +        n=$((1 << n))
> +        [ $((n / 100)) -eq 0 ] && pad='  '
> +        [ $((n / 10)) -eq 0 ] && pad='   '
> +        local suff=$((p / 10))
> +        printf "#define S_%u%s%s%20u\n" ${n} "$(size_suffix ${suff})" \
> +            "${pad}" $((1 << p))
> +        p=$((p + 1))
> +    done

Rule of thumb: when you compute blank padding strings, you're not fully
exploiting printf :)

    local p
    for ((p=10; p < 64; p++))
    do
        local n=$((1 << (p % 10)))
	local sym=$(printf "S_%u%s" $n "$(size_suffix $((p / 10)))")
        printf "#define %-8s %20u\n" $sym $((1 << p))
    done

> +}
> +
> +print_header() {
> +    cat <<EOF
> +/* AUTOMATICALLY GENERATED, DO NOT MODIFY.
> + *
> + * The following lookup table is intended to be used when a literal string of
> + * the number of bytes is required (for example if it needs to be stringified).
> + * It can also be used for generic shortcuts of power-of-two sizes.
> + *
> + * Authors:
> + *   Leonid Bloch  <lbloch@janustech.com>
> + */
> +
> +#ifndef QEMU_SIZES_H
> +#define QEMU_SIZES_H
> +
> +EOF
> +}
> +
> +print_footer() {
> +    printf "\n#endif  /* QEMU_SIZES_H */\n"
> +}
> +
> +print_header  > "${1}"
> +print_sizes  >> "${1}"
> +print_footer >> "${1}"

Unchecked use of command-line arguments is not nice:

    $ scripts/gen-sizes.sh 
    scripts/gen-sizes.sh: line 64: : No such file or directory
    scripts/gen-sizes.sh: line 65: : No such file or directory
    scripts/gen-sizes.sh: line 66: : No such file or directory

You should error out if $# -ne 1.  But in such a simple script, I'd
dispense with arguments and print to stdout.  Matter of taste.
Rejecting $# -ne 0 is still nice then.
Kevin Wolf Jan. 8, 2019, 12:20 p.m. UTC | #2
Am 03.01.2019 um 22:33 hat Leonid Bloch geschrieben:
> The lookup table for power-of-two sizes is now auto-generated during the
> build, and not hard-coded into the units.h file.
> 
> This partially reverts commit 540b8492618eb.
> 
> Signed-off-by: Leonid Bloch <lbloch@janustech.com>

During a downstream review, Max found a problem with the table that we
could fix while we're touching it:

    Upstream: All >= S_2GiB are not valid ints.  (qemu assumes that
    sizeof(int) == 4, right?)  So S_2GiB should be 2147483648u and all
    above should be ...ull or better UINT64_C().

Kevin
Eric Blake Jan. 8, 2019, 3:19 p.m. UTC | #3
On 1/8/19 3:31 AM, Markus Armbruster wrote:
> Copying our resident shell script guru Eric.

Who already expressed a desire to keep things hard-coded on v1 ;)

And I've also already expressed a dislike for the entire S_XXXiB macro
list, thinking that a nicer solution would instead be teaching QemuOpt
that instead of encoding all defaults as strings (where we are using
S_XXXiB strings in order to work around the fact that we have to store
the stringized value of an integer as the default), we should instead
encode defaults as an appropriate type (and then do a runtime conversion
of that type into a string when displaying the help output that shows
the default value).  But I don't know who wants to sign up for that
work, as that is already putting lipstick on a pig (we'd rather be using
Markus' work on converting command-line parsing to QAPI, rather than
expanding QemuOpts even more).  The more work we put into the S_XXXiB
table, the harder it will be to rip it out later when we do come up with
a nicer solution that does not require users to store integer defaults
as a correctly-spelled string.

> 
> Leonid Bloch <lbloch@janustech.com> writes:
> 
>> The lookup table for power-of-two sizes is now auto-generated during the
>> build, and not hard-coded into the units.h file.
>>
>> This partially reverts commit 540b8492618eb.
>>

> 
> I'd leave it hard-coded.  Replacing a few trivial defines by an arguably
> less trivial script doesn't feel like an improvement.  In this case, it
> doesn't even save lines.  But I'm not the maintainer.

That's several people that have leaned towards not having a generator
script.


>> +++ b/include/qemu/units.h
>> @@ -17,77 +17,4 @@
>>  #define PiB     (INT64_C(1) << 50)
>>  #define EiB     (INT64_C(1) << 60)
>>  
>> -/*
>> - * The following lookup table is intended to be used when a literal string of
>> - * the number of bytes is required (for example if it needs to be stringified).
>> - * It can also be used for generic shortcuts of power-of-two sizes.
>> - * This table is generated using the AWK script below:
>> - *
>> - *  BEGIN {
>> - *      suffix="KMGTPE";
>> - *      for(i=10; i<64; i++) {
>> - *          val=2**i;
>> - *          s=substr(suffix, int(i/10), 1);
>> - *          n=2**(i%10);
>> - *          pad=21-int(log(n)/log(10));
>> - *          printf("#define S_%d%siB %*d\n", n, s, pad, val);
>> - *      }
>> - *  }
> 
> If we decide not keep the defines hard-coded, I think we can delete the
> AWK script.

Kevin also pointed out that if we keep things hard-coded, we still need
to fix the hard-coding to use correct types for the second half of the
table:

>> -#define S_4GiB            4294967296
>> -#define S_8GiB            8589934592

and even if we write a generator instead of hard-coding, our generator
also needs to make that change, which makes the generator a bit more
complicated.

>> +++ b/scripts/gen-sizes.sh
> 
> I'm not sure I'd use shell for this, but since you already wrote it and
> it works...
> 
>> @@ -0,0 +1,66 @@
>> +#!/bin/sh

Note that this selects 'sh',...

>> +
>> +size_suffix() {
>> +    case ${1} in
>> +        1)
>> +            printf "KiB"
>> +            ;;
>> +        2)
>> +            printf "MiB"
>> +            ;;
>> +        3)
>> +            printf "GiB"
>> +            ;;
>> +        4)
>> +            printf "TiB"
>> +            ;;
>> +        5)
>> +            printf "PiB"
>> +            ;;
>> +        6)
>> +            printf "EiB"
>> +            ;;
>> +    esac
>> +}
> 
> Terser:
> 
>        suf=('' 'K' 'M' 'G' 'T' 'P' 'E')
>        printf "${suf[$1]}iB"

...but this terser representation depends on a bash-ism, so if we did go
with a generator, you'd have to fix the shebang to be bash.  The list
starts at S_1KiB, so the '' entry in suf is being used as a filler to
get indexing to work as desired.

Or, sticking to POSIX shell, you could do a similar lookup using sed:

echo XKMGTPE | sed "s/^.\{$1\}\(.\).*/\1/"

> 
>> +
>> +print_sizes() {
>> +    local p=10

local is a bash-ism. So your script is already dependent on bash, unless
you drop the use of local, even without the above paragraph on
shortening the case statement into a one-liner.

>> +    while [ ${p} -lt 64 ]
>> +    do
>> +        local pad=' '
>> +        local n=$((p % 10))
>> +        n=$((1 << n))
>> +        [ $((n / 100)) -eq 0 ] && pad='  '
>> +        [ $((n / 10)) -eq 0 ] && pad='   '
>> +        local suff=$((p / 10))
>> +        printf "#define S_%u%s%s%20u\n" ${n} "$(size_suffix ${suff})" \
>> +            "${pad}" $((1 << p))
>> +        p=$((p + 1))
>> +    done
> 
> Rule of thumb: when you compute blank padding strings, you're not fully
> exploiting printf :)
> 
>     local p
>     for ((p=10; p < 64; p++))
>     do
>         local n=$((1 << (p % 10)))
> 	local sym=$(printf "S_%u%s" $n "$(size_suffix $((p / 10)))")
>         printf "#define %-8s %20u\n" $sym $((1 << p))
>     done
> 
>> +}
>> +
>> +print_header() {
>> +    cat <<EOF
>> +/* AUTOMATICALLY GENERATED, DO NOT MODIFY.
>> + *
>> + * The following lookup table is intended to be used when a literal string of
>> + * the number of bytes is required (for example if it needs to be stringified).
>> + * It can also be used for generic shortcuts of power-of-two sizes.
>> + *
>> + * Authors:
>> + *   Leonid Bloch  <lbloch@janustech.com>
>> + */

Do we still need Authors: lines in files, or can we just trust git
history (which is going to be more accurate anyway)?

>> +
>> +#ifndef QEMU_SIZES_H
>> +#define QEMU_SIZES_H
>> +
>> +EOF
>> +}
>> +
>> +print_footer() {
>> +    printf "\n#endif  /* QEMU_SIZES_H */\n"
>> +}
>> +
>> +print_header  > "${1}"
>> +print_sizes  >> "${1}"
>> +print_footer >> "${1}"
> 
> Unchecked use of command-line arguments is not nice:
> 
>     $ scripts/gen-sizes.sh 
>     scripts/gen-sizes.sh: line 64: : No such file or directory
>     scripts/gen-sizes.sh: line 65: : No such file or directory
>     scripts/gen-sizes.sh: line 66: : No such file or directory
> 
> You should error out if $# -ne 1.  But in such a simple script, I'd
> dispense with arguments and print to stdout.  Matter of taste.
> Rejecting $# -ne 0 is still nice then.

Also, do you really need to break the work into functions that get
called exactly once?  Just do the work directly.
Leonid Bloch Jan. 10, 2019, 9:42 a.m. UTC | #4
Hi,

On 1/8/19 11:31 AM, Markus Armbruster wrote:
> I'd leave it hard-coded.  Replacing a few trivial defines by an arguably
> less trivial script doesn't feel like an improvement.  In this case, it
> doesn't even save lines.

As I've said, I'm fine with that. The autogeneration sounds the right 
thing to do here, as the table is autogenertaed anyway, but indeed it is 
already there, explained, and even the script for generating it appears 
in the comments for reproducibility.

> 
> I'm not sure I'd use shell for this, but since you already wrote it and
> it works...
> 

If you've noticed, the original script was in AWK. But to be as generic 
as possible, I didn't write the generation script in AWK because even 
AWK is not guaranteed to be installed on the build system. The only 
interpreted language that is guaranteed to be there is shell (most basic 
shell) because .configure itself needs it.

Sure, the script could be prettier and shorter, but I wanted to keep it 
compatible with the most basic shell, not only Bash, and without needing 
external programs.

Eric - thanks for the comment about 'local' - I will get rid of it if we 
decide to include this patch.

> 
> Unchecked use of command-line arguments is not nice:
> 
>      $ scripts/gen-sizes.sh
>      scripts/gen-sizes.sh: line 64: : No such file or directory
>      scripts/gen-sizes.sh: line 65: : No such file or directory
>      scripts/gen-sizes.sh: line 66: : No such file or directory
> 
> You should error out if $# -ne 1.  But in such a simple script, I'd
> dispense with arguments and print to stdout.  Matter of taste.
> Rejecting $# -ne 0 is still nice then.
> 

Agree, thanks!

Leonid.
Leonid Bloch Jan. 10, 2019, 10:04 a.m. UTC | #5
Hi,

On 1/8/19 2:20 PM, Kevin Wolf wrote:
> Am 03.01.2019 um 22:33 hat Leonid Bloch geschrieben:
>> The lookup table for power-of-two sizes is now auto-generated during the
>> build, and not hard-coded into the units.h file.
>>
>> This partially reverts commit 540b8492618eb.
>>
>> Signed-off-by: Leonid Bloch <lbloch@janustech.com>
> 
> During a downstream review, Max found a problem with the table that we
> could fix while we're touching it:
> 
>      Upstream: All >= S_2GiB are not valid ints.  (qemu assumes that
>      sizeof(int) == 4, right?)  So S_2GiB should be 2147483648u and all
>      above should be ...ull or better UINT64_C().

But the initial reasoning for this table was to have a pure number 
there. If there will be strings like "2147483648u/ull" or 
"UINT64_C(...)" there, they will be stringified, literally, and will 
appear as such inside the binary. If specifying the unit64 type is 
really needed, one can always use, e.g., 2 * GiB, from units.h.

Leonid.
Markus Armbruster Jan. 10, 2019, 12:40 p.m. UTC | #6
Leonid Bloch <lbloch@janustech.com> writes:

> Hi,
>
> On 1/8/19 11:31 AM, Markus Armbruster wrote:
>> I'd leave it hard-coded.  Replacing a few trivial defines by an arguably
>> less trivial script doesn't feel like an improvement.  In this case, it
>> doesn't even save lines.
>
> As I've said, I'm fine with that. The autogeneration sounds the right 
> thing to do here, as the table is autogenertaed anyway, but indeed it is 
> already there, explained, and even the script for generating it appears 
> in the comments for reproducibility.
>
>> 
>> I'm not sure I'd use shell for this, but since you already wrote it and
>> it works...
>> 
>
> If you've noticed, the original script was in AWK. But to be as generic 
> as possible, I didn't write the generation script in AWK because even 
> AWK is not guaranteed to be installed on the build system. The only 
> interpreted language that is guaranteed to be there is shell (most basic 
> shell) because .configure itself needs it.

Well,

    $ grep -w awk configure 
        maj=`libgcrypt-config --version | awk -F . '{print $1}'`
        min=`libgcrypt-config --version | awk -F . '{print $2}'`

The build also requires Python.

[...]
Markus Armbruster Jan. 10, 2019, 12:51 p.m. UTC | #7
Leonid Bloch <lbloch@janustech.com> writes:

> Hi,
>
> On 1/8/19 2:20 PM, Kevin Wolf wrote:
>> Am 03.01.2019 um 22:33 hat Leonid Bloch geschrieben:
>>> The lookup table for power-of-two sizes is now auto-generated during the
>>> build, and not hard-coded into the units.h file.
>>>
>>> This partially reverts commit 540b8492618eb.
>>>
>>> Signed-off-by: Leonid Bloch <lbloch@janustech.com>
>> 
>> During a downstream review, Max found a problem with the table that we
>> could fix while we're touching it:
>> 
>>      Upstream: All >= S_2GiB are not valid ints.  (qemu assumes that
>>      sizeof(int) == 4, right?)  So S_2GiB should be 2147483648u and all
>>      above should be ...ull or better UINT64_C().

So their (integer) type can vary.  Whether that's a problem depends on
how the macros are used.

> But the initial reasoning for this table was to have a pure number 
> there. If there will be strings like "2147483648u/ull" or 
> "UINT64_C(...)" there, they will be stringified, literally, and will 
> appear as such inside the binary.

"Macro that expands to an integer constant that stringify() turns into a
decimal number" is a rather peculiar contract.

>                                   If specifying the unit64 type is 
> really needed, one can always use, e.g., 2 * GiB, from units.h.

Right now I suspect these S_ macros should generally be avoided.

We have 54 of them.  I count six uses:

    block/qcow2.h:#define QCOW_MAX_REFTABLE_SIZE S_8MiB
    block/qcow2.h:#define QCOW_MAX_L1_SIZE S_32MiB
    block/qcow2.h:#define DEFAULT_L2_CACHE_MAX_SIZE S_32MiB
    block/qcow2.h:#define DEFAULT_L2_CACHE_MAX_SIZE S_8MiB
    block/qcow2.h:#define DEFAULT_CLUSTER_SIZE S_64KiB
    block/vdi.c:#define DEFAULT_CLUSTER_SIZE S_1MiB

Which if them truly need stringify-able integers?
Alberto Garcia Jan. 10, 2019, 12:51 p.m. UTC | #8
On Tue 08 Jan 2019 01:20:21 PM CET, Kevin Wolf wrote:
> During a downstream review, Max found a problem with the table that we
> could fix while we're touching it:
>
>     Upstream: All >= S_2GiB are not valid ints.  (qemu assumes that
>     sizeof(int) == 4, right?)  So S_2GiB should be 2147483648u and all
>     above should be ...ull or better UINT64_C().

Should it? sizeof(S_2GiB) is already 8 without having to add any suffix.

Berto
Eric Blake Jan. 10, 2019, 1:33 p.m. UTC | #9
On 1/10/19 3:42 AM, Leonid Bloch wrote:

> If you've noticed, the original script was in AWK. But to be as generic 
> as possible, I didn't write the generation script in AWK because even 
> AWK is not guaranteed to be installed on the build system. The only 
> interpreted language that is guaranteed to be there is shell (most basic 
> shell) because .configure itself needs it.
> 
> Sure, the script could be prettier and shorter, but I wanted to keep it 
> compatible with the most basic shell, not only Bash, and without needing 
> external programs.

awk is portable - autoconf-generated scripts assume it exists (same as
sed, grep, ls, ...) if you have a /bin/sh. In fact, the GNU Coding
Standards has a nice list of programs that you can blindly assume exist.
 It's true you may have to use a lowest-common-denominator when using
the tools that are globally available to be portable (not all awk
scripts are portable, even if awk is universally available on systems
with a Bourne-like shell).  So avoiding awk on the grounds that it might
not be present is the wrong approach.

> Eric - thanks for the comment about 'local' - I will get rid of it if we 
> decide to include this patch.

I'm still not convinced we need it.  I would much rather see a patch
that makes QemuOpt accept default integer values as integers rather than
as strings, so we don't have to worry about stringifying special macros.
Leonid Bloch Jan. 10, 2019, 4:49 p.m. UTC | #10
On 1/10/19 2:40 PM, Markus Armbruster wrote:
> Leonid Bloch <lbloch@janustech.com> writes:
> 
>> Hi,
>>
>> On 1/8/19 11:31 AM, Markus Armbruster wrote:
>>> I'd leave it hard-coded.  Replacing a few trivial defines by an arguably
>>> less trivial script doesn't feel like an improvement.  In this case, it
>>> doesn't even save lines.
>>
>> As I've said, I'm fine with that. The autogeneration sounds the right
>> thing to do here, as the table is autogenertaed anyway, but indeed it is
>> already there, explained, and even the script for generating it appears
>> in the comments for reproducibility.
>>
>>>
>>> I'm not sure I'd use shell for this, but since you already wrote it and
>>> it works...
>>>
>>
>> If you've noticed, the original script was in AWK. But to be as generic
>> as possible, I didn't write the generation script in AWK because even
>> AWK is not guaranteed to be installed on the build system. The only
>> interpreted language that is guaranteed to be there is shell (most basic
>> shell) because .configure itself needs it.
> 
> Well,
> 
>      $ grep -w awk configure
>          maj=`libgcrypt-config --version | awk -F . '{print $1}'`
>          min=`libgcrypt-config --version | awk -F . '{print $2}'`
> 
> The build also requires Python.

Not for critical things as this. I mean it can still build OK without 
Python.

> 
> [...]
>
Leonid Bloch Jan. 10, 2019, 4:53 p.m. UTC | #11
On 1/10/19 2:51 PM,  wrote:
> Leonid Bloch <lbloch@janustech.com> writes:
> 
>> Hi,
>>
>> On 1/8/19 2:20 PM, Kevin Wolf wrote:
>>> Am 03.01.2019 um 22:33 hat Leonid Bloch geschrieben:
>>>> The lookup table for power-of-two sizes is now auto-generated during the
>>>> build, and not hard-coded into the units.h file.
>>>>
>>>> This partially reverts commit 540b8492618eb.
>>>>
>>>> Signed-off-by: Leonid Bloch <lbloch@janustech.com>
>>>
>>> During a downstream review, Max found a problem with the table that we
>>> could fix while we're touching it:
>>>
>>>       Upstream: All >= S_2GiB are not valid ints.  (qemu assumes that
>>>       sizeof(int) == 4, right?)  So S_2GiB should be 2147483648u and all
>>>       above should be ...ull or better UINT64_C().
> 
> So their (integer) type can vary.  Whether that's a problem depends on
> how the macros are used.
> 
>> But the initial reasoning for this table was to have a pure number
>> there. If there will be strings like "2147483648u/ull" or
>> "UINT64_C(...)" there, they will be stringified, literally, and will
>> appear as such inside the binary.
> 
> "Macro that expands to an integer constant that stringify() turns into a
> decimal number" is a rather peculiar contract.
> 
>>                                    If specifying the unit64 type is
>> really needed, one can always use, e.g., 2 * GiB, from units.h.
> 
> Right now I suspect these S_ macros should generally be avoided.
> 
> We have 54 of them.  I count six uses:
> 
>      block/qcow2.h:#define QCOW_MAX_REFTABLE_SIZE S_8MiB
>      block/qcow2.h:#define QCOW_MAX_L1_SIZE S_32MiB
>      block/qcow2.h:#define DEFAULT_L2_CACHE_MAX_SIZE S_32MiB
>      block/qcow2.h:#define DEFAULT_L2_CACHE_MAX_SIZE S_8MiB
>      block/qcow2.h:#define DEFAULT_CLUSTER_SIZE S_64KiB
>      block/vdi.c:#define DEFAULT_CLUSTER_SIZE S_1MiB
> 
> Which if them truly need stringify-able integers?
> 

These two:

block/qcow2.h:#define DEFAULT_CLUSTER_SIZE
block/vdi.c:#define DEFAULT_CLUSTER_SIZE.
Markus Armbruster Jan. 10, 2019, 7:11 p.m. UTC | #12
Leonid Bloch <lbloch@janustech.com> writes:

> On 1/10/19 2:40 PM, Markus Armbruster wrote:
>> Leonid Bloch <lbloch@janustech.com> writes:
>> 
>>> Hi,
>>>
>>> On 1/8/19 11:31 AM, Markus Armbruster wrote:
>>>> I'd leave it hard-coded.  Replacing a few trivial defines by an arguably
>>>> less trivial script doesn't feel like an improvement.  In this case, it
>>>> doesn't even save lines.
>>>
>>> As I've said, I'm fine with that. The autogeneration sounds the right
>>> thing to do here, as the table is autogenertaed anyway, but indeed it is
>>> already there, explained, and even the script for generating it appears
>>> in the comments for reproducibility.
>>>
>>>>
>>>> I'm not sure I'd use shell for this, but since you already wrote it and
>>>> it works...
>>>>
>>>
>>> If you've noticed, the original script was in AWK. But to be as generic
>>> as possible, I didn't write the generation script in AWK because even
>>> AWK is not guaranteed to be installed on the build system. The only
>>> interpreted language that is guaranteed to be there is shell (most basic
>>> shell) because .configure itself needs it.
>> 
>> Well,
>> 
>>      $ grep -w awk configure
>>          maj=`libgcrypt-config --version | awk -F . '{print $1}'`
>>          min=`libgcrypt-config --version | awk -F . '{print $2}'`
>> 
>> The build also requires Python.
>
> Not for critical things as this. I mean it can still build OK without 
> Python.

You canÃ't.  Try it :)

>> 
>> [...]
>>
Eric Blake Jan. 10, 2019, 7:33 p.m. UTC | #13
On 1/10/19 7:33 AM, Eric Blake wrote:

>> Eric - thanks for the comment about 'local' - I will get rid of it if we 
>> decide to include this patch.
> 
> I'm still not convinced we need it.  I would much rather see a patch
> that makes QemuOpt accept default integer values as integers rather than
> as strings, so we don't have to worry about stringifying special macros.

So I took a quick look at QemuOpts, and it turned out to be easier than
I feared; hence:
https://lists.gnu.org/archive/html/qemu-devel/2019-01/msg02041.html
Markus Armbruster Jan. 11, 2019, 7:50 a.m. UTC | #14
Leonid Bloch <lbloch@janustech.com> writes:

> On 1/10/19 2:51 PM,  wrote:
>> Leonid Bloch <lbloch@janustech.com> writes:
>> 
>>> Hi,
>>>
>>> On 1/8/19 2:20 PM, Kevin Wolf wrote:
>>>> Am 03.01.2019 um 22:33 hat Leonid Bloch geschrieben:
>>>>> The lookup table for power-of-two sizes is now auto-generated during the
>>>>> build, and not hard-coded into the units.h file.
>>>>>
>>>>> This partially reverts commit 540b8492618eb.
>>>>>
>>>>> Signed-off-by: Leonid Bloch <lbloch@janustech.com>
>>>>
>>>> During a downstream review, Max found a problem with the table that we
>>>> could fix while we're touching it:
>>>>
>>>>       Upstream: All >= S_2GiB are not valid ints.  (qemu assumes that
>>>>       sizeof(int) == 4, right?)  So S_2GiB should be 2147483648u and all
>>>>       above should be ...ull or better UINT64_C().
>> 
>> So their (integer) type can vary.  Whether that's a problem depends on
>> how the macros are used.
>> 
>>> But the initial reasoning for this table was to have a pure number
>>> there. If there will be strings like "2147483648u/ull" or
>>> "UINT64_C(...)" there, they will be stringified, literally, and will
>>> appear as such inside the binary.
>> 
>> "Macro that expands to an integer constant that stringify() turns into a
>> decimal number" is a rather peculiar contract.
>> 
>>>                                    If specifying the unit64 type is
>>> really needed, one can always use, e.g., 2 * GiB, from units.h.
>> 
>> Right now I suspect these S_ macros should generally be avoided.
>> 
>> We have 54 of them.  I count six uses:
>> 
>>      block/qcow2.h:#define QCOW_MAX_REFTABLE_SIZE S_8MiB
>>      block/qcow2.h:#define QCOW_MAX_L1_SIZE S_32MiB
>>      block/qcow2.h:#define DEFAULT_L2_CACHE_MAX_SIZE S_32MiB
>>      block/qcow2.h:#define DEFAULT_L2_CACHE_MAX_SIZE S_8MiB
>>      block/qcow2.h:#define DEFAULT_CLUSTER_SIZE S_64KiB
>>      block/vdi.c:#define DEFAULT_CLUSTER_SIZE S_1MiB
>> 
>> Which if them truly need stringify-able integers?
>> 
>
> These two:
>
> block/qcow2.h:#define DEFAULT_CLUSTER_SIZE
> block/vdi.c:#define DEFAULT_CLUSTER_SIZE.

Compared to the complexity visible in this thread,

    .def_value_str = "65536", /* must match DEFAULT_CLUSTER_SIZE */

looks attractively stupid to me then.
Eric Blake Jan. 11, 2019, 3:29 p.m. UTC | #15
On 1/11/19 1:50 AM, Markus Armbruster wrote:

>>> We have 54 of them.  I count six uses:
>>>
>>>      block/qcow2.h:#define QCOW_MAX_REFTABLE_SIZE S_8MiB
>>>      block/qcow2.h:#define QCOW_MAX_L1_SIZE S_32MiB
>>>      block/qcow2.h:#define DEFAULT_L2_CACHE_MAX_SIZE S_32MiB
>>>      block/qcow2.h:#define DEFAULT_L2_CACHE_MAX_SIZE S_8MiB
>>>      block/qcow2.h:#define DEFAULT_CLUSTER_SIZE S_64KiB
>>>      block/vdi.c:#define DEFAULT_CLUSTER_SIZE S_1MiB
>>>
>>> Which if them truly need stringify-able integers?
>>>
>>
>> These two:
>>
>> block/qcow2.h:#define DEFAULT_CLUSTER_SIZE
>> block/vdi.c:#define DEFAULT_CLUSTER_SIZE.
> 
> Compared to the complexity visible in this thread,
> 
>     .def_value_str = "65536", /* must match DEFAULT_CLUSTER_SIZE */
> 
> looks attractively stupid to me then.

In general, .def_value_str = stringify(...) looks odd, compared to my v3
patch that lets us do .def_value_int = DEFAULT_CLUSTER_SIZE; my patch
also has the benefit that if DEFAULT_CLUSTER_SIZE changes value, we
don't have to remember to update the string.
diff mbox series

Patch

diff --git a/.gitignore b/.gitignore
index 0430257313..721a7f4454 100644
--- a/.gitignore
+++ b/.gitignore
@@ -59,6 +59,7 @@ 
 /qemu-version.h
 /qemu-version.h.tmp
 /module_block.h
+/pow2_sizes.h
 /scsi/qemu-pr-helper
 /vhost-user-scsi
 /vhost-user-blk
diff --git a/Makefile b/Makefile
index dd53965f77..db72786ccb 100644
--- a/Makefile
+++ b/Makefile
@@ -122,6 +122,8 @@  endif
 
 GENERATED_FILES += module_block.h
 
+GENERATED_FILES += pow2_sizes.h
+
 TRACE_HEADERS = trace-root.h $(trace-events-subdirs:%=%/trace.h)
 TRACE_SOURCES = trace-root.c $(trace-events-subdirs:%=%/trace.c)
 TRACE_DTRACE =
@@ -499,6 +501,9 @@  ifdef CONFIG_MPATH
 scsi/qemu-pr-helper$(EXESUF): LIBS += -ludev -lmultipath -lmpathpersist
 endif
 
+pow2_sizes.h: $(SRC_PATH)/scripts/gen-sizes.sh
+	$(call quiet-command,sh $(SRC_PATH)/scripts/gen-sizes.sh $@,"GEN","$@")
+
 qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"GEN","$@")
 
diff --git a/block/qcow2.h b/block/qcow2.h
index a98d24500b..f5fa419ae7 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -27,7 +27,7 @@ 
 
 #include "crypto/block.h"
 #include "qemu/coroutine.h"
-#include "qemu/units.h"
+#include "pow2_sizes.h"
 
 //#define DEBUG_ALLOC
 //#define DEBUG_ALLOC2
diff --git a/block/vdi.c b/block/vdi.c
index 2380daa583..06d7335b3e 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -51,6 +51,7 @@ 
 
 #include "qemu/osdep.h"
 #include "qemu/units.h"
+#include "pow2_sizes.h"
 #include "qapi/error.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
diff --git a/include/qemu/units.h b/include/qemu/units.h
index 1c959d182e..692db3fbb2 100644
--- a/include/qemu/units.h
+++ b/include/qemu/units.h
@@ -17,77 +17,4 @@ 
 #define PiB     (INT64_C(1) << 50)
 #define EiB     (INT64_C(1) << 60)
 
-/*
- * The following lookup table is intended to be used when a literal string of
- * the number of bytes is required (for example if it needs to be stringified).
- * It can also be used for generic shortcuts of power-of-two sizes.
- * This table is generated using the AWK script below:
- *
- *  BEGIN {
- *      suffix="KMGTPE";
- *      for(i=10; i<64; i++) {
- *          val=2**i;
- *          s=substr(suffix, int(i/10), 1);
- *          n=2**(i%10);
- *          pad=21-int(log(n)/log(10));
- *          printf("#define S_%d%siB %*d\n", n, s, pad, val);
- *      }
- *  }
- */
-
-#define S_1KiB                  1024
-#define S_2KiB                  2048
-#define S_4KiB                  4096
-#define S_8KiB                  8192
-#define S_16KiB                16384
-#define S_32KiB                32768
-#define S_64KiB                65536
-#define S_128KiB              131072
-#define S_256KiB              262144
-#define S_512KiB              524288
-#define S_1MiB               1048576
-#define S_2MiB               2097152
-#define S_4MiB               4194304
-#define S_8MiB               8388608
-#define S_16MiB             16777216
-#define S_32MiB             33554432
-#define S_64MiB             67108864
-#define S_128MiB           134217728
-#define S_256MiB           268435456
-#define S_512MiB           536870912
-#define S_1GiB            1073741824
-#define S_2GiB            2147483648
-#define S_4GiB            4294967296
-#define S_8GiB            8589934592
-#define S_16GiB          17179869184
-#define S_32GiB          34359738368
-#define S_64GiB          68719476736
-#define S_128GiB        137438953472
-#define S_256GiB        274877906944
-#define S_512GiB        549755813888
-#define S_1TiB         1099511627776
-#define S_2TiB         2199023255552
-#define S_4TiB         4398046511104
-#define S_8TiB         8796093022208
-#define S_16TiB       17592186044416
-#define S_32TiB       35184372088832
-#define S_64TiB       70368744177664
-#define S_128TiB     140737488355328
-#define S_256TiB     281474976710656
-#define S_512TiB     562949953421312
-#define S_1PiB      1125899906842624
-#define S_2PiB      2251799813685248
-#define S_4PiB      4503599627370496
-#define S_8PiB      9007199254740992
-#define S_16PiB    18014398509481984
-#define S_32PiB    36028797018963968
-#define S_64PiB    72057594037927936
-#define S_128PiB  144115188075855872
-#define S_256PiB  288230376151711744
-#define S_512PiB  576460752303423488
-#define S_1EiB   1152921504606846976
-#define S_2EiB   2305843009213693952
-#define S_4EiB   4611686018427387904
-#define S_8EiB   9223372036854775808
-
 #endif
diff --git a/scripts/gen-sizes.sh b/scripts/gen-sizes.sh
new file mode 100755
index 0000000000..28fe62a4c2
--- /dev/null
+++ b/scripts/gen-sizes.sh
@@ -0,0 +1,66 @@ 
+#!/bin/sh
+
+size_suffix() {
+    case ${1} in
+        1)
+            printf "KiB"
+            ;;
+        2)
+            printf "MiB"
+            ;;
+        3)
+            printf "GiB"
+            ;;
+        4)
+            printf "TiB"
+            ;;
+        5)
+            printf "PiB"
+            ;;
+        6)
+            printf "EiB"
+            ;;
+    esac
+}
+
+print_sizes() {
+    local p=10
+    while [ ${p} -lt 64 ]
+    do
+        local pad=' '
+        local n=$((p % 10))
+        n=$((1 << n))
+        [ $((n / 100)) -eq 0 ] && pad='  '
+        [ $((n / 10)) -eq 0 ] && pad='   '
+        local suff=$((p / 10))
+        printf "#define S_%u%s%s%20u\n" ${n} "$(size_suffix ${suff})" \
+            "${pad}" $((1 << p))
+        p=$((p + 1))
+    done
+}
+
+print_header() {
+    cat <<EOF
+/* AUTOMATICALLY GENERATED, DO NOT MODIFY.
+ *
+ * The following lookup table is intended to be used when a literal string of
+ * the number of bytes is required (for example if it needs to be stringified).
+ * It can also be used for generic shortcuts of power-of-two sizes.
+ *
+ * Authors:
+ *   Leonid Bloch  <lbloch@janustech.com>
+ */
+
+#ifndef QEMU_SIZES_H
+#define QEMU_SIZES_H
+
+EOF
+}
+
+print_footer() {
+    printf "\n#endif  /* QEMU_SIZES_H */\n"
+}
+
+print_header  > "${1}"
+print_sizes  >> "${1}"
+print_footer >> "${1}"