Message ID | 20250415-b4-pks-parse-options-integers-v2-4-ce07441a1f01@pks.im (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | parse-options: harden handling of integer values | expand |
Hi Patrick On 15/04/2025 13:14, Patrick Steinhardt wrote: > We have two generic ways to parse integers in the "parse-options" > subsytem: > > - `OPTION_INTEGER` parses a signed integer. > > - `OPTION_MAGNITUDE` parses an unsigned integer, but it also > interprets suffixes like "k" or "g". > > Notably missing is a middle ground that parses unsigned integers without > interpreting suffixes. Introduce a new `OPTION_UNSIGNED` option type to > plug this gap. This option type will be used in subsequent commits. I think this is a useful addition. I wonder about the way it handles negative values though. For types narrower than uintmax_t "-1" will be rejected but large negative values that parse as small positive numbers will be accepted. Perhaps we should explicitly reject strings starting with "-" as we do in git_parse_ulong() which is used by OPTION_MAGNITUDE. This patch also needs the fix from patch 2 to detect overflows for uintmax_t. Best Wishes Phillip [1] https://lore.kernel.org/git/NYMqsJ7uttDzFT2OOEg5LLsxCSoQhTzqBs16KrMHGEKC7LzOAiYnYTEZavRQWqGH41UgjdwScwer7MssNzI7AEDHnD8GTBWvoBIqJ2e7D6g=@proton.me/ > Signed-off-by: Patrick Steinhardt <ps@pks.im> > --- > parse-options.c | 43 +++++++++++++++++++++++++++++++++++++++++++ > parse-options.h | 12 ++++++++++++ > t/helper/test-parse-options.c | 4 +++- > t/t0040-parse-options.sh | 18 +++++++++++++++++- > 4 files changed, 75 insertions(+), 2 deletions(-) > > diff --git a/parse-options.c b/parse-options.c > index ae836c384c7..9670e46a679 100644 > --- a/parse-options.c > +++ b/parse-options.c > @@ -216,6 +216,49 @@ static enum parse_opt_result do_get_value(struct parse_opt_ctx_t *p, > optname(opt, flags)); > } > } > + case OPTION_UNSIGNED: > + { > + uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision); > + uintmax_t value; > + > + if (unset) { > + value = 0; > + } else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { > + value = opt->defval; > + } else if (get_arg(p, opt, flags, &arg)) { > + return -1; > + } else if (!*arg) { > + return error(_("%s expects a numerical value"), > + optname(opt, flags)); > + } else { > + value = strtoumax(arg, (char **)&s, 10); > + if (*s) > + return error(_("%s expects a numerical value"), > + optname(opt, flags)); > + } > + > + if (value > upper_bound) > + return error(_("value %"PRIuMAX" for %s exceeds %"PRIuMAX), > + value, optname(opt, flags), upper_bound); > + > + switch (opt->precision) { > + case 1: > + *(int8_t *)opt->value = value; > + return 0; > + case 2: > + *(int16_t *)opt->value = value; > + return 0; > + case 4: > + *(int32_t *)opt->value = value; > + return 0; > + case 8: > + *(int64_t *)opt->value = value; > + return 0; > + default: > + BUG("invalid precision for option %s", > + optname(opt, flags)); > + } > + } > case OPTION_MAGNITUDE: > { > uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision); > diff --git a/parse-options.h b/parse-options.h > index 4b561679581..20ea7d2ab13 100644 > --- a/parse-options.h > +++ b/parse-options.h > @@ -25,6 +25,7 @@ enum parse_opt_type { > /* options with arguments (usually) */ > OPTION_STRING, > OPTION_INTEGER, > + OPTION_UNSIGNED, > OPTION_MAGNITUDE, > OPTION_CALLBACK, > OPTION_LOWLEVEL_CALLBACK, > @@ -224,6 +225,16 @@ struct option { > .help = (h), \ > .flags = (f), \ > } > +#define OPT_UNSIGNED_F(s, l, v, h, f) { \ > + .type = OPTION_UNSIGNED, \ > + .short_name = (s), \ > + .long_name = (l), \ > + .value = (v), \ > + .precision = sizeof(*v), \ > + .argh = N_("n"), \ > + .help = (h), \ > + .flags = (f), \ > +} > > #define OPT_END() { \ > .type = OPTION_END, \ > @@ -276,6 +287,7 @@ struct option { > #define OPT_CMDMODE(s, l, v, h, i) OPT_CMDMODE_F(s, l, v, h, i, 0) > > #define OPT_INTEGER(s, l, v, h) OPT_INTEGER_F(s, l, v, h, 0) > +#define OPT_UNSIGNED(s, l, v, h) OPT_UNSIGNED_F(s, l, v, h, 0) > #define OPT_MAGNITUDE(s, l, v, h) { \ > .type = OPTION_MAGNITUDE, \ > .short_name = (s), \ > diff --git a/t/helper/test-parse-options.c b/t/helper/test-parse-options.c > index 46deb4317ef..0d559288d9c 100644 > --- a/t/helper/test-parse-options.c > +++ b/t/helper/test-parse-options.c > @@ -120,7 +120,7 @@ int cmd__parse_options(int argc, const char **argv) > }; > struct string_list expect = STRING_LIST_INIT_NODUP; > struct string_list list = STRING_LIST_INIT_NODUP; > - uint16_t m16 = 0; > + uint16_t m16 = 0, u16 = 0; > int16_t i16 = 0; > > struct option options[] = { > @@ -142,6 +142,7 @@ int cmd__parse_options(int argc, const char **argv) > OPT_GROUP(""), > OPT_INTEGER('i', "integer", &integer, "get a integer"), > OPT_INTEGER(0, "i16", &i16, "get a 16 bit integer"), > + OPT_UNSIGNED(0, "u16", &u16, "get a 16 bit unsigned integer"), > OPT_INTEGER('j', NULL, &integer, "get a integer, too"), > OPT_MAGNITUDE('m', "magnitude", &magnitude, "get a magnitude"), > OPT_MAGNITUDE(0, "m16", &m16, "get a 16 bit magnitude"), > @@ -215,6 +216,7 @@ int cmd__parse_options(int argc, const char **argv) > show(&expect, &ret, "boolean: %d", boolean); > show(&expect, &ret, "integer: %d", integer); > show(&expect, &ret, "i16: %"PRIdMAX, (intmax_t) i16); > + show(&expect, &ret, "u16: %"PRIuMAX, (uintmax_t) u16); > show(&expect, &ret, "magnitude: %lu", magnitude); > show(&expect, &ret, "m16: %"PRIuMAX, (uintmax_t) m16); > show(&expect, &ret, "timestamp: %"PRItime, timestamp); > diff --git a/t/t0040-parse-options.sh b/t/t0040-parse-options.sh > index 5f503b26cc8..9946e69f586 100755 > --- a/t/t0040-parse-options.sh > +++ b/t/t0040-parse-options.sh > @@ -23,6 +23,7 @@ usage: test-tool parse-options <options> > -i, --[no-]integer <n> > get a integer > --[no-]i16 <n> get a 16 bit integer > + --[no-]u16 <n> get a 16 bit unsigned integer > -j <n> get a integer, too > -m, --magnitude <n> get a magnitude > --m16 <n> get a 16 bit magnitude > @@ -139,6 +140,7 @@ cat >expect <<\EOF > boolean: 2 > integer: 1729 > i16: 0 > +u16: 0 > magnitude: 16384 > m16: 0 > timestamp: 0 > @@ -161,6 +163,7 @@ cat >expect <<\EOF > boolean: 2 > integer: 1729 > i16: 9000 > +u16: 5432 > magnitude: 16384 > m16: 32768 > timestamp: 0 > @@ -173,7 +176,7 @@ file: prefix/fi.le > EOF > > test_expect_success 'long options' ' > - test-tool parse-options --boolean --integer 1729 --i16 9000 --magnitude 16k \ > + test-tool parse-options --boolean --integer 1729 --i16 9000 --u16 5432 --magnitude 16k \ > --m16 32k --boolean --string2=321 --verbose --verbose --no-dry-run \ > --abbrev=10 --file fi.le --obsolete \ > >output 2>output.err && > @@ -186,6 +189,7 @@ test_expect_success 'abbreviate to something longer than SHA1 length' ' > boolean: 0 > integer: 0 > i16: 0 > + u16: 0 > magnitude: 0 > m16: 0 > timestamp: 0 > @@ -262,6 +266,7 @@ cat >expect <<\EOF > boolean: 1 > integer: 13 > i16: 0 > +u16: 0 > magnitude: 0 > m16: 0 > timestamp: 0 > @@ -287,6 +292,7 @@ cat >expect <<\EOF > boolean: 0 > integer: 2 > i16: 0 > +u16: 0 > magnitude: 0 > m16: 0 > timestamp: 0 > @@ -356,6 +362,7 @@ Callback: "four", 0 > boolean: 5 > integer: 4 > i16: 0 > +u16: 0 > magnitude: 0 > m16: 0 > timestamp: 0 > @@ -383,6 +390,7 @@ cat >expect <<\EOF > boolean: 1 > integer: 23 > i16: 0 > +u16: 0 > magnitude: 0 > m16: 0 > timestamp: 0 > @@ -464,6 +472,7 @@ cat >expect <<\EOF > boolean: 0 > integer: 0 > i16: 0 > +u16: 0 > magnitude: 0 > m16: 0 > timestamp: 0 > @@ -820,4 +829,11 @@ test_expect_success 'm16 limits range' ' > test_grep "value 65536 for option .m16. exceeds 65535" err > ' > > +test_expect_success 'u16 limits range' ' > + test-tool parse-options --u16 65535 >out && > + test_grep "u16: 65535" out && > + test_must_fail test-tool parse-options --u16 65536 2>err && > + test_grep "value 65536 for option .u16. exceeds 65535" err > +' > + > test_done >
Am 15.04.25 um 14:14 schrieb Patrick Steinhardt: > We have two generic ways to parse integers in the "parse-options" > subsytem: "subsystem" > - `OPTION_INTEGER` parses a signed integer. > > - `OPTION_MAGNITUDE` parses an unsigned integer, but it also > interprets suffixes like "k" or "g". > > Notably missing is a middle ground that parses unsigned integers without > interpreting suffixes. Introduce a new `OPTION_UNSIGNED` option type to > plug this gap. This option type will be used in subsequent commits. > > Signed-off-by: Patrick Steinhardt <ps@pks.im> > --- > parse-options.c | 43 +++++++++++++++++++++++++++++++++++++++++++ > parse-options.h | 12 ++++++++++++ > t/helper/test-parse-options.c | 4 +++- > t/t0040-parse-options.sh | 18 +++++++++++++++++- > 4 files changed, 75 insertions(+), 2 deletions(-) > > diff --git a/parse-options.c b/parse-options.c > index ae836c384c7..9670e46a679 100644 > --- a/parse-options.c > +++ b/parse-options.c > @@ -216,6 +216,49 @@ static enum parse_opt_result do_get_value(struct parse_opt_ctx_t *p, > optname(opt, flags)); > } > } > + case OPTION_UNSIGNED: > + { > + uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision); > + uintmax_t value; > + > + if (unset) { > + value = 0; > + } else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { > + value = opt->defval; > + } else if (get_arg(p, opt, flags, &arg)) { > + return -1; > + } else if (!*arg) { > + return error(_("%s expects a numerical value"), > + optname(opt, flags)); > + } else { > + value = strtoumax(arg, (char **)&s, 10); > + if (*s) > + return error(_("%s expects a numerical value"), > + optname(opt, flags)); > + } > + > + if (value > upper_bound) > + return error(_("value %"PRIuMAX" for %s exceeds %"PRIuMAX), > + value, optname(opt, flags), upper_bound); > + > + switch (opt->precision) { > + case 1: > + *(int8_t *)opt->value = value; uint8_t, surely. Similarly for the other casts below. > + return 0; > + case 2: > + *(int16_t *)opt->value = value; > + return 0; > + case 4: > + *(int32_t *)opt->value = value; > + return 0; > + case 8: > + *(int64_t *)opt->value = value; > + return 0; > + default: > + BUG("invalid precision for option %s", > + optname(opt, flags)); > + } > + } > case OPTION_MAGNITUDE: > { > uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision);
On Tue, Apr 15, 2025 at 04:52:02PM +0100, Phillip Wood wrote: > Hi Patrick > > On 15/04/2025 13:14, Patrick Steinhardt wrote: > > We have two generic ways to parse integers in the "parse-options" > > subsytem: > > > > - `OPTION_INTEGER` parses a signed integer. > > > > - `OPTION_MAGNITUDE` parses an unsigned integer, but it also > > interprets suffixes like "k" or "g". > > > > Notably missing is a middle ground that parses unsigned integers without > > interpreting suffixes. Introduce a new `OPTION_UNSIGNED` option type to > > plug this gap. This option type will be used in subsequent commits. > > I think this is a useful addition. I wonder about the way it handles > negative values though. For types narrower than uintmax_t "-1" will be > rejected but large negative values that parse as small positive numbers will > be accepted. Perhaps we should explicitly reject strings starting with "-" > as we do in git_parse_ulong() which is used by OPTION_MAGNITUDE. Wait, does it? Why would `strtoul()` or any of its variants ever accept a string prefixed with a "-"? If the minus sign was part of the input sequence, the numeric value calculated from the sequence of digits is negated as if by unary minus in the result type, which applies unsigned integer wraparound rules. Oh dear... all these integer conversion functions are really a gift that keeps on giving. Gross. > This patch also needs the fix from patch 2 to detect overflows for > uintmax_t. Yup, will add. Patrick
On Tue, Apr 15, 2025 at 07:38:04PM +0200, René Scharfe wrote: > Am 15.04.25 um 14:14 schrieb Patrick Steinhardt: > > diff --git a/parse-options.c b/parse-options.c > > index ae836c384c7..9670e46a679 100644 > > --- a/parse-options.c > > +++ b/parse-options.c > > @@ -216,6 +216,49 @@ static enum parse_opt_result do_get_value(struct parse_opt_ctx_t *p, > > optname(opt, flags)); > > } > > } > > + case OPTION_UNSIGNED: > > + { > > + uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision); > > + uintmax_t value; > > + > > + if (unset) { > > + value = 0; > > + } else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { > > + value = opt->defval; > > + } else if (get_arg(p, opt, flags, &arg)) { > > + return -1; > > + } else if (!*arg) { > > + return error(_("%s expects a numerical value"), > > + optname(opt, flags)); > > + } else { > > + value = strtoumax(arg, (char **)&s, 10); > > + if (*s) > > + return error(_("%s expects a numerical value"), > > + optname(opt, flags)); > > + } > > + > > + if (value > upper_bound) > > + return error(_("value %"PRIuMAX" for %s exceeds %"PRIuMAX), > > + value, optname(opt, flags), upper_bound); > > + > > + switch (opt->precision) { > > + case 1: > > + *(int8_t *)opt->value = value; > > uint8_t, surely. Similarly for the other casts below. Oof, of course. Patrick
diff --git a/parse-options.c b/parse-options.c index ae836c384c7..9670e46a679 100644 --- a/parse-options.c +++ b/parse-options.c @@ -216,6 +216,49 @@ static enum parse_opt_result do_get_value(struct parse_opt_ctx_t *p, optname(opt, flags)); } } + case OPTION_UNSIGNED: + { + uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision); + uintmax_t value; + + if (unset) { + value = 0; + } else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { + value = opt->defval; + } else if (get_arg(p, opt, flags, &arg)) { + return -1; + } else if (!*arg) { + return error(_("%s expects a numerical value"), + optname(opt, flags)); + } else { + value = strtoumax(arg, (char **)&s, 10); + if (*s) + return error(_("%s expects a numerical value"), + optname(opt, flags)); + } + + if (value > upper_bound) + return error(_("value %"PRIuMAX" for %s exceeds %"PRIuMAX), + value, optname(opt, flags), upper_bound); + + switch (opt->precision) { + case 1: + *(int8_t *)opt->value = value; + return 0; + case 2: + *(int16_t *)opt->value = value; + return 0; + case 4: + *(int32_t *)opt->value = value; + return 0; + case 8: + *(int64_t *)opt->value = value; + return 0; + default: + BUG("invalid precision for option %s", + optname(opt, flags)); + } + } case OPTION_MAGNITUDE: { uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision); diff --git a/parse-options.h b/parse-options.h index 4b561679581..20ea7d2ab13 100644 --- a/parse-options.h +++ b/parse-options.h @@ -25,6 +25,7 @@ enum parse_opt_type { /* options with arguments (usually) */ OPTION_STRING, OPTION_INTEGER, + OPTION_UNSIGNED, OPTION_MAGNITUDE, OPTION_CALLBACK, OPTION_LOWLEVEL_CALLBACK, @@ -224,6 +225,16 @@ struct option { .help = (h), \ .flags = (f), \ } +#define OPT_UNSIGNED_F(s, l, v, h, f) { \ + .type = OPTION_UNSIGNED, \ + .short_name = (s), \ + .long_name = (l), \ + .value = (v), \ + .precision = sizeof(*v), \ + .argh = N_("n"), \ + .help = (h), \ + .flags = (f), \ +} #define OPT_END() { \ .type = OPTION_END, \ @@ -276,6 +287,7 @@ struct option { #define OPT_CMDMODE(s, l, v, h, i) OPT_CMDMODE_F(s, l, v, h, i, 0) #define OPT_INTEGER(s, l, v, h) OPT_INTEGER_F(s, l, v, h, 0) +#define OPT_UNSIGNED(s, l, v, h) OPT_UNSIGNED_F(s, l, v, h, 0) #define OPT_MAGNITUDE(s, l, v, h) { \ .type = OPTION_MAGNITUDE, \ .short_name = (s), \ diff --git a/t/helper/test-parse-options.c b/t/helper/test-parse-options.c index 46deb4317ef..0d559288d9c 100644 --- a/t/helper/test-parse-options.c +++ b/t/helper/test-parse-options.c @@ -120,7 +120,7 @@ int cmd__parse_options(int argc, const char **argv) }; struct string_list expect = STRING_LIST_INIT_NODUP; struct string_list list = STRING_LIST_INIT_NODUP; - uint16_t m16 = 0; + uint16_t m16 = 0, u16 = 0; int16_t i16 = 0; struct option options[] = { @@ -142,6 +142,7 @@ int cmd__parse_options(int argc, const char **argv) OPT_GROUP(""), OPT_INTEGER('i', "integer", &integer, "get a integer"), OPT_INTEGER(0, "i16", &i16, "get a 16 bit integer"), + OPT_UNSIGNED(0, "u16", &u16, "get a 16 bit unsigned integer"), OPT_INTEGER('j', NULL, &integer, "get a integer, too"), OPT_MAGNITUDE('m', "magnitude", &magnitude, "get a magnitude"), OPT_MAGNITUDE(0, "m16", &m16, "get a 16 bit magnitude"), @@ -215,6 +216,7 @@ int cmd__parse_options(int argc, const char **argv) show(&expect, &ret, "boolean: %d", boolean); show(&expect, &ret, "integer: %d", integer); show(&expect, &ret, "i16: %"PRIdMAX, (intmax_t) i16); + show(&expect, &ret, "u16: %"PRIuMAX, (uintmax_t) u16); show(&expect, &ret, "magnitude: %lu", magnitude); show(&expect, &ret, "m16: %"PRIuMAX, (uintmax_t) m16); show(&expect, &ret, "timestamp: %"PRItime, timestamp); diff --git a/t/t0040-parse-options.sh b/t/t0040-parse-options.sh index 5f503b26cc8..9946e69f586 100755 --- a/t/t0040-parse-options.sh +++ b/t/t0040-parse-options.sh @@ -23,6 +23,7 @@ usage: test-tool parse-options <options> -i, --[no-]integer <n> get a integer --[no-]i16 <n> get a 16 bit integer + --[no-]u16 <n> get a 16 bit unsigned integer -j <n> get a integer, too -m, --magnitude <n> get a magnitude --m16 <n> get a 16 bit magnitude @@ -139,6 +140,7 @@ cat >expect <<\EOF boolean: 2 integer: 1729 i16: 0 +u16: 0 magnitude: 16384 m16: 0 timestamp: 0 @@ -161,6 +163,7 @@ cat >expect <<\EOF boolean: 2 integer: 1729 i16: 9000 +u16: 5432 magnitude: 16384 m16: 32768 timestamp: 0 @@ -173,7 +176,7 @@ file: prefix/fi.le EOF test_expect_success 'long options' ' - test-tool parse-options --boolean --integer 1729 --i16 9000 --magnitude 16k \ + test-tool parse-options --boolean --integer 1729 --i16 9000 --u16 5432 --magnitude 16k \ --m16 32k --boolean --string2=321 --verbose --verbose --no-dry-run \ --abbrev=10 --file fi.le --obsolete \ >output 2>output.err && @@ -186,6 +189,7 @@ test_expect_success 'abbreviate to something longer than SHA1 length' ' boolean: 0 integer: 0 i16: 0 + u16: 0 magnitude: 0 m16: 0 timestamp: 0 @@ -262,6 +266,7 @@ cat >expect <<\EOF boolean: 1 integer: 13 i16: 0 +u16: 0 magnitude: 0 m16: 0 timestamp: 0 @@ -287,6 +292,7 @@ cat >expect <<\EOF boolean: 0 integer: 2 i16: 0 +u16: 0 magnitude: 0 m16: 0 timestamp: 0 @@ -356,6 +362,7 @@ Callback: "four", 0 boolean: 5 integer: 4 i16: 0 +u16: 0 magnitude: 0 m16: 0 timestamp: 0 @@ -383,6 +390,7 @@ cat >expect <<\EOF boolean: 1 integer: 23 i16: 0 +u16: 0 magnitude: 0 m16: 0 timestamp: 0 @@ -464,6 +472,7 @@ cat >expect <<\EOF boolean: 0 integer: 0 i16: 0 +u16: 0 magnitude: 0 m16: 0 timestamp: 0 @@ -820,4 +829,11 @@ test_expect_success 'm16 limits range' ' test_grep "value 65536 for option .m16. exceeds 65535" err ' +test_expect_success 'u16 limits range' ' + test-tool parse-options --u16 65535 >out && + test_grep "u16: 65535" out && + test_must_fail test-tool parse-options --u16 65536 2>err && + test_grep "value 65536 for option .u16. exceeds 65535" err +' + test_done
We have two generic ways to parse integers in the "parse-options" subsytem: - `OPTION_INTEGER` parses a signed integer. - `OPTION_MAGNITUDE` parses an unsigned integer, but it also interprets suffixes like "k" or "g". Notably missing is a middle ground that parses unsigned integers without interpreting suffixes. Introduce a new `OPTION_UNSIGNED` option type to plug this gap. This option type will be used in subsequent commits. Signed-off-by: Patrick Steinhardt <ps@pks.im> --- parse-options.c | 43 +++++++++++++++++++++++++++++++++++++++++++ parse-options.h | 12 ++++++++++++ t/helper/test-parse-options.c | 4 +++- t/t0040-parse-options.sh | 18 +++++++++++++++++- 4 files changed, 75 insertions(+), 2 deletions(-)