diff mbox series

[v2,4/5] parse-options: introduce `OPTION_UNSIGNED`

Message ID 20250415-b4-pks-parse-options-integers-v2-4-ce07441a1f01@pks.im (mailing list archive)
State New
Headers show
Series parse-options: harden handling of integer values | expand

Commit Message

Patrick Steinhardt April 15, 2025, 12:14 p.m. UTC
We have two generic ways to parse integers in the "parse-options"
subsytem:

  - `OPTION_INTEGER` parses a signed integer.

  - `OPTION_MAGNITUDE` parses an unsigned integer, but it also
    interprets suffixes like "k" or "g".

Notably missing is a middle ground that parses unsigned integers without
interpreting suffixes. Introduce a new `OPTION_UNSIGNED` option type to
plug this gap. This option type will be used in subsequent commits.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 parse-options.c               | 43 +++++++++++++++++++++++++++++++++++++++++++
 parse-options.h               | 12 ++++++++++++
 t/helper/test-parse-options.c |  4 +++-
 t/t0040-parse-options.sh      | 18 +++++++++++++++++-
 4 files changed, 75 insertions(+), 2 deletions(-)

Comments

Phillip Wood April 15, 2025, 3:52 p.m. UTC | #1
Hi Patrick

On 15/04/2025 13:14, Patrick Steinhardt wrote:
> We have two generic ways to parse integers in the "parse-options"
> subsytem:
> 
>    - `OPTION_INTEGER` parses a signed integer.
> 
>    - `OPTION_MAGNITUDE` parses an unsigned integer, but it also
>      interprets suffixes like "k" or "g".
> 
> Notably missing is a middle ground that parses unsigned integers without
> interpreting suffixes. Introduce a new `OPTION_UNSIGNED` option type to
> plug this gap. This option type will be used in subsequent commits.

I think this is a useful addition. I wonder about the way it handles 
negative values though. For types narrower than uintmax_t "-1" will be 
rejected but large negative values that parse as small positive numbers 
will be accepted. Perhaps we should explicitly reject strings starting 
with "-" as we do in git_parse_ulong() which is used by 
OPTION_MAGNITUDE. This patch also needs the fix from patch 2 to detect 
overflows for uintmax_t.

Best Wishes

Phillip


[1] 
https://lore.kernel.org/git/NYMqsJ7uttDzFT2OOEg5LLsxCSoQhTzqBs16KrMHGEKC7LzOAiYnYTEZavRQWqGH41UgjdwScwer7MssNzI7AEDHnD8GTBWvoBIqJ2e7D6g=@proton.me/

> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>   parse-options.c               | 43 +++++++++++++++++++++++++++++++++++++++++++
>   parse-options.h               | 12 ++++++++++++
>   t/helper/test-parse-options.c |  4 +++-
>   t/t0040-parse-options.sh      | 18 +++++++++++++++++-
>   4 files changed, 75 insertions(+), 2 deletions(-)
> 
> diff --git a/parse-options.c b/parse-options.c
> index ae836c384c7..9670e46a679 100644
> --- a/parse-options.c
> +++ b/parse-options.c
> @@ -216,6 +216,49 @@ static enum parse_opt_result do_get_value(struct parse_opt_ctx_t *p,
>   			    optname(opt, flags));
>   		}
>   	}
> +	case OPTION_UNSIGNED:
> +	{
> +		uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision);
> +		uintmax_t value;
> +
> +		if (unset) {
> +			value = 0;
> +		} else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
> +			value = opt->defval;
> +		} else if (get_arg(p, opt, flags, &arg)) {
> +			return -1;
> +		} else if (!*arg) {
> +			return error(_("%s expects a numerical value"),
> +				     optname(opt, flags));
> +		} else {
> +			value = strtoumax(arg, (char **)&s, 10);
> +			if (*s)
> +				return error(_("%s expects a numerical value"),
> +					     optname(opt, flags));
> +		}
> +
> +		if (value > upper_bound)
> +			return error(_("value %"PRIuMAX" for %s exceeds %"PRIuMAX),
> +				     value, optname(opt, flags), upper_bound);
> +
> +		switch (opt->precision) {
> +		case 1:
> +			*(int8_t *)opt->value = value;
> +			return 0;
> +		case 2:
> +			*(int16_t *)opt->value = value;
> +			return 0;
> +		case 4:
> +			*(int32_t *)opt->value = value;
> +			return 0;
> +		case 8:
> +			*(int64_t *)opt->value = value;
> +			return 0;
> +		default:
> +			BUG("invalid precision for option %s",
> +			    optname(opt, flags));
> +		}
> +	}
>   	case OPTION_MAGNITUDE:
>   	{
>   		uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision);
> diff --git a/parse-options.h b/parse-options.h
> index 4b561679581..20ea7d2ab13 100644
> --- a/parse-options.h
> +++ b/parse-options.h
> @@ -25,6 +25,7 @@ enum parse_opt_type {
>   	/* options with arguments (usually) */
>   	OPTION_STRING,
>   	OPTION_INTEGER,
> +	OPTION_UNSIGNED,
>   	OPTION_MAGNITUDE,
>   	OPTION_CALLBACK,
>   	OPTION_LOWLEVEL_CALLBACK,
> @@ -224,6 +225,16 @@ struct option {
>   	.help = (h), \
>   	.flags = (f), \
>   }
> +#define OPT_UNSIGNED_F(s, l, v, h, f) { \
> +	.type = OPTION_UNSIGNED, \
> +	.short_name = (s), \
> +	.long_name = (l), \
> +	.value = (v), \
> +	.precision = sizeof(*v), \
> +	.argh = N_("n"), \
> +	.help = (h), \
> +	.flags = (f), \
> +}
>   
>   #define OPT_END() { \
>   	.type = OPTION_END, \
> @@ -276,6 +287,7 @@ struct option {
>   #define OPT_CMDMODE(s, l, v, h, i)  OPT_CMDMODE_F(s, l, v, h, i, 0)
>   
>   #define OPT_INTEGER(s, l, v, h)     OPT_INTEGER_F(s, l, v, h, 0)
> +#define OPT_UNSIGNED(s, l, v, h)    OPT_UNSIGNED_F(s, l, v, h, 0)
>   #define OPT_MAGNITUDE(s, l, v, h) { \
>   	.type = OPTION_MAGNITUDE, \
>   	.short_name = (s), \
> diff --git a/t/helper/test-parse-options.c b/t/helper/test-parse-options.c
> index 46deb4317ef..0d559288d9c 100644
> --- a/t/helper/test-parse-options.c
> +++ b/t/helper/test-parse-options.c
> @@ -120,7 +120,7 @@ int cmd__parse_options(int argc, const char **argv)
>   	};
>   	struct string_list expect = STRING_LIST_INIT_NODUP;
>   	struct string_list list = STRING_LIST_INIT_NODUP;
> -	uint16_t m16 = 0;
> +	uint16_t m16 = 0, u16 = 0;
>   	int16_t i16 = 0;
>   
>   	struct option options[] = {
> @@ -142,6 +142,7 @@ int cmd__parse_options(int argc, const char **argv)
>   		OPT_GROUP(""),
>   		OPT_INTEGER('i', "integer", &integer, "get a integer"),
>   		OPT_INTEGER(0, "i16", &i16, "get a 16 bit integer"),
> +		OPT_UNSIGNED(0, "u16", &u16, "get a 16 bit unsigned integer"),
>   		OPT_INTEGER('j', NULL, &integer, "get a integer, too"),
>   		OPT_MAGNITUDE('m', "magnitude", &magnitude, "get a magnitude"),
>   		OPT_MAGNITUDE(0, "m16", &m16, "get a 16 bit magnitude"),
> @@ -215,6 +216,7 @@ int cmd__parse_options(int argc, const char **argv)
>   	show(&expect, &ret, "boolean: %d", boolean);
>   	show(&expect, &ret, "integer: %d", integer);
>   	show(&expect, &ret, "i16: %"PRIdMAX, (intmax_t) i16);
> +	show(&expect, &ret, "u16: %"PRIuMAX, (uintmax_t) u16);
>   	show(&expect, &ret, "magnitude: %lu", magnitude);
>   	show(&expect, &ret, "m16: %"PRIuMAX, (uintmax_t) m16);
>   	show(&expect, &ret, "timestamp: %"PRItime, timestamp);
> diff --git a/t/t0040-parse-options.sh b/t/t0040-parse-options.sh
> index 5f503b26cc8..9946e69f586 100755
> --- a/t/t0040-parse-options.sh
> +++ b/t/t0040-parse-options.sh
> @@ -23,6 +23,7 @@ usage: test-tool parse-options <options>
>       -i, --[no-]integer <n>
>                             get a integer
>       --[no-]i16 <n>        get a 16 bit integer
> +    --[no-]u16 <n>        get a 16 bit unsigned integer
>       -j <n>                get a integer, too
>       -m, --magnitude <n>   get a magnitude
>       --m16 <n>             get a 16 bit magnitude
> @@ -139,6 +140,7 @@ cat >expect <<\EOF
>   boolean: 2
>   integer: 1729
>   i16: 0
> +u16: 0
>   magnitude: 16384
>   m16: 0
>   timestamp: 0
> @@ -161,6 +163,7 @@ cat >expect <<\EOF
>   boolean: 2
>   integer: 1729
>   i16: 9000
> +u16: 5432
>   magnitude: 16384
>   m16: 32768
>   timestamp: 0
> @@ -173,7 +176,7 @@ file: prefix/fi.le
>   EOF
>   
>   test_expect_success 'long options' '
> -	test-tool parse-options --boolean --integer 1729 --i16 9000 --magnitude 16k \
> +	test-tool parse-options --boolean --integer 1729 --i16 9000 --u16 5432 --magnitude 16k \
>   		--m16 32k --boolean --string2=321 --verbose --verbose --no-dry-run \
>   		--abbrev=10 --file fi.le --obsolete \
>   		>output 2>output.err &&
> @@ -186,6 +189,7 @@ test_expect_success 'abbreviate to something longer than SHA1 length' '
>   	boolean: 0
>   	integer: 0
>   	i16: 0
> +	u16: 0
>   	magnitude: 0
>   	m16: 0
>   	timestamp: 0
> @@ -262,6 +266,7 @@ cat >expect <<\EOF
>   boolean: 1
>   integer: 13
>   i16: 0
> +u16: 0
>   magnitude: 0
>   m16: 0
>   timestamp: 0
> @@ -287,6 +292,7 @@ cat >expect <<\EOF
>   boolean: 0
>   integer: 2
>   i16: 0
> +u16: 0
>   magnitude: 0
>   m16: 0
>   timestamp: 0
> @@ -356,6 +362,7 @@ Callback: "four", 0
>   boolean: 5
>   integer: 4
>   i16: 0
> +u16: 0
>   magnitude: 0
>   m16: 0
>   timestamp: 0
> @@ -383,6 +390,7 @@ cat >expect <<\EOF
>   boolean: 1
>   integer: 23
>   i16: 0
> +u16: 0
>   magnitude: 0
>   m16: 0
>   timestamp: 0
> @@ -464,6 +472,7 @@ cat >expect <<\EOF
>   boolean: 0
>   integer: 0
>   i16: 0
> +u16: 0
>   magnitude: 0
>   m16: 0
>   timestamp: 0
> @@ -820,4 +829,11 @@ test_expect_success 'm16 limits range' '
>   	test_grep "value 65536 for option .m16. exceeds 65535" err
>   '
>   
> +test_expect_success 'u16 limits range' '
> +	test-tool parse-options --u16 65535 >out &&
> +	test_grep "u16: 65535" out &&
> +	test_must_fail test-tool parse-options --u16 65536 2>err &&
> +	test_grep "value 65536 for option .u16. exceeds 65535" err
> +'
> +
>   test_done
>
René Scharfe April 15, 2025, 5:38 p.m. UTC | #2
Am 15.04.25 um 14:14 schrieb Patrick Steinhardt:
> We have two generic ways to parse integers in the "parse-options"
> subsytem:

"subsystem"

>   - `OPTION_INTEGER` parses a signed integer.
> 
>   - `OPTION_MAGNITUDE` parses an unsigned integer, but it also
>     interprets suffixes like "k" or "g".
> 
> Notably missing is a middle ground that parses unsigned integers without
> interpreting suffixes. Introduce a new `OPTION_UNSIGNED` option type to
> plug this gap. This option type will be used in subsequent commits.
> 
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>  parse-options.c               | 43 +++++++++++++++++++++++++++++++++++++++++++
>  parse-options.h               | 12 ++++++++++++
>  t/helper/test-parse-options.c |  4 +++-
>  t/t0040-parse-options.sh      | 18 +++++++++++++++++-
>  4 files changed, 75 insertions(+), 2 deletions(-)
> 
> diff --git a/parse-options.c b/parse-options.c
> index ae836c384c7..9670e46a679 100644
> --- a/parse-options.c
> +++ b/parse-options.c
> @@ -216,6 +216,49 @@ static enum parse_opt_result do_get_value(struct parse_opt_ctx_t *p,
>  			    optname(opt, flags));
>  		}
>  	}
> +	case OPTION_UNSIGNED:
> +	{
> +		uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision);
> +		uintmax_t value;
> +
> +		if (unset) {
> +			value = 0;
> +		} else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
> +			value = opt->defval;
> +		} else if (get_arg(p, opt, flags, &arg)) {
> +			return -1;
> +		} else if (!*arg) {
> +			return error(_("%s expects a numerical value"),
> +				     optname(opt, flags));
> +		} else {
> +			value = strtoumax(arg, (char **)&s, 10);
> +			if (*s)
> +				return error(_("%s expects a numerical value"),
> +					     optname(opt, flags));
> +		}
> +
> +		if (value > upper_bound)
> +			return error(_("value %"PRIuMAX" for %s exceeds %"PRIuMAX),
> +				     value, optname(opt, flags), upper_bound);
> +
> +		switch (opt->precision) {
> +		case 1:
> +			*(int8_t *)opt->value = value;

uint8_t, surely.  Similarly for the other casts below.

> +			return 0;
> +		case 2:
> +			*(int16_t *)opt->value = value;
> +			return 0;
> +		case 4:
> +			*(int32_t *)opt->value = value;
> +			return 0;
> +		case 8:
> +			*(int64_t *)opt->value = value;
> +			return 0;
> +		default:
> +			BUG("invalid precision for option %s",
> +			    optname(opt, flags));
> +		}
> +	}
>  	case OPTION_MAGNITUDE:
>  	{
>  		uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision);
diff mbox series

Patch

diff --git a/parse-options.c b/parse-options.c
index ae836c384c7..9670e46a679 100644
--- a/parse-options.c
+++ b/parse-options.c
@@ -216,6 +216,49 @@  static enum parse_opt_result do_get_value(struct parse_opt_ctx_t *p,
 			    optname(opt, flags));
 		}
 	}
+	case OPTION_UNSIGNED:
+	{
+		uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision);
+		uintmax_t value;
+
+		if (unset) {
+			value = 0;
+		} else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			value = opt->defval;
+		} else if (get_arg(p, opt, flags, &arg)) {
+			return -1;
+		} else if (!*arg) {
+			return error(_("%s expects a numerical value"),
+				     optname(opt, flags));
+		} else {
+			value = strtoumax(arg, (char **)&s, 10);
+			if (*s)
+				return error(_("%s expects a numerical value"),
+					     optname(opt, flags));
+		}
+
+		if (value > upper_bound)
+			return error(_("value %"PRIuMAX" for %s exceeds %"PRIuMAX),
+				     value, optname(opt, flags), upper_bound);
+
+		switch (opt->precision) {
+		case 1:
+			*(int8_t *)opt->value = value;
+			return 0;
+		case 2:
+			*(int16_t *)opt->value = value;
+			return 0;
+		case 4:
+			*(int32_t *)opt->value = value;
+			return 0;
+		case 8:
+			*(int64_t *)opt->value = value;
+			return 0;
+		default:
+			BUG("invalid precision for option %s",
+			    optname(opt, flags));
+		}
+	}
 	case OPTION_MAGNITUDE:
 	{
 		uintmax_t upper_bound = UINTMAX_MAX >> (bitsizeof(uintmax_t) - CHAR_BIT * opt->precision);
diff --git a/parse-options.h b/parse-options.h
index 4b561679581..20ea7d2ab13 100644
--- a/parse-options.h
+++ b/parse-options.h
@@ -25,6 +25,7 @@  enum parse_opt_type {
 	/* options with arguments (usually) */
 	OPTION_STRING,
 	OPTION_INTEGER,
+	OPTION_UNSIGNED,
 	OPTION_MAGNITUDE,
 	OPTION_CALLBACK,
 	OPTION_LOWLEVEL_CALLBACK,
@@ -224,6 +225,16 @@  struct option {
 	.help = (h), \
 	.flags = (f), \
 }
+#define OPT_UNSIGNED_F(s, l, v, h, f) { \
+	.type = OPTION_UNSIGNED, \
+	.short_name = (s), \
+	.long_name = (l), \
+	.value = (v), \
+	.precision = sizeof(*v), \
+	.argh = N_("n"), \
+	.help = (h), \
+	.flags = (f), \
+}
 
 #define OPT_END() { \
 	.type = OPTION_END, \
@@ -276,6 +287,7 @@  struct option {
 #define OPT_CMDMODE(s, l, v, h, i)  OPT_CMDMODE_F(s, l, v, h, i, 0)
 
 #define OPT_INTEGER(s, l, v, h)     OPT_INTEGER_F(s, l, v, h, 0)
+#define OPT_UNSIGNED(s, l, v, h)    OPT_UNSIGNED_F(s, l, v, h, 0)
 #define OPT_MAGNITUDE(s, l, v, h) { \
 	.type = OPTION_MAGNITUDE, \
 	.short_name = (s), \
diff --git a/t/helper/test-parse-options.c b/t/helper/test-parse-options.c
index 46deb4317ef..0d559288d9c 100644
--- a/t/helper/test-parse-options.c
+++ b/t/helper/test-parse-options.c
@@ -120,7 +120,7 @@  int cmd__parse_options(int argc, const char **argv)
 	};
 	struct string_list expect = STRING_LIST_INIT_NODUP;
 	struct string_list list = STRING_LIST_INIT_NODUP;
-	uint16_t m16 = 0;
+	uint16_t m16 = 0, u16 = 0;
 	int16_t i16 = 0;
 
 	struct option options[] = {
@@ -142,6 +142,7 @@  int cmd__parse_options(int argc, const char **argv)
 		OPT_GROUP(""),
 		OPT_INTEGER('i', "integer", &integer, "get a integer"),
 		OPT_INTEGER(0, "i16", &i16, "get a 16 bit integer"),
+		OPT_UNSIGNED(0, "u16", &u16, "get a 16 bit unsigned integer"),
 		OPT_INTEGER('j', NULL, &integer, "get a integer, too"),
 		OPT_MAGNITUDE('m', "magnitude", &magnitude, "get a magnitude"),
 		OPT_MAGNITUDE(0, "m16", &m16, "get a 16 bit magnitude"),
@@ -215,6 +216,7 @@  int cmd__parse_options(int argc, const char **argv)
 	show(&expect, &ret, "boolean: %d", boolean);
 	show(&expect, &ret, "integer: %d", integer);
 	show(&expect, &ret, "i16: %"PRIdMAX, (intmax_t) i16);
+	show(&expect, &ret, "u16: %"PRIuMAX, (uintmax_t) u16);
 	show(&expect, &ret, "magnitude: %lu", magnitude);
 	show(&expect, &ret, "m16: %"PRIuMAX, (uintmax_t) m16);
 	show(&expect, &ret, "timestamp: %"PRItime, timestamp);
diff --git a/t/t0040-parse-options.sh b/t/t0040-parse-options.sh
index 5f503b26cc8..9946e69f586 100755
--- a/t/t0040-parse-options.sh
+++ b/t/t0040-parse-options.sh
@@ -23,6 +23,7 @@  usage: test-tool parse-options <options>
     -i, --[no-]integer <n>
                           get a integer
     --[no-]i16 <n>        get a 16 bit integer
+    --[no-]u16 <n>        get a 16 bit unsigned integer
     -j <n>                get a integer, too
     -m, --magnitude <n>   get a magnitude
     --m16 <n>             get a 16 bit magnitude
@@ -139,6 +140,7 @@  cat >expect <<\EOF
 boolean: 2
 integer: 1729
 i16: 0
+u16: 0
 magnitude: 16384
 m16: 0
 timestamp: 0
@@ -161,6 +163,7 @@  cat >expect <<\EOF
 boolean: 2
 integer: 1729
 i16: 9000
+u16: 5432
 magnitude: 16384
 m16: 32768
 timestamp: 0
@@ -173,7 +176,7 @@  file: prefix/fi.le
 EOF
 
 test_expect_success 'long options' '
-	test-tool parse-options --boolean --integer 1729 --i16 9000 --magnitude 16k \
+	test-tool parse-options --boolean --integer 1729 --i16 9000 --u16 5432 --magnitude 16k \
 		--m16 32k --boolean --string2=321 --verbose --verbose --no-dry-run \
 		--abbrev=10 --file fi.le --obsolete \
 		>output 2>output.err &&
@@ -186,6 +189,7 @@  test_expect_success 'abbreviate to something longer than SHA1 length' '
 	boolean: 0
 	integer: 0
 	i16: 0
+	u16: 0
 	magnitude: 0
 	m16: 0
 	timestamp: 0
@@ -262,6 +266,7 @@  cat >expect <<\EOF
 boolean: 1
 integer: 13
 i16: 0
+u16: 0
 magnitude: 0
 m16: 0
 timestamp: 0
@@ -287,6 +292,7 @@  cat >expect <<\EOF
 boolean: 0
 integer: 2
 i16: 0
+u16: 0
 magnitude: 0
 m16: 0
 timestamp: 0
@@ -356,6 +362,7 @@  Callback: "four", 0
 boolean: 5
 integer: 4
 i16: 0
+u16: 0
 magnitude: 0
 m16: 0
 timestamp: 0
@@ -383,6 +390,7 @@  cat >expect <<\EOF
 boolean: 1
 integer: 23
 i16: 0
+u16: 0
 magnitude: 0
 m16: 0
 timestamp: 0
@@ -464,6 +472,7 @@  cat >expect <<\EOF
 boolean: 0
 integer: 0
 i16: 0
+u16: 0
 magnitude: 0
 m16: 0
 timestamp: 0
@@ -820,4 +829,11 @@  test_expect_success 'm16 limits range' '
 	test_grep "value 65536 for option .m16. exceeds 65535" err
 '
 
+test_expect_success 'u16 limits range' '
+	test-tool parse-options --u16 65535 >out &&
+	test_grep "u16: 65535" out &&
+	test_must_fail test-tool parse-options --u16 65536 2>err &&
+	test_grep "value 65536 for option .u16. exceeds 65535" err
+'
+
 test_done