Message ID | 4960b36916d55e22be08fe1689b81e0eefb47578.1704324320.git.wqu@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | kstrtox: introduce memparse_safe() | expand |
Hi, On 1/3/24 15:27, Qu Wenruo wrote: > [BUGS] > Function memparse() lacks error handling: > > - If no valid number string at all > In that case @retptr would just be updated and return value would be > zero. > > - No overflown detection > This applies to both the number string part, and the suffixes part. > And since we have no way to indicate errors, we can get weird results > like: > > "25E" -> 10376293541461622784 (9E) > > This is due to the fact that for "E" suffix, there is only 4 bits > left, and 25 with 60 bits left shift would lead to overflow. > > [CAUSE] > The root cause is already mentioned in the comments of the function, the > usage of simple_strtoull() is the source of evil. > Furthermore the function prototype is no good either, just returning an > unsigned long long gives us no way to indicate an error. > > [FIX] > Due to the prototype limits, we can not have a drop-in replacement for > memparse(). > > This patch can only help by introduce a new helper, memparse_safe(), and > mark the old memparse() deprecated. > > The new memparse_safe() has the following improvement: > > - Invalid string detection > If no number string can be detected at all, -EINVAL would be returned. > > - Better overflow detection > Both the string part and the extra left shift would have overflow > detection. > Any overflow would result -ERANGE. > > - Safer default suffix selection > The helper allows the caller to choose the suffixes that they want to > use. > But only "KMGTP" are recommended by default since the "E" leaves only > 4 bits before overflow. > For those callers really know what they are doing, they can still > manually to include all suffixes. > > Due to the prototype change, callers should migrate to the new one and > change their code and add extra error handling. > > Signed-off-by: Qu Wenruo <wqu@suse.com> > Reviewed-by: David Disseldorp <ddiss@suse.de> > --- > include/linux/kernel.h | 8 +++- > include/linux/kstrtox.h | 15 +++++++ > lib/cmdline.c | 5 ++- > lib/kstrtox.c | 96 +++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 122 insertions(+), 2 deletions(-) > > diff --git a/include/linux/kernel.h b/include/linux/kernel.h > index d9ad21058eed..b1b6da60ea43 100644 > --- a/include/linux/kernel.h > +++ b/include/linux/kernel.h > @@ -201,7 +201,13 @@ void do_exit(long error_code) __noreturn; > > extern int get_option(char **str, int *pint); > extern char *get_options(const char *str, int nints, int *ints); > -extern unsigned long long memparse(const char *ptr, char **retptr); > + > +/* > + * DEPRECATED, lack of any kind of error handling. > + * > + * Use memparse_safe() from lib/kstrtox.c instead. > + */ > +extern __deprecated unsigned long long memparse(const char *ptr, char **retptr); > extern bool parse_option_str(const char *str, const char *option); > extern char *next_arg(char *args, char **param, char **val); > > diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h > index 7fcf29a4e0de..53a1e059dd31 100644 > --- a/include/linux/kstrtox.h > +++ b/include/linux/kstrtox.h > @@ -9,6 +9,21 @@ > int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); > int __must_check _kstrtol(const char *s, unsigned int base, long *res); > > +enum memparse_suffix { > + MEMPARSE_SUFFIX_K = 1 << 0, > + MEMPARSE_SUFFIX_M = 1 << 1, > + MEMPARSE_SUFFIX_G = 1 << 2, > + MEMPARSE_SUFFIX_T = 1 << 3, > + MEMPARSE_SUFFIX_P = 1 << 4, > + MEMPARSE_SUFFIX_E = 1 << 5, > +}; > + > +#define MEMPARSE_SUFFIXES_DEFAULT (MEMPARSE_SUFFIX_K | MEMPARSE_SUFFIX_M |\ > + MEMPARSE_SUFFIX_G | MEMPARSE_SUFFIX_T |\ > + MEMPARSE_SUFFIX_P) > + > +int __must_check memparse_safe(const char *s, enum memparse_suffix suffixes, > + unsigned long long *res, char **retptr); > int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res); > int __must_check kstrtoll(const char *s, unsigned int base, long long *res); > > diff --git a/lib/cmdline.c b/lib/cmdline.c > index 90ed997d9570..d379157de349 100644 > --- a/lib/cmdline.c > +++ b/lib/cmdline.c > @@ -139,12 +139,15 @@ char *get_options(const char *str, int nints, int *ints) > EXPORT_SYMBOL(get_options); > > /** > - * memparse - parse a string with mem suffixes into a number > + * memparse - DEPRECATED, parse a string with mem suffixes into a number > * @ptr: Where parse begins > * @retptr: (output) Optional pointer to next char after parse completes > * > + * There is no way to handle errors, and no overflown detection and string > + * sanity checks. > * Parses a string into a number. The number stored at @ptr is > * potentially suffixed with K, M, G, T, P, E. > + * Extra line is not needed. > */ > > unsigned long long memparse(const char *ptr, char **retptr) > diff --git a/lib/kstrtox.c b/lib/kstrtox.c > index 41c9a499bbf3..a1e4279f52b3 100644 > --- a/lib/kstrtox.c > +++ b/lib/kstrtox.c > @@ -113,6 +113,102 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) > return 0; > } > > +/** > + * memparse_safe - convert a string to an unsigned long long, safer version of > + * memparse() > + * > + * @s: The start of the string. Must be null-terminated. > + * The base would be determined automatically, if it starts with > + * "0x" the base would be 16, if it starts with "0" the base > + * would be 8, otherwise the base would be 10. > + * After a valid number string, there can be at most one > + * case-insensive suffix character, specified by the @suffixes case-insensitive > + * parameter. > + * > + * @suffixes: The suffixes which should be parsed. Use logical ORed > + * memparse_suffix enum to indicate the supported suffixes. > + * The suffixes are case-insensive, all 2 ^ 10 based. case-insensitive > + * Supported ones are "KMGPTE". > + * NOTE: If one suffix out of the supported one is hit, it would ones > + * end the parse normally, with @retptr pointed to the unsupported > + * suffix. Could you explain (or give an example) of "to the unsupported suffix"? This isn't clear IMO. > + * > + * @res: Where to write the result. > + * > + * @retptr: (output) Optional pointer to the next char after parse completes. > + * > + * Return 0 if any valid numberic string can be parsed, and @retptr updated. > + * Return -INVALID if no valid number string can be found. > + * Return -ERANGE if the number overflows. > + * For minus return values, @retptr would not be updated. * Returns: * * %0 if any valid numeric string can be parsed, and @retptr is updated. * * %-EINVAL if no valid number string can be found. * * %-ERANGE if the number overflows. * * For negative return values, @retptr is not updated. For *ALL* of the comments, I request/suggest that you change the "would be" or "would not be" to "is" or "is not" or whatever present tense words make the most sense. > + */ > +noinline int memparse_safe(const char *s, enum memparse_suffix suffixes, > + unsigned long long *res, char **retptr) > +{ > + unsigned long long value; > + unsigned int rv; > + int shift = 0; > + int base = 0; > + > + s = _parse_integer_fixup_radix(s, &base); > + rv = _parse_integer(s, base, &value); > + if (rv & KSTRTOX_OVERFLOW) > + return -ERANGE; > + if (rv == 0) > + return -EINVAL; > + > + s += rv; > + switch (*s) { > + case 'K': > + case 'k': > + if (!(suffixes & MEMPARSE_SUFFIX_K)) > + break; > + shift = 10; > + break; > + case 'M': > + case 'm': > + if (!(suffixes & MEMPARSE_SUFFIX_M)) > + break; > + shift = 20; > + break; > + case 'G': > + case 'g': > + if (!(suffixes & MEMPARSE_SUFFIX_G)) > + break; > + shift = 30; > + break; > + case 'T': > + case 't': > + if (!(suffixes & MEMPARSE_SUFFIX_T)) > + break; > + shift = 40; > + break; > + case 'P': > + case 'p': > + if (!(suffixes & MEMPARSE_SUFFIX_P)) > + break; > + shift = 50; > + break; > + case 'E': > + case 'e': > + if (!(suffixes & MEMPARSE_SUFFIX_E)) > + break; > + shift = 60; > + break; > + } > + if (shift) { > + s++; > + if (value >> (64 - shift)) > + return -ERANGE; > + value <<= shift; > + } > + *res = value; > + if (retptr) > + *retptr = (char *)s; > + return 0; > +} > +EXPORT_SYMBOL(memparse_safe); > + > /** > * kstrtoull - convert a string to an unsigned long long > * @s: The start of the string. The string must be null-terminated, and may also Thanks.
On 2024/1/4 17:00, Randy Dunlap wrote: > Hi, > [...] >> + * parameter. >> + * >> + * @suffixes: The suffixes which should be parsed. Use logical ORed >> + * memparse_suffix enum to indicate the supported suffixes. >> + * The suffixes are case-insensive, all 2 ^ 10 based. > > case-insensitive > >> + * Supported ones are "KMGPTE". >> + * NOTE: If one suffix out of the supported one is hit, it would > > ones > >> + * end the parse normally, with @retptr pointed to the unsupported >> + * suffix. > > Could you explain (or give an example) of "to the unsupported suffix"? > This isn't clear IMO. Oh, my bad, that sentence itself is not correct. What I really want to say is: If one suffix (one of "KMGPTE") is hit but that suffix is not specified in the @suffxies parameter, it would end the parse normally, with @retptr pointed to the (unsupported) suffix. The example would be the "68k " case in the ok cases in the next patch. We have two different cases for the same "68k" string, with different @suffixes and different results: "68k ", KMGPTE -> 68 * 1024, @retptr at " ". "68k ", M -> 68, @retptr at 'k'. I don't have a better expression here unfortunately, maybe the special case is not even worthy explaining? > >> + * >> + * @res: Where to write the result. >> + * >> + * @retptr: (output) Optional pointer to the next char after parse completes. >> + * >> + * Return 0 if any valid numberic string can be parsed, and @retptr updated. >> + * Return -INVALID if no valid number string can be found. >> + * Return -ERANGE if the number overflows. >> + * For minus return values, @retptr would not be updated. > > * Returns: > * * %0 if any valid numeric string can be parsed, and @retptr is updated. > * * %-EINVAL if no valid number string can be found. > * * %-ERANGE if the number overflows. > * * For negative return values, @retptr is not updated. > > > For *ALL* of the comments, I request/suggest that you change the "would be" or > "would not be" to "is" or "is not" or whatever present tense words make the > most sense. No problem. Thanks, Qu > > >> + */ >> +noinline int memparse_safe(const char *s, enum memparse_suffix suffixes, >> + unsigned long long *res, char **retptr) >> +{ >> + unsigned long long value; >> + unsigned int rv; >> + int shift = 0; >> + int base = 0; >> + >> + s = _parse_integer_fixup_radix(s, &base); >> + rv = _parse_integer(s, base, &value); >> + if (rv & KSTRTOX_OVERFLOW) >> + return -ERANGE; >> + if (rv == 0) >> + return -EINVAL; >> + >> + s += rv; >> + switch (*s) { >> + case 'K': >> + case 'k': >> + if (!(suffixes & MEMPARSE_SUFFIX_K)) >> + break; >> + shift = 10; >> + break; >> + case 'M': >> + case 'm': >> + if (!(suffixes & MEMPARSE_SUFFIX_M)) >> + break; >> + shift = 20; >> + break; >> + case 'G': >> + case 'g': >> + if (!(suffixes & MEMPARSE_SUFFIX_G)) >> + break; >> + shift = 30; >> + break; >> + case 'T': >> + case 't': >> + if (!(suffixes & MEMPARSE_SUFFIX_T)) >> + break; >> + shift = 40; >> + break; >> + case 'P': >> + case 'p': >> + if (!(suffixes & MEMPARSE_SUFFIX_P)) >> + break; >> + shift = 50; >> + break; >> + case 'E': >> + case 'e': >> + if (!(suffixes & MEMPARSE_SUFFIX_E)) >> + break; >> + shift = 60; >> + break; >> + } >> + if (shift) { >> + s++; >> + if (value >> (64 - shift)) >> + return -ERANGE; >> + value <<= shift; >> + } >> + *res = value; >> + if (retptr) >> + *retptr = (char *)s; >> + return 0; >> +} >> +EXPORT_SYMBOL(memparse_safe); >> + >> /** >> * kstrtoull - convert a string to an unsigned long long >> * @s: The start of the string. The string must be null-terminated, and may also > > Thanks.
On 1/3/24 22:42, Qu Wenruo wrote: > > > On 2024/1/4 17:00, Randy Dunlap wrote: >> Hi, >> > [...] >>> + * parameter. >>> + * >>> + * @suffixes: The suffixes which should be parsed. Use logical ORed >>> + * memparse_suffix enum to indicate the supported suffixes. >>> + * The suffixes are case-insensive, all 2 ^ 10 based. >> >> case-insensitive >> >>> + * Supported ones are "KMGPTE". >>> + * NOTE: If one suffix out of the supported one is hit, it would >> >> ones >> >>> + * end the parse normally, with @retptr pointed to the unsupported >>> + * suffix. >> >> Could you explain (or give an example) of "to the unsupported suffix"? >> This isn't clear IMO. > > Oh, my bad, that sentence itself is not correct. > > What I really want to say is: > > If one suffix (one of "KMGPTE") is hit but that suffix is not > specified in the @suffxies parameter, it would end the parse normally, > with @retptr pointed to the (unsupported) suffix. (corrected ^^^) > The example would be the "68k " case in the ok cases in the next patch. > We have two different cases for the same "68k" string, with different > @suffixes and different results: > > "68k ", KMGPTE -> 68 * 1024, @retptr at " ". > "68k ", M -> 68, @retptr at 'k'. > > I don't have a better expression here unfortunately, maybe the special > case is not even worthy explaining? > No, the corrected paragraph above is good. (s/would end/ends/) Except for one thing: the examples here terminate on a space character, but the function kernel-doc says "null-terminated": +/** + * memparse_safe - convert a string to an unsigned long long, safer version of + * memparse() + * + * @s: The start of the string. Must be null-terminated. >> >>> + * >>> + * @res: Where to write the result. >>> + * >>> + * @retptr: (output) Optional pointer to the next char after parse completes. >>> + * >>> + * Return 0 if any valid numberic string can be parsed, and @retptr updated. >>> + * Return -INVALID if no valid number string can be found. >>> + * Return -ERANGE if the number overflows. >>> + * For minus return values, @retptr would not be updated. >> >> * Returns: >> * * %0 if any valid numeric string can be parsed, and @retptr is updated. >> * * %-EINVAL if no valid number string can be found. >> * * %-ERANGE if the number overflows. >> * * For negative return values, @retptr is not updated. >> >> >> For *ALL* of the comments, I request/suggest that you change the "would be" or >> "would not be" to "is" or "is not" or whatever present tense words make the >> most sense. > > No problem.
On 2024/1/4 17:20, Randy Dunlap wrote: > > > On 1/3/24 22:42, Qu Wenruo wrote: >> >> >> On 2024/1/4 17:00, Randy Dunlap wrote: >>> Hi, >>> >> [...] >>>> + * parameter. >>>> + * >>>> + * @suffixes: The suffixes which should be parsed. Use logical ORed >>>> + * memparse_suffix enum to indicate the supported suffixes. >>>> + * The suffixes are case-insensive, all 2 ^ 10 based. >>> >>> case-insensitive >>> >>>> + * Supported ones are "KMGPTE". >>>> + * NOTE: If one suffix out of the supported one is hit, it would >>> >>> ones >>> >>>> + * end the parse normally, with @retptr pointed to the unsupported >>>> + * suffix. >>> >>> Could you explain (or give an example) of "to the unsupported suffix"? >>> This isn't clear IMO. >> >> Oh, my bad, that sentence itself is not correct. >> >> What I really want to say is: >> >> If one suffix (one of "KMGPTE") is hit but that suffix is not >> specified in the @suffxies parameter, it would end the parse normally, >> with @retptr pointed to the (unsupported) suffix. > > (corrected ^^^) > >> The example would be the "68k " case in the ok cases in the next patch. >> We have two different cases for the same "68k" string, with different >> @suffixes and different results: >> >> "68k ", KMGPTE -> 68 * 1024, @retptr at " ". >> "68k ", M -> 68, @retptr at 'k'. >> >> I don't have a better expression here unfortunately, maybe the special >> case is not even worthy explaining? >> > > No, the corrected paragraph above is good. (s/would end/ends/) > > Except for one thing: the examples here terminate on a space character, > but the function kernel-doc says "null-terminated": Oh, all the strings used here and in the test cases are all "\0" ended. Just to make it a little more readable (and easier to type), the tailing "\0" is not explicitly showed. That extra ' ' in the "68k \0" case is just to make sure the @retptr is properly updated. For all the other existing ok cases for memparse_safe() (those without extra tailing chars), the @retptr points to the tailing NUL '\0'. Thanks, Qu > > +/** > + * memparse_safe - convert a string to an unsigned long long, safer version of > + * memparse() > + * > + * @s: The start of the string. Must be null-terminated. > > > > >>> >>>> + * >>>> + * @res: Where to write the result. >>>> + * >>>> + * @retptr: (output) Optional pointer to the next char after parse completes. >>>> + * >>>> + * Return 0 if any valid numberic string can be parsed, and @retptr updated. >>>> + * Return -INVALID if no valid number string can be found. >>>> + * Return -ERANGE if the number overflows. >>>> + * For minus return values, @retptr would not be updated. >>> >>> * Returns: >>> * * %0 if any valid numeric string can be parsed, and @retptr is updated. >>> * * %-EINVAL if no valid number string can be found. >>> * * %-ERANGE if the number overflows. >>> * * For negative return values, @retptr is not updated. >>> >>> >>> For *ALL* of the comments, I request/suggest that you change the "would be" or >>> "would not be" to "is" or "is not" or whatever present tense words make the >>> most sense. >> >> No problem. > >
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d9ad21058eed..b1b6da60ea43 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -201,7 +201,13 @@ void do_exit(long error_code) __noreturn; extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); -extern unsigned long long memparse(const char *ptr, char **retptr); + +/* + * DEPRECATED, lack of any kind of error handling. + * + * Use memparse_safe() from lib/kstrtox.c instead. + */ +extern __deprecated unsigned long long memparse(const char *ptr, char **retptr); extern bool parse_option_str(const char *str, const char *option); extern char *next_arg(char *args, char **param, char **val); diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h index 7fcf29a4e0de..53a1e059dd31 100644 --- a/include/linux/kstrtox.h +++ b/include/linux/kstrtox.h @@ -9,6 +9,21 @@ int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); int __must_check _kstrtol(const char *s, unsigned int base, long *res); +enum memparse_suffix { + MEMPARSE_SUFFIX_K = 1 << 0, + MEMPARSE_SUFFIX_M = 1 << 1, + MEMPARSE_SUFFIX_G = 1 << 2, + MEMPARSE_SUFFIX_T = 1 << 3, + MEMPARSE_SUFFIX_P = 1 << 4, + MEMPARSE_SUFFIX_E = 1 << 5, +}; + +#define MEMPARSE_SUFFIXES_DEFAULT (MEMPARSE_SUFFIX_K | MEMPARSE_SUFFIX_M |\ + MEMPARSE_SUFFIX_G | MEMPARSE_SUFFIX_T |\ + MEMPARSE_SUFFIX_P) + +int __must_check memparse_safe(const char *s, enum memparse_suffix suffixes, + unsigned long long *res, char **retptr); int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res); int __must_check kstrtoll(const char *s, unsigned int base, long long *res); diff --git a/lib/cmdline.c b/lib/cmdline.c index 90ed997d9570..d379157de349 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -139,12 +139,15 @@ char *get_options(const char *str, int nints, int *ints) EXPORT_SYMBOL(get_options); /** - * memparse - parse a string with mem suffixes into a number + * memparse - DEPRECATED, parse a string with mem suffixes into a number * @ptr: Where parse begins * @retptr: (output) Optional pointer to next char after parse completes * + * There is no way to handle errors, and no overflown detection and string + * sanity checks. * Parses a string into a number. The number stored at @ptr is * potentially suffixed with K, M, G, T, P, E. + * */ unsigned long long memparse(const char *ptr, char **retptr) diff --git a/lib/kstrtox.c b/lib/kstrtox.c index 41c9a499bbf3..a1e4279f52b3 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -113,6 +113,102 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) return 0; } +/** + * memparse_safe - convert a string to an unsigned long long, safer version of + * memparse() + * + * @s: The start of the string. Must be null-terminated. + * The base would be determined automatically, if it starts with + * "0x" the base would be 16, if it starts with "0" the base + * would be 8, otherwise the base would be 10. + * After a valid number string, there can be at most one + * case-insensive suffix character, specified by the @suffixes + * parameter. + * + * @suffixes: The suffixes which should be parsed. Use logical ORed + * memparse_suffix enum to indicate the supported suffixes. + * The suffixes are case-insensive, all 2 ^ 10 based. + * Supported ones are "KMGPTE". + * NOTE: If one suffix out of the supported one is hit, it would + * end the parse normally, with @retptr pointed to the unsupported + * suffix. + * + * @res: Where to write the result. + * + * @retptr: (output) Optional pointer to the next char after parse completes. + * + * Return 0 if any valid numberic string can be parsed, and @retptr updated. + * Return -INVALID if no valid number string can be found. + * Return -ERANGE if the number overflows. + * For minus return values, @retptr would not be updated. + */ +noinline int memparse_safe(const char *s, enum memparse_suffix suffixes, + unsigned long long *res, char **retptr) +{ + unsigned long long value; + unsigned int rv; + int shift = 0; + int base = 0; + + s = _parse_integer_fixup_radix(s, &base); + rv = _parse_integer(s, base, &value); + if (rv & KSTRTOX_OVERFLOW) + return -ERANGE; + if (rv == 0) + return -EINVAL; + + s += rv; + switch (*s) { + case 'K': + case 'k': + if (!(suffixes & MEMPARSE_SUFFIX_K)) + break; + shift = 10; + break; + case 'M': + case 'm': + if (!(suffixes & MEMPARSE_SUFFIX_M)) + break; + shift = 20; + break; + case 'G': + case 'g': + if (!(suffixes & MEMPARSE_SUFFIX_G)) + break; + shift = 30; + break; + case 'T': + case 't': + if (!(suffixes & MEMPARSE_SUFFIX_T)) + break; + shift = 40; + break; + case 'P': + case 'p': + if (!(suffixes & MEMPARSE_SUFFIX_P)) + break; + shift = 50; + break; + case 'E': + case 'e': + if (!(suffixes & MEMPARSE_SUFFIX_E)) + break; + shift = 60; + break; + } + if (shift) { + s++; + if (value >> (64 - shift)) + return -ERANGE; + value <<= shift; + } + *res = value; + if (retptr) + *retptr = (char *)s; + return 0; +} +EXPORT_SYMBOL(memparse_safe); + /** * kstrtoull - convert a string to an unsigned long long * @s: The start of the string. The string must be null-terminated, and may also