diff mbox series

[09/13] parse-options API: don't restrict OPT_SUBCOMMAND() to one *_fn type

Message ID patch-09.13-2cb3807aa17-20221104T132117Z-avarab@gmail.com (mailing list archive)
State New, archived
Headers show
Series bisect: v2.30.0 "run" regressions + make it built-in | expand

Commit Message

Ævar Arnfjörð Bjarmason Nov. 4, 2022, 1:22 p.m. UTC
When the OPT_SUBCOMMAND() API was implemented in [1] it did so by
adding a new "subcommand_fn" member to "struct option", rather than
allowing the user of the API to pick the type of the function.

An advantage of mandating that "parse_opt_subcommand_fn" must be used
is that we'll get type checking for the function we're passing in, a
disadvantage is that we can't convert e.g. "builtin/bisect--helper.c"
easily to it, as its callbacks need their own argument.

Let's generalize this interface, while leaving in place a small hack
to give the existing API users their type safety. We assign to
"typecheck_subcommand_fn", but don't subsequently use it for
anything. Instead we use the "defval" and "value" members.

A subsequent commit will add a OPT_SUBCOMMAND() variant where the
"callback" isn't our default "parse_options_pick_subcommand" (and that
caller won't be able to use the type checking).

1. fa83cc834da (parse-options: add support for parsing subcommands,
   2022-08-19)

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 parse-options.c |  9 ++++++---
 parse-options.h | 25 +++++++++++++++++++++----
 2 files changed, 27 insertions(+), 7 deletions(-)

Comments

René Scharfe Nov. 5, 2022, 8:32 a.m. UTC | #1
Am 04.11.22 um 14:22 schrieb Ævar Arnfjörð Bjarmason:
> When the OPT_SUBCOMMAND() API was implemented in [1] it did so by
> adding a new "subcommand_fn" member to "struct option", rather than
> allowing the user of the API to pick the type of the function.
>
> An advantage of mandating that "parse_opt_subcommand_fn" must be used
> is that we'll get type checking for the function we're passing in, a
> disadvantage is that we can't convert e.g. "builtin/bisect--helper.c"
> easily to it, as its callbacks need their own argument.
>
> Let's generalize this interface, while leaving in place a small hack
> to give the existing API users their type safety. We assign to
> "typecheck_subcommand_fn", but don't subsequently use it for
> anything. Instead we use the "defval" and "value" members.
>
> A subsequent commit will add a OPT_SUBCOMMAND() variant where the
> "callback" isn't our default "parse_options_pick_subcommand" (and that
> caller won't be able to use the type checking).
>
> 1. fa83cc834da (parse-options: add support for parsing subcommands,
>    2022-08-19)
>
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
>  parse-options.c |  9 ++++++---
>  parse-options.h | 25 +++++++++++++++++++++----
>  2 files changed, 27 insertions(+), 7 deletions(-)
>
> diff --git a/parse-options.c b/parse-options.c
> index a1ec932f0f9..1d9e46c9dc7 100644
> --- a/parse-options.c
> +++ b/parse-options.c
> @@ -427,7 +427,8 @@ static enum parse_opt_result parse_subcommand(const char *arg,
>  	for (; options->type != OPTION_END; options++)
>  		if (options->type == OPTION_SUBCOMMAND &&
>  		    !strcmp(options->long_name, arg)) {
> -			*(parse_opt_subcommand_fn **)options->value = options->subcommand_fn;
> +			if (options->callback(options, arg, 0))
> +				BUG("OPT_SUBCOMMAND callback returning non-zero");
>  			return PARSE_OPT_SUBCOMMAND;
>  		}
>
> @@ -506,8 +507,10 @@ static void parse_options_check(const struct option *opts)
>  			       "That case is not supported yet.");
>  			break;
>  		case OPTION_SUBCOMMAND:
> -			if (!opts->value || !opts->subcommand_fn)
> -				optbug(opts, "OPTION_SUBCOMMAND needs a value and a subcommand function");
> +			if (!opts->value || !opts->callback)
> +				optbug(opts, "OPTION_SUBCOMMAND needs a value and a callback function");
> +			if (opts->ll_callback)
> +				optbug(opts, "OPTION_SUBCOMMAND uses callback, not ll_callback");
>  			if (!subcommand_value)
>  				subcommand_value = opts->value;
>  			else if (subcommand_value != opts->value)
> diff --git a/parse-options.h b/parse-options.h
> index b6ef86e0d15..61e3016c3fc 100644
> --- a/parse-options.h
> +++ b/parse-options.h
> @@ -128,19 +128,24 @@ typedef int parse_opt_subcommand_fn(int argc, const char **argv,
>   *			 the option takes optional argument.
>   *
>   * `callback`::
> - *   pointer to the callback to use for OPTION_CALLBACK
> + *   pointer to the callback to use for OPTION_CALLBACK and OPTION_SUBCOMMAND.
>   *
>   * `defval`::
>   *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
>   *   OPTION_{BIT,SET_INT} store the {mask,integer} to put in the value when met.
> + *   OPTION_SUBCOMMAND stores the pointer the function selected for
> + *   the subcommand.
> + *
>   *   CALLBACKS can use it like they want.
>   *
>   * `ll_callback`::
>   *   pointer to the callback to use for OPTION_LOWLEVEL_CALLBACK
>   *
>   * `subcommand_fn`::
> - *   pointer to a function to use for OPTION_SUBCOMMAND.
> - *   It will be put in value when the subcommand is given on the command line.
> + *   pointer to the callback used with OPT_SUBCOMMAND() and
> + *   OPT_SUBCOMMAND_F(). Internally we store the same value in
> + *   `defval`. This is only here to give the OPT_SUBCOMMAND{,_F}()
> + *   common case type safety.
>   */
>  struct option {
>  	enum parse_opt_type type;
> @@ -217,12 +222,24 @@ struct option {
>  #define OPT_ALIAS(s, l, source_long_name) \
>  	{ OPTION_ALIAS, (s), (l), (source_long_name) }
>
> +static inline int parse_options_pick_subcommand_cb(const struct option *option,
> +						   const char *arg UNUSED,
> +						   int unset UNUSED)
> +{
> +	parse_opt_subcommand_fn *fn = (parse_opt_subcommand_fn *)option->defval;
> +	*(parse_opt_subcommand_fn **)option->value = fn;

->defval is of type intptr_t and ->value is a void pointer.  The result
of converting a void pointer value to an intptr_t and back is a void
pointer equal to the original pointer if I read 6.3.2.3 (Pointers,
paragraphs 5 and 6) and 7.18.1.4 (Integer types capable of holding
object pointers) in C99 correctly.

6.3.2.3 paragraph 8 says that casting between function pointers of
different type is OK and you can get your original function pointer
back and use it in a call if you convert it back to the right type.

Casting between a function pointer and an object pointer is undefined,
though.  They don't have to be of the same size, so a function pointer
doesn't have to fit into an intptr_t.  I wouldn't be surprised if CHERI
(https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/) was an actual
example of that.

Why is this trickery needed?  Above you write that callbacks in
builtin/bisect--helper.c can't use subcommand_fn because they need
their own argument.  Can we extend subcommand_fn or use a global
variable to pass that extra thing instead?  The latter may be ugly, but
at least it's valid C..

> +	return 0;
> +}
> +
>  #define OPT_SUBCOMMAND_F(l, v, fn, f) { \
>  	.type = OPTION_SUBCOMMAND, \
>  	.long_name = (l), \
>  	.value = (v), \
>  	.flags = (f), \
> -	.subcommand_fn = (fn) }
> +	.defval = (intptr_t)(fn), \
> +	.subcommand_fn = (fn), \
> +	.callback = parse_options_pick_subcommand_cb, \

Getting the address of an inline function feels weird, but the compiler
is free to emit to ignore that keyword and will provide an addressable
function object here.

> +}
>  #define OPT_SUBCOMMAND(l, v, fn)    OPT_SUBCOMMAND_F((l), (v), (fn), 0)
>
>  /*
Đoàn Trần Công Danh Nov. 5, 2022, 11:34 a.m. UTC | #2
On 2022-11-05 09:32:44+0100, René Scharfe <l.s.r@web.de> wrote:
> > diff --git a/parse-options.h b/parse-options.h
> > index b6ef86e0d15..61e3016c3fc 100644
> > --- a/parse-options.h
> > +++ b/parse-options.h
> > @@ -128,19 +128,24 @@ typedef int parse_opt_subcommand_fn(int argc, const char **argv,
> >   *			 the option takes optional argument.
> >   *
> >   * `callback`::
> > - *   pointer to the callback to use for OPTION_CALLBACK
> > + *   pointer to the callback to use for OPTION_CALLBACK and OPTION_SUBCOMMAND.
> >   *
> >   * `defval`::
> >   *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
> >   *   OPTION_{BIT,SET_INT} store the {mask,integer} to put in the value when met.
> > + *   OPTION_SUBCOMMAND stores the pointer the function selected for
> > + *   the subcommand.
> > + *
> >   *   CALLBACKS can use it like they want.
> >   *
> >   * `ll_callback`::
> >   *   pointer to the callback to use for OPTION_LOWLEVEL_CALLBACK
> >   *
> >   * `subcommand_fn`::
> > - *   pointer to a function to use for OPTION_SUBCOMMAND.
> > - *   It will be put in value when the subcommand is given on the command line.
> > + *   pointer to the callback used with OPT_SUBCOMMAND() and
> > + *   OPT_SUBCOMMAND_F(). Internally we store the same value in
> > + *   `defval`. This is only here to give the OPT_SUBCOMMAND{,_F}()
> > + *   common case type safety.
> >   */
> >  struct option {
> >  	enum parse_opt_type type;
> > @@ -217,12 +222,24 @@ struct option {
> >  #define OPT_ALIAS(s, l, source_long_name) \
> >  	{ OPTION_ALIAS, (s), (l), (source_long_name) }
> >
> > +static inline int parse_options_pick_subcommand_cb(const struct option *option,
> > +						   const char *arg UNUSED,
> > +						   int unset UNUSED)
> > +{
> > +	parse_opt_subcommand_fn *fn = (parse_opt_subcommand_fn *)option->defval;
> > +	*(parse_opt_subcommand_fn **)option->value = fn;
> 
> ->defval is of type intptr_t and ->value is a void pointer.  The result
> of converting a void pointer value to an intptr_t and back is a void
> pointer equal to the original pointer if I read 6.3.2.3 (Pointers,
> paragraphs 5 and 6) and 7.18.1.4 (Integer types capable of holding
> object pointers) in C99 correctly.
> 
> 6.3.2.3 paragraph 8 says that casting between function pointers of
> different type is OK and you can get your original function pointer
> back and use it in a call if you convert it back to the right type.
> 
> Casting between a function pointer and an object pointer is undefined,
> though.  They don't have to be of the same size, so a function pointer
> doesn't have to fit into an intptr_t.  I wouldn't be surprised if CHERI
> (https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/) was an actual
> example of that.
> 
> Why is this trickery needed?  Above you write that callbacks in
> builtin/bisect--helper.c can't use subcommand_fn because they need
> their own argument.  Can we extend subcommand_fn or use a global
> variable to pass that extra thing instead?  The latter may be ugly, but
> at least it's valid C..

Not the author, but I fully agree with you, I think instead of adding new API
for some arbitrary subcommand_fn, I would change the subcommand_fn to
type:

	int (*)(int argc, const char **argv, const char *prefix, void *context)

The last argument would be an object pointer, which will be casted to
the correct type inside the callback.

Let me cherry-picking this series on top of mine to see how things
would progress.

> > +	return 0;
> > +}
> > +
> >  #define OPT_SUBCOMMAND_F(l, v, fn, f) { \
> >  	.type = OPTION_SUBCOMMAND, \
> >  	.long_name = (l), \
> >  	.value = (v), \
> >  	.flags = (f), \
> > -	.subcommand_fn = (fn) }
> > +	.defval = (intptr_t)(fn), \
> > +	.subcommand_fn = (fn), \
> > +	.callback = parse_options_pick_subcommand_cb, \
> 
> Getting the address of an inline function feels weird, but the compiler
> is free to emit to ignore that keyword and will provide an addressable
> function object here.
> 
> > +}
> >  #define OPT_SUBCOMMAND(l, v, fn)    OPT_SUBCOMMAND_F((l), (v), (fn), 0)
> >
> >  /*
>
Ævar Arnfjörð Bjarmason Nov. 5, 2022, 1:52 p.m. UTC | #3
On Sat, Nov 05 2022, René Scharfe wrote:

> Am 04.11.22 um 14:22 schrieb Ævar Arnfjörð Bjarmason:
>> When the OPT_SUBCOMMAND() API was implemented in [1] it did so by
>> adding a new "subcommand_fn" member to "struct option", rather than
>> allowing the user of the API to pick the type of the function.
>>
>> An advantage of mandating that "parse_opt_subcommand_fn" must be used
>> is that we'll get type checking for the function we're passing in, a
>> disadvantage is that we can't convert e.g. "builtin/bisect--helper.c"
>> easily to it, as its callbacks need their own argument.
>>
>> Let's generalize this interface, while leaving in place a small hack
>> to give the existing API users their type safety. We assign to
>> "typecheck_subcommand_fn", but don't subsequently use it for
>> anything. Instead we use the "defval" and "value" members.
>>
>> A subsequent commit will add a OPT_SUBCOMMAND() variant where the
>> "callback" isn't our default "parse_options_pick_subcommand" (and that
>> caller won't be able to use the type checking).
>>
>> 1. fa83cc834da (parse-options: add support for parsing subcommands,
>>    2022-08-19)
>>
>> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
>> ---
>>  parse-options.c |  9 ++++++---
>>  parse-options.h | 25 +++++++++++++++++++++----
>>  2 files changed, 27 insertions(+), 7 deletions(-)
>>
>> diff --git a/parse-options.c b/parse-options.c
>> index a1ec932f0f9..1d9e46c9dc7 100644
>> --- a/parse-options.c
>> +++ b/parse-options.c
>> @@ -427,7 +427,8 @@ static enum parse_opt_result parse_subcommand(const char *arg,
>>  	for (; options->type != OPTION_END; options++)
>>  		if (options->type == OPTION_SUBCOMMAND &&
>>  		    !strcmp(options->long_name, arg)) {
>> -			*(parse_opt_subcommand_fn **)options->value = options->subcommand_fn;
>> +			if (options->callback(options, arg, 0))
>> +				BUG("OPT_SUBCOMMAND callback returning non-zero");
>>  			return PARSE_OPT_SUBCOMMAND;
>>  		}
>>
>> @@ -506,8 +507,10 @@ static void parse_options_check(const struct option *opts)
>>  			       "That case is not supported yet.");
>>  			break;
>>  		case OPTION_SUBCOMMAND:
>> -			if (!opts->value || !opts->subcommand_fn)
>> -				optbug(opts, "OPTION_SUBCOMMAND needs a value and a subcommand function");
>> +			if (!opts->value || !opts->callback)
>> +				optbug(opts, "OPTION_SUBCOMMAND needs a value and a callback function");
>> +			if (opts->ll_callback)
>> +				optbug(opts, "OPTION_SUBCOMMAND uses callback, not ll_callback");
>>  			if (!subcommand_value)
>>  				subcommand_value = opts->value;
>>  			else if (subcommand_value != opts->value)
>> diff --git a/parse-options.h b/parse-options.h
>> index b6ef86e0d15..61e3016c3fc 100644
>> --- a/parse-options.h
>> +++ b/parse-options.h
>> @@ -128,19 +128,24 @@ typedef int parse_opt_subcommand_fn(int argc, const char **argv,
>>   *			 the option takes optional argument.
>>   *
>>   * `callback`::
>> - *   pointer to the callback to use for OPTION_CALLBACK
>> + *   pointer to the callback to use for OPTION_CALLBACK and OPTION_SUBCOMMAND.
>>   *
>>   * `defval`::
>>   *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
>>   *   OPTION_{BIT,SET_INT} store the {mask,integer} to put in the value when met.
>> + *   OPTION_SUBCOMMAND stores the pointer the function selected for
>> + *   the subcommand.
>> + *
>>   *   CALLBACKS can use it like they want.
>>   *
>>   * `ll_callback`::
>>   *   pointer to the callback to use for OPTION_LOWLEVEL_CALLBACK
>>   *
>>   * `subcommand_fn`::
>> - *   pointer to a function to use for OPTION_SUBCOMMAND.
>> - *   It will be put in value when the subcommand is given on the command line.
>> + *   pointer to the callback used with OPT_SUBCOMMAND() and
>> + *   OPT_SUBCOMMAND_F(). Internally we store the same value in
>> + *   `defval`. This is only here to give the OPT_SUBCOMMAND{,_F}()
>> + *   common case type safety.
>>   */
>>  struct option {
>>  	enum parse_opt_type type;
>> @@ -217,12 +222,24 @@ struct option {
>>  #define OPT_ALIAS(s, l, source_long_name) \
>>  	{ OPTION_ALIAS, (s), (l), (source_long_name) }
>>
>> +static inline int parse_options_pick_subcommand_cb(const struct option *option,
>> +						   const char *arg UNUSED,
>> +						   int unset UNUSED)
>> +{
>> +	parse_opt_subcommand_fn *fn = (parse_opt_subcommand_fn *)option->defval;
>> +	*(parse_opt_subcommand_fn **)option->value = fn;
>
> ->defval is of type intptr_t and ->value is a void pointer.  The result
> of converting a void pointer value to an intptr_t and back is a void
> pointer equal to the original pointer if I read 6.3.2.3 (Pointers,
> paragraphs 5 and 6) and 7.18.1.4 (Integer types capable of holding
> object pointers) in C99 correctly.
>
> 6.3.2.3 paragraph 8 says that casting between function pointers of
> different type is OK and you can get your original function pointer
> back and use it in a call if you convert it back to the right type.
>
> Casting between a function pointer and an object pointer is undefined,
> though.  They don't have to be of the same size, so a function pointer
> doesn't have to fit into an intptr_t.  I wouldn't be surprised if CHERI
> (https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/) was an actual
> example of that.

I should have called this out explicitly. I think you're right as far as
what you're summarizing goes.

To elaborate on it, paragraph 8 of 6.3.2.3 says:

	A pointer to a function of one type may be converted to a
	pointer to a function of another type and back again; the result
	shall compare equal to the original pointer. If a converted
	pointer is used to call a function whose type is not compatible
	with the pointed-to type, the behavior is undefined.

And 7.18.1.4 says, when discussing (among other things) "intptr_t"
("[such" added for clarity:

	[...]any valid [such] pointer to void can be converted to this
	type, then converted back to pointer to void, and the result
	will compare equal to the original pointer:

But as you point out that doesn't say anything about whether a pointer
to a function is a "valid .. pointer to void".

I think that's an "unportable" extension covered in "J.5 Common
extensions", specifically "J.5.7 Function pointer casts":

	A pointer to an object or to void may be cast to a pointer to a
	function, allowing data to be invoked as a function

Thus, since the standard already establishes that valid "void *" and
"intptr_t" pointers can be cast'd back & forth, the J.5.7 bridges the
gap between the two saying a function pointer can be converted to
either.

Now, I may be missing something here, but I was under the impression
that "intptr_t" wasn't special in any way here, and that any casting of
a function pointer to either it or a "void *" was what was made portable
by "J.5.7". We're only discussing "intptr_t" here, so it's just a point
of clarification.

Anyway, like ssize_t and a few other things this is extended upon and
made standard by POSIX. I.e. we're basically talking about whether this
passes:

	assert(sizeof(void (*)(void)) == sizeof(void*))

And per POSIX
(https://pubs.opengroup.org/onlinepubs/9699919799/functions/dlsym.html):

	Note that conversion from a void * pointer to a function pointer
	as in:

		fptr = (int (*)(int))dlsym(handle, "my_function");

	is not defined by the ISO C standard. This standard requires
	this conversion to work correctly on conforming implementations.

So I think aside from other concerns this should be safe to use, as
real-world data backing that up we've had a intptr_t converted to a
function pointer since v2.35.0: 5cb28270a1f (pack-objects: lazily set up
"struct rev_info", don't leak, 2022-03-28).

> Why is this trickery needed?  Above you write that callbacks in
> builtin/bisect--helper.c can't use subcommand_fn because they need
> their own argument.  Can we extend subcommand_fn or use a global
> variable to pass that extra thing instead?  The latter may be ugly, but
> at least it's valid C..

Yeah, there's ways around it. Less uglier in this case would probably be
to just have the callback set a function pointer in your own custom
struct (which we'd point to with "defval).

I.e. if our callabck is the one to populate the "fn" even without J.5.7
there's no portability issue, and that's just a convenience.

>> +	return 0;
>> +}
>> +
>>  #define OPT_SUBCOMMAND_F(l, v, fn, f) { \
>>  	.type = OPTION_SUBCOMMAND, \
>>  	.long_name = (l), \
>>  	.value = (v), \
>>  	.flags = (f), \
>> -	.subcommand_fn = (fn) }
>> +	.defval = (intptr_t)(fn), \
>> +	.subcommand_fn = (fn), \
>> +	.callback = parse_options_pick_subcommand_cb, \
>
> Getting the address of an inline function feels weird, but the compiler
> is free to emit to ignore that keyword and will provide an addressable
> function object here.

*nod*, I thought about adding this to parse-options-cb.c, but it seemed
 small enough...
Phillip Wood Nov. 5, 2022, 4:36 p.m. UTC | #4
On 05/11/2022 13:52, Ævar Arnfjörð Bjarmason wrote:
> 
> On Sat, Nov 05 2022, René Scharfe wrote:
> 
>> Am 04.11.22 um 14:22 schrieb Ævar Arnfjörð Bjarmason:
>>> diff --git a/parse-options.h b/parse-options.h
>>> index b6ef86e0d15..61e3016c3fc 100644
>>> --- a/parse-options.h
>>> +++ b/parse-options.h
>>> @@ -128,19 +128,24 @@ typedef int parse_opt_subcommand_fn(int argc, const char **argv,
>>>    *			 the option takes optional argument.
>>>    *
>>>    * `callback`::
>>> - *   pointer to the callback to use for OPTION_CALLBACK
>>> + *   pointer to the callback to use for OPTION_CALLBACK and OPTION_SUBCOMMAND.
>>>    *
>>>    * `defval`::
>>>    *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
>>>    *   OPTION_{BIT,SET_INT} store the {mask,integer} to put in the value when met.
>>> + *   OPTION_SUBCOMMAND stores the pointer the function selected for
>>> + *   the subcommand.
>>> + *
>>>    *   CALLBACKS can use it like they want.
>>>    *
>>>    * `ll_callback`::
>>>    *   pointer to the callback to use for OPTION_LOWLEVEL_CALLBACK
>>>    *
>>>    * `subcommand_fn`::
>>> - *   pointer to a function to use for OPTION_SUBCOMMAND.
>>> - *   It will be put in value when the subcommand is given on the command line.
>>> + *   pointer to the callback used with OPT_SUBCOMMAND() and
>>> + *   OPT_SUBCOMMAND_F(). Internally we store the same value in
>>> + *   `defval`. This is only here to give the OPT_SUBCOMMAND{,_F}()
>>> + *   common case type safety.
>>>    */
>>>   struct option {
>>>   	enum parse_opt_type type;
>>> @@ -217,12 +222,24 @@ struct option {
>>>   #define OPT_ALIAS(s, l, source_long_name) \
>>>   	{ OPTION_ALIAS, (s), (l), (source_long_name) }
>>>
>>> +static inline int parse_options_pick_subcommand_cb(const struct option *option,
>>> +						   const char *arg UNUSED,
>>> +						   int unset UNUSED)
>>> +{
>>> +	parse_opt_subcommand_fn *fn = (parse_opt_subcommand_fn *)option->defval;
>>> +	*(parse_opt_subcommand_fn **)option->value = fn;
>>
>> ->defval is of type intptr_t and ->value is a void pointer.  The result
>> of converting a void pointer value to an intptr_t and back is a void
>> pointer equal to the original pointer if I read 6.3.2.3 (Pointers,
>> paragraphs 5 and 6) and 7.18.1.4 (Integer types capable of holding
>> object pointers) in C99 correctly.
>>
>> 6.3.2.3 paragraph 8 says that casting between function pointers of
>> different type is OK and you can get your original function pointer
>> back and use it in a call if you convert it back to the right type.
>>
>> Casting between a function pointer and an object pointer is undefined,
>> though.  They don't have to be of the same size, so a function pointer
>> doesn't have to fit into an intptr_t.  I wouldn't be surprised if CHERI
>> (https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/) was an actual
>> example of that.
> 
> I should have called this out explicitly. I think you're right as far as
> what you're summarizing goes.
> 
> To elaborate on it, paragraph 8 of 6.3.2.3 says:
> 
> 	A pointer to a function of one type may be converted to a
> 	pointer to a function of another type and back again; the result
> 	shall compare equal to the original pointer. If a converted
> 	pointer is used to call a function whose type is not compatible
> 	with the pointed-to type, the behavior is undefined.
> 
> And 7.18.1.4 says, when discussing (among other things) "intptr_t"
> ("[such" added for clarity:
> 
> 	[...]any valid [such] pointer to void can be converted to this
> 	type, then converted back to pointer to void, and the result
> 	will compare equal to the original pointer:
> 
> But as you point out that doesn't say anything about whether a pointer
> to a function is a "valid .. pointer to void".
> 
> I think that's an "unportable" extension covered in "J.5 Common
> extensions", specifically "J.5.7 Function pointer casts":
> 
> 	A pointer to an object or to void may be cast to a pointer to a
> 	function, allowing data to be invoked as a function

This is a common extension, it is _not_ guaranteed by the standard and 
so still undefined behavior unless your compiler happens to have 
implemented that extension.

> Thus, since the standard already establishes that valid "void *" and
> "intptr_t" pointers can be cast'd back & forth, the J.5.7 bridges the
> gap between the two saying a function pointer can be converted to
> either.

How does J.5.7 bridge the gap when compilers are not required to 
implement it?

> Now, I may be missing something here, but I was under the impression
> that "intptr_t" wasn't special in any way here, and that any casting of
> a function pointer to either it or a "void *" was what was made portable
> by "J.5.7"

How is it made portable by an "unportable" extension?

> So I think aside from other concerns this should be safe to use, as
> real-world data backing that up we've had a intptr_t converted to a
> function pointer since v2.35.0: 5cb28270a1f (pack-objects: lazily set up
> "struct rev_info", don't leak, 2022-03-28).

Saying "it works so it is fine" is not a convincing argument that it is 
compliant with the standard. If it is undefined then it may work now but 
not in a future compiler.

>> Why is this trickery needed?  Above you write that callbacks in
>> builtin/bisect--helper.c can't use subcommand_fn because they need
>> their own argument.  Can we extend subcommand_fn or use a global
>> variable to pass that extra thing instead?  The latter may be ugly, but
>> at least it's valid C..

Unfortunately the current code relies on being able to cast function 
pointers to void* so it is not just this patch that is relying on 
undefined behavior.

Best Wishes

Phillip
René Scharfe Nov. 5, 2022, 5:26 p.m. UTC | #5
Am 05.11.22 um 14:52 schrieb Ævar Arnfjörð Bjarmason:
>
> I think that's an "unportable" extension covered in "J.5 Common
> extensions", specifically "J.5.7 Function pointer casts":
>
> 	A pointer to an object or to void may be cast to a pointer to a
> 	function, allowing data to be invoked as a function
>
> Thus, since the standard already establishes that valid "void *" and
> "intptr_t" pointers can be cast'd back & forth, the J.5.7 bridges the
> gap between the two saying a function pointer can be converted to
> either.
>
> Now, I may be missing something here, but I was under the impression
> that "intptr_t" wasn't special in any way here, and that any casting of
> a function pointer to either it or a "void *" was what was made portable
> by "J.5.7".

Do you mean "possible" or "workable" instead of "portable" here?  As you
write above, J.5.7 is an extension, not (fully) portable.

> Anyway, like ssize_t and a few other things this is extended upon and
> made standard by POSIX. I.e. we're basically talking about whether this
> passes:
>
> 	assert(sizeof(void (*)(void)) == sizeof(void*))
>
> And per POSIX
> (https://pubs.opengroup.org/onlinepubs/9699919799/functions/dlsym.html):
>
> 	Note that conversion from a void * pointer to a function pointer
> 	as in:
>
> 		fptr = (int (*)(int))dlsym(handle, "my_function");
>
> 	is not defined by the ISO C standard. This standard requires
> 	this conversion to work correctly on conforming implementations.

Conversion from object pointer to function pointer can still work if
function pointers are wider.

> So I think aside from other concerns this should be safe to use, as
> real-world data backing that up we've had a intptr_t converted to a
> function pointer since v2.35.0: 5cb28270a1f (pack-objects: lazily set up
> "struct rev_info", don't leak, 2022-03-28).

That may not have reached unusual architectures, yet.  Let's replace
that cast with something boring before someone gets hurt.  Something
like this?


diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 573d0b20b7..9e6f1530c6 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -4154,14 +4154,15 @@ struct po_filter_data {
 	struct rev_info revs;
 };

-static struct list_objects_filter_options *po_filter_revs_init(void *value)
+static int list_objects_filter_cb(const struct option *opt,
+				  const char *arg, int unset)
 {
-	struct po_filter_data *data = value;
+	struct po_filter_data *data = opt->value;

 	repo_init_revisions(the_repository, &data->revs, NULL);
 	data->have_revs = 1;

-	return &data->revs.filter;
+	return opt_parse_list_objects_filter(&data->revs.filter, arg, unset);
 }

 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
@@ -4265,7 +4266,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			      &write_bitmap_index,
 			      N_("write a bitmap index if possible"),
 			      WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN),
-		OPT_PARSE_LIST_OBJECTS_FILTER_INIT(&pfd, po_filter_revs_init),
+		OPT_PARSE_LIST_OBJECTS_FILTER_F(&pfd, list_objects_filter_cb),
 		OPT_CALLBACK_F(0, "missing", NULL, N_("action"),
 		  N_("handling for missing objects"), PARSE_OPT_NONEG,
 		  option_parse_missing_action),
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 5339660238..2e560c2fdb 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -286,15 +286,9 @@ void parse_list_objects_filter(
 		die("%s", errbuf.buf);
 }

-int opt_parse_list_objects_filter(const struct option *opt,
+int opt_parse_list_objects_filter(struct list_objects_filter_options *filter_options,
 				  const char *arg, int unset)
 {
-	struct list_objects_filter_options *filter_options = opt->value;
-	opt_lof_init init = (opt_lof_init)opt->defval;
-
-	if (init)
-		filter_options = init(opt->value);
-
 	if (unset || !arg)
 		list_objects_filter_set_no_filter(filter_options);
 	else
@@ -302,6 +296,12 @@ int opt_parse_list_objects_filter(const struct option *opt,
 	return 0;
 }

+int opt_parse_list_objects_filter_cb(const struct option *opt,
+				     const char *arg, int unset)
+{
+	return opt_parse_list_objects_filter(opt->value, arg, unset);
+}
+
 const char *list_objects_filter_spec(struct list_objects_filter_options *filter)
 {
 	if (!filter->filter_spec.len)
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index 7eeadab2dd..fc6b4da06d 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -107,31 +107,26 @@ void parse_list_objects_filter(
 	struct list_objects_filter_options *filter_options,
 	const char *arg);

+int opt_parse_list_objects_filter(struct list_objects_filter_options *filter_options,
+				  const char *arg, int unset);
+
 /**
  * The opt->value to opt_parse_list_objects_filter() is either a
  * "struct list_objects_filter_option *" when using
  * OPT_PARSE_LIST_OBJECTS_FILTER().
  *
- * Or, if using no "struct option" field is used by the callback,
- * except the "defval" which is expected to be an "opt_lof_init"
- * function, which is called with the "opt->value" and must return a
- * pointer to the ""struct list_objects_filter_option *" to be used.
- *
- * The OPT_PARSE_LIST_OBJECTS_FILTER_INIT() can be used e.g. the
- * "struct list_objects_filter_option" is embedded in a "struct
- * rev_info", which the "defval" could be tasked with lazily
- * initializing. See cmd_pack_objects() for an example.
+ * Or, OPT_PARSE_LIST_OBJECTS_FILTER_F() can be used to specify a
+ * custom callback function that may expect a different type.
  */
-int opt_parse_list_objects_filter(const struct option *opt,
-				  const char *arg, int unset);
+int opt_parse_list_objects_filter_cb(const struct option *opt,
+				     const char *arg, int unset);
 typedef struct list_objects_filter_options *(*opt_lof_init)(void *);
-#define OPT_PARSE_LIST_OBJECTS_FILTER_INIT(fo, init) \
+#define OPT_PARSE_LIST_OBJECTS_FILTER_F(fo, fn) \
 	{ OPTION_CALLBACK, 0, "filter", (fo), N_("args"), \
-	  N_("object filtering"), 0, opt_parse_list_objects_filter, \
-	  (intptr_t)(init) }
+	  N_("object filtering"), 0, (fn) }

 #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
-	OPT_PARSE_LIST_OBJECTS_FILTER_INIT((fo), NULL)
+	OPT_PARSE_LIST_OBJECTS_FILTER_F((fo), opt_parse_list_objects_filter_cb)

 /*
  * Translates abbreviated numbers in the filter's filter_spec into their
Phillip Wood Nov. 5, 2022, 9:32 p.m. UTC | #6
Hi Đoàn

On 05/11/2022 11:34, Đoàn Trần Công Danh wrote:
>   
> On 2022-11-05 09:32:44+0100, René Scharfe <l.s.r@web.de> wrote:
>> Why is this trickery needed?  Above you write that callbacks in
>> builtin/bisect--helper.c can't use subcommand_fn because they need
>> their own argument.  Can we extend subcommand_fn or use a global
>> variable to pass that extra thing instead?  The latter may be ugly, but
>> at least it's valid C..
> 
> Not the author, but I fully agree with you, I think instead of adding new API
> for some arbitrary subcommand_fn, I would change the subcommand_fn to
> type:
> 
> 	int (*)(int argc, const char **argv, const char *prefix, void *context)
> 
> The last argument would be an object pointer, which will be casted to
> the correct type inside the callback.
> 
> Let me cherry-picking this series on top of mine to see how things
> would progress.

Unfortunately the current implementation of OPT_SUBCOMMAND relies on 
returning a function pointer though a void* variable which while widely 
supported is undefined behavior. I wonder if an API like

typedef int (*subcommand_fn)(int, char**, char*);
typedef int (*subcommand_ctk_fn)(int, char**, char*, void*);
struct subcommand; /* opaque */

OPT_SUBCOMMAND(name, value, func);
OPT_SUMCOMMAND_CTX(name, value, func, ctx);

int call_subcommand(struct subcommand* subcommand, int argc, char** 
argv, char* prefix);


which would be used as

int sub1_fn(int argc, char** argv, char* prefix, void* ctx)
{
	struct cmd_ctx cmd_ctx = ctx
	...
}

int sub2_fn(int argc, char** argv, char* prefix)
{
	...
}

int cmd_foo(int argc, char** argv, char* prefix)
{
	struct cmd_ctx ctx = ...
	struct subcommand cmd = { 0 };
	...
	struct option opts[] = {
		OPT_SUBCOMMAND_CTX("sub1", &cmd, sub_fn, &ctx);
		OPT_SUBCOMMAND("sub2", &cmd, sub2_fn);
	};
	argc = parse_options(argc, argv, prefix, &opts, usage, flags);
	return call_subcommand(&cmd, argc, argv, prefix);
}


would be an improvement. One can avoid having to mark the ctx parameter 
as UNUSED() if a subcommand does not need any context by using 
OPT_SUBCOMMAND() rather than OPT_SUBCOMMAND_CTX().


The implementation of call_subcommand() would look something like

struct subcommand {
         void* ctx;
         int has_ctx;
         union {
                 subcommand_fn fn;
                 subcommand_ctx_fn ctx_fn;
         };
};

int call_subcommand(struct subcommand* subcommand, int argc, char 
**argv, char* prefix)
{
         if (subcommand->has_ctx)
                 return subcommand->ctx_fn(argc, argv, prefix, 
subcommand->ctx);
         else
                 return subcommand->fn(argc, argv, prefix);
}


Best Wishes

Phillip
Ævar Arnfjörð Bjarmason Nov. 5, 2022, 9:59 p.m. UTC | #7
On Sat, Nov 05 2022, Phillip Wood wrote:

> On 05/11/2022 13:52, Ævar Arnfjörð Bjarmason wrote:
>> On Sat, Nov 05 2022, René Scharfe wrote:
>> 
>>> Am 04.11.22 um 14:22 schrieb Ævar Arnfjörð Bjarmason:
>>>> diff --git a/parse-options.h b/parse-options.h
>>>> index b6ef86e0d15..61e3016c3fc 100644
>>>> --- a/parse-options.h
>>>> +++ b/parse-options.h
>>>> @@ -128,19 +128,24 @@ typedef int parse_opt_subcommand_fn(int argc, const char **argv,
>>>>    *			 the option takes optional argument.
>>>>    *
>>>>    * `callback`::
>>>> - *   pointer to the callback to use for OPTION_CALLBACK
>>>> + *   pointer to the callback to use for OPTION_CALLBACK and OPTION_SUBCOMMAND.
>>>>    *
>>>>    * `defval`::
>>>>    *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
>>>>    *   OPTION_{BIT,SET_INT} store the {mask,integer} to put in the value when met.
>>>> + *   OPTION_SUBCOMMAND stores the pointer the function selected for
>>>> + *   the subcommand.
>>>> + *
>>>>    *   CALLBACKS can use it like they want.
>>>>    *
>>>>    * `ll_callback`::
>>>>    *   pointer to the callback to use for OPTION_LOWLEVEL_CALLBACK
>>>>    *
>>>>    * `subcommand_fn`::
>>>> - *   pointer to a function to use for OPTION_SUBCOMMAND.
>>>> - *   It will be put in value when the subcommand is given on the command line.
>>>> + *   pointer to the callback used with OPT_SUBCOMMAND() and
>>>> + *   OPT_SUBCOMMAND_F(). Internally we store the same value in
>>>> + *   `defval`. This is only here to give the OPT_SUBCOMMAND{,_F}()
>>>> + *   common case type safety.
>>>>    */
>>>>   struct option {
>>>>   	enum parse_opt_type type;
>>>> @@ -217,12 +222,24 @@ struct option {
>>>>   #define OPT_ALIAS(s, l, source_long_name) \
>>>>   	{ OPTION_ALIAS, (s), (l), (source_long_name) }
>>>>
>>>> +static inline int parse_options_pick_subcommand_cb(const struct option *option,
>>>> +						   const char *arg UNUSED,
>>>> +						   int unset UNUSED)
>>>> +{
>>>> +	parse_opt_subcommand_fn *fn = (parse_opt_subcommand_fn *)option->defval;
>>>> +	*(parse_opt_subcommand_fn **)option->value = fn;
>>>
>>> ->defval is of type intptr_t and ->value is a void pointer.  The result
>>> of converting a void pointer value to an intptr_t and back is a void
>>> pointer equal to the original pointer if I read 6.3.2.3 (Pointers,
>>> paragraphs 5 and 6) and 7.18.1.4 (Integer types capable of holding
>>> object pointers) in C99 correctly.
>>>
>>> 6.3.2.3 paragraph 8 says that casting between function pointers of
>>> different type is OK and you can get your original function pointer
>>> back and use it in a call if you convert it back to the right type.
>>>
>>> Casting between a function pointer and an object pointer is undefined,
>>> though.  They don't have to be of the same size, so a function pointer
>>> doesn't have to fit into an intptr_t.  I wouldn't be surprised if CHERI
>>> (https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/) was an actual
>>> example of that.
>> I should have called this out explicitly. I think you're right as
>> far as
>> what you're summarizing goes.
>> To elaborate on it, paragraph 8 of 6.3.2.3 says:
>> 	A pointer to a function of one type may be converted to a
>> 	pointer to a function of another type and back again; the result
>> 	shall compare equal to the original pointer. If a converted
>> 	pointer is used to call a function whose type is not compatible
>> 	with the pointed-to type, the behavior is undefined.
>> And 7.18.1.4 says, when discussing (among other things) "intptr_t"
>> ("[such" added for clarity:
>> 	[...]any valid [such] pointer to void can be converted to this
>> 	type, then converted back to pointer to void, and the result
>> 	will compare equal to the original pointer:
>> But as you point out that doesn't say anything about whether a
>> pointer
>> to a function is a "valid .. pointer to void".
>> I think that's an "unportable" extension covered in "J.5 Common
>> extensions", specifically "J.5.7 Function pointer casts":
>> 	A pointer to an object or to void may be cast to a pointer to
>> a
>> 	function, allowing data to be invoked as a function
>
> This is a common extension, it is _not_ guaranteed by the standard and
> so still undefined behavior unless your compiler happens to have 
> implemented that extension.

>> Thus, since the standard already establishes that valid "void *" and
>> "intptr_t" pointers can be cast'd back & forth, the J.5.7 bridges the
>> gap between the two saying a function pointer can be converted to
>> either.
>
> How does J.5.7 bridge the gap when compilers are not required to
> implement it?

I'm saying it bridges the gap in that explanation, i.e. reinforces that
the main standard body isn't referring to function pointer casts to void
* and intptr_t.

>> Now, I may be missing something here, but I was under the impression
>> that "intptr_t" wasn't special in any way here, and that any casting of
>> a function pointer to either it or a "void *" was what was made portable
>> by "J.5.7"
>
> How is it made portable by an "unportable" extension?

I'm just repeating the terminology the standard uses: "The following
extensions are widely used in many systems, but are not portable to all
implementations".

>> So I think aside from other concerns this should be safe to use, as
>> real-world data backing that up we've had a intptr_t converted to a
>> function pointer since v2.35.0: 5cb28270a1f (pack-objects: lazily set up
>> "struct rev_info", don't leak, 2022-03-28).
>
> Saying "it works so it is fine" is not a convincing argument that it
> is compliant with the standard. 

I was saying, among other things, that it's standardized by POSIX, so
it's in the same category as ssize_t, not some hypothetical "happens to
work" undefined behavior.

But we also make use of it already, so that gives us some real-world
data on potential issues.
Ævar Arnfjörð Bjarmason Nov. 5, 2022, 10:33 p.m. UTC | #8
On Sat, Nov 05 2022, René Scharfe wrote:

> Am 05.11.22 um 14:52 schrieb Ævar Arnfjörð Bjarmason:
>>
>> I think that's an "unportable" extension covered in "J.5 Common
>> extensions", specifically "J.5.7 Function pointer casts":
>>
>> 	A pointer to an object or to void may be cast to a pointer to a
>> 	function, allowing data to be invoked as a function
>>
>> Thus, since the standard already establishes that valid "void *" and
>> "intptr_t" pointers can be cast'd back & forth, the J.5.7 bridges the
>> gap between the two saying a function pointer can be converted to
>> either.
>>
>> Now, I may be missing something here, but I was under the impression
>> that "intptr_t" wasn't special in any way here, and that any casting of
>> a function pointer to either it or a "void *" was what was made portable
>> by "J.5.7".
>
> Do you mean "possible" or "workable" instead of "portable" here?  As you
> write above, J.5.7 is an extension, not (fully) portable.

I think my just-sent in the side-thread should clarify this.

>> Anyway, like ssize_t and a few other things this is extended upon and
>> made standard by POSIX. I.e. we're basically talking about whether this
>> passes:
>>
>> 	assert(sizeof(void (*)(void)) == sizeof(void*))
>>
>> And per POSIX
>> (https://pubs.opengroup.org/onlinepubs/9699919799/functions/dlsym.html):
>>
>> 	Note that conversion from a void * pointer to a function pointer
>> 	as in:
>>
>> 		fptr = (int (*)(int))dlsym(handle, "my_function");
>>
>> 	is not defined by the ISO C standard. This standard requires
>> 	this conversion to work correctly on conforming implementations.
>
> Conversion from object pointer to function pointer can still work if
> function pointers are wider.
>
>> So I think aside from other concerns this should be safe to use, as
>> real-world data backing that up we've had a intptr_t converted to a
>> function pointer since v2.35.0: 5cb28270a1f (pack-objects: lazily set up
>> "struct rev_info", don't leak, 2022-03-28).
>
> That may not have reached unusual architectures, yet.  Let's replace
> that cast with something boring before someone gets hurt.  Something
> like this?
>
>
> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 573d0b20b7..9e6f1530c6 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -4154,14 +4154,15 @@ struct po_filter_data {
>  	struct rev_info revs;
>  };
>
> -static struct list_objects_filter_options *po_filter_revs_init(void *value)
> +static int list_objects_filter_cb(const struct option *opt,
> +				  const char *arg, int unset)
>  {
> -	struct po_filter_data *data = value;
> +	struct po_filter_data *data = opt->value;
>
>  	repo_init_revisions(the_repository, &data->revs, NULL);
>  	data->have_revs = 1;
>
> -	return &data->revs.filter;
> +	return opt_parse_list_objects_filter(&data->revs.filter, arg, unset);
>  }
>
>  int cmd_pack_objects(int argc, const char **argv, const char *prefix)
> @@ -4265,7 +4266,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
>  			      &write_bitmap_index,
>  			      N_("write a bitmap index if possible"),
>  			      WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN),
> -		OPT_PARSE_LIST_OBJECTS_FILTER_INIT(&pfd, po_filter_revs_init),
> +		OPT_PARSE_LIST_OBJECTS_FILTER_F(&pfd, list_objects_filter_cb),
>  		OPT_CALLBACK_F(0, "missing", NULL, N_("action"),
>  		  N_("handling for missing objects"), PARSE_OPT_NONEG,
>  		  option_parse_missing_action),
> diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
> index 5339660238..2e560c2fdb 100644
> --- a/list-objects-filter-options.c
> +++ b/list-objects-filter-options.c
> @@ -286,15 +286,9 @@ void parse_list_objects_filter(
>  		die("%s", errbuf.buf);
>  }
>
> -int opt_parse_list_objects_filter(const struct option *opt,
> +int opt_parse_list_objects_filter(struct list_objects_filter_options *filter_options,
>  				  const char *arg, int unset)
>  {
> -	struct list_objects_filter_options *filter_options = opt->value;
> -	opt_lof_init init = (opt_lof_init)opt->defval;
> -
> -	if (init)
> -		filter_options = init(opt->value);
> -
>  	if (unset || !arg)
>  		list_objects_filter_set_no_filter(filter_options);
>  	else
> @@ -302,6 +296,12 @@ int opt_parse_list_objects_filter(const struct option *opt,
>  	return 0;
>  }
>
> +int opt_parse_list_objects_filter_cb(const struct option *opt,
> +				     const char *arg, int unset)
> +{
> +	return opt_parse_list_objects_filter(opt->value, arg, unset);
> +}
> +
>  const char *list_objects_filter_spec(struct list_objects_filter_options *filter)
>  {
>  	if (!filter->filter_spec.len)
> diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
> index 7eeadab2dd..fc6b4da06d 100644
> --- a/list-objects-filter-options.h
> +++ b/list-objects-filter-options.h
> @@ -107,31 +107,26 @@ void parse_list_objects_filter(
>  	struct list_objects_filter_options *filter_options,
>  	const char *arg);
>
> +int opt_parse_list_objects_filter(struct list_objects_filter_options *filter_options,
> +				  const char *arg, int unset);
> +
>  /**
>   * The opt->value to opt_parse_list_objects_filter() is either a
>   * "struct list_objects_filter_option *" when using
>   * OPT_PARSE_LIST_OBJECTS_FILTER().
>   *
> - * Or, if using no "struct option" field is used by the callback,
> - * except the "defval" which is expected to be an "opt_lof_init"
> - * function, which is called with the "opt->value" and must return a
> - * pointer to the ""struct list_objects_filter_option *" to be used.
> - *
> - * The OPT_PARSE_LIST_OBJECTS_FILTER_INIT() can be used e.g. the
> - * "struct list_objects_filter_option" is embedded in a "struct
> - * rev_info", which the "defval" could be tasked with lazily
> - * initializing. See cmd_pack_objects() for an example.
> + * Or, OPT_PARSE_LIST_OBJECTS_FILTER_F() can be used to specify a
> + * custom callback function that may expect a different type.
>   */
> -int opt_parse_list_objects_filter(const struct option *opt,
> -				  const char *arg, int unset);
> +int opt_parse_list_objects_filter_cb(const struct option *opt,
> +				     const char *arg, int unset);
>  typedef struct list_objects_filter_options *(*opt_lof_init)(void *);
> -#define OPT_PARSE_LIST_OBJECTS_FILTER_INIT(fo, init) \
> +#define OPT_PARSE_LIST_OBJECTS_FILTER_F(fo, fn) \
>  	{ OPTION_CALLBACK, 0, "filter", (fo), N_("args"), \
> -	  N_("object filtering"), 0, opt_parse_list_objects_filter, \
> -	  (intptr_t)(init) }
> +	  N_("object filtering"), 0, (fn) }
>
>  #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \
> -	OPT_PARSE_LIST_OBJECTS_FILTER_INIT((fo), NULL)
> +	OPT_PARSE_LIST_OBJECTS_FILTER_F((fo), opt_parse_list_objects_filter_cb)
>
>  /*
>   * Translates abbreviated numbers in the filter's filter_spec into their

I think "just leave it, and see if anyone complains".

If you look over config.mak.uname you can see what we're likely to be
ported to (and some of that's probably dead). The list of potential
targets that:

 1) We know of ports to, or people would plausibly port git to
 2) Are updated so slow that they're on a release that's getting close
    to a year old.

Are small, and it's usually easy to look up their memory model etc. are
you concerned about any specific one?

I think if you're worried enough about it to push for the diff above:
Can we just hide it behind an "#ifdef", then if we find that nobody's
using it, we can consider it safe to use.

I don't think there's any great benefit to the extension in that
specific case, but there might be in the future (e.g. this topic would
be one small user), so since we already have an unintentional test
ballon, why not see if we can keep it safely?
René Scharfe Nov. 6, 2022, 8:25 a.m. UTC | #9
Am 05.11.22 um 23:33 schrieb Ævar Arnfjörð Bjarmason:
>
> On Sat, Nov 05 2022, René Scharfe wrote:
>
>> Am 05.11.22 um 14:52 schrieb Ævar Arnfjörð Bjarmason:
>>>
>>> I think that's an "unportable" extension covered in "J.5 Common
>>> extensions", specifically "J.5.7 Function pointer casts":
>>>
>>> 	A pointer to an object or to void may be cast to a pointer to a
>>> 	function, allowing data to be invoked as a function
>>>
>>> Thus, since the standard already establishes that valid "void *" and
>>> "intptr_t" pointers can be cast'd back & forth, the J.5.7 bridges the
>>> gap between the two saying a function pointer can be converted to
>>> either.
>>>
>>> Now, I may be missing something here, but I was under the impression
>>> that "intptr_t" wasn't special in any way here, and that any casting of
>>> a function pointer to either it or a "void *" was what was made portable
>>> by "J.5.7".
>>
>> Do you mean "possible" or "workable" instead of "portable" here?  As you
>> write above, J.5.7 is an extension, not (fully) portable.
>
> I think my just-sent in the side-thread should clarify this.

AFAIU you think that J.5.7 plus POSIX make conversions between object
pointers and function pointers portable.

>>> Anyway, like ssize_t and a few other things this is extended upon and
>>> made standard by POSIX. I.e. we're basically talking about whether this
>>> passes:
>>>
>>> 	assert(sizeof(void (*)(void)) == sizeof(void*))
>>>
>>> And per POSIX
>>> (https://pubs.opengroup.org/onlinepubs/9699919799/functions/dlsym.html):
>>>
>>> 	Note that conversion from a void * pointer to a function pointer
>>> 	as in:
>>>
>>> 		fptr = (int (*)(int))dlsym(handle, "my_function");
>>>
>>> 	is not defined by the ISO C standard. This standard requires
>>> 	this conversion to work correctly on conforming implementations.
>>
>> Conversion from object pointer to function pointer can still work if
>> function pointers are wider.

This means that a compliant implementation could lose bits when going
the other way, i.e. converting a function pointer to an object pointer.

>>> So I think aside from other concerns this should be safe to use, as
>>> real-world data backing that up we've had a intptr_t converted to a
>>> function pointer since v2.35.0: 5cb28270a1f (pack-objects: lazily set up
>>> "struct rev_info", don't leak, 2022-03-28).
>>
>> That may not have reached unusual architectures, yet.  Let's replace
>> that cast with something boring before someone gets hurt.  Something
>> like this?

[snip]

> I think "just leave it, and see if anyone complains".
>
> If you look over config.mak.uname you can see what we're likely to be
> ported to (and some of that's probably dead). The list of potential
> targets that:
>
>  1) We know of ports to, or people would plausibly port git to
>  2) Are updated so slow that they're on a release that's getting close
>     to a year old.
>
> Are small, and it's usually easy to look up their memory model etc. are
> you concerned about any specific one?

Using implementation-defined behavior and requiring extensions when
standard code would work just as well makes no sense to me.

> I think if you're worried enough about it to push for the diff above:
> Can we just hide it behind an "#ifdef", then if we find that nobody's
> using it, we can consider it safe to use.
>
> I don't think there's any great benefit to the extension in that
> specific case, but there might be in the future (e.g. this topic would
> be one small user), so since we already have an unintentional test
> ballon, why not see if we can keep it safely?

You can't certify safety with tests.  Undefined behavior may manifest
itself in weird ways and only under certain circumstances.  Future
architectures may add new failure modes.  It's not like a syntax
extension, to which nonsupporting compilers respond with an error,
i.e. a clear signal.

René
Ævar Arnfjörð Bjarmason Nov. 6, 2022, 1:28 p.m. UTC | #10
On Sun, Nov 06 2022, René Scharfe wrote:

> Am 05.11.22 um 23:33 schrieb Ævar Arnfjörð Bjarmason:
>>
>> On Sat, Nov 05 2022, René Scharfe wrote:
>>
>>> Am 05.11.22 um 14:52 schrieb Ævar Arnfjörð Bjarmason:
>>>>
>>>> I think that's an "unportable" extension covered in "J.5 Common
>>>> extensions", specifically "J.5.7 Function pointer casts":
>>>>
>>>> 	A pointer to an object or to void may be cast to a pointer to a
>>>> 	function, allowing data to be invoked as a function
>>>>
>>>> Thus, since the standard already establishes that valid "void *" and
>>>> "intptr_t" pointers can be cast'd back & forth, the J.5.7 bridges the
>>>> gap between the two saying a function pointer can be converted to
>>>> either.
>>>>
>>>> Now, I may be missing something here, but I was under the impression
>>>> that "intptr_t" wasn't special in any way here, and that any casting of
>>>> a function pointer to either it or a "void *" was what was made portable
>>>> by "J.5.7".
>>>
>>> Do you mean "possible" or "workable" instead of "portable" here?  As you
>>> write above, J.5.7 is an extension, not (fully) portable.
>>
>> I think my just-sent in the side-thread should clarify this.
>
> AFAIU you think that J.5.7 plus POSIX make conversions between object
> pointers and function pointers portable.

No, I think that:

1) J.5.7 does that on its own for C99
2) POSIX has orthagonally mandated this, seperate from C99.

In practice I think it's always worked for dlsym(), but there's
interesting changes in wording between v6 and v7 of POSIX:

   - https://pubs.opengroup.org/onlinepubs/009695399/functions/dlsym.html
   - https://pubs.opengroup.org/onlinepubs/9699919799/functions/dlsym.html

v6 claims that conforming C compilers are required to produce a warning
if this isn't supported (I haven't found the part of the standard
they're referencing), and notes that the behavior may be deprecated in
the future.

Whereas v7 says that POSIX "requires this conversion to work correctly
on conforming implementations".

Is your reading of any of that different?

In addition to that: This is widely supported on systems that didn't
aquire such support via optional C99 extensions, or POSIX. E.g. Win32.

I think this is one of those things that C has left deliberately
undefined for the benefit of embedded implementations, and others with
odd memory models.

>> I think "just leave it, and see if anyone complains".
>>
>> If you look over config.mak.uname you can see what we're likely to be
>> ported to (and some of that's probably dead). The list of potential
>> targets that:
>>
>>  1) We know of ports to, or people would plausibly port git to
>>  2) Are updated so slow that they're on a release that's getting close
>>     to a year old.
>>
>> Are small, and it's usually easy to look up their memory model etc. are
>> you concerned about any specific one?
>
> Using implementation-defined behavior and requiring extensions when
> standard code would work just as well makes no sense to me.

I think it's useful in itself to see what subset or superset of C we
actually need to concern ourselves with.

E.g. we have plenty of code that assumes ASCII, instead of catering to
EBCDIC, and assuming NULL is (void *)0, not (void *)123456 or whatever.

Yes, in this case the alternative is trivial, but perhaps we'd find a
use-case in the future.

All I'm saying is let's leave the current one in place, as there's no
indication that it's not supported by our targets.
René Scharfe Nov. 12, 2022, 10:42 a.m. UTC | #11
Am 06.11.22 um 14:28 schrieb Ævar Arnfjörð Bjarmason:
>
> No, I think that:
>
> 1) J.5.7 does that on its own for C99

An extension is not part of the standard, by definition.  Section J is
informative, not normative.  J.5.7 just says that there are systems out
there which allow casts from function pointers to object pointers and/or
back, which is not portable.

> 2) POSIX has orthagonally mandated this, seperate from C99.

It only requires dlsym(3) to work.  That's a narrow case.  A conforming
implementation could have function pointers wider than object pointers,
making conversions in the other direction lossy.  Or have function
pointers narrower than object pointers and let dlopen(3) place symbols
only in the range addressable by those.

> In practice I think it's always worked for dlsym(), but there's
> interesting changes in wording between v6 and v7 of POSIX:
>
>    - https://pubs.opengroup.org/onlinepubs/009695399/functions/dlsym.html
>    - https://pubs.opengroup.org/onlinepubs/9699919799/functions/dlsym.html
>
> v6 claims that conforming C compilers are required to produce a warning
> if this isn't supported (I haven't found the part of the standard
> they're referencing), and notes that the behavior may be deprecated in
> the future.
>
> Whereas v7 says that POSIX "requires this conversion to work correctly
> on conforming implementations".

From the second link: "The dlsym() function is moved from the XSI option
to the Base."  So dynamic libraries are no longer optional on POSIX
systems.  That means you can use dlsym(3) on any compliant system.  You
can also safely store its return value in an intptr_t IIUC.  But that
doesn't generalize to all function pointers.

> I think it's useful in itself to see what subset or superset of C we
> actually need to concern ourselves with.
>
> E.g. we have plenty of code that assumes ASCII, instead of catering to
> EBCDIC, and assuming NULL is (void *)0, not (void *)123456 or whatever.

NULL is defined as "0" or "(void *)0" by C99 6.3.2.3 Pointers paragraph
3 and 7.17 Common definitions <stddef.h> paragraph 3.

> Yes, in this case the alternative is trivial, but perhaps we'd find a
> use-case in the future.
>
> All I'm saying is let's leave the current one in place, as there's no
> indication that it's not supported by our targets.

Leaving undefined behavior in the code in the hope that we may come up
with a compelling use case for it later is a bad idea.  I really hope
we never find one.

René
Jeff King Nov. 12, 2022, 4:34 p.m. UTC | #12
On Sat, Nov 12, 2022 at 11:42:09AM +0100, René Scharfe wrote:

> > E.g. we have plenty of code that assumes ASCII, instead of catering to
> > EBCDIC, and assuming NULL is (void *)0, not (void *)123456 or whatever.
> 
> NULL is defined as "0" or "(void *)0" by C99 6.3.2.3 Pointers paragraph
> 3 and 7.17 Common definitions <stddef.h> paragraph 3.

I think he is alluding to the fact that while the standard requires that
a "0" constant refers to a NULL pointer, the representation does not
have to be all-bits-zero. So:

  char *foo = 0;

is fine, but:

  char *foo;
  memset(foo, 0, sizeof(&foo));

is not. And we absolutely do the latter in our code base anyway, because
it's convenient and unlikely to be a problem on practical platforms. And
I think it has always been our attitude in this community to let
engineering practicality trump strict adherence to the standard. But
"practicality" there should be measuring the tradeoff of how useful
something is versus how likely it is to bite us.

In the case under discussion, my gut feeling agrees with you, though.
I'm skeptical that equivalence of object and function pointers is all
that useful in practice. And your mention of CHERI seems like a
plausible way it could bite us.

-Peff
Ævar Arnfjörð Bjarmason Nov. 12, 2022, 4:55 p.m. UTC | #13
On Sat, Nov 12 2022, Jeff King wrote:

> On Sat, Nov 12, 2022 at 11:42:09AM +0100, René Scharfe wrote:
>
>> > E.g. we have plenty of code that assumes ASCII, instead of catering to
>> > EBCDIC, and assuming NULL is (void *)0, not (void *)123456 or whatever.
>> 
>> NULL is defined as "0" or "(void *)0" by C99 6.3.2.3 Pointers paragraph
>> 3 and 7.17 Common definitions <stddef.h> paragraph 3.
>
> I think he is alluding to the fact that while the standard requires that
> a "0" constant refers to a NULL pointer, the representation does not
> have to be all-bits-zero. So:
>
>   char *foo = 0;
>
> is fine, but:
>
>   char *foo;
>   memset(foo, 0, sizeof(&foo));

Yes, to elaborate: the "null pointer constant" referred to in 6.3.2.3
deliberately leaves room for the representation being unequal to the
"all zero bits". And as you point out the former example is portable,
but not the latter.

> is not. And we absolutely do the latter in our code base anyway, because
> it's convenient and unlikely to be a problem on practical platforms. And
> I think it has always been our attitude in this community to let
> engineering practicality trump strict adherence to the standard. But
> "practicality" there should be measuring the tradeoff of how useful
> something is versus how likely it is to bite us.

All I've been trying to get across in this sub-thread is that there's an
interesting empirical question here: Are we in fact targeting an
architecture where J.5.7 isn't implemented, or likely to have one sneak
up on us?

I don't think so, and timing-wise deciding to be paranoid about this
particular thing would leave that question unanswered, when all we have
to do is wait a bit (some of the slower platforms tend to be a few
releases behind).

The argument for the change[1] (further articulated upthread) hasn't
answered the "do we target such an arch?", but seems to just fall back
to general standards paranoia.

Which isn't an invalid argument in itself. But doesn't really address
why we'd be worried about *this* particular thing, but not e.g. those
sort of memsets, assuming ASCII ordering for 'A'..'z' etc.

> In the case under discussion, my gut feeling agrees with you, though.
> I'm skeptical that equivalence of object and function pointers is all
> that useful in practice. And your mention of CHERI seems like a
> plausible way it could bite us.

I think the post-image of [1] looks nicer when reviewed stand-alone, so
I'm not against the change per-se, I actually like it.

And I don't have a use-case for using that feature further, in a way
that isn't easy to do differently.

But e.g. now we're having a parallel discussion about using some 3rd
party bitmap library. We might e.g. want to incorporate some 3rd party
JIT or whatever in the future. If we run into this question again it
would be nice to have it answered already.

And if we didn't have this J.5.7 reliance in that code already I don't
think it would be worth the effort to introduce one as a test
balloon. I'm only saying this in the context that we already have one.

1. https://lore.kernel.org/git/c64e4fa5-62c2-2a93-a4ef-bd84407ea570@web.de/
René Scharfe Nov. 13, 2022, 5:31 p.m. UTC | #14
Am 12.11.22 um 17:55 schrieb Ævar Arnfjörð Bjarmason:
>
> On Sat, Nov 12 2022, Jeff King wrote:
>
>> On Sat, Nov 12, 2022 at 11:42:09AM +0100, René Scharfe wrote:
>>
>>>> E.g. we have plenty of code that assumes ASCII, instead of catering to
>>>> EBCDIC, and assuming NULL is (void *)0, not (void *)123456 or whatever.
>>>
>>> NULL is defined as "0" or "(void *)0" by C99 6.3.2.3 Pointers paragraph
>>> 3 and 7.17 Common definitions <stddef.h> paragraph 3.
>>
>> I think he is alluding to the fact that while the standard requires that
>> a "0" constant refers to a NULL pointer, the representation does not
>> have to be all-bits-zero. So:
>>
>>   char *foo = 0;
>>
>> is fine, but:
>>
>>   char *foo;
>>   memset(foo, 0, sizeof(&foo));
>
> Yes, to elaborate: the "null pointer constant" referred to in 6.3.2.3
> deliberately leaves room for the representation being unequal to the
> "all zero bits". And as you point out the former example is portable,
> but not the latter.
>
>> is not. And we absolutely do the latter in our code base anyway, because
>> it's convenient and unlikely to be a problem on practical platforms. And
>> I think it has always been our attitude in this community to let
>> engineering practicality trump strict adherence to the standard. But
>> "practicality" there should be measuring the tradeoff of how useful
>> something is versus how likely it is to bite us.

For me the usefulness so far is negative: The code is more complicated
than necessary.  Adding a context pointer to the callback function
signature here and keeping the extra code outside the callback function
in builtin/pack-objects.c is simpler.

> All I've been trying to get across in this sub-thread is that there's an
> interesting empirical question here: Are we in fact targeting an
> architecture where J.5.7 isn't implemented, or likely to have one sneak
> up on us?

How would you measure this?  Undefined behavior can manifest itself
differently e.g. based on compiler version and options, or in this
case pointer value and perhaps even function calling convention.  And
of course in the form of the famous nasal demons..

> I don't think so, and timing-wise deciding to be paranoid about this
> particular thing would leave that question unanswered, when all we have
> to do is wait a bit (some of the slower platforms tend to be a few
> releases behind).
>
> The argument for the change[1] (further articulated upthread) hasn't
> answered the "do we target such an arch?", but seems to just fall back
> to general standards paranoia.

I mentioned CHERI (Arm Morello) as a candidate, but can't tell you for
sure.

> Which isn't an invalid argument in itself. But doesn't really address
> why we'd be worried about *this* particular thing, but not e.g. those
> sort of memsets, assuming ASCII ordering for 'A'..'z' etc.

You can keep worrying about them if you like.  Replacing memset calls
with _INIT macros has been going on for while already.  Using isalpha()
instead of character range comparisons etc. is probably a good idea
anyway.

>> In the case under discussion, my gut feeling agrees with you, though.
>> I'm skeptical that equivalence of object and function pointers is all
>> that useful in practice. And your mention of CHERI seems like a
>> plausible way it could bite us.
>
> I think the post-image of [1] looks nicer when reviewed stand-alone, so
> I'm not against the change per-se, I actually like it.
>
> And I don't have a use-case for using that feature further, in a way
> that isn't easy to do differently.
>
> But e.g. now we're having a parallel discussion about using some 3rd
> party bitmap library. We might e.g. want to incorporate some 3rd party
> JIT or whatever in the future. If we run into this question again it
> would be nice to have it answered already.

Why would a bitmap library require function pointer casts?

A JIT library probably comes with a list of supported systems and
requires a fallback for anyone else.  I'd expect the system-specific
parts to be encapsulated in that library.

> And if we didn't have this J.5.7 reliance in that code already I don't
> think it would be worth the effort to introduce one as a test
> balloon. I'm only saying this in the context that we already have one.
>
> 1. https://lore.kernel.org/git/c64e4fa5-62c2-2a93-a4ef-bd84407ea570@web.de/

If it's not worth adding then it's probably not worth keeping.

René
diff mbox series

Patch

diff --git a/parse-options.c b/parse-options.c
index a1ec932f0f9..1d9e46c9dc7 100644
--- a/parse-options.c
+++ b/parse-options.c
@@ -427,7 +427,8 @@  static enum parse_opt_result parse_subcommand(const char *arg,
 	for (; options->type != OPTION_END; options++)
 		if (options->type == OPTION_SUBCOMMAND &&
 		    !strcmp(options->long_name, arg)) {
-			*(parse_opt_subcommand_fn **)options->value = options->subcommand_fn;
+			if (options->callback(options, arg, 0))
+				BUG("OPT_SUBCOMMAND callback returning non-zero");
 			return PARSE_OPT_SUBCOMMAND;
 		}
 
@@ -506,8 +507,10 @@  static void parse_options_check(const struct option *opts)
 			       "That case is not supported yet.");
 			break;
 		case OPTION_SUBCOMMAND:
-			if (!opts->value || !opts->subcommand_fn)
-				optbug(opts, "OPTION_SUBCOMMAND needs a value and a subcommand function");
+			if (!opts->value || !opts->callback)
+				optbug(opts, "OPTION_SUBCOMMAND needs a value and a callback function");
+			if (opts->ll_callback)
+				optbug(opts, "OPTION_SUBCOMMAND uses callback, not ll_callback");
 			if (!subcommand_value)
 				subcommand_value = opts->value;
 			else if (subcommand_value != opts->value)
diff --git a/parse-options.h b/parse-options.h
index b6ef86e0d15..61e3016c3fc 100644
--- a/parse-options.h
+++ b/parse-options.h
@@ -128,19 +128,24 @@  typedef int parse_opt_subcommand_fn(int argc, const char **argv,
  *			 the option takes optional argument.
  *
  * `callback`::
- *   pointer to the callback to use for OPTION_CALLBACK
+ *   pointer to the callback to use for OPTION_CALLBACK and OPTION_SUBCOMMAND.
  *
  * `defval`::
  *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
  *   OPTION_{BIT,SET_INT} store the {mask,integer} to put in the value when met.
+ *   OPTION_SUBCOMMAND stores the pointer the function selected for
+ *   the subcommand.
+ *
  *   CALLBACKS can use it like they want.
  *
  * `ll_callback`::
  *   pointer to the callback to use for OPTION_LOWLEVEL_CALLBACK
  *
  * `subcommand_fn`::
- *   pointer to a function to use for OPTION_SUBCOMMAND.
- *   It will be put in value when the subcommand is given on the command line.
+ *   pointer to the callback used with OPT_SUBCOMMAND() and
+ *   OPT_SUBCOMMAND_F(). Internally we store the same value in
+ *   `defval`. This is only here to give the OPT_SUBCOMMAND{,_F}()
+ *   common case type safety.
  */
 struct option {
 	enum parse_opt_type type;
@@ -217,12 +222,24 @@  struct option {
 #define OPT_ALIAS(s, l, source_long_name) \
 	{ OPTION_ALIAS, (s), (l), (source_long_name) }
 
+static inline int parse_options_pick_subcommand_cb(const struct option *option,
+						   const char *arg UNUSED,
+						   int unset UNUSED)
+{
+	parse_opt_subcommand_fn *fn = (parse_opt_subcommand_fn *)option->defval;
+	*(parse_opt_subcommand_fn **)option->value = fn;
+	return 0;
+}
+
 #define OPT_SUBCOMMAND_F(l, v, fn, f) { \
 	.type = OPTION_SUBCOMMAND, \
 	.long_name = (l), \
 	.value = (v), \
 	.flags = (f), \
-	.subcommand_fn = (fn) }
+	.defval = (intptr_t)(fn), \
+	.subcommand_fn = (fn), \
+	.callback = parse_options_pick_subcommand_cb, \
+}
 #define OPT_SUBCOMMAND(l, v, fn)    OPT_SUBCOMMAND_F((l), (v), (fn), 0)
 
 /*