diff mbox series

[v3,23/25] tools/xenstore: merge is_valid_nodename() into canonicalize()

Message ID 20230724110247.10520-24-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series tools/xenstore: drop TDB | expand

Commit Message

Jürgen Groß July 24, 2023, 11:02 a.m. UTC
Today is_valid_nodename() is always called directly after calling
canonicalize(), with the exception of do_unwatch(), where the call
is missing (which is not correct, but results just in a wrong error
reason being returned).

Merge is_valid_nodename() into canonicalize(). While at it merge
valid_chars() into it, too.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V3:
- new patch
---
 tools/xenstore/xenstored_core.c  | 89 ++++++++++++++------------------
 tools/xenstore/xenstored_core.h  |  6 +--
 tools/xenstore/xenstored_watch.c | 16 ++----
 3 files changed, 45 insertions(+), 66 deletions(-)

Comments

Julien Grall Aug. 3, 2023, 9:46 p.m. UTC | #1
Hi,

On 24/07/2023 12:02, Juergen Gross wrote:
> Today is_valid_nodename() is always called directly after calling
> canonicalize(), with the exception of do_unwatch(), where the call
> is missing (which is not correct, but results just in a wrong error
> reason being returned).

While this change makes sense...

> 
> Merge is_valid_nodename() into canonicalize(). While at it merge
> valid_chars() into it, too.

... I am not in favor of folding the code is_valid_nodename() and 
valid_chars() into canonicalize() because the code is now more difficult 
to read. Also, the keeping the split would allow to free the 'name' in 
case of an error without adding too much goto in the code.

> 
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
> V3:
> - new patch
> ---
>   tools/xenstore/xenstored_core.c  | 89 ++++++++++++++------------------
>   tools/xenstore/xenstored_core.h  |  6 +--
>   tools/xenstore/xenstored_watch.c | 16 ++----
>   3 files changed, 45 insertions(+), 66 deletions(-)
> 
> diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
> index ea5a1a9cce..ec20bc042d 100644
> --- a/tools/xenstore/xenstored_core.c
> +++ b/tools/xenstore/xenstored_core.c
> @@ -1210,42 +1210,6 @@ void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
>   	send_reply(conn, type, "OK", sizeof("OK"));
>   }
>   
> -static bool valid_chars(const char *node)
> -{
> -	/* Nodes can have lots of crap. */
> -	return (strspn(node,
> -		       "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
> -		       "abcdefghijklmnopqrstuvwxyz"
> -		       "0123456789-/_@") == strlen(node));
> -}
> -
> -bool is_valid_nodename(const struct connection *conn, const char *node,
> -		       bool allow_special)
> -{
> -	int local_off = 0;
> -	unsigned int domid;
> -
> -	/* Must start in / or - if special nodes are allowed - in @. */
> -	if (!strstarts(node, "/") && (!allow_special || !strstarts(node, "@")))
> -		return false;
> -
> -	/* Cannot end in / (unless it's just "/"). */
> -	if (strends(node, "/") && !streq(node, "/"))
> -		return false;
> -
> -	/* No double //. */
> -	if (strstr(node, "//"))
> -		return false;
> -
> -	if (sscanf(node, "/local/domain/%5u/%n", &domid, &local_off) != 1)
> -		local_off = 0;
> -
> -	if (domain_max_chk(conn, ACC_PATHLEN, strlen(node) - local_off))
> -		return false;
> -
> -	return valid_chars(node);
> -}
> -
>   /* We expect one arg in the input: return NULL otherwise.
>    * The payload must contain exactly one nul, at the end.
>    */
> @@ -1279,16 +1243,46 @@ static char *perms_to_strings(const void *ctx, const struct node_perms *perms,
>   }
>   
>   const char *canonicalize(struct connection *conn, const void *ctx,
> -			 const char *node)
> +			 const char *node, bool allow_special)
>   {
> -	const char *prefix;
> +	char *name;
> +	int local_off = 0;
> +	unsigned int domid;
>   
> -	if (!node || (node[0] == '/') || (node[0] == '@'))
> -		return node;
> -	prefix = get_implicit_path(conn);
> -	if (prefix)
> -		return talloc_asprintf(ctx, "%s/%s", prefix, node);
> -	return node;
> +	errno = EINVAL;
> +	if (!node)
> +		return NULL;
> +
> +	if (strspn(node, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
> +			 "0123456789-/_@") != strlen(node))
> +		return NULL;
> +
> +	if (node[0] == '@' && !allow_special)
> +		return NULL;
> +
> +	if (node[0] != '/' && node[0] != '@') {
> +		name = talloc_asprintf(ctx, "%s/%s", get_implicit_path(conn),
> +				       node);

This is allocated but not freed on error. I understand this is part of 
the 'ctxt' and therefore will be free later on. But this means temporary 
memory will be allocated for longer. So best to clean-up when you can.

> +		if (!name)
> +			return NULL;
> +	} else
> +		name = (char *)node;

Why does name need to be const?

> +
> +	/* Cannot end in / (unless it's just "/"). */
> +	if (strends(name, "/") && !streq(name, "/"))
> +		return NULL;
> +
> +	/* No double //. */
> +	if (strstr(name, "//"))
> +		return NULL;
> +
> +	if (sscanf(name, "/local/domain/%5u/%n", &domid, &local_off) != 1)
> +		local_off = 0;
> +
> +	if (domain_max_chk(conn, ACC_PATHLEN, strlen(name) - local_off))
> +		return NULL;
> +
> +	return name;
>   }

Cheers,
Jürgen Groß Aug. 4, 2023, 9:35 a.m. UTC | #2
On 03.08.23 23:46, Julien Grall wrote:
> Hi,
> 
> On 24/07/2023 12:02, Juergen Gross wrote:
>> Today is_valid_nodename() is always called directly after calling
>> canonicalize(), with the exception of do_unwatch(), where the call
>> is missing (which is not correct, but results just in a wrong error
>> reason being returned).
> 
> While this change makes sense...
> 
>>
>> Merge is_valid_nodename() into canonicalize(). While at it merge
>> valid_chars() into it, too.
> 
> ... I am not in favor of folding the code is_valid_nodename() and valid_chars() 
> into canonicalize() because the code is now more difficult to read. Also, the 
> keeping the split would allow to free the 'name' in case of an error without 
> adding too much goto in the code.

I don't think we can easily free name in an error case, at that would require
to keep knowledge that name was just allocated in the non-canonical case.

> 
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> ---
>> V3:
>> - new patch
>> ---
>>   tools/xenstore/xenstored_core.c  | 89 ++++++++++++++------------------
>>   tools/xenstore/xenstored_core.h  |  6 +--
>>   tools/xenstore/xenstored_watch.c | 16 ++----
>>   3 files changed, 45 insertions(+), 66 deletions(-)
>>
>> diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
>> index ea5a1a9cce..ec20bc042d 100644
>> --- a/tools/xenstore/xenstored_core.c
>> +++ b/tools/xenstore/xenstored_core.c
>> @@ -1210,42 +1210,6 @@ void send_ack(struct connection *conn, enum 
>> xsd_sockmsg_type type)
>>       send_reply(conn, type, "OK", sizeof("OK"));
>>   }
>> -static bool valid_chars(const char *node)
>> -{
>> -    /* Nodes can have lots of crap. */
>> -    return (strspn(node,
>> -               "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
>> -               "abcdefghijklmnopqrstuvwxyz"
>> -               "0123456789-/_@") == strlen(node));
>> -}
>> -
>> -bool is_valid_nodename(const struct connection *conn, const char *node,
>> -               bool allow_special)
>> -{
>> -    int local_off = 0;
>> -    unsigned int domid;
>> -
>> -    /* Must start in / or - if special nodes are allowed - in @. */
>> -    if (!strstarts(node, "/") && (!allow_special || !strstarts(node, "@")))
>> -        return false;
>> -
>> -    /* Cannot end in / (unless it's just "/"). */
>> -    if (strends(node, "/") && !streq(node, "/"))
>> -        return false;
>> -
>> -    /* No double //. */
>> -    if (strstr(node, "//"))
>> -        return false;
>> -
>> -    if (sscanf(node, "/local/domain/%5u/%n", &domid, &local_off) != 1)
>> -        local_off = 0;
>> -
>> -    if (domain_max_chk(conn, ACC_PATHLEN, strlen(node) - local_off))
>> -        return false;
>> -
>> -    return valid_chars(node);
>> -}
>> -
>>   /* We expect one arg in the input: return NULL otherwise.
>>    * The payload must contain exactly one nul, at the end.
>>    */
>> @@ -1279,16 +1243,46 @@ static char *perms_to_strings(const void *ctx, const 
>> struct node_perms *perms,
>>   }
>>   const char *canonicalize(struct connection *conn, const void *ctx,
>> -             const char *node)
>> +             const char *node, bool allow_special)
>>   {
>> -    const char *prefix;
>> +    char *name;
>> +    int local_off = 0;
>> +    unsigned int domid;
>> -    if (!node || (node[0] == '/') || (node[0] == '@'))
>> -        return node;
>> -    prefix = get_implicit_path(conn);
>> -    if (prefix)
>> -        return talloc_asprintf(ctx, "%s/%s", prefix, node);
>> -    return node;
>> +    errno = EINVAL;
>> +    if (!node)
>> +        return NULL;
>> +
>> +    if (strspn(node, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
>> +             "0123456789-/_@") != strlen(node))
>> +        return NULL;
>> +
>> +    if (node[0] == '@' && !allow_special)
>> +        return NULL;
>> +
>> +    if (node[0] != '/' && node[0] != '@') {
>> +        name = talloc_asprintf(ctx, "%s/%s", get_implicit_path(conn),
>> +                       node);
> 
> This is allocated but not freed on error. I understand this is part of the 
> 'ctxt' and therefore will be free later on. But this means temporary memory will 
> be allocated for longer. So best to clean-up when you can.

Really?

It is possible, of course, but it is adding more code churn. Remember that
"name" is allocated only in case of a relative path, so freeing it needs
to be conditional, too (yes, it would be possible via comparing name to node).

In case you want me to go this route, I can rearrange the code in order to
avoid multiple error exits by having only one large if () testing for all
possible violations.

> 
>> +        if (!name)
>> +            return NULL;
>> +    } else
>> +        name = (char *)node;
> 
> Why does name need to be const?

I think the question was posed in a wrong way. :-)

I'll change name to be const char *.


Juergen
Julien Grall Aug. 4, 2023, 10 a.m. UTC | #3
Hi,

On 04/08/2023 10:35, Juergen Gross wrote:
> On 03.08.23 23:46, Julien Grall wrote:
>> Hi,
>>
>> On 24/07/2023 12:02, Juergen Gross wrote:
>>> Today is_valid_nodename() is always called directly after calling
>>> canonicalize(), with the exception of do_unwatch(), where the call
>>> is missing (which is not correct, but results just in a wrong error
>>> reason being returned).
>>
>> While this change makes sense...
>>
>>>
>>> Merge is_valid_nodename() into canonicalize(). While at it merge
>>> valid_chars() into it, too.
>>
>> ... I am not in favor of folding the code is_valid_nodename() and 
>> valid_chars() into canonicalize() because the code is now more 
>> difficult to read. Also, the keeping the split would allow to free the 
>> 'name' in case of an error without adding too much goto in the code.
> 
> I don't think we can easily free name in an error case, at that would 
> require
> to keep knowledge that name was just allocated in the non-canonical case.

How about this:

const char *canonicalize(struct connection *conn, const void *ctx,
                          const char *node, bool allow_special)
{
         const char *prefix;
         const char *name;

         if (!node)
                 return NULL;

         if (node[0] == '@' && !allow_special)
                 return NULL;

         if (!node || (node[0] == '/') || (node[0] == '@'))
                 return node;
         prefix = get_implicit_path(conn);
         if (prefix) {
                 name = talloc_asprintf(ctx, "%s/%s", prefix, node);
                 if (name)
                         return NULL;
         } else
                 name = node;

         if (!is_valid_nodename(conn, node, allow_special)) {
                 /* Release the memory if 'name' was allocated by us */
                 if ( name != node )
                         talloc_free(name);
                 return NULL;
         }

         return name;
}

And before you ask, I don't see the benefits to partially validate the 
name before allocating. Hence why I suggest to keep is_valid_nodename() 
as this keep the function small.

>>> Signed-off-by: Juergen Gross <jgross@suse.com>
>>> ---
>>> V3:
>>> - new patch
>>> ---
>>>   tools/xenstore/xenstored_core.c  | 89 ++++++++++++++------------------
>>>   tools/xenstore/xenstored_core.h  |  6 +--
>>>   tools/xenstore/xenstored_watch.c | 16 ++----
>>>   3 files changed, 45 insertions(+), 66 deletions(-)
>>>
>>> diff --git a/tools/xenstore/xenstored_core.c 
>>> b/tools/xenstore/xenstored_core.c
>>> index ea5a1a9cce..ec20bc042d 100644
>>> --- a/tools/xenstore/xenstored_core.c
>>> +++ b/tools/xenstore/xenstored_core.c
>>> @@ -1210,42 +1210,6 @@ void send_ack(struct connection *conn, enum 
>>> xsd_sockmsg_type type)
>>>       send_reply(conn, type, "OK", sizeof("OK"));
>>>   }
>>> -static bool valid_chars(const char *node)
>>> -{
>>> -    /* Nodes can have lots of crap. */
>>> -    return (strspn(node,
>>> -               "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
>>> -               "abcdefghijklmnopqrstuvwxyz"
>>> -               "0123456789-/_@") == strlen(node));
>>> -}
>>> -
>>> -bool is_valid_nodename(const struct connection *conn, const char *node,
>>> -               bool allow_special)
>>> -{
>>> -    int local_off = 0;
>>> -    unsigned int domid;
>>> -
>>> -    /* Must start in / or - if special nodes are allowed - in @. */
>>> -    if (!strstarts(node, "/") && (!allow_special || !strstarts(node, 
>>> "@")))
>>> -        return false;
>>> -
>>> -    /* Cannot end in / (unless it's just "/"). */
>>> -    if (strends(node, "/") && !streq(node, "/"))
>>> -        return false;
>>> -
>>> -    /* No double //. */
>>> -    if (strstr(node, "//"))
>>> -        return false;
>>> -
>>> -    if (sscanf(node, "/local/domain/%5u/%n", &domid, &local_off) != 1)
>>> -        local_off = 0;
>>> -
>>> -    if (domain_max_chk(conn, ACC_PATHLEN, strlen(node) - local_off))
>>> -        return false;
>>> -
>>> -    return valid_chars(node);
>>> -}
>>> -
>>>   /* We expect one arg in the input: return NULL otherwise.
>>>    * The payload must contain exactly one nul, at the end.
>>>    */
>>> @@ -1279,16 +1243,46 @@ static char *perms_to_strings(const void 
>>> *ctx, const struct node_perms *perms,
>>>   }
>>>   const char *canonicalize(struct connection *conn, const void *ctx,
>>> -             const char *node)
>>> +             const char *node, bool allow_special)
>>>   {
>>> -    const char *prefix;
>>> +    char *name;
>>> +    int local_off = 0;
>>> +    unsigned int domid;
>>> -    if (!node || (node[0] == '/') || (node[0] == '@'))
>>> -        return node;
>>> -    prefix = get_implicit_path(conn);
>>> -    if (prefix)
>>> -        return talloc_asprintf(ctx, "%s/%s", prefix, node);
>>> -    return node;
>>> +    errno = EINVAL;
>>> +    if (!node)
>>> +        return NULL;
>>> +
>>> +    if (strspn(node, 
>>> "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
>>> +             "0123456789-/_@") != strlen(node))
>>> +        return NULL;
>>> +
>>> +    if (node[0] == '@' && !allow_special)
>>> +        return NULL;
>>> +
>>> +    if (node[0] != '/' && node[0] != '@') {
>>> +        name = talloc_asprintf(ctx, "%s/%s", get_implicit_path(conn),
>>> +                       node);
>>
>> This is allocated but not freed on error. I understand this is part of 
>> the 'ctxt' and therefore will be free later on. But this means 
>> temporary memory will be allocated for longer. So best to clean-up 
>> when you can.
> 
> Really?

Let me reply with a different question. Why should we keep the memory 
allocated longer than necessary?

> 
> It is possible, of course, but it is adding more code churn. Remember that
> "name" is allocated only in case of a relative path, so freeing it needs
> to be conditional, too (yes, it would be possible via comparing name to 
> node).

See above, a proposal.

> 
> In case you want me to go this route, I can rearrange the code in order to
> avoid multiple error exits by having only one large if () testing for all
> possible violations.
> 
>>
>>> +        if (!name)
>>> +            return NULL;
>>> +    } else
>>> +        name = (char *)node;
>>
>> Why does name need to be const?
> 
> I think the question was posed in a wrong way. :-)

Whoops yes.

> 
> I'll change name to be const char *.

Cheers,
Jürgen Groß Aug. 4, 2023, 10:17 a.m. UTC | #4
On 04.08.23 12:00, Julien Grall wrote:
> Hi,
> 
> On 04/08/2023 10:35, Juergen Gross wrote:
>> On 03.08.23 23:46, Julien Grall wrote:
>>> Hi,
>>>
>>> On 24/07/2023 12:02, Juergen Gross wrote:
>>>> Today is_valid_nodename() is always called directly after calling
>>>> canonicalize(), with the exception of do_unwatch(), where the call
>>>> is missing (which is not correct, but results just in a wrong error
>>>> reason being returned).
>>>
>>> While this change makes sense...
>>>
>>>>
>>>> Merge is_valid_nodename() into canonicalize(). While at it merge
>>>> valid_chars() into it, too.
>>>
>>> ... I am not in favor of folding the code is_valid_nodename() and 
>>> valid_chars() into canonicalize() because the code is now more difficult to 
>>> read. Also, the keeping the split would allow to free the 'name' in case of 
>>> an error without adding too much goto in the code.
>>
>> I don't think we can easily free name in an error case, at that would require
>> to keep knowledge that name was just allocated in the non-canonical case.
> 
> How about this:
> 
> const char *canonicalize(struct connection *conn, const void *ctx,
>                           const char *node, bool allow_special)
> {
>          const char *prefix;
>          const char *name;
> 
>          if (!node)
>                  return NULL;
> 
>          if (node[0] == '@' && !allow_special)
>                  return NULL;
> 
>          if (!node || (node[0] == '/') || (node[0] == '@'))
>                  return node;
>          prefix = get_implicit_path(conn);
>          if (prefix) {
>                  name = talloc_asprintf(ctx, "%s/%s", prefix, node);
>                  if (name)
>                          return NULL;
>          } else
>                  name = node;
> 
>          if (!is_valid_nodename(conn, node, allow_special)) {
>                  /* Release the memory if 'name' was allocated by us */
>                  if ( name != node )
>                          talloc_free(name);
>                  return NULL;
>          }
> 
>          return name;
> }
> 
> And before you ask, I don't see the benefits to partially validate the name 
> before allocating. Hence why I suggest to keep is_valid_nodename() as this keep 
> the function small.

Partially validating before doing the allocation is a benefit as it doesn't
spend cycles on validating the known good prefix.

Additionally your example won't validate an absolute pathname at all.

What about this variant:

const char *canonicalize(struct connection *conn, const void *ctx,
                          const char *node, bool allow_special)
{
         const char *name;
         int local_off = 0;
         unsigned int domid;

         /*
          * Invalid if any of:
          * - no node at all
          * - illegal character in node
          * - starts with '@' but no special node allowed
          */
         errno = EINVAL;
         if (!node ||
             strspn(node, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
                          "0123456789-/_@") != strlen(node) ||
             (node[0] == '@' && !allow_special))
                 return NULL;

         if (node[0] != '/' && node[0] != '@') {
                 name = talloc_asprintf(ctx, "%s/%s", get_implicit_path(conn),
                                        node);
                 if (!name)
                         return NULL;
         } else
                 name = node;

         if (sscanf(name, "/local/domain/%5u/%n", &domid, &local_off) != 1)
                 local_off = 0;

         /*
          * Only valid if:
          * - doesn't end in / (unless it's just "/")
          * - no double //
          * - not violating max allowed path length
          */
         if (!(strends(name, "/") && !streq(name, "/")) &&
             !strstr(name, "//") &&
             !domain_max_chk(conn, ACC_PATHLEN, strlen(name) - local_off))
                 return name;

         /* Release the memory if 'name' was allocated by us. */
         if (name != node)
                 talloc_free(name);
         return NULL;
}


Juergen
Julien Grall Aug. 4, 2023, 10:33 a.m. UTC | #5
Hi Juergen,

On 04/08/2023 11:17, Juergen Gross wrote:
> On 04.08.23 12:00, Julien Grall wrote:
>> Hi,
>>
>> On 04/08/2023 10:35, Juergen Gross wrote:
>>> On 03.08.23 23:46, Julien Grall wrote:
>>>> Hi,
>>>>
>>>> On 24/07/2023 12:02, Juergen Gross wrote:
>>>>> Today is_valid_nodename() is always called directly after calling
>>>>> canonicalize(), with the exception of do_unwatch(), where the call
>>>>> is missing (which is not correct, but results just in a wrong error
>>>>> reason being returned).
>>>>
>>>> While this change makes sense...
>>>>
>>>>>
>>>>> Merge is_valid_nodename() into canonicalize(). While at it merge
>>>>> valid_chars() into it, too.
>>>>
>>>> ... I am not in favor of folding the code is_valid_nodename() and 
>>>> valid_chars() into canonicalize() because the code is now more 
>>>> difficult to read. Also, the keeping the split would allow to free 
>>>> the 'name' in case of an error without adding too much goto in the 
>>>> code.
>>>
>>> I don't think we can easily free name in an error case, at that would 
>>> require
>>> to keep knowledge that name was just allocated in the non-canonical 
>>> case.
>>
>> How about this:
>>
>> const char *canonicalize(struct connection *conn, const void *ctx,
>>                           const char *node, bool allow_special)
>> {
>>          const char *prefix;
>>          const char *name;
>>
>>          if (!node)
>>                  return NULL;
>>
>>          if (node[0] == '@' && !allow_special)
>>                  return NULL;
>>
>>          if (!node || (node[0] == '/') || (node[0] == '@'))
>>                  return node;
>>          prefix = get_implicit_path(conn);
>>          if (prefix) {
>>                  name = talloc_asprintf(ctx, "%s/%s", prefix, node);
>>                  if (name)
>>                          return NULL;
>>          } else
>>                  name = node;
>>
>>          if (!is_valid_nodename(conn, node, allow_special)) {
>>                  /* Release the memory if 'name' was allocated by us */
>>                  if ( name != node )
>>                          talloc_free(name);
>>                  return NULL;
>>          }
>>
>>          return name;
>> }
>>
>> And before you ask, I don't see the benefits to partially validate the 
>> name before allocating. Hence why I suggest to keep 
>> is_valid_nodename() as this keep the function small.
> 
> Partially validating before doing the allocation is a benefit as it doesn't
> spend cycles on validating the known good prefix.

Which is pretty much a drop in the ocean in the context of Xenstored :). 
In reality most of the validation can be done before the allocation with 
a bit of work.

> 
> Additionally your example won't validate an absolute pathname at all.

That's an error in the logic. This can be sorted out.

> 
> What about this variant:

I still don't see the value of folding is_valid_node_name(). But if the 
other agrees with you then...

> 
> const char *canonicalize(struct connection *conn, const void *ctx,
>                           const char *node, bool allow_special)
> {
>          const char *name;
>          int local_off = 0;
>          unsigned int domid;
> 
>          /*
>           * Invalid if any of:
>           * - no node at all
>           * - illegal character in node
>           * - starts with '@' but no special node allowed
>           */
>          errno = EINVAL;
>          if (!node ||
>              strspn(node, 
> "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
>                           "0123456789-/_@") != strlen(node) ||

... I would rather keep calling valid_chars() here. The rest looks fine 
even though this is definitely not my preference.

>              (node[0] == '@' && !allow_special))
>                  return NULL;
> 
>          if (node[0] != '/' && node[0] != '@') {
>                  name = talloc_asprintf(ctx, "%s/%s", 
> get_implicit_path(conn),
>                                         node);
>                  if (!name)
>                          return NULL;
>          } else
>                  name = node;
> 
>          if (sscanf(name, "/local/domain/%5u/%n", &domid, &local_off) != 1)
>                  local_off = 0;
> 
>          /*
>           * Only valid if:
>           * - doesn't end in / (unless it's just "/")
>           * - no double //
>           * - not violating max allowed path length
>           */
>          if (!(strends(name, "/") && !streq(name, "/")) &&
>              !strstr(name, "//") &&
>              !domain_max_chk(conn, ACC_PATHLEN, strlen(name) - local_off))
>                  return name;
> 
>          /* Release the memory if 'name' was allocated by us. */
>          if (name != node)
>                  talloc_free(name);
>          return NULL;
> }

Cheers,
Jürgen Groß Aug. 4, 2023, 12:05 p.m. UTC | #6
On 04.08.23 12:33, Julien Grall wrote:
> Hi Juergen,
> 
> On 04/08/2023 11:17, Juergen Gross wrote:
>> On 04.08.23 12:00, Julien Grall wrote:
>>> Hi,
>>>
>>> On 04/08/2023 10:35, Juergen Gross wrote:
>>>> On 03.08.23 23:46, Julien Grall wrote:
>>>>> Hi,
>>>>>
>>>>> On 24/07/2023 12:02, Juergen Gross wrote:
>>>>>> Today is_valid_nodename() is always called directly after calling
>>>>>> canonicalize(), with the exception of do_unwatch(), where the call
>>>>>> is missing (which is not correct, but results just in a wrong error
>>>>>> reason being returned).
>>>>>
>>>>> While this change makes sense...
>>>>>
>>>>>>
>>>>>> Merge is_valid_nodename() into canonicalize(). While at it merge
>>>>>> valid_chars() into it, too.
>>>>>
>>>>> ... I am not in favor of folding the code is_valid_nodename() and 
>>>>> valid_chars() into canonicalize() because the code is now more difficult to 
>>>>> read. Also, the keeping the split would allow to free the 'name' in case of 
>>>>> an error without adding too much goto in the code.
>>>>
>>>> I don't think we can easily free name in an error case, at that would require
>>>> to keep knowledge that name was just allocated in the non-canonical case.
>>>
>>> How about this:
>>>
>>> const char *canonicalize(struct connection *conn, const void *ctx,
>>>                           const char *node, bool allow_special)
>>> {
>>>          const char *prefix;
>>>          const char *name;
>>>
>>>          if (!node)
>>>                  return NULL;
>>>
>>>          if (node[0] == '@' && !allow_special)
>>>                  return NULL;
>>>
>>>          if (!node || (node[0] == '/') || (node[0] == '@'))
>>>                  return node;
>>>          prefix = get_implicit_path(conn);
>>>          if (prefix) {
>>>                  name = talloc_asprintf(ctx, "%s/%s", prefix, node);
>>>                  if (name)
>>>                          return NULL;
>>>          } else
>>>                  name = node;
>>>
>>>          if (!is_valid_nodename(conn, node, allow_special)) {
>>>                  /* Release the memory if 'name' was allocated by us */
>>>                  if ( name != node )
>>>                          talloc_free(name);
>>>                  return NULL;
>>>          }
>>>
>>>          return name;
>>> }
>>>
>>> And before you ask, I don't see the benefits to partially validate the name 
>>> before allocating. Hence why I suggest to keep is_valid_nodename() as this 
>>> keep the function small.
>>
>> Partially validating before doing the allocation is a benefit as it doesn't
>> spend cycles on validating the known good prefix.
> 
> Which is pretty much a drop in the ocean in the context of Xenstored :). In 
> reality most of the validation can be done before the allocation with a bit of 
> work.

Yes, but this would need a more complicated logic related to the handling
of local_off. I thought about that, but didn't want to make the patch more
complicated.

And regarding performance - yes, it will be minimal, but it was rather low
hanging fruit.

>> Additionally your example won't validate an absolute pathname at all.
> 
> That's an error in the logic. This can be sorted out.
> 
>>
>> What about this variant:
> 
> I still don't see the value of folding is_valid_node_name(). But if the other 
> agrees with you then...

I didn't see the value of keeping them apart, as the only caller of
is_valid_node_name() would be canonicalize() after this patch. :-)

> 
>>
>> const char *canonicalize(struct connection *conn, const void *ctx,
>>                           const char *node, bool allow_special)
>> {
>>          const char *name;
>>          int local_off = 0;
>>          unsigned int domid;
>>
>>          /*
>>           * Invalid if any of:
>>           * - no node at all
>>           * - illegal character in node
>>           * - starts with '@' but no special node allowed
>>           */
>>          errno = EINVAL;
>>          if (!node ||
>>              strspn(node, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
>>                           "0123456789-/_@") != strlen(node) ||
> 
> ... I would rather keep calling valid_chars() here. The rest looks fine even 
> though this is definitely not my preference.

I can do that, even if I don't see the real value with the comment above the if.


Juergen
Julien Grall Aug. 4, 2023, 12:27 p.m. UTC | #7
Hi Juergen,

On 04/08/2023 13:05, Juergen Gross wrote:
> On 04.08.23 12:33, Julien Grall wrote:
>>> const char *canonicalize(struct connection *conn, const void *ctx,
>>>                           const char *node, bool allow_special)
>>> {
>>>          const char *name;
>>>          int local_off = 0;
>>>          unsigned int domid;
>>>
>>>          /*
>>>           * Invalid if any of:
>>>           * - no node at all
>>>           * - illegal character in node
>>>           * - starts with '@' but no special node allowed
>>>           */
>>>          errno = EINVAL;
>>>          if (!node ||
>>>              strspn(node, 
>>> "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
>>>                           "0123456789-/_@") != strlen(node) ||
>>
>> ... I would rather keep calling valid_chars() here. The rest looks 
>> fine even though this is definitely not my preference.
> 
> I can do that, even if I don't see the real value with the comment above 
> the if.

How about writing Xenstored in a single function then? After all with 
comments it should be easy to read, right? :)

There are a few difficulty with the current approach. There are:
   * a large function call that needs to be split over two lines
   * multiple || which also need to split over multiple lines.
   * No parentheses over strspn(....) != strlen(node)

Maybe you can parse/understand this 'if' very quickly. But I can't and 
this is just slowing down review and increasing the risk of introducing 
bugs.

Cheers,
Jürgen Groß Aug. 4, 2023, 12:43 p.m. UTC | #8
On 04.08.23 14:27, Julien Grall wrote:
> Hi Juergen,
> 
> On 04/08/2023 13:05, Juergen Gross wrote:
>> On 04.08.23 12:33, Julien Grall wrote:
>>>> const char *canonicalize(struct connection *conn, const void *ctx,
>>>>                           const char *node, bool allow_special)
>>>> {
>>>>          const char *name;
>>>>          int local_off = 0;
>>>>          unsigned int domid;
>>>>
>>>>          /*
>>>>           * Invalid if any of:
>>>>           * - no node at all
>>>>           * - illegal character in node
>>>>           * - starts with '@' but no special node allowed
>>>>           */
>>>>          errno = EINVAL;
>>>>          if (!node ||
>>>>              strspn(node, 
>>>> "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
>>>>                           "0123456789-/_@") != strlen(node) ||
>>>
>>> ... I would rather keep calling valid_chars() here. The rest looks fine even 
>>> though this is definitely not my preference.
>>
>> I can do that, even if I don't see the real value with the comment above the if.
> 
> How about writing Xenstored in a single function then? After all with comments 
> it should be easy to read, right? :)

Yeah, right.

Might come with the downside of a little bit of code duplication. ;-)

> 
> There are a few difficulty with the current approach. There are:
>    * a large function call that needs to be split over two lines
>    * multiple || which also need to split over multiple lines.
>    * No parentheses over strspn(....) != strlen(node)
> 
> Maybe you can parse/understand this 'if' very quickly. But I can't and this is 
> just slowing down review and increasing the risk of introducing bugs.

Okay, as said above: I can do that.


Juergen
diff mbox series

Patch

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index ea5a1a9cce..ec20bc042d 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -1210,42 +1210,6 @@  void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
 	send_reply(conn, type, "OK", sizeof("OK"));
 }
 
-static bool valid_chars(const char *node)
-{
-	/* Nodes can have lots of crap. */
-	return (strspn(node, 
-		       "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-		       "abcdefghijklmnopqrstuvwxyz"
-		       "0123456789-/_@") == strlen(node));
-}
-
-bool is_valid_nodename(const struct connection *conn, const char *node,
-		       bool allow_special)
-{
-	int local_off = 0;
-	unsigned int domid;
-
-	/* Must start in / or - if special nodes are allowed - in @. */
-	if (!strstarts(node, "/") && (!allow_special || !strstarts(node, "@")))
-		return false;
-
-	/* Cannot end in / (unless it's just "/"). */
-	if (strends(node, "/") && !streq(node, "/"))
-		return false;
-
-	/* No double //. */
-	if (strstr(node, "//"))
-		return false;
-
-	if (sscanf(node, "/local/domain/%5u/%n", &domid, &local_off) != 1)
-		local_off = 0;
-
-	if (domain_max_chk(conn, ACC_PATHLEN, strlen(node) - local_off))
-		return false;
-
-	return valid_chars(node);
-}
-
 /* We expect one arg in the input: return NULL otherwise.
  * The payload must contain exactly one nul, at the end.
  */
@@ -1279,16 +1243,46 @@  static char *perms_to_strings(const void *ctx, const struct node_perms *perms,
 }
 
 const char *canonicalize(struct connection *conn, const void *ctx,
-			 const char *node)
+			 const char *node, bool allow_special)
 {
-	const char *prefix;
+	char *name;
+	int local_off = 0;
+	unsigned int domid;
 
-	if (!node || (node[0] == '/') || (node[0] == '@'))
-		return node;
-	prefix = get_implicit_path(conn);
-	if (prefix)
-		return talloc_asprintf(ctx, "%s/%s", prefix, node);
-	return node;
+	errno = EINVAL;
+	if (!node)
+		return NULL;
+
+	if (strspn(node, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+			 "0123456789-/_@") != strlen(node))
+		return NULL;
+
+	if (node[0] == '@' && !allow_special)
+		return NULL;
+
+	if (node[0] != '/' && node[0] != '@') {
+		name = talloc_asprintf(ctx, "%s/%s", get_implicit_path(conn),
+				       node);
+		if (!name)
+			return NULL;
+	} else
+		name = (char *)node;
+
+	/* Cannot end in / (unless it's just "/"). */
+	if (strends(name, "/") && !streq(name, "/"))
+		return NULL;
+
+	/* No double //. */
+	if (strstr(name, "//"))
+		return NULL;
+
+	if (sscanf(name, "/local/domain/%5u/%n", &domid, &local_off) != 1)
+		local_off = 0;
+
+	if (domain_max_chk(conn, ACC_PATHLEN, strlen(name) - local_off))
+		return NULL;
+
+	return name;
 }
 
 static struct node *get_node_canonicalized(struct connection *conn,
@@ -1302,13 +1296,10 @@  static struct node *get_node_canonicalized(struct connection *conn,
 
 	if (!canonical_name)
 		canonical_name = &tmp_name;
-	*canonical_name = canonicalize(conn, ctx, name);
+	*canonical_name = canonicalize(conn, ctx, name, allow_special);
 	if (!*canonical_name)
 		return NULL;
-	if (!is_valid_nodename(conn, *canonical_name, allow_special)) {
-		errno = EINVAL;
-		return NULL;
-	}
+
 	return get_node(conn, ctx, *canonical_name, perm);
 }
 
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index f3a83efce8..ec1d6aac27 100644
--- a/tools/xenstore/xenstored_core.h
+++ b/tools/xenstore/xenstored_core.h
@@ -241,7 +241,7 @@  void send_ack(struct connection *conn, enum xsd_sockmsg_type type);
 
 /* Canonicalize this path if possible. */
 const char *canonicalize(struct connection *conn, const void *ctx,
-			 const char *node);
+			 const char *node, bool allow_special);
 
 /* Get access permissions. */
 unsigned int perm_for_conn(struct connection *conn,
@@ -294,10 +294,6 @@  struct connection *get_connection_by_id(unsigned int conn_id);
 void check_store(void);
 void corrupt(struct connection *conn, const char *fmt, ...);
 
-/* Is this a valid node name? */
-bool is_valid_nodename(const struct connection *conn, const char *node,
-		       bool allow_special);
-
 /* Get name of parent node. */
 char *get_parent(const void *ctx, const char *node);
 
diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
index 2662a3fa49..247d37e80f 100644
--- a/tools/xenstore/xenstored_watch.c
+++ b/tools/xenstore/xenstored_watch.c
@@ -167,17 +167,9 @@  static int check_watch_path(struct connection *conn, const void *ctx,
 			    const char **path, bool *relative)
 {
 	*relative = !strstarts(*path, "/") && !strstarts(*path, "@");
-	*path = canonicalize(conn, ctx, *path);
-	if (!*path)
-		return errno;
-	if (!is_valid_nodename(conn, *path, true))
-		goto inval;
-
-	return 0;
+	*path = canonicalize(conn, ctx, *path, true);
 
- inval:
-	errno = EINVAL;
-	return errno;
+	return *path ? 0 : errno;
 }
 
 static struct watch *add_watch(struct connection *conn, const char *path,
@@ -261,9 +253,9 @@  int do_unwatch(const void *ctx, struct connection *conn,
 	if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec))
 		return EINVAL;
 
-	node = canonicalize(conn, ctx, vec[0]);
+	node = canonicalize(conn, ctx, vec[0], true);
 	if (!node)
-		return ENOMEM;
+		return errno;
 	list_for_each_entry(watch, &conn->watches, list) {
 		if (streq(watch->node, node) && streq(watch->token, vec[1])) {
 			list_del(&watch->list);