diff mbox series

[v5,05/14] tools/xenstore: use accounting buffering for node accounting

Message ID 20230508114754.31514-6-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series tools/xenstore: rework internal accounting | expand

Commit Message

Jürgen Groß May 8, 2023, 11:47 a.m. UTC
Add the node accounting to the accounting information buffering in
order to avoid having to undo it in case of failure.

This requires to call domain_nbentry_dec() before any changes to the
data base, as it can return an error now.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V5:
- add error handling after domain_nbentry_dec() calls (Julien Grall)
---
 tools/xenstore/xenstored_core.c   | 29 +++++++----------------------
 tools/xenstore/xenstored_domain.h |  4 ++--
 2 files changed, 9 insertions(+), 24 deletions(-)

Comments

Julien Grall May 9, 2023, 6:46 p.m. UTC | #1
Hi Juergen,

On 08/05/2023 12:47, Juergen Gross wrote:
> Add the node accounting to the accounting information buffering in
> order to avoid having to undo it in case of failure.
> 
> This requires to call domain_nbentry_dec() before any changes to the
> data base, as it can return an error now.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
> V5:
> - add error handling after domain_nbentry_dec() calls (Julien Grall)
> ---
>   tools/xenstore/xenstored_core.c   | 29 +++++++----------------------
>   tools/xenstore/xenstored_domain.h |  4 ++--
>   2 files changed, 9 insertions(+), 24 deletions(-)
> 
> diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
> index 8392bdec9b..22da434e2a 100644
> --- a/tools/xenstore/xenstored_core.c
> +++ b/tools/xenstore/xenstored_core.c
> @@ -1454,7 +1454,6 @@ static void destroy_node_rm(struct connection *conn, struct node *node)
>   static int destroy_node(struct connection *conn, struct node *node)
>   {
>   	destroy_node_rm(conn, node);
> -	domain_nbentry_dec(conn, get_node_owner(node));
>   
>   	/*
>   	 * It is not possible to easily revert the changes in a transaction.
> @@ -1645,6 +1644,9 @@ static int delnode_sub(const void *ctx, struct connection *conn,
>   	if (ret > 0)
>   		return WALK_TREE_SUCCESS_STOP;
>   
> +	if (domain_nbentry_dec(conn, get_node_owner(node)))
> +		return WALK_TREE_ERROR_STOP;

I think there is a potential issue with the buffering here. In case of 
failure, the node could have been removed, but the quota would not be 
properly accounted.

Also, I think a comment would be warrant to explain why we are returning 
WALK_TREE_ERROR_STOP here when...

> +
>   	/* In case of error stop the walk. */
>   	if (!ret && do_tdb_delete(conn, &key, &node->acc))
>   		return WALK_TREE_SUCCESS_STOP;

... this is not the case when do_tdb_delete() fails for some reasons.

> @@ -1657,8 +1659,6 @@ static int delnode_sub(const void *ctx, struct connection *conn,
>   	watch_exact = strcmp(root, node->name);
>   	fire_watches(conn, ctx, node->name, node, watch_exact, NULL);
>   
> -	domain_nbentry_dec(conn, get_node_owner(node));
> -
>   	return WALK_TREE_RM_CHILDENTRY;
>   }
>   
> @@ -1797,29 +1797,14 @@ static int do_set_perms(const void *ctx, struct connection *conn,
>   		return EPERM;
>   
>   	old_perms = node->perms;
> -	domain_nbentry_dec(conn, get_node_owner(node));
> +	if (domain_nbentry_dec(conn, get_node_owner(node)))
> +		return ENOMEM;
>   	node->perms = perms;
> -	if (domain_nbentry_inc(conn, get_node_owner(node))) {
> -		node->perms = old_perms;
> -		/*
> -		 * This should never fail because we had a reference on the
> -		 * domain before and Xenstored is single-threaded.
> -		 */
> -		domain_nbentry_inc(conn, get_node_owner(node));
> +	if (domain_nbentry_inc(conn, get_node_owner(node)))
>   		return ENOMEM;
> -	}
> -
> -	if (write_node(conn, node, false)) {
> -		int saved_errno = errno;
>   
> -		domain_nbentry_dec(conn, get_node_owner(node));
> -		node->perms = old_perms;
> -		/* No failure possible as above. */
> -		domain_nbentry_inc(conn, get_node_owner(node));
> -
> -		errno = saved_errno;
> +	if (write_node(conn, node, false))
>   		return errno;
> -	}
>   
>   	fire_watches(conn, ctx, name, node, false, &old_perms);
>   	send_ack(conn, XS_SET_PERMS);
> diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
> index e40657216b..466549709f 100644
> --- a/tools/xenstore/xenstored_domain.h
> +++ b/tools/xenstore/xenstored_domain.h
> @@ -25,9 +25,9 @@
>    * a per transaction array.
>    */
>   enum accitem {
> +	ACC_NODES,
>   	ACC_REQ_N,		/* Number of elements per request. */
> -	ACC_NODES = ACC_REQ_N,
> -	ACC_TR_N,		/* Number of elements per transaction. */
> +	ACC_TR_N = ACC_REQ_N,	/* Number of elements per transaction. */
>   	ACC_CHD_N = ACC_TR_N,	/* max(ACC_REQ_N, ACC_TR_N), for changed dom. */
>   	ACC_N = ACC_TR_N,	/* Number of elements per domain. */
>   };


Cheers,
Jürgen Groß May 10, 2023, 12:54 p.m. UTC | #2
On 09.05.23 20:46, Julien Grall wrote:
> Hi Juergen,
> 
> On 08/05/2023 12:47, Juergen Gross wrote:
>> Add the node accounting to the accounting information buffering in
>> order to avoid having to undo it in case of failure.
>>
>> This requires to call domain_nbentry_dec() before any changes to the
>> data base, as it can return an error now.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> ---
>> V5:
>> - add error handling after domain_nbentry_dec() calls (Julien Grall)
>> ---
>>   tools/xenstore/xenstored_core.c   | 29 +++++++----------------------
>>   tools/xenstore/xenstored_domain.h |  4 ++--
>>   2 files changed, 9 insertions(+), 24 deletions(-)
>>
>> diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
>> index 8392bdec9b..22da434e2a 100644
>> --- a/tools/xenstore/xenstored_core.c
>> +++ b/tools/xenstore/xenstored_core.c
>> @@ -1454,7 +1454,6 @@ static void destroy_node_rm(struct connection *conn, 
>> struct node *node)
>>   static int destroy_node(struct connection *conn, struct node *node)
>>   {
>>       destroy_node_rm(conn, node);
>> -    domain_nbentry_dec(conn, get_node_owner(node));
>>       /*
>>        * It is not possible to easily revert the changes in a transaction.
>> @@ -1645,6 +1644,9 @@ static int delnode_sub(const void *ctx, struct 
>> connection *conn,
>>       if (ret > 0)
>>           return WALK_TREE_SUCCESS_STOP;
>> +    if (domain_nbentry_dec(conn, get_node_owner(node)))
>> +        return WALK_TREE_ERROR_STOP;
> 
> I think there is a potential issue with the buffering here. In case of failure, 
> the node could have been removed, but the quota would not be properly accounted.

You mean the case where another node has been deleted and due to accounting
buffering the corrected accounting data wouldn't be committed?

This is no problem, as an error returned by delnode_sub() will result in
corrupt() being called, which in turn will correct the accounting data.

> Also, I think a comment would be warrant to explain why we are returning 
> WALK_TREE_ERROR_STOP here when...
> 
>> +
>>       /* In case of error stop the walk. */
>>       if (!ret && do_tdb_delete(conn, &key, &node->acc))
>>           return WALK_TREE_SUCCESS_STOP;
> 
> ... this is not the case when do_tdb_delete() fails for some reasons.

The main idea was that the remove is working from the leafs towards the root.
In case one entry can't be removed, we should just stop.

OTOH returning WALK_TREE_ERROR_STOP might be cleaner, as this would make sure
that accounting data is repaired afterwards. Even if do_tdb_delete() can't
really fail in normal configurations, as the only failure reasons are:

- the node isn't found (quite unlikely, as we just read it before entering
   delnode_sub()), or
- writing the updated data base failed (in normal configurations it is in
   already allocated memory, so no way to fail that)

I think I'll switch to return WALK_TREE_ERROR_STOP here.


Juergen
Julien Grall May 10, 2023, 9:31 p.m. UTC | #3
Hi,

On 10/05/2023 13:54, Juergen Gross wrote:
> On 09.05.23 20:46, Julien Grall wrote:
>> Hi Juergen,
>>
>> On 08/05/2023 12:47, Juergen Gross wrote:
>>> Add the node accounting to the accounting information buffering in
>>> order to avoid having to undo it in case of failure.
>>>
>>> This requires to call domain_nbentry_dec() before any changes to the
>>> data base, as it can return an error now.
>>>
>>> Signed-off-by: Juergen Gross <jgross@suse.com>
>>> ---
>>> V5:
>>> - add error handling after domain_nbentry_dec() calls (Julien Grall)
>>> ---
>>>   tools/xenstore/xenstored_core.c   | 29 +++++++----------------------
>>>   tools/xenstore/xenstored_domain.h |  4 ++--
>>>   2 files changed, 9 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/tools/xenstore/xenstored_core.c 
>>> b/tools/xenstore/xenstored_core.c
>>> index 8392bdec9b..22da434e2a 100644
>>> --- a/tools/xenstore/xenstored_core.c
>>> +++ b/tools/xenstore/xenstored_core.c
>>> @@ -1454,7 +1454,6 @@ static void destroy_node_rm(struct connection 
>>> *conn, struct node *node)
>>>   static int destroy_node(struct connection *conn, struct node *node)
>>>   {
>>>       destroy_node_rm(conn, node);
>>> -    domain_nbentry_dec(conn, get_node_owner(node));
>>>       /*
>>>        * It is not possible to easily revert the changes in a 
>>> transaction.
>>> @@ -1645,6 +1644,9 @@ static int delnode_sub(const void *ctx, struct 
>>> connection *conn,
>>>       if (ret > 0)
>>>           return WALK_TREE_SUCCESS_STOP;
>>> +    if (domain_nbentry_dec(conn, get_node_owner(node)))
>>> +        return WALK_TREE_ERROR_STOP;
>>
>> I think there is a potential issue with the buffering here. In case of 
>> failure, the node could have been removed, but the quota would not be 
>> properly accounted.
> 
> You mean the case where another node has been deleted and due to accounting
> buffering the corrected accounting data wouldn't be committed?
> 
> This is no problem, as an error returned by delnode_sub() will result in
> corrupt() being called, which in turn will correct the accounting data.

To me corrupt() is a big hammer and it feels wrong to call it when I 
think we have easier/faster way to deal with the issue. Could we instead 
call acc_commit() before returning?

> 
>> Also, I think a comment would be warrant to explain why we are 
>> returning WALK_TREE_ERROR_STOP here when...
>>
>>> +
>>>       /* In case of error stop the walk. */
>>>       if (!ret && do_tdb_delete(conn, &key, &node->acc))
>>>           return WALK_TREE_SUCCESS_STOP;
>>
>> ... this is not the case when do_tdb_delete() fails for some reasons.
> 
> The main idea was that the remove is working from the leafs towards the 
> root.
> In case one entry can't be removed, we should just stop.
> 
> OTOH returning WALK_TREE_ERROR_STOP might be cleaner, as this would make 
> sure
> that accounting data is repaired afterwards. Even if do_tdb_delete() can't
> really fail in normal configurations, as the only failure reasons are:
> 
> - the node isn't found (quite unlikely, as we just read it before entering
>    delnode_sub()), or
> - writing the updated data base failed (in normal configurations it is in
>    already allocated memory, so no way to fail that)
> 
> I think I'll switch to return WALK_TREE_ERROR_STOP here.

See above for a different proposal.

Cheers,
Jürgen Groß May 11, 2023, 5:25 a.m. UTC | #4
On 10.05.23 23:31, Julien Grall wrote:
> Hi,
> 
> On 10/05/2023 13:54, Juergen Gross wrote:
>> On 09.05.23 20:46, Julien Grall wrote:
>>> Hi Juergen,
>>>
>>> On 08/05/2023 12:47, Juergen Gross wrote:
>>>> Add the node accounting to the accounting information buffering in
>>>> order to avoid having to undo it in case of failure.
>>>>
>>>> This requires to call domain_nbentry_dec() before any changes to the
>>>> data base, as it can return an error now.
>>>>
>>>> Signed-off-by: Juergen Gross <jgross@suse.com>
>>>> ---
>>>> V5:
>>>> - add error handling after domain_nbentry_dec() calls (Julien Grall)
>>>> ---
>>>>   tools/xenstore/xenstored_core.c   | 29 +++++++----------------------
>>>>   tools/xenstore/xenstored_domain.h |  4 ++--
>>>>   2 files changed, 9 insertions(+), 24 deletions(-)
>>>>
>>>> diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
>>>> index 8392bdec9b..22da434e2a 100644
>>>> --- a/tools/xenstore/xenstored_core.c
>>>> +++ b/tools/xenstore/xenstored_core.c
>>>> @@ -1454,7 +1454,6 @@ static void destroy_node_rm(struct connection *conn, 
>>>> struct node *node)
>>>>   static int destroy_node(struct connection *conn, struct node *node)
>>>>   {
>>>>       destroy_node_rm(conn, node);
>>>> -    domain_nbentry_dec(conn, get_node_owner(node));
>>>>       /*
>>>>        * It is not possible to easily revert the changes in a transaction.
>>>> @@ -1645,6 +1644,9 @@ static int delnode_sub(const void *ctx, struct 
>>>> connection *conn,
>>>>       if (ret > 0)
>>>>           return WALK_TREE_SUCCESS_STOP;
>>>> +    if (domain_nbentry_dec(conn, get_node_owner(node)))
>>>> +        return WALK_TREE_ERROR_STOP;
>>>
>>> I think there is a potential issue with the buffering here. In case of 
>>> failure, the node could have been removed, but the quota would not be 
>>> properly accounted.
>>
>> You mean the case where another node has been deleted and due to accounting
>> buffering the corrected accounting data wouldn't be committed?
>>
>> This is no problem, as an error returned by delnode_sub() will result in
>> corrupt() being called, which in turn will correct the accounting data.
> 
> To me corrupt() is a big hammer and it feels wrong to call it when I think we 
> have easier/faster way to deal with the issue. Could we instead call 
> acc_commit() before returning?

You are aware that this is a very problematic situation we are in?

We couldn't allocate a small amount of memory (around 64 bytes)! Xenstored
will probably die within milliseconds. Using the big hammer in such a
situation is fine IMO. It will maybe result in solving the problem by
freeing of memory (quite unlikely, though), but it won't leave xenstored
in a worse state than with your suggestion.

And calling acc_commit() here wouldn't really help, as accounting data
couldn't be recorded, so there are missing updates anyway due to the failed
call of domain_nbentry_dec().

>>> Also, I think a comment would be warrant to explain why we are returning 
>>> WALK_TREE_ERROR_STOP here when...
>>>
>>>> +
>>>>       /* In case of error stop the walk. */
>>>>       if (!ret && do_tdb_delete(conn, &key, &node->acc))
>>>>           return WALK_TREE_SUCCESS_STOP;
>>>
>>> ... this is not the case when do_tdb_delete() fails for some reasons.
>>
>> The main idea was that the remove is working from the leafs towards the root.
>> In case one entry can't be removed, we should just stop.
>>
>> OTOH returning WALK_TREE_ERROR_STOP might be cleaner, as this would make sure
>> that accounting data is repaired afterwards. Even if do_tdb_delete() can't
>> really fail in normal configurations, as the only failure reasons are:
>>
>> - the node isn't found (quite unlikely, as we just read it before entering
>>    delnode_sub()), or
>> - writing the updated data base failed (in normal configurations it is in
>>    already allocated memory, so no way to fail that)
>>
>> I think I'll switch to return WALK_TREE_ERROR_STOP here.
> 
> See above for a different proposal.

Without deleting the node in the data base this would be another accounting
data inconsistency, so calling corrupt() is the correct cleanup measure.


Juergen
Julien Grall May 11, 2023, 12:07 p.m. UTC | #5
Hi Juergen,

On 11/05/2023 06:25, Juergen Gross wrote:
> On 10.05.23 23:31, Julien Grall wrote:
>> On 10/05/2023 13:54, Juergen Gross wrote:
>>> On 09.05.23 20:46, Julien Grall wrote:
>>>> Hi Juergen,
>>>>
>>>> On 08/05/2023 12:47, Juergen Gross wrote:
>>>>> Add the node accounting to the accounting information buffering in
>>>>> order to avoid having to undo it in case of failure.
>>>>>
>>>>> This requires to call domain_nbentry_dec() before any changes to the
>>>>> data base, as it can return an error now.
>>>>>
>>>>> Signed-off-by: Juergen Gross <jgross@suse.com>
>>>>> ---
>>>>> V5:
>>>>> - add error handling after domain_nbentry_dec() calls (Julien Grall)
>>>>> ---
>>>>>   tools/xenstore/xenstored_core.c   | 29 +++++++----------------------
>>>>>   tools/xenstore/xenstored_domain.h |  4 ++--
>>>>>   2 files changed, 9 insertions(+), 24 deletions(-)
>>>>>
>>>>> diff --git a/tools/xenstore/xenstored_core.c 
>>>>> b/tools/xenstore/xenstored_core.c
>>>>> index 8392bdec9b..22da434e2a 100644
>>>>> --- a/tools/xenstore/xenstored_core.c
>>>>> +++ b/tools/xenstore/xenstored_core.c
>>>>> @@ -1454,7 +1454,6 @@ static void destroy_node_rm(struct connection 
>>>>> *conn, struct node *node)
>>>>>   static int destroy_node(struct connection *conn, struct node *node)
>>>>>   {
>>>>>       destroy_node_rm(conn, node);
>>>>> -    domain_nbentry_dec(conn, get_node_owner(node));
>>>>>       /*
>>>>>        * It is not possible to easily revert the changes in a 
>>>>> transaction.
>>>>> @@ -1645,6 +1644,9 @@ static int delnode_sub(const void *ctx, 
>>>>> struct connection *conn,
>>>>>       if (ret > 0)
>>>>>           return WALK_TREE_SUCCESS_STOP;
>>>>> +    if (domain_nbentry_dec(conn, get_node_owner(node)))
>>>>> +        return WALK_TREE_ERROR_STOP;
>>>>
>>>> I think there is a potential issue with the buffering here. In case 
>>>> of failure, the node could have been removed, but the quota would 
>>>> not be properly accounted.
>>>
>>> You mean the case where another node has been deleted and due to 
>>> accounting
>>> buffering the corrected accounting data wouldn't be committed?
>>>
>>> This is no problem, as an error returned by delnode_sub() will result in
>>> corrupt() being called, which in turn will correct the accounting data.
>>
>> To me corrupt() is a big hammer and it feels wrong to call it when I 
>> think we have easier/faster way to deal with the issue. Could we 
>> instead call acc_commit() before returning?
> 
> You are aware that this is a very problematic situation we are in?

It is not very clear from the code. And that's why comments are always 
useful to clarify why corrupt() is the right call.

> 
> We couldn't allocate a small amount of memory (around 64 bytes)! 

So long this is the only reason then...

Xenstored
> will probably die within milliseconds. Using the big hammer in such a
> situation is fine IMO. It will maybe result in solving the problem by
> freeing of memory (quite unlikely, though), but it won't leave xenstored
> in a worse state than with your suggestion.

... this might be OK. But in the past, we had a place where corrupt() 
could be reliably triggered by a guest. If you think that's not 
possible, then it should be properly documented.

> 
> And calling acc_commit() here wouldn't really help, as accounting data
> couldn't be recorded, so there are missing updates anyway due to the failed
> call of domain_nbentry_dec().

We are removing the node after the accounting is updated. So if the 
accounting fail, then it should still be correct for anything that was 
removed before.

> 
>>>> Also, I think a comment would be warrant to explain why we are 
>>>> returning WALK_TREE_ERROR_STOP here when...
>>>>
>>>>> +
>>>>>       /* In case of error stop the walk. */
>>>>>       if (!ret && do_tdb_delete(conn, &key, &node->acc))
>>>>>           return WALK_TREE_SUCCESS_STOP;
>>>>
>>>> ... this is not the case when do_tdb_delete() fails for some reasons.
>>>
>>> The main idea was that the remove is working from the leafs towards 
>>> the root.
>>> In case one entry can't be removed, we should just stop.
>>>
>>> OTOH returning WALK_TREE_ERROR_STOP might be cleaner, as this would 
>>> make sure
>>> that accounting data is repaired afterwards. Even if do_tdb_delete() 
>>> can't
>>> really fail in normal configurations, as the only failure reasons are:
>>>
>>> - the node isn't found (quite unlikely, as we just read it before 
>>> entering
>>>    delnode_sub()), or
>>> - writing the updated data base failed (in normal configurations it 
>>> is in
>>>    already allocated memory, so no way to fail that)
>>>
>>> I think I'll switch to return WALK_TREE_ERROR_STOP here.
>>
>> See above for a different proposal.
> 
> Without deleting the node in the data base this would be another accounting
> data inconsistency, so calling corrupt() is the correct cleanup measure.

Hmmm... I read this as this is already a bug rather than one introduced 
by this patch. IIUC, then this should be done in a new commit.

Cheers,
Jürgen Groß May 25, 2023, 12:45 p.m. UTC | #6
On 11.05.23 14:07, Julien Grall wrote:
> Hi Juergen,
> 
> On 11/05/2023 06:25, Juergen Gross wrote:
>> On 10.05.23 23:31, Julien Grall wrote:
>>> On 10/05/2023 13:54, Juergen Gross wrote:
>>>> On 09.05.23 20:46, Julien Grall wrote:
>>>>> Hi Juergen,
>>>>>
>>>>> On 08/05/2023 12:47, Juergen Gross wrote:
>>>>>> Add the node accounting to the accounting information buffering in
>>>>>> order to avoid having to undo it in case of failure.
>>>>>>
>>>>>> This requires to call domain_nbentry_dec() before any changes to the
>>>>>> data base, as it can return an error now.
>>>>>>
>>>>>> Signed-off-by: Juergen Gross <jgross@suse.com>
>>>>>> ---
>>>>>> V5:
>>>>>> - add error handling after domain_nbentry_dec() calls (Julien Grall)
>>>>>> ---
>>>>>>   tools/xenstore/xenstored_core.c   | 29 +++++++----------------------
>>>>>>   tools/xenstore/xenstored_domain.h |  4 ++--
>>>>>>   2 files changed, 9 insertions(+), 24 deletions(-)
>>>>>>
>>>>>> diff --git a/tools/xenstore/xenstored_core.c 
>>>>>> b/tools/xenstore/xenstored_core.c
>>>>>> index 8392bdec9b..22da434e2a 100644
>>>>>> --- a/tools/xenstore/xenstored_core.c
>>>>>> +++ b/tools/xenstore/xenstored_core.c
>>>>>> @@ -1454,7 +1454,6 @@ static void destroy_node_rm(struct connection *conn, 
>>>>>> struct node *node)
>>>>>>   static int destroy_node(struct connection *conn, struct node *node)
>>>>>>   {
>>>>>>       destroy_node_rm(conn, node);
>>>>>> -    domain_nbentry_dec(conn, get_node_owner(node));
>>>>>>       /*
>>>>>>        * It is not possible to easily revert the changes in a transaction.
>>>>>> @@ -1645,6 +1644,9 @@ static int delnode_sub(const void *ctx, struct 
>>>>>> connection *conn,
>>>>>>       if (ret > 0)
>>>>>>           return WALK_TREE_SUCCESS_STOP;
>>>>>> +    if (domain_nbentry_dec(conn, get_node_owner(node)))
>>>>>> +        return WALK_TREE_ERROR_STOP;
>>>>>
>>>>> I think there is a potential issue with the buffering here. In case of 
>>>>> failure, the node could have been removed, but the quota would not be 
>>>>> properly accounted.
>>>>
>>>> You mean the case where another node has been deleted and due to accounting
>>>> buffering the corrected accounting data wouldn't be committed?
>>>>
>>>> This is no problem, as an error returned by delnode_sub() will result in
>>>> corrupt() being called, which in turn will correct the accounting data.
>>>
>>> To me corrupt() is a big hammer and it feels wrong to call it when I think we 
>>> have easier/faster way to deal with the issue. Could we instead call 
>>> acc_commit() before returning?
>>
>> You are aware that this is a very problematic situation we are in?
> 
> It is not very clear from the code. And that's why comments are always useful to 
> clarify why corrupt() is the right call.

I agree. I'll add a comment.

> 
>>
>> We couldn't allocate a small amount of memory (around 64 bytes)! 
> 
> So long this is the only reason then...
> 
> Xenstored
>> will probably die within milliseconds. Using the big hammer in such a
>> situation is fine IMO. It will maybe result in solving the problem by
>> freeing of memory (quite unlikely, though), but it won't leave xenstored
>> in a worse state than with your suggestion.
> 
> ... this might be OK. But in the past, we had a place where corrupt() could be 
> reliably triggered by a guest. If you think that's not possible, then it should 
> be properly documented.

Okay, will do so.

> 
>>
>> And calling acc_commit() here wouldn't really help, as accounting data
>> couldn't be recorded, so there are missing updates anyway due to the failed
>> call of domain_nbentry_dec().
> 
> We are removing the node after the accounting is updated. So if the accounting 
> fail, then it should still be correct for anything that was removed before.

Oh, right.

> 
>>
>>>>> Also, I think a comment would be warrant to explain why we are returning 
>>>>> WALK_TREE_ERROR_STOP here when...
>>>>>
>>>>>> +
>>>>>>       /* In case of error stop the walk. */
>>>>>>       if (!ret && do_tdb_delete(conn, &key, &node->acc))
>>>>>>           return WALK_TREE_SUCCESS_STOP;
>>>>>
>>>>> ... this is not the case when do_tdb_delete() fails for some reasons.
>>>>
>>>> The main idea was that the remove is working from the leafs towards the root.
>>>> In case one entry can't be removed, we should just stop.
>>>>
>>>> OTOH returning WALK_TREE_ERROR_STOP might be cleaner, as this would make sure
>>>> that accounting data is repaired afterwards. Even if do_tdb_delete() can't
>>>> really fail in normal configurations, as the only failure reasons are:
>>>>
>>>> - the node isn't found (quite unlikely, as we just read it before entering
>>>>    delnode_sub()), or
>>>> - writing the updated data base failed (in normal configurations it is in
>>>>    already allocated memory, so no way to fail that)
>>>>
>>>> I think I'll switch to return WALK_TREE_ERROR_STOP here.
>>>
>>> See above for a different proposal.
>>
>> Without deleting the node in the data base this would be another accounting
>> data inconsistency, so calling corrupt() is the correct cleanup measure.
> 
> Hmmm... I read this as this is already a bug rather than one introduced by this 
> patch. IIUC, then this should be done in a new commit.

No, previously domain_nbentry_dec() couldn't fail, so this is a new situation.
Or did I misunderstand what you mean?


Juergen
diff mbox series

Patch

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index 8392bdec9b..22da434e2a 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -1454,7 +1454,6 @@  static void destroy_node_rm(struct connection *conn, struct node *node)
 static int destroy_node(struct connection *conn, struct node *node)
 {
 	destroy_node_rm(conn, node);
-	domain_nbentry_dec(conn, get_node_owner(node));
 
 	/*
 	 * It is not possible to easily revert the changes in a transaction.
@@ -1645,6 +1644,9 @@  static int delnode_sub(const void *ctx, struct connection *conn,
 	if (ret > 0)
 		return WALK_TREE_SUCCESS_STOP;
 
+	if (domain_nbentry_dec(conn, get_node_owner(node)))
+		return WALK_TREE_ERROR_STOP;
+
 	/* In case of error stop the walk. */
 	if (!ret && do_tdb_delete(conn, &key, &node->acc))
 		return WALK_TREE_SUCCESS_STOP;
@@ -1657,8 +1659,6 @@  static int delnode_sub(const void *ctx, struct connection *conn,
 	watch_exact = strcmp(root, node->name);
 	fire_watches(conn, ctx, node->name, node, watch_exact, NULL);
 
-	domain_nbentry_dec(conn, get_node_owner(node));
-
 	return WALK_TREE_RM_CHILDENTRY;
 }
 
@@ -1797,29 +1797,14 @@  static int do_set_perms(const void *ctx, struct connection *conn,
 		return EPERM;
 
 	old_perms = node->perms;
-	domain_nbentry_dec(conn, get_node_owner(node));
+	if (domain_nbentry_dec(conn, get_node_owner(node)))
+		return ENOMEM;
 	node->perms = perms;
-	if (domain_nbentry_inc(conn, get_node_owner(node))) {
-		node->perms = old_perms;
-		/*
-		 * This should never fail because we had a reference on the
-		 * domain before and Xenstored is single-threaded.
-		 */
-		domain_nbentry_inc(conn, get_node_owner(node));
+	if (domain_nbentry_inc(conn, get_node_owner(node)))
 		return ENOMEM;
-	}
-
-	if (write_node(conn, node, false)) {
-		int saved_errno = errno;
 
-		domain_nbentry_dec(conn, get_node_owner(node));
-		node->perms = old_perms;
-		/* No failure possible as above. */
-		domain_nbentry_inc(conn, get_node_owner(node));
-
-		errno = saved_errno;
+	if (write_node(conn, node, false))
 		return errno;
-	}
 
 	fire_watches(conn, ctx, name, node, false, &old_perms);
 	send_ack(conn, XS_SET_PERMS);
diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
index e40657216b..466549709f 100644
--- a/tools/xenstore/xenstored_domain.h
+++ b/tools/xenstore/xenstored_domain.h
@@ -25,9 +25,9 @@ 
  * a per transaction array.
  */
 enum accitem {
+	ACC_NODES,
 	ACC_REQ_N,		/* Number of elements per request. */
-	ACC_NODES = ACC_REQ_N,
-	ACC_TR_N,		/* Number of elements per transaction. */
+	ACC_TR_N = ACC_REQ_N,	/* Number of elements per transaction. */
 	ACC_CHD_N = ACC_TR_N,	/* max(ACC_REQ_N, ACC_TR_N), for changed dom. */
 	ACC_N = ACC_TR_N,	/* Number of elements per domain. */
 };