diff mbox series

mdmon: imsm: fix metadata corruption when managing new array

Message ID 20250210212225.10633-1-junxiao.bi@oracle.com (mailing list archive)
State New
Headers show
Series mdmon: imsm: fix metadata corruption when managing new array | expand

Commit Message

Junxiao Bi Feb. 10, 2025, 9:22 p.m. UTC
When manager thread detects new array, it will invoke manage_new().
For imsm array, it will further invoke imsm_open_new(). Since
commit bbab0940fa75("imsm: write bad block log on metadata sync"),
it preallocates bad block log when opening the array, that requires
increasing the mpb buffer size.
To do that, imsm_open_new() invokes imsm_update_metadata_locally(),
which first uses imsm_prepare_update() to allocate a larger mpb buffer
and store it at "mpb->next_buf", and then invoke imsm_process_update()
to copy the content from current mpb buffer "mpb->buf" to "mpb->next_buf",
and then free the current mpb buffer and set the new buffer as current.

There is a small race window, when monitor thread is syncing metadata,
it grabs current buffer pointer in imsm_sync_metadata()->write_super_imsm(),
but before flushing the buffer to disk, manager thread does above switching
buffer which frees current buffer, then monitor thread will run into
use-after-free issue and could cause on-disk metadata corruption.
If system keeps running, further metadata update could fix the corruption,
because after switching buffer, the new buffer will contain good metadata,
but if panic/power cycle happens while disk metadata is corrupted,
the system will run into bootup failure if array is used as root,
otherwise the array can not be assembled after boot if not used as root.

This issue will not happen for imsm array with only one member array,
because the memory array has not be opened yet, monitor thread will not
do any metadata updates.
This can happen for imsm array with at lease two member array, in the
following two scenarios:
1. Restarting mdmon process with at least two member array
This will happen during system boot up or user restart mdmon after mdadm
upgrade
2. Adding new member array to exist imsm array with at least one member array.

To fix this, delay the switching buffer operation to monitor thread.

Fixes: bbab0940fa75 ("imsm: write bad block log on metadata sync")
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
---
 managemon.c   |  6 ++++++
 super-intel.c | 14 +++++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

Comments

Mariusz Tkaczyk Feb. 12, 2025, 10:07 a.m. UTC | #1
Hello Junxiao,
Thanks for solid and complete explanation!

On Mon, 10 Feb 2025 13:22:25 -0800
Junxiao Bi <junxiao.bi@oracle.com> wrote:

> When manager thread detects new array, it will invoke manage_new().
> For imsm array, it will further invoke imsm_open_new(). Since
> commit bbab0940fa75("imsm: write bad block log on metadata sync"),
> it preallocates bad block log when opening the array, that requires
> increasing the mpb buffer size.
> To do that, imsm_open_new() invokes imsm_update_metadata_locally(),
> which first uses imsm_prepare_update() to allocate a larger mpb buffer
> and store it at "mpb->next_buf", and then invoke imsm_process_update()
> to copy the content from current mpb buffer "mpb->buf" to
> "mpb->next_buf", and then free the current mpb buffer and set the new
> buffer as current.
> 
> There is a small race window, when monitor thread is syncing metadata,
> it grabs current buffer pointer in
> imsm_sync_metadata()->write_super_imsm(), but before flushing the
> buffer to disk, manager thread does above switching buffer which
> frees current buffer, then monitor thread will run into
> use-after-free issue and could cause on-disk metadata corruption. If
> system keeps running, further metadata update could fix the
> corruption, because after switching buffer, the new buffer will
> contain good metadata, but if panic/power cycle happens while disk
> metadata is corrupted, the system will run into bootup failure if
> array is used as root, otherwise the array can not be assembled after
> boot if not used as root.
> 
> This issue will not happen for imsm array with only one member array,
> because the memory array has not be opened yet, monitor thread will
> not do any metadata updates.
> This can happen for imsm array with at lease two member array, in the
> following two scenarios:
> 1. Restarting mdmon process with at least two member array
> This will happen during system boot up or user restart mdmon after
> mdadm upgrade
> 2. Adding new member array to exist imsm array with at least one
> member array.
> 
> To fix this, delay the switching buffer operation to monitor thread.
> 
> Fixes: bbab0940fa75 ("imsm: write bad block log on metadata sync")
> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
> ---
>  managemon.c   |  6 ++++++
>  super-intel.c | 14 +++++++++++---
>  2 files changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/managemon.c b/managemon.c
> index d79813282457..855c85c3da92 100644
> --- a/managemon.c
> +++ b/managemon.c
> @@ -726,6 +726,7 @@ static void manage_new(struct mdstat_ent *mdstat,
>  	int i, inst;
>  	int failed = 0;
>  	char buf[SYSFS_MAX_BUF_SIZE];
> +	struct metadata_update *update = NULL;

If you are adding something new here, please follow reversed Christmas
tree convention.

>  
>  	/* check if array is ready to be monitored */
>  	if (!mdstat->active || !mdstat->level)
> @@ -824,9 +825,14 @@ static void manage_new(struct mdstat_ent *mdstat,
>  	/* if everything checks out tell the metadata handler we
> want to
>  	 * manage this instance
>  	 */
> +	container->update_tail = &update;
>  	if (!aa_ready(new) || container->ss->open_new(container,
> new, inst) < 0) {
> +		container->update_tail = NULL;
>  		goto error;
>  	} else {
> +		if (update)
> +			queue_metadata_update(update);
> +		container->update_tail = NULL;
>  		replace_array(container, victim, new);
>  		if (failed) {
>  			new->check_degraded = 1;
> diff --git a/super-intel.c b/super-intel.c
> index cab841980830..4988eef191da 100644
> --- a/super-intel.c
> +++ b/super-intel.c
> @@ -8467,12 +8467,15 @@ static int imsm_count_failed(struct
> intel_super *super, struct imsm_dev *dev, return failed;
>  }
>  
> +static int imsm_prepare_update(struct supertype *st,
> +			       struct metadata_update *update);
>  static int imsm_open_new(struct supertype *c, struct active_array *a,
>  			 int inst)
>  {
>  	struct intel_super *super = c->sb;
>  	struct imsm_super *mpb = super->anchor;
> -	struct imsm_update_prealloc_bb_mem u;
> +	struct imsm_update_prealloc_bb_mem *u;
> +	struct metadata_update mu;
>  
>  	if (inst >= mpb->num_raid_devs) {
>  		pr_err("subarry index %d, out of range\n", inst);
> @@ -8482,8 +8485,13 @@ static int imsm_open_new(struct supertype *c,
> struct active_array *a, dprintf("imsm: open_new %d\n", inst);
>  	a->info.container_member = inst;
>  
> -	u.type = update_prealloc_badblocks_mem;
> -	imsm_update_metadata_locally(c, &u, sizeof(u));
> +	u = xmalloc(sizeof(*u));
> +	u->type = update_prealloc_badblocks_mem;
> +	mu.len = sizeof(*u);
> +	mu.buf = (char *)u;
> +	imsm_prepare_update(c, &mu);
> +	if (c->update_tail)
> +		append_metadata_update(c, u, sizeof(*u));
>  
>  	return 0;
>  }

I don't see issues, so you have my approve but it is Intel owned code
and I need Intel to approve.
.
Blazej, Could you please create Github PR with a patch if Intel is good
with the change? I would like to see test results before merge.

Thanks,
Mariusz
Blazej Kucman Feb. 12, 2025, 9:51 p.m. UTC | #2
On Wed, 12 Feb 2025 11:07:13 +0100
Mariusz Tkaczyk <mtkaczyk@kernel.org> wrote:

> Hello Junxiao,
> Thanks for solid and complete explanation!
> 
> On Mon, 10 Feb 2025 13:22:25 -0800
> Junxiao Bi <junxiao.bi@oracle.com> wrote:
> 
> > When manager thread detects new array, it will invoke manage_new().
> > For imsm array, it will further invoke imsm_open_new(). Since
> > commit bbab0940fa75("imsm: write bad block log on metadata sync"),
> > it preallocates bad block log when opening the array, that requires
> > increasing the mpb buffer size.
> > To do that, imsm_open_new() invokes imsm_update_metadata_locally(),
> > which first uses imsm_prepare_update() to allocate a larger mpb
> > buffer and store it at "mpb->next_buf", and then invoke
> > imsm_process_update() to copy the content from current mpb buffer
> > "mpb->buf" to "mpb->next_buf", and then free the current mpb buffer
> > and set the new buffer as current.
> > 
> > There is a small race window, when monitor thread is syncing
> > metadata, it grabs current buffer pointer in
> > imsm_sync_metadata()->write_super_imsm(), but before flushing the
> > buffer to disk, manager thread does above switching buffer which
> > frees current buffer, then monitor thread will run into
> > use-after-free issue and could cause on-disk metadata corruption. If
> > system keeps running, further metadata update could fix the
> > corruption, because after switching buffer, the new buffer will
> > contain good metadata, but if panic/power cycle happens while disk
> > metadata is corrupted, the system will run into bootup failure if
> > array is used as root, otherwise the array can not be assembled
> > after boot if not used as root.
> > 
> > This issue will not happen for imsm array with only one member
> > array, because the memory array has not be opened yet, monitor
> > thread will not do any metadata updates.
> > This can happen for imsm array with at lease two member array, in
> > the following two scenarios:
> > 1. Restarting mdmon process with at least two member array
> > This will happen during system boot up or user restart mdmon after
> > mdadm upgrade
> > 2. Adding new member array to exist imsm array with at least one
> > member array.
> > 
> > To fix this, delay the switching buffer operation to monitor thread.
> > 
> > Fixes: bbab0940fa75 ("imsm: write bad block log on metadata sync")
> > Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
> > ---
> >  managemon.c   |  6 ++++++
> >  super-intel.c | 14 +++++++++++---
> >  2 files changed, 17 insertions(+), 3 deletions(-)
> > 
> > diff --git a/managemon.c b/managemon.c
> > index d79813282457..855c85c3da92 100644
> > --- a/managemon.c
> > +++ b/managemon.c
> > @@ -726,6 +726,7 @@ static void manage_new(struct mdstat_ent
> > *mdstat, int i, inst;
> >  	int failed = 0;
> >  	char buf[SYSFS_MAX_BUF_SIZE];
> > +	struct metadata_update *update = NULL;  
> 
> If you are adding something new here, please follow reversed Christmas
> tree convention.
> 
> >  
> >  	/* check if array is ready to be monitored */
> >  	if (!mdstat->active || !mdstat->level)
> > @@ -824,9 +825,14 @@ static void manage_new(struct mdstat_ent
> > *mdstat, /* if everything checks out tell the metadata handler we
> > want to
> >  	 * manage this instance
> >  	 */
> > +	container->update_tail = &update;
> >  	if (!aa_ready(new) || container->ss->open_new(container,
> > new, inst) < 0) {
> > +		container->update_tail = NULL;
> >  		goto error;
> >  	} else {
> > +		if (update)
> > +			queue_metadata_update(update);
> > +		container->update_tail = NULL;
> >  		replace_array(container, victim, new);
> >  		if (failed) {
> >  			new->check_degraded = 1;
> > diff --git a/super-intel.c b/super-intel.c
> > index cab841980830..4988eef191da 100644
> > --- a/super-intel.c
> > +++ b/super-intel.c
> > @@ -8467,12 +8467,15 @@ static int imsm_count_failed(struct
> > intel_super *super, struct imsm_dev *dev, return failed;
> >  }
> >  
> > +static int imsm_prepare_update(struct supertype *st,
> > +			       struct metadata_update *update);
> >  static int imsm_open_new(struct supertype *c, struct active_array
> > *a, int inst)
> >  {
> >  	struct intel_super *super = c->sb;
> >  	struct imsm_super *mpb = super->anchor;
> > -	struct imsm_update_prealloc_bb_mem u;
> > +	struct imsm_update_prealloc_bb_mem *u;
> > +	struct metadata_update mu;
> >  
> >  	if (inst >= mpb->num_raid_devs) {
> >  		pr_err("subarry index %d, out of range\n", inst);
> > @@ -8482,8 +8485,13 @@ static int imsm_open_new(struct supertype *c,
> > struct active_array *a, dprintf("imsm: open_new %d\n", inst);
> >  	a->info.container_member = inst;
> >  
> > -	u.type = update_prealloc_badblocks_mem;
> > -	imsm_update_metadata_locally(c, &u, sizeof(u));
> > +	u = xmalloc(sizeof(*u));
> > +	u->type = update_prealloc_badblocks_mem;
> > +	mu.len = sizeof(*u);
> > +	mu.buf = (char *)u;
> > +	imsm_prepare_update(c, &mu);
> > +	if (c->update_tail)
> > +		append_metadata_update(c, u, sizeof(*u));
> >  
> >  	return 0;
> >  }  
> 
> I don't see issues, so you have my approve but it is Intel owned code
> and I need Intel to approve.
> .
> Blazej, Could you please create Github PR with a patch if Intel is
> good with the change? I would like to see test results before merge.

Hi
I've added a PR on github, I'll review this change by the end of the
week.

PR: https://github.com/md-raid-utilities/mdadm/pull/152

Thanks,
Blazej

> 
> Thanks,
> Mariusz
Junxiao Bi Feb. 13, 2025, 6:38 a.m. UTC | #3
Hi Mariusz & Blazej,

Thanks for the review and file PR.

Please check other comments below.

On 2/12/25 1:51 PM, Blazej Kucman wrote:
> On Wed, 12 Feb 2025 11:07:13 +0100
> Mariusz Tkaczyk <mtkaczyk@kernel.org> wrote:
>
>> Hello Junxiao,
>> Thanks for solid and complete explanation!
>>
>> On Mon, 10 Feb 2025 13:22:25 -0800
>> Junxiao Bi <junxiao.bi@oracle.com> wrote:
>>
>>> When manager thread detects new array, it will invoke manage_new().
>>> For imsm array, it will further invoke imsm_open_new(). Since
>>> commit bbab0940fa75("imsm: write bad block log on metadata sync"),
>>> it preallocates bad block log when opening the array, that requires
>>> increasing the mpb buffer size.
>>> To do that, imsm_open_new() invokes imsm_update_metadata_locally(),
>>> which first uses imsm_prepare_update() to allocate a larger mpb
>>> buffer and store it at "mpb->next_buf", and then invoke
>>> imsm_process_update() to copy the content from current mpb buffer
>>> "mpb->buf" to "mpb->next_buf", and then free the current mpb buffer
>>> and set the new buffer as current.
>>>
>>> There is a small race window, when monitor thread is syncing
>>> metadata, it grabs current buffer pointer in
>>> imsm_sync_metadata()->write_super_imsm(), but before flushing the
>>> buffer to disk, manager thread does above switching buffer which
>>> frees current buffer, then monitor thread will run into
>>> use-after-free issue and could cause on-disk metadata corruption. If
>>> system keeps running, further metadata update could fix the
>>> corruption, because after switching buffer, the new buffer will
>>> contain good metadata, but if panic/power cycle happens while disk
>>> metadata is corrupted, the system will run into bootup failure if
>>> array is used as root, otherwise the array can not be assembled
>>> after boot if not used as root.
>>>
>>> This issue will not happen for imsm array with only one member
>>> array, because the memory array has not be opened yet, monitor
>>> thread will not do any metadata updates.
>>> This can happen for imsm array with at lease two member array, in
>>> the following two scenarios:
>>> 1. Restarting mdmon process with at least two member array
>>> This will happen during system boot up or user restart mdmon after
>>> mdadm upgrade
>>> 2. Adding new member array to exist imsm array with at least one
>>> member array.
>>>
>>> To fix this, delay the switching buffer operation to monitor thread.
>>>
>>> Fixes: bbab0940fa75 ("imsm: write bad block log on metadata sync")
>>> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
>>> ---
>>>   managemon.c   |  6 ++++++
>>>   super-intel.c | 14 +++++++++++---
>>>   2 files changed, 17 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/managemon.c b/managemon.c
>>> index d79813282457..855c85c3da92 100644
>>> --- a/managemon.c
>>> +++ b/managemon.c
>>> @@ -726,6 +726,7 @@ static void manage_new(struct mdstat_ent
>>> *mdstat, int i, inst;
>>>   	int failed = 0;
>>>   	char buf[SYSFS_MAX_BUF_SIZE];
>>> +	struct metadata_update *update = NULL;
>> If you are adding something new here, please follow reversed Christmas
>> tree convention.

got it, i will move this new variable to the top of the function in v2. 
Should i move variable "buf" to proper location as well?


>>
>>>   
>>>   	/* check if array is ready to be monitored */
>>>   	if (!mdstat->active || !mdstat->level)
>>> @@ -824,9 +825,14 @@ static void manage_new(struct mdstat_ent
>>> *mdstat, /* if everything checks out tell the metadata handler we
>>> want to
>>>   	 * manage this instance
>>>   	 */
>>> +	container->update_tail = &update;
>>>   	if (!aa_ready(new) || container->ss->open_new(container,
>>> new, inst) < 0) {
>>> +		container->update_tail = NULL;
>>>   		goto error;
>>>   	} else {
>>> +		if (update)
>>> +			queue_metadata_update(update);
>>> +		container->update_tail = NULL;
>>>   		replace_array(container, victim, new);
>>>   		if (failed) {
>>>   			new->check_degraded = 1;
>>> diff --git a/super-intel.c b/super-intel.c
>>> index cab841980830..4988eef191da 100644
>>> --- a/super-intel.c
>>> +++ b/super-intel.c
>>> @@ -8467,12 +8467,15 @@ static int imsm_count_failed(struct
>>> intel_super *super, struct imsm_dev *dev, return failed;
>>>   }
>>>   
>>> +static int imsm_prepare_update(struct supertype *st,
>>> +			       struct metadata_update *update);
>>>   static int imsm_open_new(struct supertype *c, struct active_array
>>> *a, int inst)
>>>   {
>>>   	struct intel_super *super = c->sb;
>>>   	struct imsm_super *mpb = super->anchor;
>>> -	struct imsm_update_prealloc_bb_mem u;
>>> +	struct imsm_update_prealloc_bb_mem *u;
>>> +	struct metadata_update mu;
>>>   
>>>   	if (inst >= mpb->num_raid_devs) {
>>>   		pr_err("subarry index %d, out of range\n", inst);
>>> @@ -8482,8 +8485,13 @@ static int imsm_open_new(struct supertype *c,
>>> struct active_array *a, dprintf("imsm: open_new %d\n", inst);
>>>   	a->info.container_member = inst;
>>>   
>>> -	u.type = update_prealloc_badblocks_mem;
>>> -	imsm_update_metadata_locally(c, &u, sizeof(u));
>>> +	u = xmalloc(sizeof(*u));
>>> +	u->type = update_prealloc_badblocks_mem;
>>> +	mu.len = sizeof(*u);
>>> +	mu.buf = (char *)u;
>>> +	imsm_prepare_update(c, &mu);
>>> +	if (c->update_tail)
>>> +		append_metadata_update(c, u, sizeof(*u));
>>>   
>>>   	return 0;
>>>   }
>> I don't see issues, so you have my approve but it is Intel owned code
>> and I need Intel to approve.
>> .
>> Blazej, Could you please create Github PR with a patch if Intel is
>> good with the change? I would like to see test results before merge.
> Hi
> I've added a PR on github, I'll review this change by the end of the
> week.
>
> PR: https://github.com/md-raid-utilities/mdadm/pull/152

I see this error reported from PR test, may i know how to access these 
two logs?  This fix is for imsm, and ->open_new for ddf not even touch 
"update_tail", not sure how this could cause ddf test failure.

     /home/vagrant/host/mdadm/tests/10ddf-create-fail-rebuild... 
Execution time (seconds): 46 FAILED - see 
/var/tmp/10ddf-create-fail-rebuild.log and 
/var/tmp/fail10ddf-create-fail-rebuild.log for details
     /home/vagrant/host/mdadm/tests/10ddf-fail-readd... Execution time 
(seconds): 27 FAILED - see /var/tmp/10ddf-fail-readd.log and 
/var/tmp/fail10ddf-fail-readd.log for details

Thanks,

Junxiao.

>
> Thanks,
> Blazej
>
>> Thanks,
>> Mariusz
Mariusz Tkaczyk Feb. 18, 2025, 6:22 p.m. UTC | #4
Hello,
Sorry for a delay.

On Wed, 12 Feb 2025 22:38:13 -0800
junxiao.bi@oracle.com wrote:

> Hi Mariusz & Blazej,
> 
> Thanks for the review and file PR.
> 
> Please check other comments below.
> 
> On 2/12/25 1:51 PM, Blazej Kucman wrote:
> > On Wed, 12 Feb 2025 11:07:13 +0100
> > Mariusz Tkaczyk <mtkaczyk@kernel.org> wrote:
> >  
> >> Hello Junxiao,
> >> Thanks for solid and complete explanation!
> >>
> >> On Mon, 10 Feb 2025 13:22:25 -0800
> >> Junxiao Bi <junxiao.bi@oracle.com> wrote:
> >>  
> >>> When manager thread detects new array, it will invoke
> >>> manage_new(). For imsm array, it will further invoke
> >>> imsm_open_new(). Since commit bbab0940fa75("imsm: write bad block
> >>> log on metadata sync"), it preallocates bad block log when
> >>> opening the array, that requires increasing the mpb buffer size.
> >>> To do that, imsm_open_new() invokes
> >>> imsm_update_metadata_locally(), which first uses
> >>> imsm_prepare_update() to allocate a larger mpb buffer and store
> >>> it at "mpb->next_buf", and then invoke imsm_process_update() to
> >>> copy the content from current mpb buffer "mpb->buf" to
> >>> "mpb->next_buf", and then free the current mpb buffer and set the
> >>> new buffer as current.
> >>>
> >>> There is a small race window, when monitor thread is syncing
> >>> metadata, it grabs current buffer pointer in
> >>> imsm_sync_metadata()->write_super_imsm(), but before flushing the
> >>> buffer to disk, manager thread does above switching buffer which
> >>> frees current buffer, then monitor thread will run into
> >>> use-after-free issue and could cause on-disk metadata corruption.
> >>> If system keeps running, further metadata update could fix the
> >>> corruption, because after switching buffer, the new buffer will
> >>> contain good metadata, but if panic/power cycle happens while disk
> >>> metadata is corrupted, the system will run into bootup failure if
> >>> array is used as root, otherwise the array can not be assembled
> >>> after boot if not used as root.
> >>>
> >>> This issue will not happen for imsm array with only one member
> >>> array, because the memory array has not be opened yet, monitor
> >>> thread will not do any metadata updates.
> >>> This can happen for imsm array with at lease two member array, in
> >>> the following two scenarios:
> >>> 1. Restarting mdmon process with at least two member array
> >>> This will happen during system boot up or user restart mdmon after
> >>> mdadm upgrade
> >>> 2. Adding new member array to exist imsm array with at least one
> >>> member array.
> >>>
> >>> To fix this, delay the switching buffer operation to monitor
> >>> thread.
> >>>
> >>> Fixes: bbab0940fa75 ("imsm: write bad block log on metadata sync")
> >>> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
> >>> ---
> >>>   managemon.c   |  6 ++++++
> >>>   super-intel.c | 14 +++++++++++---
> >>>   2 files changed, 17 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/managemon.c b/managemon.c
> >>> index d79813282457..855c85c3da92 100644
> >>> --- a/managemon.c
> >>> +++ b/managemon.c
> >>> @@ -726,6 +726,7 @@ static void manage_new(struct mdstat_ent
> >>> *mdstat, int i, inst;
> >>>   	int failed = 0;
> >>>   	char buf[SYSFS_MAX_BUF_SIZE];
> >>> +	struct metadata_update *update = NULL;  
> >> If you are adding something new here, please follow reversed
> >> Christmas tree convention.  
> 
> got it, i will move this new variable to the top of the function in
> v2. Should i move variable "buf" to proper location as well?
> 
> 

Either way is fine. I have no objections to do this.

> >>  
> >>>   
> >>>   	/* check if array is ready to be monitored */
> >>>   	if (!mdstat->active || !mdstat->level)
> >>> @@ -824,9 +825,14 @@ static void manage_new(struct mdstat_ent
> >>> *mdstat, /* if everything checks out tell the metadata handler we
> >>> want to
> >>>   	 * manage this instance
> >>>   	 */
> >>> +	container->update_tail = &update;
> >>>   	if (!aa_ready(new) || container->ss->open_new(container,
> >>> new, inst) < 0) {
> >>> +		container->update_tail = NULL;
> >>>   		goto error;
> >>>   	} else {
> >>> +		if (update)
> >>> +			queue_metadata_update(update);
> >>> +		container->update_tail = NULL;
> >>>   		replace_array(container, victim, new);
> >>>   		if (failed) {
> >>>   			new->check_degraded = 1;
> >>> diff --git a/super-intel.c b/super-intel.c
> >>> index cab841980830..4988eef191da 100644
> >>> --- a/super-intel.c
> >>> +++ b/super-intel.c
> >>> @@ -8467,12 +8467,15 @@ static int imsm_count_failed(struct
> >>> intel_super *super, struct imsm_dev *dev, return failed;
> >>>   }
> >>>   
> >>> +static int imsm_prepare_update(struct supertype *st,
> >>> +			       struct metadata_update *update);
> >>>   static int imsm_open_new(struct supertype *c, struct
> >>> active_array *a, int inst)
> >>>   {
> >>>   	struct intel_super *super = c->sb;
> >>>   	struct imsm_super *mpb = super->anchor;
> >>> -	struct imsm_update_prealloc_bb_mem u;
> >>> +	struct imsm_update_prealloc_bb_mem *u;
> >>> +	struct metadata_update mu;
> >>>   
> >>>   	if (inst >= mpb->num_raid_devs) {
> >>>   		pr_err("subarry index %d, out of range\n",
> >>> inst); @@ -8482,8 +8485,13 @@ static int imsm_open_new(struct
> >>> supertype *c, struct active_array *a, dprintf("imsm: open_new
> >>> %d\n", inst); a->info.container_member = inst;
> >>>   
> >>> -	u.type = update_prealloc_badblocks_mem;
> >>> -	imsm_update_metadata_locally(c, &u, sizeof(u));
> >>> +	u = xmalloc(sizeof(*u));
> >>> +	u->type = update_prealloc_badblocks_mem;
> >>> +	mu.len = sizeof(*u);
> >>> +	mu.buf = (char *)u;
> >>> +	imsm_prepare_update(c, &mu);
> >>> +	if (c->update_tail)
> >>> +		append_metadata_update(c, u, sizeof(*u));
> >>>   
> >>>   	return 0;
> >>>   }  
> >> I don't see issues, so you have my approve but it is Intel owned
> >> code and I need Intel to approve.
> >> .
> >> Blazej, Could you please create Github PR with a patch if Intel is
> >> good with the change? I would like to see test results before
> >> merge.  
> > Hi
> > I've added a PR on github, I'll review this change by the end of the
> > week.
> >
> > PR: https://github.com/md-raid-utilities/mdadm/pull/152  
> 
> I see this error reported from PR test, may i know how to access
> these two logs?  This fix is for imsm, and ->open_new for ddf not
> even touch "update_tail", not sure how this could cause ddf test
> failure.
> 
>      /home/vagrant/host/mdadm/tests/10ddf-create-fail-rebuild... 
> Execution time (seconds): 46 FAILED - see 
> /var/tmp/10ddf-create-fail-rebuild.log and 
> /var/tmp/fail10ddf-create-fail-rebuild.log for details
>      /home/vagrant/host/mdadm/tests/10ddf-fail-readd... Execution
> time (seconds): 27 FAILED - see /var/tmp/10ddf-fail-readd.log and 
> /var/tmp/fail10ddf-fail-readd.log for details

It is known problem, so far I know Nigel is looking at it. There is no
fix for now. Execution timed out and logs has not been copied for user
as expected.

Here you can see the Nightly report that is executed on top regularly
to at least determine if fails are persistent (probably not caused by
your change).

Sorry, it is not yet perfect but at least something :)

Thanks,
Mariusz
Junxiao Bi Feb. 18, 2025, 6:50 p.m. UTC | #5
Thanks Mariusz for the review and share the test details.

I have sent a v2 to address the code/log style issue.

Hi Blazej, can you help review the v2 version?

Thanks,

Junxiao.

On 2/18/25 10:22 AM, Mariusz Tkaczyk wrote:
> Hello,
> Sorry for a delay.
>
> On Wed, 12 Feb 2025 22:38:13 -0800
> junxiao.bi@oracle.com wrote:
>
>> Hi Mariusz & Blazej,
>>
>> Thanks for the review and file PR.
>>
>> Please check other comments below.
>>
>> On 2/12/25 1:51 PM, Blazej Kucman wrote:
>>> On Wed, 12 Feb 2025 11:07:13 +0100
>>> Mariusz Tkaczyk <mtkaczyk@kernel.org> wrote:
>>>   
>>>> Hello Junxiao,
>>>> Thanks for solid and complete explanation!
>>>>
>>>> On Mon, 10 Feb 2025 13:22:25 -0800
>>>> Junxiao Bi <junxiao.bi@oracle.com> wrote:
>>>>   
>>>>> When manager thread detects new array, it will invoke
>>>>> manage_new(). For imsm array, it will further invoke
>>>>> imsm_open_new(). Since commit bbab0940fa75("imsm: write bad block
>>>>> log on metadata sync"), it preallocates bad block log when
>>>>> opening the array, that requires increasing the mpb buffer size.
>>>>> To do that, imsm_open_new() invokes
>>>>> imsm_update_metadata_locally(), which first uses
>>>>> imsm_prepare_update() to allocate a larger mpb buffer and store
>>>>> it at "mpb->next_buf", and then invoke imsm_process_update() to
>>>>> copy the content from current mpb buffer "mpb->buf" to
>>>>> "mpb->next_buf", and then free the current mpb buffer and set the
>>>>> new buffer as current.
>>>>>
>>>>> There is a small race window, when monitor thread is syncing
>>>>> metadata, it grabs current buffer pointer in
>>>>> imsm_sync_metadata()->write_super_imsm(), but before flushing the
>>>>> buffer to disk, manager thread does above switching buffer which
>>>>> frees current buffer, then monitor thread will run into
>>>>> use-after-free issue and could cause on-disk metadata corruption.
>>>>> If system keeps running, further metadata update could fix the
>>>>> corruption, because after switching buffer, the new buffer will
>>>>> contain good metadata, but if panic/power cycle happens while disk
>>>>> metadata is corrupted, the system will run into bootup failure if
>>>>> array is used as root, otherwise the array can not be assembled
>>>>> after boot if not used as root.
>>>>>
>>>>> This issue will not happen for imsm array with only one member
>>>>> array, because the memory array has not be opened yet, monitor
>>>>> thread will not do any metadata updates.
>>>>> This can happen for imsm array with at lease two member array, in
>>>>> the following two scenarios:
>>>>> 1. Restarting mdmon process with at least two member array
>>>>> This will happen during system boot up or user restart mdmon after
>>>>> mdadm upgrade
>>>>> 2. Adding new member array to exist imsm array with at least one
>>>>> member array.
>>>>>
>>>>> To fix this, delay the switching buffer operation to monitor
>>>>> thread.
>>>>>
>>>>> Fixes: bbab0940fa75 ("imsm: write bad block log on metadata sync")
>>>>> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
>>>>> ---
>>>>>    managemon.c   |  6 ++++++
>>>>>    super-intel.c | 14 +++++++++++---
>>>>>    2 files changed, 17 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/managemon.c b/managemon.c
>>>>> index d79813282457..855c85c3da92 100644
>>>>> --- a/managemon.c
>>>>> +++ b/managemon.c
>>>>> @@ -726,6 +726,7 @@ static void manage_new(struct mdstat_ent
>>>>> *mdstat, int i, inst;
>>>>>    	int failed = 0;
>>>>>    	char buf[SYSFS_MAX_BUF_SIZE];
>>>>> +	struct metadata_update *update = NULL;
>>>> If you are adding something new here, please follow reversed
>>>> Christmas tree convention.
>> got it, i will move this new variable to the top of the function in
>> v2. Should i move variable "buf" to proper location as well?
>>
>>
> Either way is fine. I have no objections to do this.
>
>>>>   
>>>>>    
>>>>>    	/* check if array is ready to be monitored */
>>>>>    	if (!mdstat->active || !mdstat->level)
>>>>> @@ -824,9 +825,14 @@ static void manage_new(struct mdstat_ent
>>>>> *mdstat, /* if everything checks out tell the metadata handler we
>>>>> want to
>>>>>    	 * manage this instance
>>>>>    	 */
>>>>> +	container->update_tail = &update;
>>>>>    	if (!aa_ready(new) || container->ss->open_new(container,
>>>>> new, inst) < 0) {
>>>>> +		container->update_tail = NULL;
>>>>>    		goto error;
>>>>>    	} else {
>>>>> +		if (update)
>>>>> +			queue_metadata_update(update);
>>>>> +		container->update_tail = NULL;
>>>>>    		replace_array(container, victim, new);
>>>>>    		if (failed) {
>>>>>    			new->check_degraded = 1;
>>>>> diff --git a/super-intel.c b/super-intel.c
>>>>> index cab841980830..4988eef191da 100644
>>>>> --- a/super-intel.c
>>>>> +++ b/super-intel.c
>>>>> @@ -8467,12 +8467,15 @@ static int imsm_count_failed(struct
>>>>> intel_super *super, struct imsm_dev *dev, return failed;
>>>>>    }
>>>>>    
>>>>> +static int imsm_prepare_update(struct supertype *st,
>>>>> +			       struct metadata_update *update);
>>>>>    static int imsm_open_new(struct supertype *c, struct
>>>>> active_array *a, int inst)
>>>>>    {
>>>>>    	struct intel_super *super = c->sb;
>>>>>    	struct imsm_super *mpb = super->anchor;
>>>>> -	struct imsm_update_prealloc_bb_mem u;
>>>>> +	struct imsm_update_prealloc_bb_mem *u;
>>>>> +	struct metadata_update mu;
>>>>>    
>>>>>    	if (inst >= mpb->num_raid_devs) {
>>>>>    		pr_err("subarry index %d, out of range\n",
>>>>> inst); @@ -8482,8 +8485,13 @@ static int imsm_open_new(struct
>>>>> supertype *c, struct active_array *a, dprintf("imsm: open_new
>>>>> %d\n", inst); a->info.container_member = inst;
>>>>>    
>>>>> -	u.type = update_prealloc_badblocks_mem;
>>>>> -	imsm_update_metadata_locally(c, &u, sizeof(u));
>>>>> +	u = xmalloc(sizeof(*u));
>>>>> +	u->type = update_prealloc_badblocks_mem;
>>>>> +	mu.len = sizeof(*u);
>>>>> +	mu.buf = (char *)u;
>>>>> +	imsm_prepare_update(c, &mu);
>>>>> +	if (c->update_tail)
>>>>> +		append_metadata_update(c, u, sizeof(*u));
>>>>>    
>>>>>    	return 0;
>>>>>    }
>>>> I don't see issues, so you have my approve but it is Intel owned
>>>> code and I need Intel to approve.
>>>> .
>>>> Blazej, Could you please create Github PR with a patch if Intel is
>>>> good with the change? I would like to see test results before
>>>> merge.
>>> Hi
>>> I've added a PR on github, I'll review this change by the end of the
>>> week.
>>>
>>> PR: https://github.com/md-raid-utilities/mdadm/pull/152
>> I see this error reported from PR test, may i know how to access
>> these two logs?  This fix is for imsm, and ->open_new for ddf not
>> even touch "update_tail", not sure how this could cause ddf test
>> failure.
>>
>>       /home/vagrant/host/mdadm/tests/10ddf-create-fail-rebuild...
>> Execution time (seconds): 46 FAILED - see
>> /var/tmp/10ddf-create-fail-rebuild.log and
>> /var/tmp/fail10ddf-create-fail-rebuild.log for details
>>       /home/vagrant/host/mdadm/tests/10ddf-fail-readd... Execution
>> time (seconds): 27 FAILED - see /var/tmp/10ddf-fail-readd.log and
>> /var/tmp/fail10ddf-fail-readd.log for details
> It is known problem, so far I know Nigel is looking at it. There is no
> fix for now. Execution timed out and logs has not been copied for user
> as expected.
>
> Here you can see the Nightly report that is executed on top regularly
> to at least determine if fails are persistent (probably not caused by
> your change).
>
> Sorry, it is not yet perfect but at least something :)
>
> Thanks,
> Mariusz
Blazej Kucman Feb. 19, 2025, 3:54 p.m. UTC | #6
On Tue, 18 Feb 2025 10:50:46 -0800
junxiao.bi@oracle.com wrote:

> Thanks Mariusz for the review and share the test details.
> 
> I have sent a v2 to address the code/log style issue.
> 
> Hi Blazej, can you help review the v2 version?

Hi Junxiao,

Sorry for the delay, I need some more time, I want to run this through
our internal functional testing, but unfortunately I have a CI failure.

Regards,
Blazej

> 
> Thanks,
> 
> Junxiao.
> 
> On 2/18/25 10:22 AM, Mariusz Tkaczyk wrote:
> > Hello,
> > Sorry for a delay.
> >
> > On Wed, 12 Feb 2025 22:38:13 -0800
> > junxiao.bi@oracle.com wrote:
> >  
> >> Hi Mariusz & Blazej,
> >>
> >> Thanks for the review and file PR.
> >>
> >> Please check other comments below.
> >>
> >> On 2/12/25 1:51 PM, Blazej Kucman wrote:  
> >>> On Wed, 12 Feb 2025 11:07:13 +0100
> >>> Mariusz Tkaczyk <mtkaczyk@kernel.org> wrote:
> >>>     
> >>>> Hello Junxiao,
> >>>> Thanks for solid and complete explanation!
> >>>>
> >>>> On Mon, 10 Feb 2025 13:22:25 -0800
> >>>> Junxiao Bi <junxiao.bi@oracle.com> wrote:
> >>>>     
> >>>>> When manager thread detects new array, it will invoke
> >>>>> manage_new(). For imsm array, it will further invoke
> >>>>> imsm_open_new(). Since commit bbab0940fa75("imsm: write bad
> >>>>> block log on metadata sync"), it preallocates bad block log when
> >>>>> opening the array, that requires increasing the mpb buffer size.
> >>>>> To do that, imsm_open_new() invokes
> >>>>> imsm_update_metadata_locally(), which first uses
> >>>>> imsm_prepare_update() to allocate a larger mpb buffer and store
> >>>>> it at "mpb->next_buf", and then invoke imsm_process_update() to
> >>>>> copy the content from current mpb buffer "mpb->buf" to
> >>>>> "mpb->next_buf", and then free the current mpb buffer and set
> >>>>> the new buffer as current.
> >>>>>
> >>>>> There is a small race window, when monitor thread is syncing
> >>>>> metadata, it grabs current buffer pointer in
> >>>>> imsm_sync_metadata()->write_super_imsm(), but before flushing
> >>>>> the buffer to disk, manager thread does above switching buffer
> >>>>> which frees current buffer, then monitor thread will run into
> >>>>> use-after-free issue and could cause on-disk metadata
> >>>>> corruption. If system keeps running, further metadata update
> >>>>> could fix the corruption, because after switching buffer, the
> >>>>> new buffer will contain good metadata, but if panic/power cycle
> >>>>> happens while disk metadata is corrupted, the system will run
> >>>>> into bootup failure if array is used as root, otherwise the
> >>>>> array can not be assembled after boot if not used as root.
> >>>>>
> >>>>> This issue will not happen for imsm array with only one member
> >>>>> array, because the memory array has not be opened yet, monitor
> >>>>> thread will not do any metadata updates.
> >>>>> This can happen for imsm array with at lease two member array,
> >>>>> in the following two scenarios:
> >>>>> 1. Restarting mdmon process with at least two member array
> >>>>> This will happen during system boot up or user restart mdmon
> >>>>> after mdadm upgrade
> >>>>> 2. Adding new member array to exist imsm array with at least one
> >>>>> member array.
> >>>>>
> >>>>> To fix this, delay the switching buffer operation to monitor
> >>>>> thread.
> >>>>>
> >>>>> Fixes: bbab0940fa75 ("imsm: write bad block log on metadata
> >>>>> sync") Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
> >>>>> ---
> >>>>>    managemon.c   |  6 ++++++
> >>>>>    super-intel.c | 14 +++++++++++---
> >>>>>    2 files changed, 17 insertions(+), 3 deletions(-)
> >>>>>
> >>>>> diff --git a/managemon.c b/managemon.c
> >>>>> index d79813282457..855c85c3da92 100644
> >>>>> --- a/managemon.c
> >>>>> +++ b/managemon.c
> >>>>> @@ -726,6 +726,7 @@ static void manage_new(struct mdstat_ent
> >>>>> *mdstat, int i, inst;
> >>>>>    	int failed = 0;
> >>>>>    	char buf[SYSFS_MAX_BUF_SIZE];
> >>>>> +	struct metadata_update *update = NULL;  
> >>>> If you are adding something new here, please follow reversed
> >>>> Christmas tree convention.  
> >> got it, i will move this new variable to the top of the function in
> >> v2. Should i move variable "buf" to proper location as well?
> >>
> >>  
> > Either way is fine. I have no objections to do this.
> >  
> >>>>     
> >>>>>    
> >>>>>    	/* check if array is ready to be monitored */
> >>>>>    	if (!mdstat->active || !mdstat->level)
> >>>>> @@ -824,9 +825,14 @@ static void manage_new(struct mdstat_ent
> >>>>> *mdstat, /* if everything checks out tell the metadata handler
> >>>>> we want to
> >>>>>    	 * manage this instance
> >>>>>    	 */
> >>>>> +	container->update_tail = &update;
> >>>>>    	if (!aa_ready(new) ||
> >>>>> container->ss->open_new(container, new, inst) < 0) {
> >>>>> +		container->update_tail = NULL;
> >>>>>    		goto error;
> >>>>>    	} else {
> >>>>> +		if (update)
> >>>>> +			queue_metadata_update(update);
> >>>>> +		container->update_tail = NULL;
> >>>>>    		replace_array(container, victim, new);
> >>>>>    		if (failed) {
> >>>>>    			new->check_degraded = 1;
> >>>>> diff --git a/super-intel.c b/super-intel.c
> >>>>> index cab841980830..4988eef191da 100644
> >>>>> --- a/super-intel.c
> >>>>> +++ b/super-intel.c
> >>>>> @@ -8467,12 +8467,15 @@ static int imsm_count_failed(struct
> >>>>> intel_super *super, struct imsm_dev *dev, return failed;
> >>>>>    }
> >>>>>    
> >>>>> +static int imsm_prepare_update(struct supertype *st,
> >>>>> +			       struct metadata_update *update);
> >>>>>    static int imsm_open_new(struct supertype *c, struct
> >>>>> active_array *a, int inst)
> >>>>>    {
> >>>>>    	struct intel_super *super = c->sb;
> >>>>>    	struct imsm_super *mpb = super->anchor;
> >>>>> -	struct imsm_update_prealloc_bb_mem u;
> >>>>> +	struct imsm_update_prealloc_bb_mem *u;
> >>>>> +	struct metadata_update mu;
> >>>>>    
> >>>>>    	if (inst >= mpb->num_raid_devs) {
> >>>>>    		pr_err("subarry index %d, out of range\n",
> >>>>> inst); @@ -8482,8 +8485,13 @@ static int imsm_open_new(struct
> >>>>> supertype *c, struct active_array *a, dprintf("imsm: open_new
> >>>>> %d\n", inst); a->info.container_member = inst;
> >>>>>    
> >>>>> -	u.type = update_prealloc_badblocks_mem;
> >>>>> -	imsm_update_metadata_locally(c, &u, sizeof(u));
> >>>>> +	u = xmalloc(sizeof(*u));
> >>>>> +	u->type = update_prealloc_badblocks_mem;
> >>>>> +	mu.len = sizeof(*u);
> >>>>> +	mu.buf = (char *)u;
> >>>>> +	imsm_prepare_update(c, &mu);
> >>>>> +	if (c->update_tail)
> >>>>> +		append_metadata_update(c, u, sizeof(*u));
> >>>>>    
> >>>>>    	return 0;
> >>>>>    }  
> >>>> I don't see issues, so you have my approve but it is Intel owned
> >>>> code and I need Intel to approve.
> >>>> .
> >>>> Blazej, Could you please create Github PR with a patch if Intel
> >>>> is good with the change? I would like to see test results before
> >>>> merge.  
> >>> Hi
> >>> I've added a PR on github, I'll review this change by the end of
> >>> the week.
> >>>
> >>> PR: https://github.com/md-raid-utilities/mdadm/pull/152  
> >> I see this error reported from PR test, may i know how to access
> >> these two logs?  This fix is for imsm, and ->open_new for ddf not
> >> even touch "update_tail", not sure how this could cause ddf test
> >> failure.
> >>
> >>       /home/vagrant/host/mdadm/tests/10ddf-create-fail-rebuild...
> >> Execution time (seconds): 46 FAILED - see
> >> /var/tmp/10ddf-create-fail-rebuild.log and
> >> /var/tmp/fail10ddf-create-fail-rebuild.log for details
> >>       /home/vagrant/host/mdadm/tests/10ddf-fail-readd... Execution
> >> time (seconds): 27 FAILED - see /var/tmp/10ddf-fail-readd.log and
> >> /var/tmp/fail10ddf-fail-readd.log for details  
> > It is known problem, so far I know Nigel is looking at it. There is
> > no fix for now. Execution timed out and logs has not been copied
> > for user as expected.
> >
> > Here you can see the Nightly report that is executed on top
> > regularly to at least determine if fails are persistent (probably
> > not caused by your change).
> >
> > Sorry, it is not yet perfect but at least something :)
> >
> > Thanks,
> > Mariusz
Junxiao Bi Feb. 19, 2025, 6:02 p.m. UTC | #7
On 2/19/25 7:54 AM, Blazej Kucman wrote:

> On Tue, 18 Feb 2025 10:50:46 -0800
> junxiao.bi@oracle.com wrote:
>
>> Thanks Mariusz for the review and share the test details.
>>
>> I have sent a v2 to address the code/log style issue.
>>
>> Hi Blazej, can you help review the v2 version?
> Hi Junxiao,
>
> Sorry for the delay, I need some more time, I want to run this through
> our internal functional testing, but unfortunately I have a CI failure.
Understood. Thank you.
diff mbox series

Patch

diff --git a/managemon.c b/managemon.c
index d79813282457..855c85c3da92 100644
--- a/managemon.c
+++ b/managemon.c
@@ -726,6 +726,7 @@  static void manage_new(struct mdstat_ent *mdstat,
 	int i, inst;
 	int failed = 0;
 	char buf[SYSFS_MAX_BUF_SIZE];
+	struct metadata_update *update = NULL;
 
 	/* check if array is ready to be monitored */
 	if (!mdstat->active || !mdstat->level)
@@ -824,9 +825,14 @@  static void manage_new(struct mdstat_ent *mdstat,
 	/* if everything checks out tell the metadata handler we want to
 	 * manage this instance
 	 */
+	container->update_tail = &update;
 	if (!aa_ready(new) || container->ss->open_new(container, new, inst) < 0) {
+		container->update_tail = NULL;
 		goto error;
 	} else {
+		if (update)
+			queue_metadata_update(update);
+		container->update_tail = NULL;
 		replace_array(container, victim, new);
 		if (failed) {
 			new->check_degraded = 1;
diff --git a/super-intel.c b/super-intel.c
index cab841980830..4988eef191da 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8467,12 +8467,15 @@  static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
 	return failed;
 }
 
+static int imsm_prepare_update(struct supertype *st,
+			       struct metadata_update *update);
 static int imsm_open_new(struct supertype *c, struct active_array *a,
 			 int inst)
 {
 	struct intel_super *super = c->sb;
 	struct imsm_super *mpb = super->anchor;
-	struct imsm_update_prealloc_bb_mem u;
+	struct imsm_update_prealloc_bb_mem *u;
+	struct metadata_update mu;
 
 	if (inst >= mpb->num_raid_devs) {
 		pr_err("subarry index %d, out of range\n", inst);
@@ -8482,8 +8485,13 @@  static int imsm_open_new(struct supertype *c, struct active_array *a,
 	dprintf("imsm: open_new %d\n", inst);
 	a->info.container_member = inst;
 
-	u.type = update_prealloc_badblocks_mem;
-	imsm_update_metadata_locally(c, &u, sizeof(u));
+	u = xmalloc(sizeof(*u));
+	u->type = update_prealloc_badblocks_mem;
+	mu.len = sizeof(*u);
+	mu.buf = (char *)u;
+	imsm_prepare_update(c, &mu);
+	if (c->update_tail)
+		append_metadata_update(c, u, sizeof(*u));
 
 	return 0;
 }