diff mbox series

[2/6] btrfs: add cancelable chunk relocation support

Message ID 79a09502c532bc9939645d2711c72ebad5fce2e7.1621526221.git.dsterba@suse.com (mailing list archive)
State New, archived
Headers show
Series Support resize and device delete cancel ops | expand

Commit Message

David Sterba May 21, 2021, 12:06 p.m. UTC
Add support code that will allow canceling relocation on the chunk
granularity. This is different and independent of balance, that also
uses relocation but is a higher level operation and manages it's own
state and pause/cancelation requests.

Relocation is used for resize (shrink) and device deletion so this will
be a common point to implement cancelation for both. The context is
entirely in btrfs_relocate_block_group and btrfs_recover_relocation,
enclosing one chunk relocation. The status bit is set and unset between
the chunks. As relocation can take long, the effects may not be
immediate and the request and actual action can slightly race.

The fs_info::reloc_cancel_req is only supposed to be increased and does
not pair with decrement like fs_info::balance_cancel_req.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h      |  9 +++++++
 fs/btrfs/disk-io.c    |  1 +
 fs/btrfs/relocation.c | 60 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 69 insertions(+), 1 deletion(-)

Comments

Josef Bacik May 21, 2021, 1:21 p.m. UTC | #1
On 5/21/21 8:06 AM, David Sterba wrote:
> Add support code that will allow canceling relocation on the chunk
> granularity. This is different and independent of balance, that also
> uses relocation but is a higher level operation and manages it's own
> state and pause/cancelation requests.
> 
> Relocation is used for resize (shrink) and device deletion so this will
> be a common point to implement cancelation for both. The context is
> entirely in btrfs_relocate_block_group and btrfs_recover_relocation,
> enclosing one chunk relocation. The status bit is set and unset between
> the chunks. As relocation can take long, the effects may not be
> immediate and the request and actual action can slightly race.
> 
> The fs_info::reloc_cancel_req is only supposed to be increased and does
> not pair with decrement like fs_info::balance_cancel_req.
> 
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>   fs/btrfs/ctree.h      |  9 +++++++
>   fs/btrfs/disk-io.c    |  1 +
>   fs/btrfs/relocation.c | 60 ++++++++++++++++++++++++++++++++++++++++++-
>   3 files changed, 69 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index a142e56b6b9a..3dfc32a3ebab 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -565,6 +565,12 @@ enum {
>   	 */
>   	BTRFS_FS_BALANCE_RUNNING,
>   
> +	/*
> +	 * Indicate that relocation of a chunk has started, it's set per chunk
> +	 * and is toggled between chunks.
> +	 */
> +	BTRFS_FS_RELOC_RUNNING,
> +
>   	/* Indicate that the cleaner thread is awake and doing something. */
>   	BTRFS_FS_CLEANER_RUNNING,
>   
> @@ -871,6 +877,9 @@ struct btrfs_fs_info {
>   	struct btrfs_balance_control *balance_ctl;
>   	wait_queue_head_t balance_wait_q;
>   
> +	/* Cancelation requests for chunk relocation */
> +	atomic_t reloc_cancel_req;
> +
>   	u32 data_chunk_allocations;
>   	u32 metadata_ratio;
>   
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 8c3db9076988..93c994b78d61 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2251,6 +2251,7 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
>   	atomic_set(&fs_info->balance_cancel_req, 0);
>   	fs_info->balance_ctl = NULL;
>   	init_waitqueue_head(&fs_info->balance_wait_q);
> +	atomic_set(&fs_info->reloc_cancel_req, 0);
>   }
>   
>   static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index b70be2ac2e9e..9b84eb86e426 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -2876,11 +2876,12 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
>   }
>   
>   /*
> - * Allow error injection to test balance cancellation
> + * Allow error injection to test balance/relocation cancellation
>    */
>   noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
>   {
>   	return atomic_read(&fs_info->balance_cancel_req) ||
> +		atomic_read(&fs_info->reloc_cancel_req) ||
>   		fatal_signal_pending(current);
>   }
>   ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
> @@ -3780,6 +3781,47 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
>   	return inode;
>   }
>   
> +/*
> + * Mark start of chunk relocation that is cancelable. Check if the cancelation
> + * has been requested meanwhile and don't start in that case.
> + *
> + * Return:
> + *   0             success
> + *   -EINPROGRESS  operation is already in progress, that's probably a bug
> + *   -ECANCELED    cancelation request was set before the operation started
> + */
> +static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
> +{
> +	if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
> +		/* This should not happen */
> +		btrfs_err(fs_info, "reloc already running, cannot start");
> +		return -EINPROGRESS;
> +	}
> +
> +	if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
> +		btrfs_info(fs_info, "chunk relocation canceled on start");
> +		/*
> +		 * On cancel, clear all requests but let the caller mark
> +		 * the end after cleanup operations.
> +		 */
> +		atomic_set(&fs_info->reloc_cancel_req, 0);
> +		return -ECANCELED;
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Mark end of chunk relocation that is cancelable and wake any waiters.
> + */
> +static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
> +{
> +	/* Requested after start, clear bit first so any waiters can continue */
> +	if (atomic_read(&fs_info->reloc_cancel_req) > 0)
> +		btrfs_info(fs_info, "chunk relocation canceled during operation");
> +	clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
> +	atomic_set(&fs_info->reloc_cancel_req, 0);
> +}
> +
>   static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
>   {
>   	struct reloc_control *rc;
> @@ -3862,6 +3904,12 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
>   		return -ENOMEM;
>   	}
>   
> +	ret = reloc_chunk_start(fs_info);
> +	if (ret < 0) {
> +		err = ret;
> +		goto out_end;
> +	}

This needs a btrfs_put_block_group(bg) so we don't leak the bg.  Thanks,

Josef
David Sterba May 26, 2021, 10:56 p.m. UTC | #2
On Fri, May 21, 2021 at 09:21:29AM -0400, Josef Bacik wrote:
> On 5/21/21 8:06 AM, David Sterba wrote:
> > Add support code that will allow canceling relocation on the chunk
> > granularity. This is different and independent of balance, that also
> > uses relocation but is a higher level operation and manages it's own
> > state and pause/cancelation requests.
> > 
> > Relocation is used for resize (shrink) and device deletion so this will
> > be a common point to implement cancelation for both. The context is
> > entirely in btrfs_relocate_block_group and btrfs_recover_relocation,
> > enclosing one chunk relocation. The status bit is set and unset between
> > the chunks. As relocation can take long, the effects may not be
> > immediate and the request and actual action can slightly race.
> > 
> > The fs_info::reloc_cancel_req is only supposed to be increased and does
> > not pair with decrement like fs_info::balance_cancel_req.
> > 
> > Signed-off-by: David Sterba <dsterba@suse.com>
> > ---
> >   fs/btrfs/ctree.h      |  9 +++++++
> >   fs/btrfs/disk-io.c    |  1 +
> >   fs/btrfs/relocation.c | 60 ++++++++++++++++++++++++++++++++++++++++++-
> >   3 files changed, 69 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> > index a142e56b6b9a..3dfc32a3ebab 100644
> > --- a/fs/btrfs/ctree.h
> > +++ b/fs/btrfs/ctree.h
> > @@ -565,6 +565,12 @@ enum {
> >   	 */
> >   	BTRFS_FS_BALANCE_RUNNING,
> >   
> > +	/*
> > +	 * Indicate that relocation of a chunk has started, it's set per chunk
> > +	 * and is toggled between chunks.
> > +	 */
> > +	BTRFS_FS_RELOC_RUNNING,
> > +
> >   	/* Indicate that the cleaner thread is awake and doing something. */
> >   	BTRFS_FS_CLEANER_RUNNING,
> >   
> > @@ -871,6 +877,9 @@ struct btrfs_fs_info {
> >   	struct btrfs_balance_control *balance_ctl;
> >   	wait_queue_head_t balance_wait_q;
> >   
> > +	/* Cancelation requests for chunk relocation */
> > +	atomic_t reloc_cancel_req;
> > +
> >   	u32 data_chunk_allocations;
> >   	u32 metadata_ratio;
> >   
> > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> > index 8c3db9076988..93c994b78d61 100644
> > --- a/fs/btrfs/disk-io.c
> > +++ b/fs/btrfs/disk-io.c
> > @@ -2251,6 +2251,7 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
> >   	atomic_set(&fs_info->balance_cancel_req, 0);
> >   	fs_info->balance_ctl = NULL;
> >   	init_waitqueue_head(&fs_info->balance_wait_q);
> > +	atomic_set(&fs_info->reloc_cancel_req, 0);
> >   }
> >   
> >   static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
> > diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> > index b70be2ac2e9e..9b84eb86e426 100644
> > --- a/fs/btrfs/relocation.c
> > +++ b/fs/btrfs/relocation.c
> > @@ -2876,11 +2876,12 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
> >   }
> >   
> >   /*
> > - * Allow error injection to test balance cancellation
> > + * Allow error injection to test balance/relocation cancellation
> >    */
> >   noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
> >   {
> >   	return atomic_read(&fs_info->balance_cancel_req) ||
> > +		atomic_read(&fs_info->reloc_cancel_req) ||
> >   		fatal_signal_pending(current);
> >   }
> >   ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
> > @@ -3780,6 +3781,47 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
> >   	return inode;
> >   }
> >   
> > +/*
> > + * Mark start of chunk relocation that is cancelable. Check if the cancelation
> > + * has been requested meanwhile and don't start in that case.
> > + *
> > + * Return:
> > + *   0             success
> > + *   -EINPROGRESS  operation is already in progress, that's probably a bug
> > + *   -ECANCELED    cancelation request was set before the operation started
> > + */
> > +static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
> > +{
> > +	if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
> > +		/* This should not happen */
> > +		btrfs_err(fs_info, "reloc already running, cannot start");
> > +		return -EINPROGRESS;
> > +	}
> > +
> > +	if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
> > +		btrfs_info(fs_info, "chunk relocation canceled on start");
> > +		/*
> > +		 * On cancel, clear all requests but let the caller mark
> > +		 * the end after cleanup operations.
> > +		 */
> > +		atomic_set(&fs_info->reloc_cancel_req, 0);
> > +		return -ECANCELED;
> > +	}
> > +	return 0;
> > +}
> > +
> > +/*
> > + * Mark end of chunk relocation that is cancelable and wake any waiters.
> > + */
> > +static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
> > +{
> > +	/* Requested after start, clear bit first so any waiters can continue */
> > +	if (atomic_read(&fs_info->reloc_cancel_req) > 0)
> > +		btrfs_info(fs_info, "chunk relocation canceled during operation");
> > +	clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
> > +	atomic_set(&fs_info->reloc_cancel_req, 0);
> > +}
> > +
> >   static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
> >   {
> >   	struct reloc_control *rc;
> > @@ -3862,6 +3904,12 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
> >   		return -ENOMEM;
> >   	}
> >   
> > +	ret = reloc_chunk_start(fs_info);
> > +	if (ret < 0) {
> > +		err = ret;
> > +		goto out_end;
> > +	}
> 
> This needs a btrfs_put_block_group(bg) so we don't leak the bg.  Thanks,

Indeed, thanks, As there are no other remaining things to fix I won't
repost the whole series just for this. I've fixed it in misc-next, where
the exit block looks like

@@ -3952,7 +4000,9 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
        if (err && rw)
                btrfs_dec_block_group_ro(rc->block_group);
        iput(rc->data_inode);
+out_put_bg:
        btrfs_put_block_group(rc->block_group);
+       reloc_chunk_end(fs_info);
        free_reloc_control(rc);
        return err;
 }

and the failure jumps to out_put_bg, ie. restoring the ro count.
Filipe Manana June 16, 2021, 1:54 p.m. UTC | #3
On Fri, May 21, 2021 at 9:15 PM David Sterba <dsterba@suse.com> wrote:
>
> Add support code that will allow canceling relocation on the chunk
> granularity. This is different and independent of balance, that also
> uses relocation but is a higher level operation and manages it's own
> state and pause/cancelation requests.
>
> Relocation is used for resize (shrink) and device deletion so this will
> be a common point to implement cancelation for both. The context is
> entirely in btrfs_relocate_block_group and btrfs_recover_relocation,
> enclosing one chunk relocation. The status bit is set and unset between
> the chunks. As relocation can take long, the effects may not be
> immediate and the request and actual action can slightly race.
>
> The fs_info::reloc_cancel_req is only supposed to be increased and does
> not pair with decrement like fs_info::balance_cancel_req.
>
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>  fs/btrfs/ctree.h      |  9 +++++++
>  fs/btrfs/disk-io.c    |  1 +
>  fs/btrfs/relocation.c | 60 ++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 69 insertions(+), 1 deletion(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index a142e56b6b9a..3dfc32a3ebab 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -565,6 +565,12 @@ enum {
>          */
>         BTRFS_FS_BALANCE_RUNNING,
>
> +       /*
> +        * Indicate that relocation of a chunk has started, it's set per chunk
> +        * and is toggled between chunks.
> +        */
> +       BTRFS_FS_RELOC_RUNNING,
> +
>         /* Indicate that the cleaner thread is awake and doing something. */
>         BTRFS_FS_CLEANER_RUNNING,
>
> @@ -871,6 +877,9 @@ struct btrfs_fs_info {
>         struct btrfs_balance_control *balance_ctl;
>         wait_queue_head_t balance_wait_q;
>
> +       /* Cancelation requests for chunk relocation */
> +       atomic_t reloc_cancel_req;
> +
>         u32 data_chunk_allocations;
>         u32 metadata_ratio;
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 8c3db9076988..93c994b78d61 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2251,6 +2251,7 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
>         atomic_set(&fs_info->balance_cancel_req, 0);
>         fs_info->balance_ctl = NULL;
>         init_waitqueue_head(&fs_info->balance_wait_q);
> +       atomic_set(&fs_info->reloc_cancel_req, 0);
>  }
>
>  static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index b70be2ac2e9e..9b84eb86e426 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -2876,11 +2876,12 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
>  }
>
>  /*
> - * Allow error injection to test balance cancellation
> + * Allow error injection to test balance/relocation cancellation
>   */
>  noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
>  {
>         return atomic_read(&fs_info->balance_cancel_req) ||
> +               atomic_read(&fs_info->reloc_cancel_req) ||
>                 fatal_signal_pending(current);
>  }
>  ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
> @@ -3780,6 +3781,47 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
>         return inode;
>  }
>
> +/*
> + * Mark start of chunk relocation that is cancelable. Check if the cancelation
> + * has been requested meanwhile and don't start in that case.
> + *
> + * Return:
> + *   0             success
> + *   -EINPROGRESS  operation is already in progress, that's probably a bug
> + *   -ECANCELED    cancelation request was set before the operation started
> + */
> +static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
> +{
> +       if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
> +               /* This should not happen */
> +               btrfs_err(fs_info, "reloc already running, cannot start");
> +               return -EINPROGRESS;
> +       }
> +
> +       if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
> +               btrfs_info(fs_info, "chunk relocation canceled on start");
> +               /*
> +                * On cancel, clear all requests but let the caller mark
> +                * the end after cleanup operations.
> +                */
> +               atomic_set(&fs_info->reloc_cancel_req, 0);
> +               return -ECANCELED;
> +       }
> +       return 0;
> +}
> +
> +/*
> + * Mark end of chunk relocation that is cancelable and wake any waiters.
> + */
> +static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
> +{
> +       /* Requested after start, clear bit first so any waiters can continue */
> +       if (atomic_read(&fs_info->reloc_cancel_req) > 0)
> +               btrfs_info(fs_info, "chunk relocation canceled during operation");
> +       clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
> +       atomic_set(&fs_info->reloc_cancel_req, 0);
> +}
> +
>  static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
>  {
>         struct reloc_control *rc;
> @@ -3862,6 +3904,12 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
>                 return -ENOMEM;
>         }
>
> +       ret = reloc_chunk_start(fs_info);
> +       if (ret < 0) {
> +               err = ret;
> +               goto out_end;

There's a bug here. At out_end we do:

btrfs_put_block_group(rc->block_group);

But rc->block_group was not yet assigned.

Thanks.

> +       }
> +
>         rc->extent_root = extent_root;
>         rc->block_group = bg;
>
> @@ -3953,6 +4001,8 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
>                 btrfs_dec_block_group_ro(rc->block_group);
>         iput(rc->data_inode);
>         btrfs_put_block_group(rc->block_group);
> +out_end:
> +       reloc_chunk_end(fs_info);
>         free_reloc_control(rc);
>         return err;
>  }
> @@ -4073,6 +4123,12 @@ int btrfs_recover_relocation(struct btrfs_root *root)
>                 goto out;
>         }
>
> +       ret = reloc_chunk_start(fs_info);
> +       if (ret < 0) {
> +               err = ret;
> +               goto out_end;
> +       }
> +
>         rc->extent_root = fs_info->extent_root;
>
>         set_reloc_control(rc);
> @@ -4137,6 +4193,8 @@ int btrfs_recover_relocation(struct btrfs_root *root)
>                 err = ret;
>  out_unset:
>         unset_reloc_control(rc);
> +out_end:
> +       reloc_chunk_end(fs_info);
>         free_reloc_control(rc);
>  out:
>         free_reloc_roots(&reloc_roots);
> --
> 2.29.2
>
Filipe Manana June 16, 2021, 1:55 p.m. UTC | #4
On Wed, Jun 16, 2021 at 2:54 PM Filipe Manana <fdmanana@gmail.com> wrote:
>
> On Fri, May 21, 2021 at 9:15 PM David Sterba <dsterba@suse.com> wrote:
> >
> > Add support code that will allow canceling relocation on the chunk
> > granularity. This is different and independent of balance, that also
> > uses relocation but is a higher level operation and manages it's own
> > state and pause/cancelation requests.
> >
> > Relocation is used for resize (shrink) and device deletion so this will
> > be a common point to implement cancelation for both. The context is
> > entirely in btrfs_relocate_block_group and btrfs_recover_relocation,
> > enclosing one chunk relocation. The status bit is set and unset between
> > the chunks. As relocation can take long, the effects may not be
> > immediate and the request and actual action can slightly race.
> >
> > The fs_info::reloc_cancel_req is only supposed to be increased and does
> > not pair with decrement like fs_info::balance_cancel_req.
> >
> > Signed-off-by: David Sterba <dsterba@suse.com>
> > ---
> >  fs/btrfs/ctree.h      |  9 +++++++
> >  fs/btrfs/disk-io.c    |  1 +
> >  fs/btrfs/relocation.c | 60 ++++++++++++++++++++++++++++++++++++++++++-
> >  3 files changed, 69 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> > index a142e56b6b9a..3dfc32a3ebab 100644
> > --- a/fs/btrfs/ctree.h
> > +++ b/fs/btrfs/ctree.h
> > @@ -565,6 +565,12 @@ enum {
> >          */
> >         BTRFS_FS_BALANCE_RUNNING,
> >
> > +       /*
> > +        * Indicate that relocation of a chunk has started, it's set per chunk
> > +        * and is toggled between chunks.
> > +        */
> > +       BTRFS_FS_RELOC_RUNNING,
> > +
> >         /* Indicate that the cleaner thread is awake and doing something. */
> >         BTRFS_FS_CLEANER_RUNNING,
> >
> > @@ -871,6 +877,9 @@ struct btrfs_fs_info {
> >         struct btrfs_balance_control *balance_ctl;
> >         wait_queue_head_t balance_wait_q;
> >
> > +       /* Cancelation requests for chunk relocation */
> > +       atomic_t reloc_cancel_req;
> > +
> >         u32 data_chunk_allocations;
> >         u32 metadata_ratio;
> >
> > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> > index 8c3db9076988..93c994b78d61 100644
> > --- a/fs/btrfs/disk-io.c
> > +++ b/fs/btrfs/disk-io.c
> > @@ -2251,6 +2251,7 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
> >         atomic_set(&fs_info->balance_cancel_req, 0);
> >         fs_info->balance_ctl = NULL;
> >         init_waitqueue_head(&fs_info->balance_wait_q);
> > +       atomic_set(&fs_info->reloc_cancel_req, 0);
> >  }
> >
> >  static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
> > diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> > index b70be2ac2e9e..9b84eb86e426 100644
> > --- a/fs/btrfs/relocation.c
> > +++ b/fs/btrfs/relocation.c
> > @@ -2876,11 +2876,12 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
> >  }
> >
> >  /*
> > - * Allow error injection to test balance cancellation
> > + * Allow error injection to test balance/relocation cancellation
> >   */
> >  noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
> >  {
> >         return atomic_read(&fs_info->balance_cancel_req) ||
> > +               atomic_read(&fs_info->reloc_cancel_req) ||
> >                 fatal_signal_pending(current);
> >  }
> >  ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
> > @@ -3780,6 +3781,47 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
> >         return inode;
> >  }
> >
> > +/*
> > + * Mark start of chunk relocation that is cancelable. Check if the cancelation
> > + * has been requested meanwhile and don't start in that case.
> > + *
> > + * Return:
> > + *   0             success
> > + *   -EINPROGRESS  operation is already in progress, that's probably a bug
> > + *   -ECANCELED    cancelation request was set before the operation started
> > + */
> > +static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
> > +{
> > +       if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
> > +               /* This should not happen */
> > +               btrfs_err(fs_info, "reloc already running, cannot start");
> > +               return -EINPROGRESS;
> > +       }
> > +
> > +       if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
> > +               btrfs_info(fs_info, "chunk relocation canceled on start");
> > +               /*
> > +                * On cancel, clear all requests but let the caller mark
> > +                * the end after cleanup operations.
> > +                */
> > +               atomic_set(&fs_info->reloc_cancel_req, 0);
> > +               return -ECANCELED;
> > +       }
> > +       return 0;
> > +}
> > +
> > +/*
> > + * Mark end of chunk relocation that is cancelable and wake any waiters.
> > + */
> > +static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
> > +{
> > +       /* Requested after start, clear bit first so any waiters can continue */
> > +       if (atomic_read(&fs_info->reloc_cancel_req) > 0)
> > +               btrfs_info(fs_info, "chunk relocation canceled during operation");
> > +       clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
> > +       atomic_set(&fs_info->reloc_cancel_req, 0);
> > +}
> > +
> >  static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
> >  {
> >         struct reloc_control *rc;
> > @@ -3862,6 +3904,12 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
> >                 return -ENOMEM;
> >         }
> >
> > +       ret = reloc_chunk_start(fs_info);
> > +       if (ret < 0) {
> > +               err = ret;
> > +               goto out_end;
>
> There's a bug here. At out_end we do:
>
> btrfs_put_block_group(rc->block_group);
>
> But rc->block_group was not yet assigned.

On misc-next it's actually label "out_put_bg" and under there we do
the block group put, but not on the version posted here.

>
> Thanks.
>
> > +       }
> > +
> >         rc->extent_root = extent_root;
> >         rc->block_group = bg;
> >
> > @@ -3953,6 +4001,8 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
> >                 btrfs_dec_block_group_ro(rc->block_group);
> >         iput(rc->data_inode);
> >         btrfs_put_block_group(rc->block_group);
> > +out_end:
> > +       reloc_chunk_end(fs_info);
> >         free_reloc_control(rc);
> >         return err;
> >  }
> > @@ -4073,6 +4123,12 @@ int btrfs_recover_relocation(struct btrfs_root *root)
> >                 goto out;
> >         }
> >
> > +       ret = reloc_chunk_start(fs_info);
> > +       if (ret < 0) {
> > +               err = ret;
> > +               goto out_end;
> > +       }
> > +
> >         rc->extent_root = fs_info->extent_root;
> >
> >         set_reloc_control(rc);
> > @@ -4137,6 +4193,8 @@ int btrfs_recover_relocation(struct btrfs_root *root)
> >                 err = ret;
> >  out_unset:
> >         unset_reloc_control(rc);
> > +out_end:
> > +       reloc_chunk_end(fs_info);
> >         free_reloc_control(rc);
> >  out:
> >         free_reloc_roots(&reloc_roots);
> > --
> > 2.29.2
> >
>
>
> --
> Filipe David Manana,
>
> “Whether you think you can, or you think you can't — you're right.”
David Sterba June 16, 2021, 3:53 p.m. UTC | #5
On Wed, Jun 16, 2021 at 02:55:46PM +0100, Filipe Manana wrote:
> On Wed, Jun 16, 2021 at 2:54 PM Filipe Manana <fdmanana@gmail.com> wrote:
> >
> > On Fri, May 21, 2021 at 9:15 PM David Sterba <dsterba@suse.com> wrote:
> > >
> > > Add support code that will allow canceling relocation on the chunk
> > > granularity. This is different and independent of balance, that also
> > > uses relocation but is a higher level operation and manages it's own
> > > state and pause/cancelation requests.
> > >
> > > Relocation is used for resize (shrink) and device deletion so this will
> > > be a common point to implement cancelation for both. The context is
> > > entirely in btrfs_relocate_block_group and btrfs_recover_relocation,
> > > enclosing one chunk relocation. The status bit is set and unset between
> > > the chunks. As relocation can take long, the effects may not be
> > > immediate and the request and actual action can slightly race.
> > >
> > > The fs_info::reloc_cancel_req is only supposed to be increased and does
> > > not pair with decrement like fs_info::balance_cancel_req.
> > >
> > > Signed-off-by: David Sterba <dsterba@suse.com>
> > > ---
> > >  fs/btrfs/ctree.h      |  9 +++++++
> > >  fs/btrfs/disk-io.c    |  1 +
> > >  fs/btrfs/relocation.c | 60 ++++++++++++++++++++++++++++++++++++++++++-
> > >  3 files changed, 69 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> > > index a142e56b6b9a..3dfc32a3ebab 100644
> > > --- a/fs/btrfs/ctree.h
> > > +++ b/fs/btrfs/ctree.h
> > > @@ -565,6 +565,12 @@ enum {
> > >          */
> > >         BTRFS_FS_BALANCE_RUNNING,
> > >
> > > +       /*
> > > +        * Indicate that relocation of a chunk has started, it's set per chunk
> > > +        * and is toggled between chunks.
> > > +        */
> > > +       BTRFS_FS_RELOC_RUNNING,
> > > +
> > >         /* Indicate that the cleaner thread is awake and doing something. */
> > >         BTRFS_FS_CLEANER_RUNNING,
> > >
> > > @@ -871,6 +877,9 @@ struct btrfs_fs_info {
> > >         struct btrfs_balance_control *balance_ctl;
> > >         wait_queue_head_t balance_wait_q;
> > >
> > > +       /* Cancelation requests for chunk relocation */
> > > +       atomic_t reloc_cancel_req;
> > > +
> > >         u32 data_chunk_allocations;
> > >         u32 metadata_ratio;
> > >
> > > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> > > index 8c3db9076988..93c994b78d61 100644
> > > --- a/fs/btrfs/disk-io.c
> > > +++ b/fs/btrfs/disk-io.c
> > > @@ -2251,6 +2251,7 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
> > >         atomic_set(&fs_info->balance_cancel_req, 0);
> > >         fs_info->balance_ctl = NULL;
> > >         init_waitqueue_head(&fs_info->balance_wait_q);
> > > +       atomic_set(&fs_info->reloc_cancel_req, 0);
> > >  }
> > >
> > >  static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
> > > diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> > > index b70be2ac2e9e..9b84eb86e426 100644
> > > --- a/fs/btrfs/relocation.c
> > > +++ b/fs/btrfs/relocation.c
> > > @@ -2876,11 +2876,12 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
> > >  }
> > >
> > >  /*
> > > - * Allow error injection to test balance cancellation
> > > + * Allow error injection to test balance/relocation cancellation
> > >   */
> > >  noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
> > >  {
> > >         return atomic_read(&fs_info->balance_cancel_req) ||
> > > +               atomic_read(&fs_info->reloc_cancel_req) ||
> > >                 fatal_signal_pending(current);
> > >  }
> > >  ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
> > > @@ -3780,6 +3781,47 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
> > >         return inode;
> > >  }
> > >
> > > +/*
> > > + * Mark start of chunk relocation that is cancelable. Check if the cancelation
> > > + * has been requested meanwhile and don't start in that case.
> > > + *
> > > + * Return:
> > > + *   0             success
> > > + *   -EINPROGRESS  operation is already in progress, that's probably a bug
> > > + *   -ECANCELED    cancelation request was set before the operation started
> > > + */
> > > +static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
> > > +{
> > > +       if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
> > > +               /* This should not happen */
> > > +               btrfs_err(fs_info, "reloc already running, cannot start");
> > > +               return -EINPROGRESS;
> > > +       }
> > > +
> > > +       if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
> > > +               btrfs_info(fs_info, "chunk relocation canceled on start");
> > > +               /*
> > > +                * On cancel, clear all requests but let the caller mark
> > > +                * the end after cleanup operations.
> > > +                */
> > > +               atomic_set(&fs_info->reloc_cancel_req, 0);
> > > +               return -ECANCELED;
> > > +       }
> > > +       return 0;
> > > +}
> > > +
> > > +/*
> > > + * Mark end of chunk relocation that is cancelable and wake any waiters.
> > > + */
> > > +static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
> > > +{
> > > +       /* Requested after start, clear bit first so any waiters can continue */
> > > +       if (atomic_read(&fs_info->reloc_cancel_req) > 0)
> > > +               btrfs_info(fs_info, "chunk relocation canceled during operation");
> > > +       clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
> > > +       atomic_set(&fs_info->reloc_cancel_req, 0);
> > > +}
> > > +
> > >  static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
> > >  {
> > >         struct reloc_control *rc;
> > > @@ -3862,6 +3904,12 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
> > >                 return -ENOMEM;
> > >         }
> > >
> > > +       ret = reloc_chunk_start(fs_info);
> > > +       if (ret < 0) {
> > > +               err = ret;
> > > +               goto out_end;
> >
> > There's a bug here. At out_end we do:
> >
> > btrfs_put_block_group(rc->block_group);
> >
> > But rc->block_group was not yet assigned.
> 
> On misc-next it's actually label "out_put_bg" and under there we do
> the block group put, but not on the version posted here.

Right, thanks for catching it. It should have been
btrfs_put_block_group(bg).
diff mbox series

Patch

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a142e56b6b9a..3dfc32a3ebab 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -565,6 +565,12 @@  enum {
 	 */
 	BTRFS_FS_BALANCE_RUNNING,
 
+	/*
+	 * Indicate that relocation of a chunk has started, it's set per chunk
+	 * and is toggled between chunks.
+	 */
+	BTRFS_FS_RELOC_RUNNING,
+
 	/* Indicate that the cleaner thread is awake and doing something. */
 	BTRFS_FS_CLEANER_RUNNING,
 
@@ -871,6 +877,9 @@  struct btrfs_fs_info {
 	struct btrfs_balance_control *balance_ctl;
 	wait_queue_head_t balance_wait_q;
 
+	/* Cancelation requests for chunk relocation */
+	atomic_t reloc_cancel_req;
+
 	u32 data_chunk_allocations;
 	u32 metadata_ratio;
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8c3db9076988..93c994b78d61 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2251,6 +2251,7 @@  static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
 	atomic_set(&fs_info->balance_cancel_req, 0);
 	fs_info->balance_ctl = NULL;
 	init_waitqueue_head(&fs_info->balance_wait_q);
+	atomic_set(&fs_info->reloc_cancel_req, 0);
 }
 
 static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b70be2ac2e9e..9b84eb86e426 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2876,11 +2876,12 @@  int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
 }
 
 /*
- * Allow error injection to test balance cancellation
+ * Allow error injection to test balance/relocation cancellation
  */
 noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
 {
 	return atomic_read(&fs_info->balance_cancel_req) ||
+		atomic_read(&fs_info->reloc_cancel_req) ||
 		fatal_signal_pending(current);
 }
 ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
@@ -3780,6 +3781,47 @@  struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
 	return inode;
 }
 
+/*
+ * Mark start of chunk relocation that is cancelable. Check if the cancelation
+ * has been requested meanwhile and don't start in that case.
+ *
+ * Return:
+ *   0             success
+ *   -EINPROGRESS  operation is already in progress, that's probably a bug
+ *   -ECANCELED    cancelation request was set before the operation started
+ */
+static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
+{
+	if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
+		/* This should not happen */
+		btrfs_err(fs_info, "reloc already running, cannot start");
+		return -EINPROGRESS;
+	}
+
+	if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
+		btrfs_info(fs_info, "chunk relocation canceled on start");
+		/*
+		 * On cancel, clear all requests but let the caller mark
+		 * the end after cleanup operations.
+		 */
+		atomic_set(&fs_info->reloc_cancel_req, 0);
+		return -ECANCELED;
+	}
+	return 0;
+}
+
+/*
+ * Mark end of chunk relocation that is cancelable and wake any waiters.
+ */
+static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
+{
+	/* Requested after start, clear bit first so any waiters can continue */
+	if (atomic_read(&fs_info->reloc_cancel_req) > 0)
+		btrfs_info(fs_info, "chunk relocation canceled during operation");
+	clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
+	atomic_set(&fs_info->reloc_cancel_req, 0);
+}
+
 static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
 {
 	struct reloc_control *rc;
@@ -3862,6 +3904,12 @@  int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
 		return -ENOMEM;
 	}
 
+	ret = reloc_chunk_start(fs_info);
+	if (ret < 0) {
+		err = ret;
+		goto out_end;
+	}
+
 	rc->extent_root = extent_root;
 	rc->block_group = bg;
 
@@ -3953,6 +4001,8 @@  int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
 		btrfs_dec_block_group_ro(rc->block_group);
 	iput(rc->data_inode);
 	btrfs_put_block_group(rc->block_group);
+out_end:
+	reloc_chunk_end(fs_info);
 	free_reloc_control(rc);
 	return err;
 }
@@ -4073,6 +4123,12 @@  int btrfs_recover_relocation(struct btrfs_root *root)
 		goto out;
 	}
 
+	ret = reloc_chunk_start(fs_info);
+	if (ret < 0) {
+		err = ret;
+		goto out_end;
+	}
+
 	rc->extent_root = fs_info->extent_root;
 
 	set_reloc_control(rc);
@@ -4137,6 +4193,8 @@  int btrfs_recover_relocation(struct btrfs_root *root)
 		err = ret;
 out_unset:
 	unset_reloc_control(rc);
+out_end:
+	reloc_chunk_end(fs_info);
 	free_reloc_control(rc);
 out:
 	free_reloc_roots(&reloc_roots);