diff mbox series

Btrfs: fix race setting up and completing qgroup rescan workers

Message ID 20190924094954.1304-1-fdmanana@kernel.org (mailing list archive)
State New, archived
Headers show
Series Btrfs: fix race setting up and completing qgroup rescan workers | expand

Commit Message

Filipe Manana Sept. 24, 2019, 9:49 a.m. UTC
From: Filipe Manana <fdmanana@suse.com>

There is a race between setting up a qgroup rescan worker and completing
a qgroup rescan worker that can lead to callers of the qgroup rescan wait
ioctl to either not wait for the rescan worker to complete or to hang
forever due to missing wake ups. The following diagram shows a sequence
of steps that illustrates the race.

        CPU 1                                                         CPU 2                                  CPU 3

 btrfs_ioctl_quota_rescan()
  btrfs_qgroup_rescan()
   qgroup_rescan_init()
    mutex_lock(&fs_info->qgroup_rescan_lock)
    spin_lock(&fs_info->qgroup_lock)

    fs_info->qgroup_flags |=
      BTRFS_QGROUP_STATUS_FLAG_RESCAN

    init_completion(
      &fs_info->qgroup_rescan_completion)

    fs_info->qgroup_rescan_running = true

    mutex_unlock(&fs_info->qgroup_rescan_lock)
    spin_unlock(&fs_info->qgroup_lock)

    btrfs_init_work()
     --> starts the worker

                                                        btrfs_qgroup_rescan_worker()
                                                         mutex_lock(&fs_info->qgroup_rescan_lock)

                                                         fs_info->qgroup_flags &=
                                                           ~BTRFS_QGROUP_STATUS_FLAG_RESCAN

                                                         mutex_unlock(&fs_info->qgroup_rescan_lock)

                                                         starts transaction, updates qgroup status
                                                         item, etc

                                                                                                           btrfs_ioctl_quota_rescan()
                                                                                                            btrfs_qgroup_rescan()
                                                                                                             qgroup_rescan_init()
                                                                                                              mutex_lock(&fs_info->qgroup_rescan_lock)
                                                                                                              spin_lock(&fs_info->qgroup_lock)

                                                                                                              fs_info->qgroup_flags |=
                                                                                                                BTRFS_QGROUP_STATUS_FLAG_RESCAN

                                                                                                              init_completion(
                                                                                                                &fs_info->qgroup_rescan_completion)

                                                                                                              fs_info->qgroup_rescan_running = true

                                                                                                              mutex_unlock(&fs_info->qgroup_rescan_lock)
                                                                                                              spin_unlock(&fs_info->qgroup_lock)

                                                                                                              btrfs_init_work()
                                                                                                               --> starts another worker

                                                         mutex_lock(&fs_info->qgroup_rescan_lock)

                                                         fs_info->qgroup_rescan_running = false

                                                         mutex_unlock(&fs_info->qgroup_rescan_lock)

							 complete_all(&fs_info->qgroup_rescan_completion)

Before the rescan worker started by the task at CPU 3 completes, if another
task calls btrfs_ioctl_quota_rescan(), it will get -EINPROGRESS because the
flag BTRFS_QGROUP_STATUS_FLAG_RESCAN is set at fs_info->qgroup_flags, which
is expected and correct behaviour.

However if other task calls btrfs_ioctl_quota_rescan_wait() before the
rescan worker started by the task at CPU 3 completes, it will return
immediately without waiting for the new rescan worker to complete,
because fs_info->qgroup_rescan_running is set to false by CPU 2.

This race is making test case btrfs/171 (from fstests) to fail often:

  btrfs/171 9s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad)
      --- tests/btrfs/171.out     2018-09-16 21:30:48.505104287 +0100
      +++ /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad      2019-09-19 02:01:36.938486039 +0100
      @@ -1,2 +1,3 @@
       QA output created by 171
      +ERROR: quota rescan failed: Operation now in progress
       Silence is golden
      ...
      (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/171.out /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad'  to see the entire diff)

That is because the test calls the btrfs-progs commands "qgroup quota
rescan -w", "qgroup assign" and "qgroup remove" in a sequence that makes
calls to the rescan start ioctl fail with -EINPROGRESS (note the "btrfs"
commands 'qgroup assign' and 'qgroup remove' often call the rescan start
ioctl after calling the qgroup assign ioctl, btrfs_ioctl_qgroup_assign()),
since previous waits didn't actually wait for a rescan worker to complete.

Another problem the race can cause is missing wake ups for waiters, since
the call to complete_all() happens outside a critical section and after
clearing the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN. In the sequence diagram
above, if we have a waiter for the first rescan task (executed by CPU 2),
then fs_info->qgroup_rescan_completion.wait is not empty, and if after the
rescan worker clears BTRFS_QGROUP_STATUS_FLAG_RESCAN and before it calls
complete_all() against fs_info->qgroup_rescan_completion, the task at CPU 3
calls init_completion() against fs_info->qgroup_rescan_completion which
re-initilizes its wait queue to an empty queue, therefore causing the
rescan worker at CPU 2 to call complete_all() against an empty queue, never
waking up the task waiting for that rescan worker.

Fix this by clearing BTRFS_QGROUP_STATUS_FLAG_RESCAN and setting
fs_info->qgroup_rescan_running to false in the same critical section,
delimited by the mutex fs_info->qgroup_rescan_lock, as well as doing
the call to complete_all() in that same critical section. This gives
the protection needed to avoid rescan wait ioctl callers not waiting
for a running rescan worker and the lost wake ups problem, since
setting that rescan flag and boolean as well as initializing the wait
queue is done already in a critical section delimited by that mutex
(at qgroup_rescan_init()).

Fixes: 57254b6ebce4ce ("Btrfs: add ioctl to wait for qgroup rescan completion")
Fixes: d2c609b834d62f ("btrfs: properly track when rescan worker is running")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/qgroup.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

Comments

Josef Bacik Sept. 24, 2019, 1:53 p.m. UTC | #1
On Tue, Sep 24, 2019 at 10:49:54AM +0100, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> There is a race between setting up a qgroup rescan worker and completing
> a qgroup rescan worker that can lead to callers of the qgroup rescan wait
> ioctl to either not wait for the rescan worker to complete or to hang
> forever due to missing wake ups. The following diagram shows a sequence
> of steps that illustrates the race.
> 
>         CPU 1                                                         CPU 2                                  CPU 3
> 
>  btrfs_ioctl_quota_rescan()
>   btrfs_qgroup_rescan()
>    qgroup_rescan_init()
>     mutex_lock(&fs_info->qgroup_rescan_lock)
>     spin_lock(&fs_info->qgroup_lock)
> 
>     fs_info->qgroup_flags |=
>       BTRFS_QGROUP_STATUS_FLAG_RESCAN
> 
>     init_completion(
>       &fs_info->qgroup_rescan_completion)
> 
>     fs_info->qgroup_rescan_running = true
> 
>     mutex_unlock(&fs_info->qgroup_rescan_lock)
>     spin_unlock(&fs_info->qgroup_lock)
> 
>     btrfs_init_work()
>      --> starts the worker
> 
>                                                         btrfs_qgroup_rescan_worker()
>                                                          mutex_lock(&fs_info->qgroup_rescan_lock)
> 
>                                                          fs_info->qgroup_flags &=
>                                                            ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
> 
>                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)
> 
>                                                          starts transaction, updates qgroup status
>                                                          item, etc
> 
>                                                                                                            btrfs_ioctl_quota_rescan()
>                                                                                                             btrfs_qgroup_rescan()
>                                                                                                              qgroup_rescan_init()
>                                                                                                               mutex_lock(&fs_info->qgroup_rescan_lock)
>                                                                                                               spin_lock(&fs_info->qgroup_lock)
> 
>                                                                                                               fs_info->qgroup_flags |=
>                                                                                                                 BTRFS_QGROUP_STATUS_FLAG_RESCAN
> 
>                                                                                                               init_completion(
>                                                                                                                 &fs_info->qgroup_rescan_completion)
> 
>                                                                                                               fs_info->qgroup_rescan_running = true
> 
>                                                                                                               mutex_unlock(&fs_info->qgroup_rescan_lock)
>                                                                                                               spin_unlock(&fs_info->qgroup_lock)
> 
>                                                                                                               btrfs_init_work()
>                                                                                                                --> starts another worker
> 
>                                                          mutex_lock(&fs_info->qgroup_rescan_lock)
> 
>                                                          fs_info->qgroup_rescan_running = false
> 
>                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)
> 
> 							 complete_all(&fs_info->qgroup_rescan_completion)
> 
> Before the rescan worker started by the task at CPU 3 completes, if another
> task calls btrfs_ioctl_quota_rescan(), it will get -EINPROGRESS because the
> flag BTRFS_QGROUP_STATUS_FLAG_RESCAN is set at fs_info->qgroup_flags, which
> is expected and correct behaviour.
> 
> However if other task calls btrfs_ioctl_quota_rescan_wait() before the
> rescan worker started by the task at CPU 3 completes, it will return
> immediately without waiting for the new rescan worker to complete,
> because fs_info->qgroup_rescan_running is set to false by CPU 2.
> 
> This race is making test case btrfs/171 (from fstests) to fail often:
> 
>   btrfs/171 9s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad)
>       --- tests/btrfs/171.out     2018-09-16 21:30:48.505104287 +0100
>       +++ /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad      2019-09-19 02:01:36.938486039 +0100
>       @@ -1,2 +1,3 @@
>        QA output created by 171
>       +ERROR: quota rescan failed: Operation now in progress
>        Silence is golden
>       ...
>       (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/171.out /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad'  to see the entire diff)
> 
> That is because the test calls the btrfs-progs commands "qgroup quota
> rescan -w", "qgroup assign" and "qgroup remove" in a sequence that makes
> calls to the rescan start ioctl fail with -EINPROGRESS (note the "btrfs"
> commands 'qgroup assign' and 'qgroup remove' often call the rescan start
> ioctl after calling the qgroup assign ioctl, btrfs_ioctl_qgroup_assign()),
> since previous waits didn't actually wait for a rescan worker to complete.
> 
> Another problem the race can cause is missing wake ups for waiters, since
> the call to complete_all() happens outside a critical section and after
> clearing the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN. In the sequence diagram
> above, if we have a waiter for the first rescan task (executed by CPU 2),
> then fs_info->qgroup_rescan_completion.wait is not empty, and if after the
> rescan worker clears BTRFS_QGROUP_STATUS_FLAG_RESCAN and before it calls
> complete_all() against fs_info->qgroup_rescan_completion, the task at CPU 3
> calls init_completion() against fs_info->qgroup_rescan_completion which
> re-initilizes its wait queue to an empty queue, therefore causing the
> rescan worker at CPU 2 to call complete_all() against an empty queue, never
> waking up the task waiting for that rescan worker.
> 
> Fix this by clearing BTRFS_QGROUP_STATUS_FLAG_RESCAN and setting
> fs_info->qgroup_rescan_running to false in the same critical section,
> delimited by the mutex fs_info->qgroup_rescan_lock, as well as doing
> the call to complete_all() in that same critical section. This gives
> the protection needed to avoid rescan wait ioctl callers not waiting
> for a running rescan worker and the lost wake ups problem, since
> setting that rescan flag and boolean as well as initializing the wait
> queue is done already in a critical section delimited by that mutex
> (at qgroup_rescan_init()).
> 
> Fixes: 57254b6ebce4ce ("Btrfs: add ioctl to wait for qgroup rescan completion")
> Fixes: d2c609b834d62f ("btrfs: properly track when rescan worker is running")
> Signed-off-by: Filipe Manana <fdmanana@suse.com>
> ---
>  fs/btrfs/qgroup.c | 33 +++++++++++++++++++--------------
>  1 file changed, 19 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index 8d3bd799ac7d..52701c1be109 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -3166,9 +3166,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
>  	btrfs_free_path(path);
>  
>  	mutex_lock(&fs_info->qgroup_rescan_lock);
> -	if (!btrfs_fs_closing(fs_info))
> -		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
> -

Can't we accomplish the same thing by just moving this down into the "done"
section below, and adding the complete_all under the qgruop_rescan_lock?  That
way avoid all this extra code?  Just delete the above and have 

done:
	mutex_lock(&fs_info->qgroup_rescan_lock);
	if (!btrfs_fs_closing(fs_info))
		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
	fs_info->qgroup_rescan_running = false;
	complete_all(&fs_info->qgroup_rescan_completion);
	mutex_unlock(&fs_info->qgroup_rescan_lock);

Or am I missing something?  I don't see a reason why update_qgroup_status_item()
needs to be done under the qgroup_rescan_lock.  Thanks,

Josef
Nikolay Borisov Sept. 24, 2019, 2:07 p.m. UTC | #2
On 24.09.19 г. 12:49 ч., fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> There is a race between setting up a qgroup rescan worker and completing
> a qgroup rescan worker that can lead to callers of the qgroup rescan wait
> ioctl to either not wait for the rescan worker to complete or to hang
> forever due to missing wake ups. The following diagram shows a sequence
> of steps that illustrates the race.
> 
>         CPU 1                                                         CPU 2                                  CPU 3
> 
>  btrfs_ioctl_quota_rescan()
>   btrfs_qgroup_rescan()
>    qgroup_rescan_init()
>     mutex_lock(&fs_info->qgroup_rescan_lock)
>     spin_lock(&fs_info->qgroup_lock)
> 
>     fs_info->qgroup_flags |=
>       BTRFS_QGROUP_STATUS_FLAG_RESCAN
> 
>     init_completion(
>       &fs_info->qgroup_rescan_completion)
> 
>     fs_info->qgroup_rescan_running = true
> 
>     mutex_unlock(&fs_info->qgroup_rescan_lock)
>     spin_unlock(&fs_info->qgroup_lock)
> 
>     btrfs_init_work()
>      --> starts the worker
> 
>                                                         btrfs_qgroup_rescan_worker()
>                                                          mutex_lock(&fs_info->qgroup_rescan_lock)
> 
>                                                          fs_info->qgroup_flags &=
>                                                            ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
> 
>                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)
> 
>                                                          starts transaction, updates qgroup status
>                                                          item, etc
> 
>                                                                                                            btrfs_ioctl_quota_rescan()
>                                                                                                             btrfs_qgroup_rescan()
>                                                                                                              qgroup_rescan_init()
>                                                                                                               mutex_lock(&fs_info->qgroup_rescan_lock)
>                                                                                                               spin_lock(&fs_info->qgroup_lock)
> 
>                                                                                                               fs_info->qgroup_flags |=
>                                                                                                                 BTRFS_QGROUP_STATUS_FLAG_RESCAN
> 
>                                                                                                               init_completion(
>                                                                                                                 &fs_info->qgroup_rescan_completion)
> 
>                                                                                                               fs_info->qgroup_rescan_running = true
> 
>                                                                                                               mutex_unlock(&fs_info->qgroup_rescan_lock)
>                                                                                                               spin_unlock(&fs_info->qgroup_lock)
> 
>                                                                                                               btrfs_init_work()
>                                                                                                                --> starts another worker
> 
>                                                          mutex_lock(&fs_info->qgroup_rescan_lock)
> 
>                                                          fs_info->qgroup_rescan_running = false
> 
>                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)
> 
> 							 complete_all(&fs_info->qgroup_rescan_completion)
> 
> Before the rescan worker started by the task at CPU 3 completes, if another
> task calls btrfs_ioctl_quota_rescan(), it will get -EINPROGRESS because the
> flag BTRFS_QGROUP_STATUS_FLAG_RESCAN is set at fs_info->qgroup_flags, which
> is expected and correct behaviour.
> 
> However if other task calls btrfs_ioctl_quota_rescan_wait() before the
> rescan worker started by the task at CPU 3 completes, it will return
> immediately without waiting for the new rescan worker to complete,
> because fs_info->qgroup_rescan_running is set to false by CPU 2.
> 
> This race is making test case btrfs/171 (from fstests) to fail often:
> 
>   btrfs/171 9s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad)
>       --- tests/btrfs/171.out     2018-09-16 21:30:48.505104287 +0100
>       +++ /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad      2019-09-19 02:01:36.938486039 +0100
>       @@ -1,2 +1,3 @@
>        QA output created by 171
>       +ERROR: quota rescan failed: Operation now in progress
>        Silence is golden
>       ...
>       (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/171.out /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad'  to see the entire diff)
> 
> That is because the test calls the btrfs-progs commands "qgroup quota
> rescan -w", "qgroup assign" and "qgroup remove" in a sequence that makes
> calls to the rescan start ioctl fail with -EINPROGRESS (note the "btrfs"
> commands 'qgroup assign' and 'qgroup remove' often call the rescan start
> ioctl after calling the qgroup assign ioctl, btrfs_ioctl_qgroup_assign()),
> since previous waits didn't actually wait for a rescan worker to complete.
> 
> Another problem the race can cause is missing wake ups for waiters, since
> the call to complete_all() happens outside a critical section and after
> clearing the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN. In the sequence diagram
> above, if we have a waiter for the first rescan task (executed by CPU 2),
> then fs_info->qgroup_rescan_completion.wait is not empty, and if after the
> rescan worker clears BTRFS_QGROUP_STATUS_FLAG_RESCAN and before it calls
> complete_all() against fs_info->qgroup_rescan_completion, the task at CPU 3
> calls init_completion() against fs_info->qgroup_rescan_completion which
> re-initilizes its wait queue to an empty queue, therefore causing the
> rescan worker at CPU 2 to call complete_all() against an empty queue, never
> waking up the task waiting for that rescan worker.
> 
> Fix this by clearing BTRFS_QGROUP_STATUS_FLAG_RESCAN and setting
> fs_info->qgroup_rescan_running to false in the same critical section,
> delimited by the mutex fs_info->qgroup_rescan_lock, as well as doing

Why do we need both the RESCAN flag and qgroup_rescan_running having
them both and having btrfs_qgroup_wait_for_completion rely on
qgroup[_rescan_running just makes it easier to screw up. IMO it makes
sense to remove qgroup_rescan_running in this patch as well and the code
should only use RESCAN flag.
Filipe Manana Sept. 24, 2019, 2:12 p.m. UTC | #3
On Tue, Sep 24, 2019 at 2:53 PM Josef Bacik <josef@toxicpanda.com> wrote:
>
> On Tue, Sep 24, 2019 at 10:49:54AM +0100, fdmanana@kernel.org wrote:
> > From: Filipe Manana <fdmanana@suse.com>
> >
> > There is a race between setting up a qgroup rescan worker and completing
> > a qgroup rescan worker that can lead to callers of the qgroup rescan wait
> > ioctl to either not wait for the rescan worker to complete or to hang
> > forever due to missing wake ups. The following diagram shows a sequence
> > of steps that illustrates the race.
> >
> >         CPU 1                                                         CPU 2                                  CPU 3
> >
> >  btrfs_ioctl_quota_rescan()
> >   btrfs_qgroup_rescan()
> >    qgroup_rescan_init()
> >     mutex_lock(&fs_info->qgroup_rescan_lock)
> >     spin_lock(&fs_info->qgroup_lock)
> >
> >     fs_info->qgroup_flags |=
> >       BTRFS_QGROUP_STATUS_FLAG_RESCAN
> >
> >     init_completion(
> >       &fs_info->qgroup_rescan_completion)
> >
> >     fs_info->qgroup_rescan_running = true
> >
> >     mutex_unlock(&fs_info->qgroup_rescan_lock)
> >     spin_unlock(&fs_info->qgroup_lock)
> >
> >     btrfs_init_work()
> >      --> starts the worker
> >
> >                                                         btrfs_qgroup_rescan_worker()
> >                                                          mutex_lock(&fs_info->qgroup_rescan_lock)
> >
> >                                                          fs_info->qgroup_flags &=
> >                                                            ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
> >
> >                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)
> >
> >                                                          starts transaction, updates qgroup status
> >                                                          item, etc
> >
> >                                                                                                            btrfs_ioctl_quota_rescan()
> >                                                                                                             btrfs_qgroup_rescan()
> >                                                                                                              qgroup_rescan_init()
> >                                                                                                               mutex_lock(&fs_info->qgroup_rescan_lock)
> >                                                                                                               spin_lock(&fs_info->qgroup_lock)
> >
> >                                                                                                               fs_info->qgroup_flags |=
> >                                                                                                                 BTRFS_QGROUP_STATUS_FLAG_RESCAN
> >
> >                                                                                                               init_completion(
> >                                                                                                                 &fs_info->qgroup_rescan_completion)
> >
> >                                                                                                               fs_info->qgroup_rescan_running = true
> >
> >                                                                                                               mutex_unlock(&fs_info->qgroup_rescan_lock)
> >                                                                                                               spin_unlock(&fs_info->qgroup_lock)
> >
> >                                                                                                               btrfs_init_work()
> >                                                                                                                --> starts another worker
> >
> >                                                          mutex_lock(&fs_info->qgroup_rescan_lock)
> >
> >                                                          fs_info->qgroup_rescan_running = false
> >
> >                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)
> >
> >                                                        complete_all(&fs_info->qgroup_rescan_completion)
> >
> > Before the rescan worker started by the task at CPU 3 completes, if another
> > task calls btrfs_ioctl_quota_rescan(), it will get -EINPROGRESS because the
> > flag BTRFS_QGROUP_STATUS_FLAG_RESCAN is set at fs_info->qgroup_flags, which
> > is expected and correct behaviour.
> >
> > However if other task calls btrfs_ioctl_quota_rescan_wait() before the
> > rescan worker started by the task at CPU 3 completes, it will return
> > immediately without waiting for the new rescan worker to complete,
> > because fs_info->qgroup_rescan_running is set to false by CPU 2.
> >
> > This race is making test case btrfs/171 (from fstests) to fail often:
> >
> >   btrfs/171 9s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad)
> >       --- tests/btrfs/171.out     2018-09-16 21:30:48.505104287 +0100
> >       +++ /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad      2019-09-19 02:01:36.938486039 +0100
> >       @@ -1,2 +1,3 @@
> >        QA output created by 171
> >       +ERROR: quota rescan failed: Operation now in progress
> >        Silence is golden
> >       ...
> >       (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/171.out /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad'  to see the entire diff)
> >
> > That is because the test calls the btrfs-progs commands "qgroup quota
> > rescan -w", "qgroup assign" and "qgroup remove" in a sequence that makes
> > calls to the rescan start ioctl fail with -EINPROGRESS (note the "btrfs"
> > commands 'qgroup assign' and 'qgroup remove' often call the rescan start
> > ioctl after calling the qgroup assign ioctl, btrfs_ioctl_qgroup_assign()),
> > since previous waits didn't actually wait for a rescan worker to complete.
> >
> > Another problem the race can cause is missing wake ups for waiters, since
> > the call to complete_all() happens outside a critical section and after
> > clearing the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN. In the sequence diagram
> > above, if we have a waiter for the first rescan task (executed by CPU 2),
> > then fs_info->qgroup_rescan_completion.wait is not empty, and if after the
> > rescan worker clears BTRFS_QGROUP_STATUS_FLAG_RESCAN and before it calls
> > complete_all() against fs_info->qgroup_rescan_completion, the task at CPU 3
> > calls init_completion() against fs_info->qgroup_rescan_completion which
> > re-initilizes its wait queue to an empty queue, therefore causing the
> > rescan worker at CPU 2 to call complete_all() against an empty queue, never
> > waking up the task waiting for that rescan worker.
> >
> > Fix this by clearing BTRFS_QGROUP_STATUS_FLAG_RESCAN and setting
> > fs_info->qgroup_rescan_running to false in the same critical section,
> > delimited by the mutex fs_info->qgroup_rescan_lock, as well as doing
> > the call to complete_all() in that same critical section. This gives
> > the protection needed to avoid rescan wait ioctl callers not waiting
> > for a running rescan worker and the lost wake ups problem, since
> > setting that rescan flag and boolean as well as initializing the wait
> > queue is done already in a critical section delimited by that mutex
> > (at qgroup_rescan_init()).
> >
> > Fixes: 57254b6ebce4ce ("Btrfs: add ioctl to wait for qgroup rescan completion")
> > Fixes: d2c609b834d62f ("btrfs: properly track when rescan worker is running")
> > Signed-off-by: Filipe Manana <fdmanana@suse.com>
> > ---
> >  fs/btrfs/qgroup.c | 33 +++++++++++++++++++--------------
> >  1 file changed, 19 insertions(+), 14 deletions(-)
> >
> > diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> > index 8d3bd799ac7d..52701c1be109 100644
> > --- a/fs/btrfs/qgroup.c
> > +++ b/fs/btrfs/qgroup.c
> > @@ -3166,9 +3166,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
> >       btrfs_free_path(path);
> >
> >       mutex_lock(&fs_info->qgroup_rescan_lock);
> > -     if (!btrfs_fs_closing(fs_info))
> > -             fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
> > -
>
> Can't we accomplish the same thing by just moving this down into the "done"
> section below, and adding the complete_all under the qgruop_rescan_lock?  That
> way avoid all this extra code?  Just delete the above and have
>
> done:
>         mutex_lock(&fs_info->qgroup_rescan_lock);
>         if (!btrfs_fs_closing(fs_info))
>                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
>         fs_info->qgroup_rescan_running = false;
>         complete_all(&fs_info->qgroup_rescan_completion);
>         mutex_unlock(&fs_info->qgroup_rescan_lock);
>
> Or am I missing something?  I don't see a reason why update_qgroup_status_item()
> needs to be done under the qgroup_rescan_lock.  Thanks,

Clearing the flag must be done before update the status item,
otherwise commands like btrfs-progs 'qgroup show' will
think the qgroups may be consistent and print a warning (since they
scan the tree and inspect the status item).

Thanks.

>
> Josef
Filipe Manana Sept. 24, 2019, 2:14 p.m. UTC | #4
On Tue, Sep 24, 2019 at 3:07 PM Nikolay Borisov <nborisov@suse.com> wrote:
>
>
>
> On 24.09.19 г. 12:49 ч., fdmanana@kernel.org wrote:
> > From: Filipe Manana <fdmanana@suse.com>
> >
> > There is a race between setting up a qgroup rescan worker and completing
> > a qgroup rescan worker that can lead to callers of the qgroup rescan wait
> > ioctl to either not wait for the rescan worker to complete or to hang
> > forever due to missing wake ups. The following diagram shows a sequence
> > of steps that illustrates the race.
> >
> >         CPU 1                                                         CPU 2                                  CPU 3
> >
> >  btrfs_ioctl_quota_rescan()
> >   btrfs_qgroup_rescan()
> >    qgroup_rescan_init()
> >     mutex_lock(&fs_info->qgroup_rescan_lock)
> >     spin_lock(&fs_info->qgroup_lock)
> >
> >     fs_info->qgroup_flags |=
> >       BTRFS_QGROUP_STATUS_FLAG_RESCAN
> >
> >     init_completion(
> >       &fs_info->qgroup_rescan_completion)
> >
> >     fs_info->qgroup_rescan_running = true
> >
> >     mutex_unlock(&fs_info->qgroup_rescan_lock)
> >     spin_unlock(&fs_info->qgroup_lock)
> >
> >     btrfs_init_work()
> >      --> starts the worker
> >
> >                                                         btrfs_qgroup_rescan_worker()
> >                                                          mutex_lock(&fs_info->qgroup_rescan_lock)
> >
> >                                                          fs_info->qgroup_flags &=
> >                                                            ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
> >
> >                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)
> >
> >                                                          starts transaction, updates qgroup status
> >                                                          item, etc
> >
> >                                                                                                            btrfs_ioctl_quota_rescan()
> >                                                                                                             btrfs_qgroup_rescan()
> >                                                                                                              qgroup_rescan_init()
> >                                                                                                               mutex_lock(&fs_info->qgroup_rescan_lock)
> >                                                                                                               spin_lock(&fs_info->qgroup_lock)
> >
> >                                                                                                               fs_info->qgroup_flags |=
> >                                                                                                                 BTRFS_QGROUP_STATUS_FLAG_RESCAN
> >
> >                                                                                                               init_completion(
> >                                                                                                                 &fs_info->qgroup_rescan_completion)
> >
> >                                                                                                               fs_info->qgroup_rescan_running = true
> >
> >                                                                                                               mutex_unlock(&fs_info->qgroup_rescan_lock)
> >                                                                                                               spin_unlock(&fs_info->qgroup_lock)
> >
> >                                                                                                               btrfs_init_work()
> >                                                                                                                --> starts another worker
> >
> >                                                          mutex_lock(&fs_info->qgroup_rescan_lock)
> >
> >                                                          fs_info->qgroup_rescan_running = false
> >
> >                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)
> >
> >                                                        complete_all(&fs_info->qgroup_rescan_completion)
> >
> > Before the rescan worker started by the task at CPU 3 completes, if another
> > task calls btrfs_ioctl_quota_rescan(), it will get -EINPROGRESS because the
> > flag BTRFS_QGROUP_STATUS_FLAG_RESCAN is set at fs_info->qgroup_flags, which
> > is expected and correct behaviour.
> >
> > However if other task calls btrfs_ioctl_quota_rescan_wait() before the
> > rescan worker started by the task at CPU 3 completes, it will return
> > immediately without waiting for the new rescan worker to complete,
> > because fs_info->qgroup_rescan_running is set to false by CPU 2.
> >
> > This race is making test case btrfs/171 (from fstests) to fail often:
> >
> >   btrfs/171 9s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad)
> >       --- tests/btrfs/171.out     2018-09-16 21:30:48.505104287 +0100
> >       +++ /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad      2019-09-19 02:01:36.938486039 +0100
> >       @@ -1,2 +1,3 @@
> >        QA output created by 171
> >       +ERROR: quota rescan failed: Operation now in progress
> >        Silence is golden
> >       ...
> >       (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/171.out /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad'  to see the entire diff)
> >
> > That is because the test calls the btrfs-progs commands "qgroup quota
> > rescan -w", "qgroup assign" and "qgroup remove" in a sequence that makes
> > calls to the rescan start ioctl fail with -EINPROGRESS (note the "btrfs"
> > commands 'qgroup assign' and 'qgroup remove' often call the rescan start
> > ioctl after calling the qgroup assign ioctl, btrfs_ioctl_qgroup_assign()),
> > since previous waits didn't actually wait for a rescan worker to complete.
> >
> > Another problem the race can cause is missing wake ups for waiters, since
> > the call to complete_all() happens outside a critical section and after
> > clearing the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN. In the sequence diagram
> > above, if we have a waiter for the first rescan task (executed by CPU 2),
> > then fs_info->qgroup_rescan_completion.wait is not empty, and if after the
> > rescan worker clears BTRFS_QGROUP_STATUS_FLAG_RESCAN and before it calls
> > complete_all() against fs_info->qgroup_rescan_completion, the task at CPU 3
> > calls init_completion() against fs_info->qgroup_rescan_completion which
> > re-initilizes its wait queue to an empty queue, therefore causing the
> > rescan worker at CPU 2 to call complete_all() against an empty queue, never
> > waking up the task waiting for that rescan worker.
> >
> > Fix this by clearing BTRFS_QGROUP_STATUS_FLAG_RESCAN and setting
> > fs_info->qgroup_rescan_running to false in the same critical section,
> > delimited by the mutex fs_info->qgroup_rescan_lock, as well as doing
>
> Why do we need both the RESCAN flag and qgroup_rescan_running having
> them both and having btrfs_qgroup_wait_for_completion rely on
> qgroup[_rescan_running just makes it easier to screw up. IMO it makes
> sense to remove qgroup_rescan_running in this patch as well and the code
> should only use RESCAN flag.

No, they are both needed. Otherwise the issue fixed by the commit
mentioned in the second "Fixes:" tag is re-introduced.

>
Josef Bacik Sept. 24, 2019, 2:19 p.m. UTC | #5
On Tue, Sep 24, 2019 at 03:12:56PM +0100, Filipe Manana wrote:
> On Tue, Sep 24, 2019 at 2:53 PM Josef Bacik <josef@toxicpanda.com> wrote:
> >
> > On Tue, Sep 24, 2019 at 10:49:54AM +0100, fdmanana@kernel.org wrote:
> > > From: Filipe Manana <fdmanana@suse.com>
> > >
> > > There is a race between setting up a qgroup rescan worker and completing
> > > a qgroup rescan worker that can lead to callers of the qgroup rescan wait
> > > ioctl to either not wait for the rescan worker to complete or to hang
> > > forever due to missing wake ups. The following diagram shows a sequence
> > > of steps that illustrates the race.
> > >
> > >         CPU 1                                                         CPU 2                                  CPU 3
> > >
> > >  btrfs_ioctl_quota_rescan()
> > >   btrfs_qgroup_rescan()
> > >    qgroup_rescan_init()
> > >     mutex_lock(&fs_info->qgroup_rescan_lock)
> > >     spin_lock(&fs_info->qgroup_lock)
> > >
> > >     fs_info->qgroup_flags |=
> > >       BTRFS_QGROUP_STATUS_FLAG_RESCAN
> > >
> > >     init_completion(
> > >       &fs_info->qgroup_rescan_completion)
> > >
> > >     fs_info->qgroup_rescan_running = true
> > >
> > >     mutex_unlock(&fs_info->qgroup_rescan_lock)
> > >     spin_unlock(&fs_info->qgroup_lock)
> > >
> > >     btrfs_init_work()
> > >      --> starts the worker
> > >
> > >                                                         btrfs_qgroup_rescan_worker()
> > >                                                          mutex_lock(&fs_info->qgroup_rescan_lock)
> > >
> > >                                                          fs_info->qgroup_flags &=
> > >                                                            ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
> > >
> > >                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)
> > >
> > >                                                          starts transaction, updates qgroup status
> > >                                                          item, etc
> > >
> > >                                                                                                            btrfs_ioctl_quota_rescan()
> > >                                                                                                             btrfs_qgroup_rescan()
> > >                                                                                                              qgroup_rescan_init()
> > >                                                                                                               mutex_lock(&fs_info->qgroup_rescan_lock)
> > >                                                                                                               spin_lock(&fs_info->qgroup_lock)
> > >
> > >                                                                                                               fs_info->qgroup_flags |=
> > >                                                                                                                 BTRFS_QGROUP_STATUS_FLAG_RESCAN
> > >
> > >                                                                                                               init_completion(
> > >                                                                                                                 &fs_info->qgroup_rescan_completion)
> > >
> > >                                                                                                               fs_info->qgroup_rescan_running = true
> > >
> > >                                                                                                               mutex_unlock(&fs_info->qgroup_rescan_lock)
> > >                                                                                                               spin_unlock(&fs_info->qgroup_lock)
> > >
> > >                                                                                                               btrfs_init_work()
> > >                                                                                                                --> starts another worker
> > >
> > >                                                          mutex_lock(&fs_info->qgroup_rescan_lock)
> > >
> > >                                                          fs_info->qgroup_rescan_running = false
> > >
> > >                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)
> > >
> > >                                                        complete_all(&fs_info->qgroup_rescan_completion)
> > >
> > > Before the rescan worker started by the task at CPU 3 completes, if another
> > > task calls btrfs_ioctl_quota_rescan(), it will get -EINPROGRESS because the
> > > flag BTRFS_QGROUP_STATUS_FLAG_RESCAN is set at fs_info->qgroup_flags, which
> > > is expected and correct behaviour.
> > >
> > > However if other task calls btrfs_ioctl_quota_rescan_wait() before the
> > > rescan worker started by the task at CPU 3 completes, it will return
> > > immediately without waiting for the new rescan worker to complete,
> > > because fs_info->qgroup_rescan_running is set to false by CPU 2.
> > >
> > > This race is making test case btrfs/171 (from fstests) to fail often:
> > >
> > >   btrfs/171 9s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad)
> > >       --- tests/btrfs/171.out     2018-09-16 21:30:48.505104287 +0100
> > >       +++ /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad      2019-09-19 02:01:36.938486039 +0100
> > >       @@ -1,2 +1,3 @@
> > >        QA output created by 171
> > >       +ERROR: quota rescan failed: Operation now in progress
> > >        Silence is golden
> > >       ...
> > >       (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/171.out /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad'  to see the entire diff)
> > >
> > > That is because the test calls the btrfs-progs commands "qgroup quota
> > > rescan -w", "qgroup assign" and "qgroup remove" in a sequence that makes
> > > calls to the rescan start ioctl fail with -EINPROGRESS (note the "btrfs"
> > > commands 'qgroup assign' and 'qgroup remove' often call the rescan start
> > > ioctl after calling the qgroup assign ioctl, btrfs_ioctl_qgroup_assign()),
> > > since previous waits didn't actually wait for a rescan worker to complete.
> > >
> > > Another problem the race can cause is missing wake ups for waiters, since
> > > the call to complete_all() happens outside a critical section and after
> > > clearing the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN. In the sequence diagram
> > > above, if we have a waiter for the first rescan task (executed by CPU 2),
> > > then fs_info->qgroup_rescan_completion.wait is not empty, and if after the
> > > rescan worker clears BTRFS_QGROUP_STATUS_FLAG_RESCAN and before it calls
> > > complete_all() against fs_info->qgroup_rescan_completion, the task at CPU 3
> > > calls init_completion() against fs_info->qgroup_rescan_completion which
> > > re-initilizes its wait queue to an empty queue, therefore causing the
> > > rescan worker at CPU 2 to call complete_all() against an empty queue, never
> > > waking up the task waiting for that rescan worker.
> > >
> > > Fix this by clearing BTRFS_QGROUP_STATUS_FLAG_RESCAN and setting
> > > fs_info->qgroup_rescan_running to false in the same critical section,
> > > delimited by the mutex fs_info->qgroup_rescan_lock, as well as doing
> > > the call to complete_all() in that same critical section. This gives
> > > the protection needed to avoid rescan wait ioctl callers not waiting
> > > for a running rescan worker and the lost wake ups problem, since
> > > setting that rescan flag and boolean as well as initializing the wait
> > > queue is done already in a critical section delimited by that mutex
> > > (at qgroup_rescan_init()).
> > >
> > > Fixes: 57254b6ebce4ce ("Btrfs: add ioctl to wait for qgroup rescan completion")
> > > Fixes: d2c609b834d62f ("btrfs: properly track when rescan worker is running")
> > > Signed-off-by: Filipe Manana <fdmanana@suse.com>
> > > ---
> > >  fs/btrfs/qgroup.c | 33 +++++++++++++++++++--------------
> > >  1 file changed, 19 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> > > index 8d3bd799ac7d..52701c1be109 100644
> > > --- a/fs/btrfs/qgroup.c
> > > +++ b/fs/btrfs/qgroup.c
> > > @@ -3166,9 +3166,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
> > >       btrfs_free_path(path);
> > >
> > >       mutex_lock(&fs_info->qgroup_rescan_lock);
> > > -     if (!btrfs_fs_closing(fs_info))
> > > -             fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
> > > -
> >
> > Can't we accomplish the same thing by just moving this down into the "done"
> > section below, and adding the complete_all under the qgruop_rescan_lock?  That
> > way avoid all this extra code?  Just delete the above and have
> >
> > done:
> >         mutex_lock(&fs_info->qgroup_rescan_lock);
> >         if (!btrfs_fs_closing(fs_info))
> >                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
> >         fs_info->qgroup_rescan_running = false;
> >         complete_all(&fs_info->qgroup_rescan_completion);
> >         mutex_unlock(&fs_info->qgroup_rescan_lock);
> >
> > Or am I missing something?  I don't see a reason why update_qgroup_status_item()
> > needs to be done under the qgroup_rescan_lock.  Thanks,
> 
> Clearing the flag must be done before update the status item,
> otherwise commands like btrfs-progs 'qgroup show' will
> think the qgroups may be consistent and print a warning (since they
> scan the tree and inspect the status item).
> 

Ahhh ok, thanks,

Reviewed-by: Josef Bacik <josef@toxicpanda.com>

Thanks,

Josef
David Sterba Sept. 24, 2019, 2:39 p.m. UTC | #6
On Tue, Sep 24, 2019 at 10:49:54AM +0100, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> There is a race between setting up a qgroup rescan worker and completing
> a qgroup rescan worker that can lead to callers of the qgroup rescan wait
> ioctl to either not wait for the rescan worker to complete or to hang
> forever due to missing wake ups. The following diagram shows a sequence
> of steps that illustrates the race.
> 
>         CPU 1                                                         CPU 2                                  CPU 3
> 
>  btrfs_ioctl_quota_rescan()
>   btrfs_qgroup_rescan()
>    qgroup_rescan_init()
>     mutex_lock(&fs_info->qgroup_rescan_lock)
>     spin_lock(&fs_info->qgroup_lock)
> 
>     fs_info->qgroup_flags |=
>       BTRFS_QGROUP_STATUS_FLAG_RESCAN
> 
>     init_completion(
>       &fs_info->qgroup_rescan_completion)
> 
>     fs_info->qgroup_rescan_running = true
> 
>     mutex_unlock(&fs_info->qgroup_rescan_lock)
>     spin_unlock(&fs_info->qgroup_lock)
> 
>     btrfs_init_work()
>      --> starts the worker
> 
>                                                         btrfs_qgroup_rescan_worker()
>                                                          mutex_lock(&fs_info->qgroup_rescan_lock)
> 
>                                                          fs_info->qgroup_flags &=
>                                                            ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
> 
>                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)
> 
>                                                          starts transaction, updates qgroup status
>                                                          item, etc
> 
>                                                                                                            btrfs_ioctl_quota_rescan()
>                                                                                                             btrfs_qgroup_rescan()
>                                                                                                              qgroup_rescan_init()
>                                                                                                               mutex_lock(&fs_info->qgroup_rescan_lock)
>                                                                                                               spin_lock(&fs_info->qgroup_lock)
> 
>                                                                                                               fs_info->qgroup_flags |=
>                                                                                                                 BTRFS_QGROUP_STATUS_FLAG_RESCAN
> 
>                                                                                                               init_completion(
>                                                                                                                 &fs_info->qgroup_rescan_completion)
> 
>                                                                                                               fs_info->qgroup_rescan_running = true
> 
>                                                                                                               mutex_unlock(&fs_info->qgroup_rescan_lock)
>                                                                                                               spin_unlock(&fs_info->qgroup_lock)
> 
>                                                                                                               btrfs_init_work()
>                                                                                                                --> starts another worker
> 
>                                                          mutex_lock(&fs_info->qgroup_rescan_lock)
> 
>                                                          fs_info->qgroup_rescan_running = false
> 
>                                                          mutex_unlock(&fs_info->qgroup_rescan_lock)
> 
> 							 complete_all(&fs_info->qgroup_rescan_completion)
> 
> Before the rescan worker started by the task at CPU 3 completes, if another
> task calls btrfs_ioctl_quota_rescan(), it will get -EINPROGRESS because the
> flag BTRFS_QGROUP_STATUS_FLAG_RESCAN is set at fs_info->qgroup_flags, which
> is expected and correct behaviour.
> 
> However if other task calls btrfs_ioctl_quota_rescan_wait() before the
> rescan worker started by the task at CPU 3 completes, it will return
> immediately without waiting for the new rescan worker to complete,
> because fs_info->qgroup_rescan_running is set to false by CPU 2.
> 
> This race is making test case btrfs/171 (from fstests) to fail often:
> 
>   btrfs/171 9s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad)
>       --- tests/btrfs/171.out     2018-09-16 21:30:48.505104287 +0100
>       +++ /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad      2019-09-19 02:01:36.938486039 +0100
>       @@ -1,2 +1,3 @@
>        QA output created by 171
>       +ERROR: quota rescan failed: Operation now in progress
>        Silence is golden
>       ...
>       (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/171.out /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad'  to see the entire diff)
> 
> That is because the test calls the btrfs-progs commands "qgroup quota
> rescan -w", "qgroup assign" and "qgroup remove" in a sequence that makes
> calls to the rescan start ioctl fail with -EINPROGRESS (note the "btrfs"
> commands 'qgroup assign' and 'qgroup remove' often call the rescan start
> ioctl after calling the qgroup assign ioctl, btrfs_ioctl_qgroup_assign()),
> since previous waits didn't actually wait for a rescan worker to complete.
> 
> Another problem the race can cause is missing wake ups for waiters, since
> the call to complete_all() happens outside a critical section and after
> clearing the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN. In the sequence diagram
> above, if we have a waiter for the first rescan task (executed by CPU 2),
> then fs_info->qgroup_rescan_completion.wait is not empty, and if after the
> rescan worker clears BTRFS_QGROUP_STATUS_FLAG_RESCAN and before it calls
> complete_all() against fs_info->qgroup_rescan_completion, the task at CPU 3
> calls init_completion() against fs_info->qgroup_rescan_completion which
> re-initilizes its wait queue to an empty queue, therefore causing the
> rescan worker at CPU 2 to call complete_all() against an empty queue, never
> waking up the task waiting for that rescan worker.
> 
> Fix this by clearing BTRFS_QGROUP_STATUS_FLAG_RESCAN and setting
> fs_info->qgroup_rescan_running to false in the same critical section,
> delimited by the mutex fs_info->qgroup_rescan_lock, as well as doing
> the call to complete_all() in that same critical section. This gives
> the protection needed to avoid rescan wait ioctl callers not waiting
> for a running rescan worker and the lost wake ups problem, since
> setting that rescan flag and boolean as well as initializing the wait
> queue is done already in a critical section delimited by that mutex
> (at qgroup_rescan_init()).
> 
> Fixes: 57254b6ebce4ce ("Btrfs: add ioctl to wait for qgroup rescan completion")
> Fixes: d2c609b834d62f ("btrfs: properly track when rescan worker is running")
> Signed-off-by: Filipe Manana <fdmanana@suse.com>

Added to misc-next, thanks.
diff mbox series

Patch

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 8d3bd799ac7d..52701c1be109 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3166,9 +3166,6 @@  static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 	btrfs_free_path(path);
 
 	mutex_lock(&fs_info->qgroup_rescan_lock);
-	if (!btrfs_fs_closing(fs_info))
-		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
-
 	if (err > 0 &&
 	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@@ -3184,16 +3181,30 @@  static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 	trans = btrfs_start_transaction(fs_info->quota_root, 1);
 	if (IS_ERR(trans)) {
 		err = PTR_ERR(trans);
+		trans = NULL;
 		btrfs_err(fs_info,
 			  "fail to start transaction for status update: %d",
 			  err);
-		goto done;
 	}
-	ret = update_qgroup_status_item(trans);
-	if (ret < 0) {
-		err = ret;
-		btrfs_err(fs_info, "fail to update qgroup status: %d", err);
+
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	if (!btrfs_fs_closing(fs_info))
+		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+	if (trans) {
+		ret = update_qgroup_status_item(trans);
+		if (ret < 0) {
+			err = ret;
+			btrfs_err(fs_info, "fail to update qgroup status: %d",
+				  err);
+		}
 	}
+	fs_info->qgroup_rescan_running = false;
+	complete_all(&fs_info->qgroup_rescan_completion);
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+	if (!trans)
+		return;
+
 	btrfs_end_transaction(trans);
 
 	if (btrfs_fs_closing(fs_info)) {
@@ -3204,12 +3215,6 @@  static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 	} else {
 		btrfs_err(fs_info, "qgroup scan failed with %d", err);
 	}
-
-done:
-	mutex_lock(&fs_info->qgroup_rescan_lock);
-	fs_info->qgroup_rescan_running = false;
-	mutex_unlock(&fs_info->qgroup_rescan_lock);
-	complete_all(&fs_info->qgroup_rescan_completion);
 }
 
 /*