diff mbox series

libbtrfsutil: fix race between subvolume iterator and deletion

Message ID 1b0ba76b40a8fc22f8a97124ddcc83d3164f1836.1627429989.git.osandov@fb.com (mailing list archive)
State New, archived
Headers show
Series libbtrfsutil: fix race between subvolume iterator and deletion | expand

Commit Message

Omar Sandoval July 27, 2021, 11:53 p.m. UTC
From: Omar Sandoval <osandov@fb.com>

Subvolume iteration has a window between when we get a root ref (with
BTRFS_IOC_TREE_SEARCH or BTRFS_IOC_GET_SUBVOL_ROOTREF) and when we look
up the path of the parent directory (with BTRFS_IOC_INO_LOOKUP{,_USER}).
If the subvolume is moved or deleted and its old parent directory is
deleted during that window, then BTRFS_IOC_INO_LOOKUP{,_USER} will fail
with ENOENT. The iteration will then fail with ENOENT as well.

We originally encountered this bug with an application that called
`btrfs subvolume show` (which iterates subvolumes to find snapshots) in
parallel with other threads creating and deleting subvolumes. It can be
reproduced almost instantly with the following script:

  import multiprocessing
  import os

  import btrfsutil

  def create_and_delete_subvolume(i):
      dir_name = f"subvol_iter_race{i}"
      subvol_name = dir_name + "/subvol"
      while True:
          os.mkdir(dir_name)
          btrfsutil.create_subvolume(subvol_name)
          btrfsutil.delete_subvolume(subvol_name)
          os.rmdir(dir_name)

  def iterate_subvolumes():
      fd = os.open(".", os.O_RDONLY | os.O_DIRECTORY)
      while True:
          with btrfsutil.SubvolumeIterator(fd, 5) as it:
              for _ in it:
                  pass

  if __name__ == "__main__":
      for i in range(10):
          multiprocessing.Process(target=create_and_delete_subvolume, args=(i,), daemon=True).start()
      iterate_subvolumes()

Subvolume iteration should be robust against concurrent modifications to
subvolumes. So, if a subvolume's parent directory no longer exists, just
skip the subvolume, as it must have been deleted or moved elsewhere.

Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 libbtrfsutil/subvolume.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

Comments

Neal Gompa July 28, 2021, 1:02 a.m. UTC | #1
On Tue, Jul 27, 2021 at 7:54 PM Omar Sandoval <osandov@osandov.com> wrote:
>
> From: Omar Sandoval <osandov@fb.com>
>
> Subvolume iteration has a window between when we get a root ref (with
> BTRFS_IOC_TREE_SEARCH or BTRFS_IOC_GET_SUBVOL_ROOTREF) and when we look
> up the path of the parent directory (with BTRFS_IOC_INO_LOOKUP{,_USER}).
> If the subvolume is moved or deleted and its old parent directory is
> deleted during that window, then BTRFS_IOC_INO_LOOKUP{,_USER} will fail
> with ENOENT. The iteration will then fail with ENOENT as well.
>
> We originally encountered this bug with an application that called
> `btrfs subvolume show` (which iterates subvolumes to find snapshots) in
> parallel with other threads creating and deleting subvolumes. It can be
> reproduced almost instantly with the following script:
>
>   import multiprocessing
>   import os
>
>   import btrfsutil
>
>   def create_and_delete_subvolume(i):
>       dir_name = f"subvol_iter_race{i}"
>       subvol_name = dir_name + "/subvol"
>       while True:
>           os.mkdir(dir_name)
>           btrfsutil.create_subvolume(subvol_name)
>           btrfsutil.delete_subvolume(subvol_name)
>           os.rmdir(dir_name)
>
>   def iterate_subvolumes():
>       fd = os.open(".", os.O_RDONLY | os.O_DIRECTORY)
>       while True:
>           with btrfsutil.SubvolumeIterator(fd, 5) as it:
>               for _ in it:
>                   pass
>
>   if __name__ == "__main__":
>       for i in range(10):
>           multiprocessing.Process(target=create_and_delete_subvolume, args=(i,), daemon=True).start()
>       iterate_subvolumes()
>
> Subvolume iteration should be robust against concurrent modifications to
> subvolumes. So, if a subvolume's parent directory no longer exists, just
> skip the subvolume, as it must have been deleted or moved elsewhere.
>
> Signed-off-by: Omar Sandoval <osandov@fb.com>
> ---
>  libbtrfsutil/subvolume.c | 18 +++++++++++++++---
>  1 file changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/libbtrfsutil/subvolume.c b/libbtrfsutil/subvolume.c
> index e30956b1..32086b7f 100644
> --- a/libbtrfsutil/subvolume.c
> +++ b/libbtrfsutil/subvolume.c
> @@ -1469,8 +1469,16 @@ static enum btrfs_util_error subvolume_iterator_next_tree_search(struct btrfs_ut
>                 name = (const char *)(ref + 1);
>                 err = build_subvol_path_privileged(iter, header, ref, name,
>                                                    &path_len);
> -               if (err)
> +               if (err) {
> +                       /*
> +                        * If the subvolume's parent directory doesn't exist,
> +                        * then the subvolume was either moved or deleted. Skip
> +                        * it.
> +                        */
> +                       if (errno == ENOENT)
> +                               continue;
>                         return err;
> +               }
>
>                 err = append_to_search_stack(iter,
>                                 btrfs_search_header_offset(header), path_len);
> @@ -1539,8 +1547,12 @@ static enum btrfs_util_error subvolume_iterator_next_unprivileged(struct btrfs_u
>                 err = build_subvol_path_unprivileged(iter, treeid, dirid,
>                                                      &path_len);
>                 if (err) {
> -                       /* Skip the subvolume if we can't access it. */
> -                       if (errno == EACCES)
> +                       /*
> +                        * If the subvolume's parent directory doesn't exist,
> +                        * then the subvolume was either moved or deleted. Skip
> +                        * it. Also skip it if we can't access it.
> +                        */
> +                       if (errno == ENOENT || errno == EACCES)
>                                 continue;
>                         return err;
>                 }
> --
> 2.32.0
>

LGTM.

Reviewed-by: Neal Gompa <ngompa13@gmail.com>
David Sterba July 28, 2021, 11:15 a.m. UTC | #2
On Tue, Jul 27, 2021 at 04:53:28PM -0700, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> Subvolume iteration has a window between when we get a root ref (with
> BTRFS_IOC_TREE_SEARCH or BTRFS_IOC_GET_SUBVOL_ROOTREF) and when we look
> up the path of the parent directory (with BTRFS_IOC_INO_LOOKUP{,_USER}).
> If the subvolume is moved or deleted and its old parent directory is
> deleted during that window, then BTRFS_IOC_INO_LOOKUP{,_USER} will fail
> with ENOENT. The iteration will then fail with ENOENT as well.
> 
> We originally encountered this bug with an application that called
> `btrfs subvolume show` (which iterates subvolumes to find snapshots) in
> parallel with other threads creating and deleting subvolumes. It can be
> reproduced almost instantly with the following script:
> 
>   import multiprocessing
>   import os
> 
>   import btrfsutil
> 
>   def create_and_delete_subvolume(i):
>       dir_name = f"subvol_iter_race{i}"
>       subvol_name = dir_name + "/subvol"
>       while True:
>           os.mkdir(dir_name)
>           btrfsutil.create_subvolume(subvol_name)
>           btrfsutil.delete_subvolume(subvol_name)
>           os.rmdir(dir_name)
> 
>   def iterate_subvolumes():
>       fd = os.open(".", os.O_RDONLY | os.O_DIRECTORY)
>       while True:
>           with btrfsutil.SubvolumeIterator(fd, 5) as it:
>               for _ in it:
>                   pass
> 
>   if __name__ == "__main__":
>       for i in range(10):
>           multiprocessing.Process(target=create_and_delete_subvolume, args=(i,), daemon=True).start()
>       iterate_subvolumes()

Can you please turn this into a test case?

> Subvolume iteration should be robust against concurrent modifications to
> subvolumes. So, if a subvolume's parent directory no longer exists, just
> skip the subvolume, as it must have been deleted or moved elsewhere.
> 
> Signed-off-by: Omar Sandoval <osandov@fb.com>

Added to devel, thanks.
diff mbox series

Patch

diff --git a/libbtrfsutil/subvolume.c b/libbtrfsutil/subvolume.c
index e30956b1..32086b7f 100644
--- a/libbtrfsutil/subvolume.c
+++ b/libbtrfsutil/subvolume.c
@@ -1469,8 +1469,16 @@  static enum btrfs_util_error subvolume_iterator_next_tree_search(struct btrfs_ut
 		name = (const char *)(ref + 1);
 		err = build_subvol_path_privileged(iter, header, ref, name,
 						   &path_len);
-		if (err)
+		if (err) {
+			/*
+			 * If the subvolume's parent directory doesn't exist,
+			 * then the subvolume was either moved or deleted. Skip
+			 * it.
+			 */
+			if (errno == ENOENT)
+				continue;
 			return err;
+		}
 
 		err = append_to_search_stack(iter,
 				btrfs_search_header_offset(header), path_len);
@@ -1539,8 +1547,12 @@  static enum btrfs_util_error subvolume_iterator_next_unprivileged(struct btrfs_u
 		err = build_subvol_path_unprivileged(iter, treeid, dirid,
 						     &path_len);
 		if (err) {
-			/* Skip the subvolume if we can't access it. */
-			if (errno == EACCES)
+			/*
+			 * If the subvolume's parent directory doesn't exist,
+			 * then the subvolume was either moved or deleted. Skip
+			 * it. Also skip it if we can't access it.
+			 */
+			if (errno == ENOENT || errno == EACCES)
 				continue;
 			return err;
 		}