diff mbox series

[2/6] btrfs: Improve comments around nocow path

Message ID 20190805144708.5432-3-nborisov@suse.com (mailing list archive)
State New, archived
Headers show
Series Refactor nocow path | expand

Commit Message

Nikolay Borisov Aug. 5, 2019, 2:47 p.m. UTC
run_delalloc_nocow contains numerous, somewhat subtle, checks when
figuring out whether a particular extent should be CoW'ed or not. This
patch explicitly states the assumptions those checks verify. As a
result also document 2 of the more subtle checks in check_committed_ref
as well.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/extent-tree.c |  3 +++
 fs/btrfs/inode.c       | 50 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 49 insertions(+), 4 deletions(-)

Comments

Filipe Manana Aug. 6, 2019, 10:09 a.m. UTC | #1
On Mon, Aug 5, 2019 at 3:48 PM Nikolay Borisov <nborisov@suse.com> wrote:
>
> run_delalloc_nocow contains numerous, somewhat subtle, checks when
> figuring out whether a particular extent should be CoW'ed or not. This
> patch explicitly states the assumptions those checks verify. As a
> result also document 2 of the more subtle checks in check_committed_ref
> as well.
>
> Signed-off-by: Nikolay Borisov <nborisov@suse.com>
> ---
>  fs/btrfs/extent-tree.c |  3 +++
>  fs/btrfs/inode.c       | 50 ++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 49 insertions(+), 4 deletions(-)
>
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 163db9a560e2..1fc795fda4c2 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -2868,16 +2868,19 @@ static noinline int check_committed_ref(struct btrfs_root *root,
>         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
>         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
>
> +       /* If extent item has more than 1 inline ref then it has references */
>         if (item_size != sizeof(*ei) +
>             btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
>                 goto out;
>
> +       /* If extent created before last snapshot => it's definitely referenced */

Extents are always referenced, otherwise we have a corruption, so:
referenced -> shared

>         if (btrfs_extent_generation(leaf, ei) <=
>             btrfs_root_last_snapshot(&root->root_item))
>                 goto out;
>
>         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
>
> +       /* If this extent has SHARED_DATA_REF then it's referenced */

referenced -> shared

>         type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
>         if (type != BTRFS_EXTENT_DATA_REF_KEY)
>                 goto out;
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 4393f986e5ad..aa5e017e31ab 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -1345,6 +1345,11 @@ static noinline int run_delalloc_nocow(struct inode *inode,
>                                                cur_offset, 0);
>                 if (ret < 0)
>                         goto error;
> +
> +               /*
> +                * If nothing found and this is the initial search get the last
> +                * extent of this file

Last extent?
No, we want to check if the previous slot points to an extent item,
and if so, use it. That does not mean it's the last extent in the
file, it can be any in the middle or in the beginning.
This is hit when there's no extent that starts at the given search
offset, so we end up in the next one and we have to go 1 slot behind
because the previous one contains the search offset.

> +                */
>                 if (ret > 0 && path->slots[0] > 0 && check_prev) {
>                         leaf = path->nodes[0];
>                         btrfs_item_key_to_cpu(leaf, &found_key,
> @@ -1353,9 +1358,9 @@ static noinline int run_delalloc_nocow(struct inode *inode,
>                             found_key.type == BTRFS_EXTENT_DATA_KEY)
>                                 path->slots[0]--;
>                 }
> -               /* Check previous only on first pass */

Previous patch added this comment, this one removes it. Why?

>                 check_prev = false;
>  next_slot:
> +               /* If we are beyond end of leaf break */

No, we don't break. We iterate to the next leaf. We break only if
there isn't a next leaf (we are at the last one).

>                 leaf = path->nodes[0];
>                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
>                         ret = btrfs_next_leaf(root, path);
> @@ -1371,23 +1376,39 @@ static noinline int run_delalloc_nocow(struct inode *inode,
>
>                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
>
> +               /* Didn't find anything for our INO */
>                 if (found_key.objectid > ino)
>                         break;
> +               /*
> +                * Found a different inode or no extents for our file,
> +                * goto next slot

No. This does not mean that there are no extents for the file. If
there weren't any, we would break instead of iterating to the next
slot.
One example described at
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1d512cb77bdbda80f0dd0620a3b260d697fd581d

> +                */
>                 if (WARN_ON_ONCE(found_key.objectid < ino) ||
>                     found_key.type < BTRFS_EXTENT_DATA_KEY) {
>                         path->slots[0]++;
>                         goto next_slot;
>                 }
> +
> +               /* Found key is not EXTENT_DATA_KEY or starts after req range */
>                 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
>                     found_key.offset > end)
>                         break;
>
> +               /*
> +                * If the found extent starts after requested offset, then
> +                * adjust extent_end to be right before this extent begins
> +                */
>                 if (found_key.offset > cur_offset) {
>                         extent_end = found_key.offset;
>                         extent_type = 0;
>                         goto out_check;
>                 }
>
> +
> +               /*
> +                * Found extent which begins before our range and has the
> +                * potential to intersect it.
> +                */
>                 fi = btrfs_item_ptr(leaf, path->slots[0],
>                                     struct btrfs_file_extent_item);
>                 extent_type = btrfs_file_extent_type(leaf, fi);
> @@ -1401,19 +1422,28 @@ static noinline int run_delalloc_nocow(struct inode *inode,
>                                 btrfs_file_extent_num_bytes(leaf, fi);
>                         disk_num_bytes =
>                                 btrfs_file_extent_disk_num_bytes(leaf, fi);
> +                       /*
> +                        * If extent we got ends before our range starts,
> +                        * skip to next extent
> +                        */
>                         if (extent_end <= start) {
>                                 path->slots[0]++;
>                                 goto next_slot;
>                         }
> +                       /* skip holes */
>                         if (disk_bytenr == 0)
>                                 goto out_check;
> +                       /* skip compressed/encrypted/encoded extents */
>                         if (btrfs_file_extent_compression(leaf, fi) ||
>                             btrfs_file_extent_encryption(leaf, fi) ||
>                             btrfs_file_extent_other_encoding(leaf, fi))
>                                 goto out_check;
>                         /*
> -                        * Do the same check as in btrfs_cross_ref_exist but
> -                        * without the unnecessary search.
> +                        * If extent is created before the last volume's snapshot
> +                        * this implies the extent is shared, hence we can't do
> +                        * nocow. This is the same check as in
> +                        * btrfs_cross_ref_exist but without calling
> +                        * btrfs_search_slot.
>                          */
>                         if (!freespace_inode &&
>                             btrfs_file_extent_generation(leaf, fi) <=
> @@ -1421,6 +1451,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
>                                 goto out_check;
>                         if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
>                                 goto out_check;
> +                       /* If extent RO, we must CoW it */
>                         if (btrfs_extent_readonly(fs_info, disk_bytenr))
>                                 goto out_check;
>                         ret = btrfs_cross_ref_exist(root, ino,
> @@ -1444,7 +1475,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
>                         disk_bytenr += cur_offset - found_key.offset;
>                         num_bytes = min(end + 1, extent_end) - cur_offset;
>                         /*
> -                        * if there are pending snapshots for this root,
> +                        * If there are pending snapshots for this root,
>                          * we fall into common COW way.
>                          */
>                         if (!freespace_inode && atomic_read(&root->snapshot_force_cow))
> @@ -1481,12 +1512,17 @@ static noinline int run_delalloc_nocow(struct inode *inode,
>                         BUG();
>                 }
>  out_check:
> +               /* Skip extents outside of our requested range */
>                 if (extent_end <= start) {
>                         path->slots[0]++;
>                         if (nocow)
>                                 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
>                         goto next_slot;
>                 }
> +               /*
> +                * If nocow is false then record the beginning of the range
> +                * that needs to be CoWed
> +                */
>                 if (!nocow) {
>                         if (cow_start == (u64)-1)
>                                 cow_start = cur_offset;
> @@ -1498,6 +1534,12 @@ static noinline int run_delalloc_nocow(struct inode *inode,
>                 }
>
>                 btrfs_release_path(path);
> +
> +               /*
> +                * CoW range from cow_start to found_key.offset - 1. As the key
> +                * will contains the beginning of the first extent that can be
> +                * NOCOW, following one which needs to be CoW'ed
> +                */
>                 if (cow_start != (u64)-1) {
>                         ret = cow_file_range(inode, locked_page,
>                                              cow_start, found_key.offset - 1,
> --
> 2.17.1
>
Nikolay Borisov Aug. 7, 2019, 8:16 a.m. UTC | #2
On 6.08.19 г. 13:09 ч., Filipe Manana wrote:
> On Mon, Aug 5, 2019 at 3:48 PM Nikolay Borisov <nborisov@suse.com> wrote:

<snip>

>> @@ -1371,23 +1376,39 @@ static noinline int run_delalloc_nocow(struct inode *inode,
>>
>>                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
>>
>> +               /* Didn't find anything for our INO */
>>                 if (found_key.objectid > ino)
>>                         break;
>> +               /*
>> +                * Found a different inode or no extents for our file,
>> +                * goto next slot
> 
> No. This does not mean that there are no extents for the file. If
> there weren't any, we would break instead of iterating to the next
> slot.
> One example described at
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1d512cb77bdbda80f0dd0620a3b260d697fd581d

I see, thanks for the pointer. How about the following :

/*
                 * Keep searching until we find an EXTENT ITEM or are
sure
                 * there are no more extents for this inode

                 */

While it doesn't mention the race condition this check, coupled with the
next one (where we break if type > EXTENT_DATA_KEY), it reflects reality
close enough?


> 
>> +                */
>>                 if (WARN_ON_ONCE(found_key.objectid < ino) ||
>>                     found_key.type < BTRFS_EXTENT_DATA_KEY) {
>>                         path->slots[0]++;
>>                         goto next_slot;
>>                 }
>> +
>> +               /* Found key is not EXTENT_DATA_KEY or starts after req range */
>>                 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
>>                     found_key.offset > end)
>>                         break;
>>
>> +               /*
>> +                * If the found extent starts after requested offset, then
>> +                * adjust extent_end to be right before this extent begins
>> +                */
>>                 if (found_key.offset > cur_offset) {
>>                         extent_end = found_key.offset;
>>                         extent_type = 0;
>>                         goto out_check;
>>                 }
>>
>> +
>> +               /*
>> +                * Found extent which begins before our range and has the
>> +                * potential to intersect it.
>> +                */
>>                 fi = btrfs_item_ptr(leaf, path->slots[0],
>>                                     struct btrfs_file_extent_item);
>>                 extent_type = btrfs_file_extent_type(leaf, fi);
<snip>
Filipe Manana Aug. 7, 2019, 8:26 a.m. UTC | #3
On Wed, Aug 7, 2019 at 9:16 AM Nikolay Borisov <nborisov@suse.com> wrote:
>
>
>
> On 6.08.19 г. 13:09 ч., Filipe Manana wrote:
> > On Mon, Aug 5, 2019 at 3:48 PM Nikolay Borisov <nborisov@suse.com> wrote:
>
> <snip>
>
> >> @@ -1371,23 +1376,39 @@ static noinline int run_delalloc_nocow(struct inode *inode,
> >>
> >>                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
> >>
> >> +               /* Didn't find anything for our INO */
> >>                 if (found_key.objectid > ino)
> >>                         break;
> >> +               /*
> >> +                * Found a different inode or no extents for our file,
> >> +                * goto next slot
> >
> > No. This does not mean that there are no extents for the file. If
> > there weren't any, we would break instead of iterating to the next
> > slot.
> > One example described at
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1d512cb77bdbda80f0dd0620a3b260d697fd581d
>
> I see, thanks for the pointer. How about the following :
>
> /*
>                  * Keep searching until we find an EXTENT ITEM or are
> sure
>                  * there are no more extents for this inode
>
>                  */

Yes, that's just fine.

Thanks.

>
> While it doesn't mention the race condition this check, coupled with the
> next one (where we break if type > EXTENT_DATA_KEY), it reflects reality
> close enough?
>
>
> >
> >> +                */
> >>                 if (WARN_ON_ONCE(found_key.objectid < ino) ||
> >>                     found_key.type < BTRFS_EXTENT_DATA_KEY) {
> >>                         path->slots[0]++;
> >>                         goto next_slot;
> >>                 }
> >> +
> >> +               /* Found key is not EXTENT_DATA_KEY or starts after req range */
> >>                 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
> >>                     found_key.offset > end)
> >>                         break;
> >>
> >> +               /*
> >> +                * If the found extent starts after requested offset, then
> >> +                * adjust extent_end to be right before this extent begins
> >> +                */
> >>                 if (found_key.offset > cur_offset) {
> >>                         extent_end = found_key.offset;
> >>                         extent_type = 0;
> >>                         goto out_check;
> >>                 }
> >>
> >> +
> >> +               /*
> >> +                * Found extent which begins before our range and has the
> >> +                * potential to intersect it.
> >> +                */
> >>                 fi = btrfs_item_ptr(leaf, path->slots[0],
> >>                                     struct btrfs_file_extent_item);
> >>                 extent_type = btrfs_file_extent_type(leaf, fi);
> <snip>
diff mbox series

Patch

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 163db9a560e2..1fc795fda4c2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2868,16 +2868,19 @@  static noinline int check_committed_ref(struct btrfs_root *root,
 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
 
+	/* If extent item has more than 1 inline ref then it has references */
 	if (item_size != sizeof(*ei) +
 	    btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
 		goto out;
 
+	/* If extent created before last snapshot => it's definitely referenced */
 	if (btrfs_extent_generation(leaf, ei) <=
 	    btrfs_root_last_snapshot(&root->root_item))
 		goto out;
 
 	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
 
+	/* If this extent has SHARED_DATA_REF then it's referenced */
 	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
 	if (type != BTRFS_EXTENT_DATA_REF_KEY)
 		goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4393f986e5ad..aa5e017e31ab 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1345,6 +1345,11 @@  static noinline int run_delalloc_nocow(struct inode *inode,
 					       cur_offset, 0);
 		if (ret < 0)
 			goto error;
+
+		/*
+		 * If nothing found and this is the initial search get the last
+		 * extent of this file
+		 */
 		if (ret > 0 && path->slots[0] > 0 && check_prev) {
 			leaf = path->nodes[0];
 			btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1353,9 +1358,9 @@  static noinline int run_delalloc_nocow(struct inode *inode,
 			    found_key.type == BTRFS_EXTENT_DATA_KEY)
 				path->slots[0]--;
 		}
-		/* Check previous only on first pass */
 		check_prev = false;
 next_slot:
+		/* If we are beyond end of leaf break */
 		leaf = path->nodes[0];
 		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
 			ret = btrfs_next_leaf(root, path);
@@ -1371,23 +1376,39 @@  static noinline int run_delalloc_nocow(struct inode *inode,
 
 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
+		/* Didn't find anything for our INO */
 		if (found_key.objectid > ino)
 			break;
+		/*
+		 * Found a different inode or no extents for our file,
+		 * goto next slot
+		 */
 		if (WARN_ON_ONCE(found_key.objectid < ino) ||
 		    found_key.type < BTRFS_EXTENT_DATA_KEY) {
 			path->slots[0]++;
 			goto next_slot;
 		}
+
+		/* Found key is not EXTENT_DATA_KEY or starts after req range */
 		if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
 		    found_key.offset > end)
 			break;
 
+		/*
+		 * If the found extent starts after requested offset, then
+		 * adjust extent_end to be right before this extent begins
+		 */
 		if (found_key.offset > cur_offset) {
 			extent_end = found_key.offset;
 			extent_type = 0;
 			goto out_check;
 		}
 
+
+		/*
+		 * Found extent which begins before our range and has the
+		 * potential to intersect it.
+		 */
 		fi = btrfs_item_ptr(leaf, path->slots[0],
 				    struct btrfs_file_extent_item);
 		extent_type = btrfs_file_extent_type(leaf, fi);
@@ -1401,19 +1422,28 @@  static noinline int run_delalloc_nocow(struct inode *inode,
 				btrfs_file_extent_num_bytes(leaf, fi);
 			disk_num_bytes =
 				btrfs_file_extent_disk_num_bytes(leaf, fi);
+			/*
+			 * If extent we got ends before our range starts,
+			 * skip to next extent
+			 */
 			if (extent_end <= start) {
 				path->slots[0]++;
 				goto next_slot;
 			}
+			/* skip holes */
 			if (disk_bytenr == 0)
 				goto out_check;
+			/* skip compressed/encrypted/encoded extents */
 			if (btrfs_file_extent_compression(leaf, fi) ||
 			    btrfs_file_extent_encryption(leaf, fi) ||
 			    btrfs_file_extent_other_encoding(leaf, fi))
 				goto out_check;
 			/*
-			 * Do the same check as in btrfs_cross_ref_exist but
-			 * without the unnecessary search.
+			 * If extent is created before the last volume's snapshot
+			 * this implies the extent is shared, hence we can't do
+			 * nocow. This is the same check as in
+			 * btrfs_cross_ref_exist but without calling
+			 * btrfs_search_slot.
 			 */
 			if (!freespace_inode &&
 			    btrfs_file_extent_generation(leaf, fi) <=
@@ -1421,6 +1451,7 @@  static noinline int run_delalloc_nocow(struct inode *inode,
 				goto out_check;
 			if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
 				goto out_check;
+			/* If extent RO, we must CoW it */
 			if (btrfs_extent_readonly(fs_info, disk_bytenr))
 				goto out_check;
 			ret = btrfs_cross_ref_exist(root, ino,
@@ -1444,7 +1475,7 @@  static noinline int run_delalloc_nocow(struct inode *inode,
 			disk_bytenr += cur_offset - found_key.offset;
 			num_bytes = min(end + 1, extent_end) - cur_offset;
 			/*
-			 * if there are pending snapshots for this root,
+			 * If there are pending snapshots for this root,
 			 * we fall into common COW way.
 			 */
 			if (!freespace_inode && atomic_read(&root->snapshot_force_cow))
@@ -1481,12 +1512,17 @@  static noinline int run_delalloc_nocow(struct inode *inode,
 			BUG();
 		}
 out_check:
+		/* Skip extents outside of our requested range */
 		if (extent_end <= start) {
 			path->slots[0]++;
 			if (nocow)
 				btrfs_dec_nocow_writers(fs_info, disk_bytenr);
 			goto next_slot;
 		}
+		/*
+		 * If nocow is false then record the beginning of the range
+		 * that needs to be CoWed
+		 */
 		if (!nocow) {
 			if (cow_start == (u64)-1)
 				cow_start = cur_offset;
@@ -1498,6 +1534,12 @@  static noinline int run_delalloc_nocow(struct inode *inode,
 		}
 
 		btrfs_release_path(path);
+
+		/*
+		 * CoW range from cow_start to found_key.offset - 1. As the key
+		 * will contains the beginning of the first extent that can be
+		 * NOCOW, following one which needs to be CoW'ed
+		 */
 		if (cow_start != (u64)-1) {
 			ret = cow_file_range(inode, locked_page,
 					     cow_start, found_key.offset - 1,