[3/6] btrfs: introduce the inode->file_extent_tree

Message ID	20200117140224.42495-4-josef@toxicpanda.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=6qVc=3G=vger.kernel.org=linux-btrfs-owner@kernel.org> From: Josef Bacik <josef@toxicpanda.com> To: linux-btrfs@vger.kernel.org, kernel-team@fb.com Cc: Filipe Manana <fdmanana@suse.com> Subject: [PATCH 3/6] btrfs: introduce the inode->file_extent_tree Date: Fri, 17 Jan 2020 09:02:21 -0500 Message-Id: <20200117140224.42495-4-josef@toxicpanda.com> In-Reply-To: <20200117140224.42495-1-josef@toxicpanda.com> References: <20200117140224.42495-1-josef@toxicpanda.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk
Series	btrfs: fix hole corruption issue with !NO_HOLES \| expand [0/6,v3] btrfs: fix hole corruption issue with !NO_HOLES [1/6] btrfs: use btrfs_ordered_update_i_size in clone_finish_inode_update [2/6] btrfs: don't use path->leave_spinning for truncate [3/6] btrfs: introduce the inode->file_extent_tree [4/6] btrfs: use the file extent tree infrastructure [5/6] btrfs: replace all uses of btrfs_ordered_update_i_size [6/6] btrfs: delete the ordered isize update code

Message ID

20200117140224.42495-4-josef@toxicpanda.com (mailing list archive)

State

New, archived

Headers

From: Josef Bacik <josef@toxicpanda.com>
To: linux-btrfs@vger.kernel.org, kernel-team@fb.com
Cc: Filipe Manana <fdmanana@suse.com>
Subject: [PATCH 3/6] btrfs: introduce the inode->file_extent_tree
Date: Fri, 17 Jan 2020 09:02:21 -0500
Message-Id: <20200117140224.42495-4-josef@toxicpanda.com>
In-Reply-To: <20200117140224.42495-1-josef@toxicpanda.com>
References: <20200117140224.42495-1-josef@toxicpanda.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk

Series

btrfs: fix hole corruption issue with !NO_HOLES | expand

Commit Message

Josef Bacik Jan. 17, 2020, 2:02 p.m. UTC

In order to keep track of where we have file extents on disk, and thus
where it is safe to adjust the i_size to, we need to have a tree in
place to keep track of the contiguous areas we have file extents for.
Add helpers to use this tree, as it's not required for NO_HOLES file
systems.  We will use this by setting DIRTY for areas we know we have
file extent item's set, and clearing it when we remove file extent items
for truncation.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/btrfs_inode.h    |  5 +++
 fs/btrfs/ctree.h          |  5 +++
 fs/btrfs/extent-io-tree.h |  1 +
 fs/btrfs/extent_io.c      | 11 +++++
 fs/btrfs/file-item.c      | 91 +++++++++++++++++++++++++++++++++++++++
 fs/btrfs/inode.c          |  6 +++
 6 files changed, 119 insertions(+)

Comments

Nikolay Borisov Jan. 30, 2020, 11:17 a.m. UTC | #1

On 17.01.20 г. 16:02 ч., Josef Bacik wrote:
> In order to keep track of where we have file extents on disk, and thus
> where it is safe to adjust the i_size to, we need to have a tree in
> place to keep track of the contiguous areas we have file extents for.
> Add helpers to use this tree, as it's not required for NO_HOLES file
> systems.  We will use this by setting DIRTY for areas we know we have
> file extent item's set, and clearing it when we remove file extent items
> for truncation.
> 
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> Reviewed-by: Filipe Manana <fdmanana@suse.com>
> ---
>  fs/btrfs/btrfs_inode.h    |  5 +++
>  fs/btrfs/ctree.h          |  5 +++
>  fs/btrfs/extent-io-tree.h |  1 +
>  fs/btrfs/extent_io.c      | 11 +++++
>  fs/btrfs/file-item.c      | 91 +++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/inode.c          |  6 +++
>  6 files changed, 119 insertions(+)
> 
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index 4e12a477d32e..d9dcbac513ed 100644
> --- a/fs/btrfs/btrfs_inode.h
> +++ b/fs/btrfs/btrfs_inode.h
> @@ -60,6 +60,11 @@ struct btrfs_inode {
>  	 */
>  	struct extent_io_tree io_failure_tree;
>  
> +	/* keeps track of where we have extent items mapped in order to make
> +	 * sure our i_size adjustments are accurate.
> +	 */
> +	struct extent_io_tree file_extent_tree;
> +
>  	/* held while logging the inode in tree-log.c */
>  	struct mutex log_mutex;
>  
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 00cf1641f1b9..8a2c1665baad 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -2851,6 +2851,11 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
>  				     struct btrfs_file_extent_item *fi,
>  				     const bool new_inline,
>  				     struct extent_map *em);
> +int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
> +					u64 len);
> +int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
> +				      u64 len);
> +void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_isize);
>  
>  /* inode.c */
>  struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
> diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
> index a3febe746c79..c8bcd2e3184c 100644
> --- a/fs/btrfs/extent-io-tree.h
> +++ b/fs/btrfs/extent-io-tree.h
> @@ -44,6 +44,7 @@ enum {
>  	IO_TREE_TRANS_DIRTY_PAGES,
>  	IO_TREE_ROOT_DIRTY_LOG_PAGES,
>  	IO_TREE_SELFTEST,
> +	IO_TREE_INODE_FILE_EXTENT,
>  };
>  
>  struct extent_io_tree {
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 0d40cd7427ba..f9b223d827b3 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -265,6 +265,15 @@ void __cold extent_io_exit(void)
>  	bioset_exit(&btrfs_bioset);
>  }
>  
> +/*
> + * For the file_extent_tree, we want to hold the inode lock when we lookup and
> + * update the disk_i_size, but lockdep will complain because our io_tree we hold
> + * the tree lock and get the inode lock when setting delalloc.  These two things
> + * are unrelated, so make a class for the file_extent_tree so we don't get the
> + * two locking patterns mixed up.
> + */
> +static struct lock_class_key file_extent_tree_class;
> +
>  void extent_io_tree_init(struct btrfs_fs_info *fs_info,
>  			 struct extent_io_tree *tree, unsigned int owner,
>  			 void *private_data)
> @@ -276,6 +285,8 @@ void extent_io_tree_init(struct btrfs_fs_info *fs_info,
>  	spin_lock_init(&tree->lock);
>  	tree->private_data = private_data;
>  	tree->owner = owner;
> +	if (owner == IO_TREE_INODE_FILE_EXTENT)
> +		lockdep_set_class(&tree->lock, &file_extent_tree_class);
>  }
>  
>  void extent_io_tree_release(struct extent_io_tree *tree)
> diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
> index c2f365662d55..e5dc6c4b2f05 100644
> --- a/fs/btrfs/file-item.c
> +++ b/fs/btrfs/file-item.c
> @@ -23,6 +23,97 @@
>  #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
>  				       PAGE_SIZE))
>  
> +/**
> + * @inode - the inode we want to update the disk_i_size for
> + * @new_isize - the isize we want to set to, 0 if we use i_size
> + *
> + * With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read()
> + * returns as it is perfectly fine with a file that has holes without hole file
> + * extent items.
> + *
> + * However for !NO_HOLES we need to only return the area that is contiguous from
> + * the 0 offset of the file.  Otherwise we could end up adjust i_size up to an
> + * extent that has a gap in between.
> + *
> + * Finally new_isize should only be set in the case of truncate where we're not
> + * ready to use i_size_read() as the limiter yet.
> + */
> +void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_isize)
> +{
> +	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> +	u64 start, end, isize;
> +	int ret;
> +
> +	isize = new_isize ? new_isize : i_size_read(inode);
> +	if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
> +		BTRFS_I(inode)->disk_i_size = isize;
> +		return;
> +	}
> +
> +	spin_lock(&BTRFS_I(inode)->lock);
> +	ret = find_first_extent_bit(&BTRFS_I(inode)->file_extent_tree, 0,
> +				    &start, &end, EXTENT_DIRTY, NULL);
> +	if (!ret && start == 0)
> +		isize = min(isize, end + 1);
> +	else
> +		isize = 0;
> +	BTRFS_I(inode)->disk_i_size = isize;
> +	spin_unlock(&BTRFS_I(inode)->lock);
> +}
> +


What if we have the following layout for an inode:

[----1M-EXTЕNT---][----2M-HOLE--------][------3M-EXTENT-----------]

In this case the idisk size should be 4m whereas with the above code it
will be 1M. Isn't this wrong?

Filipe Manana Jan. 30, 2020, 11:25 a.m. UTC | #2

On Thu, Jan 30, 2020 at 11:19 AM Nikolay Borisov <nborisov@suse.com> wrote:
>
>
>
> On 17.01.20 г. 16:02 ч., Josef Bacik wrote:
> > In order to keep track of where we have file extents on disk, and thus
> > where it is safe to adjust the i_size to, we need to have a tree in
> > place to keep track of the contiguous areas we have file extents for.
> > Add helpers to use this tree, as it's not required for NO_HOLES file
> > systems.  We will use this by setting DIRTY for areas we know we have
> > file extent item's set, and clearing it when we remove file extent items
> > for truncation.
> >
> > Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> > Reviewed-by: Filipe Manana <fdmanana@suse.com>
> > ---
> >  fs/btrfs/btrfs_inode.h    |  5 +++
> >  fs/btrfs/ctree.h          |  5 +++
> >  fs/btrfs/extent-io-tree.h |  1 +
> >  fs/btrfs/extent_io.c      | 11 +++++
> >  fs/btrfs/file-item.c      | 91 +++++++++++++++++++++++++++++++++++++++
> >  fs/btrfs/inode.c          |  6 +++
> >  6 files changed, 119 insertions(+)
> >
> > diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> > index 4e12a477d32e..d9dcbac513ed 100644
> > --- a/fs/btrfs/btrfs_inode.h
> > +++ b/fs/btrfs/btrfs_inode.h
> > @@ -60,6 +60,11 @@ struct btrfs_inode {
> >        */
> >       struct extent_io_tree io_failure_tree;
> >
> > +     /* keeps track of where we have extent items mapped in order to make
> > +      * sure our i_size adjustments are accurate.
> > +      */
> > +     struct extent_io_tree file_extent_tree;
> > +
> >       /* held while logging the inode in tree-log.c */
> >       struct mutex log_mutex;
> >
> > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> > index 00cf1641f1b9..8a2c1665baad 100644
> > --- a/fs/btrfs/ctree.h
> > +++ b/fs/btrfs/ctree.h
> > @@ -2851,6 +2851,11 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
> >                                    struct btrfs_file_extent_item *fi,
> >                                    const bool new_inline,
> >                                    struct extent_map *em);
> > +int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
> > +                                     u64 len);
> > +int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
> > +                                   u64 len);
> > +void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_isize);
> >
> >  /* inode.c */
> >  struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
> > diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
> > index a3febe746c79..c8bcd2e3184c 100644
> > --- a/fs/btrfs/extent-io-tree.h
> > +++ b/fs/btrfs/extent-io-tree.h
> > @@ -44,6 +44,7 @@ enum {
> >       IO_TREE_TRANS_DIRTY_PAGES,
> >       IO_TREE_ROOT_DIRTY_LOG_PAGES,
> >       IO_TREE_SELFTEST,
> > +     IO_TREE_INODE_FILE_EXTENT,
> >  };
> >
> >  struct extent_io_tree {
> > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> > index 0d40cd7427ba..f9b223d827b3 100644
> > --- a/fs/btrfs/extent_io.c
> > +++ b/fs/btrfs/extent_io.c
> > @@ -265,6 +265,15 @@ void __cold extent_io_exit(void)
> >       bioset_exit(&btrfs_bioset);
> >  }
> >
> > +/*
> > + * For the file_extent_tree, we want to hold the inode lock when we lookup and
> > + * update the disk_i_size, but lockdep will complain because our io_tree we hold
> > + * the tree lock and get the inode lock when setting delalloc.  These two things
> > + * are unrelated, so make a class for the file_extent_tree so we don't get the
> > + * two locking patterns mixed up.
> > + */
> > +static struct lock_class_key file_extent_tree_class;
> > +
> >  void extent_io_tree_init(struct btrfs_fs_info *fs_info,
> >                        struct extent_io_tree *tree, unsigned int owner,
> >                        void *private_data)
> > @@ -276,6 +285,8 @@ void extent_io_tree_init(struct btrfs_fs_info *fs_info,
> >       spin_lock_init(&tree->lock);
> >       tree->private_data = private_data;
> >       tree->owner = owner;
> > +     if (owner == IO_TREE_INODE_FILE_EXTENT)
> > +             lockdep_set_class(&tree->lock, &file_extent_tree_class);
> >  }
> >
> >  void extent_io_tree_release(struct extent_io_tree *tree)
> > diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
> > index c2f365662d55..e5dc6c4b2f05 100644
> > --- a/fs/btrfs/file-item.c
> > +++ b/fs/btrfs/file-item.c
> > @@ -23,6 +23,97 @@
> >  #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
> >                                      PAGE_SIZE))
> >
> > +/**
> > + * @inode - the inode we want to update the disk_i_size for
> > + * @new_isize - the isize we want to set to, 0 if we use i_size
> > + *
> > + * With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read()
> > + * returns as it is perfectly fine with a file that has holes without hole file
> > + * extent items.
> > + *
> > + * However for !NO_HOLES we need to only return the area that is contiguous from
> > + * the 0 offset of the file.  Otherwise we could end up adjust i_size up to an
> > + * extent that has a gap in between.
> > + *
> > + * Finally new_isize should only be set in the case of truncate where we're not
> > + * ready to use i_size_read() as the limiter yet.
> > + */
> > +void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_isize)
> > +{
> > +     struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> > +     u64 start, end, isize;
> > +     int ret;
> > +
> > +     isize = new_isize ? new_isize : i_size_read(inode);
> > +     if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
> > +             BTRFS_I(inode)->disk_i_size = isize;
> > +             return;
> > +     }
> > +
> > +     spin_lock(&BTRFS_I(inode)->lock);
> > +     ret = find_first_extent_bit(&BTRFS_I(inode)->file_extent_tree, 0,
> > +                                 &start, &end, EXTENT_DIRTY, NULL);
> > +     if (!ret && start == 0)
> > +             isize = min(isize, end + 1);
> > +     else
> > +             isize = 0;
> > +     BTRFS_I(inode)->disk_i_size = isize;
> > +     spin_unlock(&BTRFS_I(inode)->lock);
> > +}
> > +
>
>
> What if we have the following layout for an inode:
>
> [----1M-EXTЕNT---][----2M-HOLE--------][------3M-EXTENT-----------]
>
> In this case the idisk size should be 4m whereas with the above code it
> will be 1M. Isn't this wrong?

No. That's precisely the point - to set it to 1Mb because there's an
implicit hole after it. For explicit holes, it will be 4M (that's done
in another patch in the series).

David Sterba Feb. 3, 2020, 6:11 p.m. UTC | #3

On Fri, Jan 17, 2020 at 09:02:21AM -0500, Josef Bacik wrote:
> diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
> index a3febe746c79..c8bcd2e3184c 100644
> --- a/fs/btrfs/extent-io-tree.h
> +++ b/fs/btrfs/extent-io-tree.h
> @@ -44,6 +44,7 @@ enum {
>  	IO_TREE_TRANS_DIRTY_PAGES,
>  	IO_TREE_ROOT_DIRTY_LOG_PAGES,
>  	IO_TREE_SELFTEST,
> +	IO_TREE_INODE_FILE_EXTENT,

I've reordered the two and added the missing enum -> string map for the
tracepoints.

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4e12a477d32e..d9dcbac513ed 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -60,6 +60,11 @@  struct btrfs_inode {
 	 */
 	struct extent_io_tree io_failure_tree;
 
+	/* keeps track of where we have extent items mapped in order to make
+	 * sure our i_size adjustments are accurate.
+	 */
+	struct extent_io_tree file_extent_tree;
+
 	/* held while logging the inode in tree-log.c */
 	struct mutex log_mutex;
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 00cf1641f1b9..8a2c1665baad 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2851,6 +2851,11 @@  void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 				     struct btrfs_file_extent_item *fi,
 				     const bool new_inline,
 				     struct extent_map *em);
+int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
+					u64 len);
+int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
+				      u64 len);
+void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_isize);
 
 /* inode.c */
 struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index a3febe746c79..c8bcd2e3184c 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -44,6 +44,7 @@  enum {
 	IO_TREE_TRANS_DIRTY_PAGES,
 	IO_TREE_ROOT_DIRTY_LOG_PAGES,
 	IO_TREE_SELFTEST,
+	IO_TREE_INODE_FILE_EXTENT,
 };
 
 struct extent_io_tree {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0d40cd7427ba..f9b223d827b3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -265,6 +265,15 @@  void __cold extent_io_exit(void)
 	bioset_exit(&btrfs_bioset);
 }
 
+/*
+ * For the file_extent_tree, we want to hold the inode lock when we lookup and
+ * update the disk_i_size, but lockdep will complain because our io_tree we hold
+ * the tree lock and get the inode lock when setting delalloc.  These two things
+ * are unrelated, so make a class for the file_extent_tree so we don't get the
+ * two locking patterns mixed up.
+ */
+static struct lock_class_key file_extent_tree_class;
+
 void extent_io_tree_init(struct btrfs_fs_info *fs_info,
 			 struct extent_io_tree *tree, unsigned int owner,
 			 void *private_data)
@@ -276,6 +285,8 @@  void extent_io_tree_init(struct btrfs_fs_info *fs_info,
 	spin_lock_init(&tree->lock);
 	tree->private_data = private_data;
 	tree->owner = owner;
+	if (owner == IO_TREE_INODE_FILE_EXTENT)
+		lockdep_set_class(&tree->lock, &file_extent_tree_class);
 }
 
 void extent_io_tree_release(struct extent_io_tree *tree)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index c2f365662d55..e5dc6c4b2f05 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -23,6 +23,97 @@ 
 #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
 				       PAGE_SIZE))
 
+/**
+ * @inode - the inode we want to update the disk_i_size for
+ * @new_isize - the isize we want to set to, 0 if we use i_size
+ *
+ * With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read()
+ * returns as it is perfectly fine with a file that has holes without hole file
+ * extent items.
+ *
+ * However for !NO_HOLES we need to only return the area that is contiguous from
+ * the 0 offset of the file.  Otherwise we could end up adjust i_size up to an
+ * extent that has a gap in between.
+ *
+ * Finally new_isize should only be set in the case of truncate where we're not
+ * ready to use i_size_read() as the limiter yet.
+ */
+void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_isize)
+{
+	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+	u64 start, end, isize;
+	int ret;
+
+	isize = new_isize ? new_isize : i_size_read(inode);
+	if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
+		BTRFS_I(inode)->disk_i_size = isize;
+		return;
+	}
+
+	spin_lock(&BTRFS_I(inode)->lock);
+	ret = find_first_extent_bit(&BTRFS_I(inode)->file_extent_tree, 0,
+				    &start, &end, EXTENT_DIRTY, NULL);
+	if (!ret && start == 0)
+		isize = min(isize, end + 1);
+	else
+		isize = 0;
+	BTRFS_I(inode)->disk_i_size = isize;
+	spin_unlock(&BTRFS_I(inode)->lock);
+}
+
+/**
+ * @inode - the inode we're modifying
+ * @start - the start file offset of the file extent we've inserted
+ * @len - the logical length of the file extent item
+ *
+ * Call when we are insering a new file extent where there was none before.
+ * Does not need to call this in the case where we're replacing an existing file
+ * extent, however if you're unsure it's fine to call this multiple times.
+ *
+ * The start and len must match the file extent item, so thus must be sectorsize
+ * aligned.
+ */
+int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
+				      u64 len)
+{
+	if (len == 0)
+		return 0;
+
+	ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize));
+
+	if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
+		return 0;
+	return set_extent_bits(&inode->file_extent_tree, start, start + len - 1,
+			       EXTENT_DIRTY);
+}
+
+/**
+ * @inode - the inode we're modifying
+ * @start - the start file offset of the file extent we've inserted
+ * @len - the logical length of the file extent item
+ *
+ * Called when we drop a file extent, for example when we truncate.  Doesn't
+ * need to be called for cases where we're replacing a file extent, like when
+ * we've cow'ed a file extent.
+ *
+ * The start and len must match the file extent item, so thus must be sectorsize
+ * aligned.
+ */
+int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
+					u64 len)
+{
+	if (len == 0)
+		return 0;
+
+	ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) ||
+	       len == (u64)-1);
+
+	if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
+		return 0;
+	return clear_extent_bit(&inode->file_extent_tree, start,
+				start + len - 1, EXTENT_DIRTY, 0, 0, NULL);
+}
+
 static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
 					u16 csum_size)
 {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4bdd412182ae..fd8f821a3919 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3116,6 +3116,9 @@  static int btrfs_read_locked_inode(struct inode *inode,
 	i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
 	i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
 	btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
+	btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
+					  round_up(i_size_read(inode),
+						   fs_info->sectorsize));
 
 	inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
 	inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
@@ -8695,6 +8698,8 @@  struct inode *btrfs_alloc_inode(struct super_block *sb)
 	extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
 	extent_io_tree_init(fs_info, &ei->io_failure_tree,
 			    IO_TREE_INODE_IO_FAILURE, inode);
+	extent_io_tree_init(fs_info, &ei->file_extent_tree,
+			    IO_TREE_INODE_FILE_EXTENT, inode);
 	ei->io_tree.track_uptodate = true;
 	ei->io_failure_tree.track_uptodate = true;
 	atomic_set(&ei->sync_writers, 0);
@@ -8761,6 +8766,7 @@  void btrfs_destroy_inode(struct inode *inode)
 	btrfs_qgroup_check_reserved_leak(inode);
 	inode_tree_del(inode);
 	btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
+	btrfs_inode_clear_file_extent_range(BTRFS_I(inode), 0, (u64)-1);
 	btrfs_put_root(BTRFS_I(inode)->root);
 }

[3/6] btrfs: introduce the inode->file_extent_tree

Commit Message

Comments

Patch