diff mbox series

[3/4] btrfs: prepare btrfs_buffered_write() for large data folios

Message ID 285fe66e1d13bd9b1aa9b316da12cbaa8cb12c95.1743113694.git.wqu@suse.com (mailing list archive)
State New
Headers show
Series btrfs: add the missing preparations exposed by initial large data folio support | expand

Commit Message

Qu Wenruo March 27, 2025, 10:31 p.m. UTC
This involves the following modifications:

- Set the order flags for __filemap_get_folio() inside
  prepare_one_folio()

  This will allow __filemap_get_folio() to create a large folio if the
  address space supports it.

- Limit the initial @write_bytes inside copy_one_range()
  If the largest folio boundary splits the initial write range, there is
  no way we can write beyond the largest folio boundary.

  This is done by a simple helper function, calc_write_bytes().

- Release exceeding reserved space if the folio is smaller than expected
  Which is doing the same handling when short copy happened.

All these preparation should not change the behavior when the largest
folio order is 0.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/file.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

Comments

Filipe Manana March 28, 2025, 5:51 p.m. UTC | #1
On Thu, Mar 27, 2025 at 10:37 PM Qu Wenruo <wqu@suse.com> wrote:
>
> This involves the following modifications:
>
> - Set the order flags for __filemap_get_folio() inside
>   prepare_one_folio()
>
>   This will allow __filemap_get_folio() to create a large folio if the
>   address space supports it.
>
> - Limit the initial @write_bytes inside copy_one_range()
>   If the largest folio boundary splits the initial write range, there is
>   no way we can write beyond the largest folio boundary.
>
>   This is done by a simple helper function, calc_write_bytes().
>
> - Release exceeding reserved space if the folio is smaller than expected
>   Which is doing the same handling when short copy happened.
>
> All these preparation should not change the behavior when the largest

preparation -> preparations

> folio order is 0.
>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/file.c | 29 +++++++++++++++++++++++++++--
>  1 file changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 63c7a3294eb2..5d10ae321687 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -861,7 +861,8 @@ static noinline int prepare_one_folio(struct inode *inode, struct folio **folio_
>  {
>         unsigned long index = pos >> PAGE_SHIFT;
>         gfp_t mask = get_prepare_gfp_flags(inode, nowait);
> -       fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN);
> +       fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN) |
> +                         fgf_set_order(write_bytes);
>         struct folio *folio;
>         int ret = 0;
>
> @@ -1169,6 +1170,16 @@ static void shrink_reserved_space(struct btrfs_inode *inode,
>                                 diff, true);
>  }
>
> +/* Calculate the maximum amount of bytes we can write into one folio. */
> +static size_t calc_write_bytes(const struct btrfs_inode *inode,
> +                              const struct iov_iter *i, u64 start)

This was mentioned by David in a previous review of some other patch,
but please don't name it 'i', we only use such a name for loop index
variables.
Name it something like iter, or iov.

> +{
> +       size_t max_folio_size = mapping_max_folio_size(inode->vfs_inode.i_mapping);

Can be const.

Anyway, those are minor things, so:

Reviewed-by: Filipe Manana <fdmanana@suse.com>

Thanks.

> +
> +       return min(max_folio_size - (start & (max_folio_size - 1)),
> +                  iov_iter_count(i));
> +}
> +
>  /*
>   * Do the heavy-lifting work to copy one range into one folio of the page cache.
>   *
> @@ -1182,7 +1193,7 @@ static int copy_one_range(struct btrfs_inode *inode, struct iov_iter *i,
>  {
>         struct btrfs_fs_info *fs_info = inode->root->fs_info;
>         struct extent_state *cached_state = NULL;
> -       size_t write_bytes = min(iov_iter_count(i), PAGE_SIZE - offset_in_page(start));
> +       size_t write_bytes = calc_write_bytes(inode, i, start);
>         size_t copied;
>         const u64 reserved_start = round_down(start, fs_info->sectorsize);
>         u64 reserved_len;
> @@ -1227,6 +1238,20 @@ static int copy_one_range(struct btrfs_inode *inode, struct iov_iter *i,
>                               only_release_metadata);
>                 return ret;
>         }
> +       /*
> +        * The reserved range goes beyond the current folio, shrink the reserved
> +        * space to the folio boundary.
> +        */
> +       if (reserved_start + reserved_len > folio_pos(folio) + folio_size(folio)) {
> +               const u64 last_block = folio_pos(folio) + folio_size(folio);
> +
> +               shrink_reserved_space(inode, *data_reserved, reserved_start,
> +                                     reserved_len, last_block - reserved_start,
> +                                     only_release_metadata);
> +               write_bytes = folio_pos(folio) + folio_size(folio) - start;
> +               reserved_len = last_block - reserved_start;
> +       }
> +
>         extents_locked = lock_and_cleanup_extent_if_need(inode,
>                                         folio, start, write_bytes, &lockstart,
>                                         &lockend, nowait, &cached_state);
> --
> 2.49.0
>
>
diff mbox series

Patch

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 63c7a3294eb2..5d10ae321687 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -861,7 +861,8 @@  static noinline int prepare_one_folio(struct inode *inode, struct folio **folio_
 {
 	unsigned long index = pos >> PAGE_SHIFT;
 	gfp_t mask = get_prepare_gfp_flags(inode, nowait);
-	fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN);
+	fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN) |
+			  fgf_set_order(write_bytes);
 	struct folio *folio;
 	int ret = 0;
 
@@ -1169,6 +1170,16 @@  static void shrink_reserved_space(struct btrfs_inode *inode,
 				diff, true);
 }
 
+/* Calculate the maximum amount of bytes we can write into one folio. */
+static size_t calc_write_bytes(const struct btrfs_inode *inode,
+			       const struct iov_iter *i, u64 start)
+{
+	size_t max_folio_size = mapping_max_folio_size(inode->vfs_inode.i_mapping);
+
+	return min(max_folio_size - (start & (max_folio_size - 1)),
+		   iov_iter_count(i));
+}
+
 /*
  * Do the heavy-lifting work to copy one range into one folio of the page cache.
  *
@@ -1182,7 +1193,7 @@  static int copy_one_range(struct btrfs_inode *inode, struct iov_iter *i,
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct extent_state *cached_state = NULL;
-	size_t write_bytes = min(iov_iter_count(i), PAGE_SIZE - offset_in_page(start));
+	size_t write_bytes = calc_write_bytes(inode, i, start);
 	size_t copied;
 	const u64 reserved_start = round_down(start, fs_info->sectorsize);
 	u64 reserved_len;
@@ -1227,6 +1238,20 @@  static int copy_one_range(struct btrfs_inode *inode, struct iov_iter *i,
 			      only_release_metadata);
 		return ret;
 	}
+	/*
+	 * The reserved range goes beyond the current folio, shrink the reserved
+	 * space to the folio boundary.
+	 */
+	if (reserved_start + reserved_len > folio_pos(folio) + folio_size(folio)) {
+		const u64 last_block = folio_pos(folio) + folio_size(folio);
+
+		shrink_reserved_space(inode, *data_reserved, reserved_start,
+				      reserved_len, last_block - reserved_start,
+				      only_release_metadata);
+		write_bytes = folio_pos(folio) + folio_size(folio) - start;
+		reserved_len = last_block - reserved_start;
+	}
+
 	extents_locked = lock_and_cleanup_extent_if_need(inode,
 					folio, start, write_bytes, &lockstart,
 					&lockend, nowait, &cached_state);