diff mbox series

[2/9] btrfs: send: avoid copying file data

Message ID be54e8e7658f85dd5e62627a1ad02beb7a4aeed8.1597994106.git.osandov@osandov.com (mailing list archive)
State New, archived
Headers show
Series btrfs: implement send/receive of compressed extents without decompressing | expand

Commit Message

Omar Sandoval Aug. 21, 2020, 7:39 a.m. UTC
From: Omar Sandoval <osandov@fb.com>

send_write() currently copies from the page cache to sctx->read_buf, and
then from sctx->read_buf to sctx->send_buf. Similarly, send_hole()
zeroes sctx->read_buf and then copies from sctx->read_buf to
sctx->send_buf. However, if we write the TLV header manually, we can
copy to sctx->send_buf directly and get rid of sctx->read_buf.

Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 fs/btrfs/send.c | 65 +++++++++++++++++++++++++++++--------------------
 fs/btrfs/send.h |  1 -
 2 files changed, 39 insertions(+), 27 deletions(-)

Comments

Filipe Manana Aug. 21, 2020, 5:29 p.m. UTC | #1
On Fri, Aug 21, 2020 at 8:42 AM Omar Sandoval <osandov@osandov.com> wrote:
>
> From: Omar Sandoval <osandov@fb.com>
>
> send_write() currently copies from the page cache to sctx->read_buf, and
> then from sctx->read_buf to sctx->send_buf. Similarly, send_hole()
> zeroes sctx->read_buf and then copies from sctx->read_buf to
> sctx->send_buf. However, if we write the TLV header manually, we can
> copy to sctx->send_buf directly and get rid of sctx->read_buf.
>
> Signed-off-by: Omar Sandoval <osandov@fb.com>

Reviewed-by: Filipe Manana <fdmanana@suse.com>

Looks good, and it passed some long duration tests with both full and
incremental sends here (with and without compression, no-holes, etc).
Only one minor thing below, but it's really subjective and doesn't
make much of a difference.

Thanks.

> ---
>  fs/btrfs/send.c | 65 +++++++++++++++++++++++++++++--------------------
>  fs/btrfs/send.h |  1 -
>  2 files changed, 39 insertions(+), 27 deletions(-)
>
> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
> index 8af5e867e4ca..e70f5ceb3261 100644
> --- a/fs/btrfs/send.c
> +++ b/fs/btrfs/send.c
> @@ -122,8 +122,6 @@ struct send_ctx {
>
>         struct file_ra_state ra;
>
> -       char *read_buf;
> -
>         /*
>          * We process inodes by their increasing order, so if before an
>          * incremental send we reverse the parent/child relationship of
> @@ -4794,7 +4792,25 @@ static int process_all_new_xattrs(struct send_ctx *sctx)
>         return ret;
>  }
>
> -static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
> +static u64 max_send_read_size(struct send_ctx *sctx)

We could make this inline, since it's so small and trivial, and
constify the argument too.

> +{
> +       return sctx->send_max_size - SZ_16K;
> +}
> +
> +static int put_data_header(struct send_ctx *sctx, u32 len)
> +{
> +       struct btrfs_tlv_header *hdr;
> +
> +       if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
> +               return -EOVERFLOW;
> +       hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
> +       hdr->tlv_type = cpu_to_le16(BTRFS_SEND_A_DATA);
> +       hdr->tlv_len = cpu_to_le16(len);
> +       sctx->send_size += sizeof(*hdr);
> +       return 0;
> +}
> +
> +static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
>  {
>         struct btrfs_root *root = sctx->send_root;
>         struct btrfs_fs_info *fs_info = root->fs_info;
> @@ -4804,8 +4820,11 @@ static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
>         pgoff_t index = offset >> PAGE_SHIFT;
>         pgoff_t last_index;
>         unsigned pg_offset = offset_in_page(offset);
> -       int ret = 0;
> -       size_t read = 0;
> +       int ret;
> +
> +       ret = put_data_header(sctx, len);
> +       if (ret)
> +               return ret;
>
>         inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
>         if (IS_ERR(inode))
> @@ -4851,14 +4870,15 @@ static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
>                 }
>
>                 addr = kmap(page);
> -               memcpy(sctx->read_buf + read, addr + pg_offset, cur_len);
> +               memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset,
> +                      cur_len);
>                 kunmap(page);
>                 unlock_page(page);
>                 put_page(page);
>                 index++;
>                 pg_offset = 0;
>                 len -= cur_len;
> -               read += cur_len;
> +               sctx->send_size += cur_len;
>         }
>         iput(inode);
>         return ret;
> @@ -4880,10 +4900,6 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
>
>         btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
>
> -       ret = fill_read_buf(sctx, offset, len);
> -       if (ret < 0)
> -               goto out;
> -
>         ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
>         if (ret < 0)
>                 goto out;
> @@ -4894,7 +4910,9 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
>
>         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
>         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
> -       TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
> +       ret = put_file_data(sctx, offset, len);
> +       if (ret < 0)
> +               goto out;
>
>         ret = send_cmd(sctx);
>
> @@ -5013,8 +5031,8 @@ static int send_update_extent(struct send_ctx *sctx,
>  static int send_hole(struct send_ctx *sctx, u64 end)
>  {
>         struct fs_path *p = NULL;
> +       u64 read_size = max_send_read_size(sctx);
>         u64 offset = sctx->cur_inode_last_extent;
> -       u64 len;
>         int ret = 0;
>
>         /*
> @@ -5041,16 +5059,19 @@ static int send_hole(struct send_ctx *sctx, u64 end)
>         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
>         if (ret < 0)
>                 goto tlv_put_failure;
> -       memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
>         while (offset < end) {
> -               len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
> +               u64 len = min(end - offset, read_size);
>
>                 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
>                 if (ret < 0)
>                         break;
>                 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
>                 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
> -               TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
> +               ret = put_data_header(sctx, len);
> +               if (ret < 0)
> +                       break;
> +               memset(sctx->send_buf + sctx->send_size, 0, len);
> +               sctx->send_size += len;
>                 ret = send_cmd(sctx);
>                 if (ret < 0)
>                         break;
> @@ -5066,17 +5087,16 @@ static int send_extent_data(struct send_ctx *sctx,
>                             const u64 offset,
>                             const u64 len)
>  {
> +       u64 read_size = max_send_read_size(sctx);
>         u64 sent = 0;
>
>         if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
>                 return send_update_extent(sctx, offset, len);
>
>         while (sent < len) {
> -               u64 size = len - sent;
> +               u64 size = min(len - sent, read_size);
>                 int ret;
>
> -               if (size > BTRFS_SEND_READ_SIZE)
> -                       size = BTRFS_SEND_READ_SIZE;
>                 ret = send_write(sctx, offset + sent, size);
>                 if (ret < 0)
>                         return ret;
> @@ -7145,12 +7165,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
>                 goto out;
>         }
>
> -       sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL);
> -       if (!sctx->read_buf) {
> -               ret = -ENOMEM;
> -               goto out;
> -       }
> -
>         sctx->pending_dir_moves = RB_ROOT;
>         sctx->waiting_dir_moves = RB_ROOT;
>         sctx->orphan_dirs = RB_ROOT;
> @@ -7354,7 +7368,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
>
>                 kvfree(sctx->clone_roots);
>                 kvfree(sctx->send_buf);
> -               kvfree(sctx->read_buf);
>
>                 name_cache_free(sctx);
>
> diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
> index ead397f7034f..de91488b7cd0 100644
> --- a/fs/btrfs/send.h
> +++ b/fs/btrfs/send.h
> @@ -13,7 +13,6 @@
>  #define BTRFS_SEND_STREAM_VERSION 1
>
>  #define BTRFS_SEND_BUF_SIZE SZ_64K
> -#define BTRFS_SEND_READ_SIZE (48 * SZ_1K)
>
>  enum btrfs_tlv_type {
>         BTRFS_TLV_U8,
> --
> 2.28.0
>
Josef Bacik Aug. 24, 2020, 5:47 p.m. UTC | #2
On 8/21/20 3:39 AM, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> send_write() currently copies from the page cache to sctx->read_buf, and
> then from sctx->read_buf to sctx->send_buf. Similarly, send_hole()
> zeroes sctx->read_buf and then copies from sctx->read_buf to
> sctx->send_buf. However, if we write the TLV header manually, we can
> copy to sctx->send_buf directly and get rid of sctx->read_buf.
> 
> Signed-off-by: Omar Sandoval <osandov@fb.com>

I couldn't figure out why you weren't just using TLV_ helper for this, but then 
I realized the len is the length of the data, so you need a special helper for 
the header.  Just in case anybody else gets confused,

Reviewed-by: Josef Bacik <josef@toxicpanda.com>

Thanks,

Josef
Omar Sandoval Aug. 24, 2020, 9:34 p.m. UTC | #3
On Fri, Aug 21, 2020 at 06:29:30PM +0100, Filipe Manana wrote:
> On Fri, Aug 21, 2020 at 8:42 AM Omar Sandoval <osandov@osandov.com> wrote:
> >
> > From: Omar Sandoval <osandov@fb.com>
> >
> > send_write() currently copies from the page cache to sctx->read_buf, and
> > then from sctx->read_buf to sctx->send_buf. Similarly, send_hole()
> > zeroes sctx->read_buf and then copies from sctx->read_buf to
> > sctx->send_buf. However, if we write the TLV header manually, we can
> > copy to sctx->send_buf directly and get rid of sctx->read_buf.
> >
> > Signed-off-by: Omar Sandoval <osandov@fb.com>
> 
> Reviewed-by: Filipe Manana <fdmanana@suse.com>
> 
> Looks good, and it passed some long duration tests with both full and
> incremental sends here (with and without compression, no-holes, etc).
> Only one minor thing below, but it's really subjective and doesn't
> make much of a difference.
> 
> Thanks.
> 
> > ---
> >  fs/btrfs/send.c | 65 +++++++++++++++++++++++++++++--------------------
> >  fs/btrfs/send.h |  1 -
> >  2 files changed, 39 insertions(+), 27 deletions(-)
> >
> > diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
> > index 8af5e867e4ca..e70f5ceb3261 100644
> > --- a/fs/btrfs/send.c
> > +++ b/fs/btrfs/send.c
> > @@ -122,8 +122,6 @@ struct send_ctx {
> >
> >         struct file_ra_state ra;
> >
> > -       char *read_buf;
> > -
> >         /*
> >          * We process inodes by their increasing order, so if before an
> >          * incremental send we reverse the parent/child relationship of
> > @@ -4794,7 +4792,25 @@ static int process_all_new_xattrs(struct send_ctx *sctx)
> >         return ret;
> >  }
> >
> > -static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
> > +static u64 max_send_read_size(struct send_ctx *sctx)
> 
> We could make this inline, since it's so small and trivial, and
> constify the argument too.

Good point, fixed. Thanks, Filipe!
David Sterba Sept. 11, 2020, 2:13 p.m. UTC | #4
On Fri, Aug 21, 2020 at 12:39:52AM -0700, Omar Sandoval wrote:
> +static int put_data_header(struct send_ctx *sctx, u32 len)
> +{
> +	struct btrfs_tlv_header *hdr;
> +
> +	if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
> +		return -EOVERFLOW;
> +	hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
> +	hdr->tlv_type = cpu_to_le16(BTRFS_SEND_A_DATA);
> +	hdr->tlv_len = cpu_to_le16(len);

I think we need put_unaligned_le16 here, it's mapping a random buffer to
a pointer, this is not alignment safe in general.
Omar Sandoval Sept. 14, 2020, 10:04 p.m. UTC | #5
On Fri, Sep 11, 2020 at 04:13:39PM +0200, David Sterba wrote:
> On Fri, Aug 21, 2020 at 12:39:52AM -0700, Omar Sandoval wrote:
> > +static int put_data_header(struct send_ctx *sctx, u32 len)
> > +{
> > +	struct btrfs_tlv_header *hdr;
> > +
> > +	if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
> > +		return -EOVERFLOW;
> > +	hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
> > +	hdr->tlv_type = cpu_to_le16(BTRFS_SEND_A_DATA);
> > +	hdr->tlv_len = cpu_to_le16(len);
> 
> I think we need put_unaligned_le16 here, it's mapping a random buffer to
> a pointer, this is not alignment safe in general.

I think you're right, although tlv_put() seems to have this same
problem.
David Sterba Sept. 15, 2020, 8:14 a.m. UTC | #6
On Mon, Sep 14, 2020 at 03:04:48PM -0700, Omar Sandoval wrote:
> On Fri, Sep 11, 2020 at 04:13:39PM +0200, David Sterba wrote:
> > On Fri, Aug 21, 2020 at 12:39:52AM -0700, Omar Sandoval wrote:
> > > +static int put_data_header(struct send_ctx *sctx, u32 len)
> > > +{
> > > +	struct btrfs_tlv_header *hdr;
> > > +
> > > +	if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
> > > +		return -EOVERFLOW;
> > > +	hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
> > > +	hdr->tlv_type = cpu_to_le16(BTRFS_SEND_A_DATA);
> > > +	hdr->tlv_len = cpu_to_le16(len);
> > 
> > I think we need put_unaligned_le16 here, it's mapping a random buffer to
> > a pointer, this is not alignment safe in general.
> 
> I think you're right, although tlv_put() seems to have this same
> problem.

Indeed and there's more: tlv_put, TLV_PUT_DEFINE_INT, begin_cmd,
send_cmd. Other direct assignments are in local structs so the alignment
is fine.
diff mbox series

Patch

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 8af5e867e4ca..e70f5ceb3261 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -122,8 +122,6 @@  struct send_ctx {
 
 	struct file_ra_state ra;
 
-	char *read_buf;
-
 	/*
 	 * We process inodes by their increasing order, so if before an
 	 * incremental send we reverse the parent/child relationship of
@@ -4794,7 +4792,25 @@  static int process_all_new_xattrs(struct send_ctx *sctx)
 	return ret;
 }
 
-static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
+static u64 max_send_read_size(struct send_ctx *sctx)
+{
+	return sctx->send_max_size - SZ_16K;
+}
+
+static int put_data_header(struct send_ctx *sctx, u32 len)
+{
+	struct btrfs_tlv_header *hdr;
+
+	if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
+		return -EOVERFLOW;
+	hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
+	hdr->tlv_type = cpu_to_le16(BTRFS_SEND_A_DATA);
+	hdr->tlv_len = cpu_to_le16(len);
+	sctx->send_size += sizeof(*hdr);
+	return 0;
+}
+
+static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
 {
 	struct btrfs_root *root = sctx->send_root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4804,8 +4820,11 @@  static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
 	pgoff_t index = offset >> PAGE_SHIFT;
 	pgoff_t last_index;
 	unsigned pg_offset = offset_in_page(offset);
-	int ret = 0;
-	size_t read = 0;
+	int ret;
+
+	ret = put_data_header(sctx, len);
+	if (ret)
+		return ret;
 
 	inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
 	if (IS_ERR(inode))
@@ -4851,14 +4870,15 @@  static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
 		}
 
 		addr = kmap(page);
-		memcpy(sctx->read_buf + read, addr + pg_offset, cur_len);
+		memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset,
+		       cur_len);
 		kunmap(page);
 		unlock_page(page);
 		put_page(page);
 		index++;
 		pg_offset = 0;
 		len -= cur_len;
-		read += cur_len;
+		sctx->send_size += cur_len;
 	}
 	iput(inode);
 	return ret;
@@ -4880,10 +4900,6 @@  static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
 
 	btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
 
-	ret = fill_read_buf(sctx, offset, len);
-	if (ret < 0)
-		goto out;
-
 	ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
 	if (ret < 0)
 		goto out;
@@ -4894,7 +4910,9 @@  static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
 
 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
 	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
-	TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
+	ret = put_file_data(sctx, offset, len);
+	if (ret < 0)
+		goto out;
 
 	ret = send_cmd(sctx);
 
@@ -5013,8 +5031,8 @@  static int send_update_extent(struct send_ctx *sctx,
 static int send_hole(struct send_ctx *sctx, u64 end)
 {
 	struct fs_path *p = NULL;
+	u64 read_size = max_send_read_size(sctx);
 	u64 offset = sctx->cur_inode_last_extent;
-	u64 len;
 	int ret = 0;
 
 	/*
@@ -5041,16 +5059,19 @@  static int send_hole(struct send_ctx *sctx, u64 end)
 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
 	if (ret < 0)
 		goto tlv_put_failure;
-	memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
 	while (offset < end) {
-		len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
+		u64 len = min(end - offset, read_size);
 
 		ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
 		if (ret < 0)
 			break;
 		TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
 		TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
-		TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
+		ret = put_data_header(sctx, len);
+		if (ret < 0)
+			break;
+		memset(sctx->send_buf + sctx->send_size, 0, len);
+		sctx->send_size += len;
 		ret = send_cmd(sctx);
 		if (ret < 0)
 			break;
@@ -5066,17 +5087,16 @@  static int send_extent_data(struct send_ctx *sctx,
 			    const u64 offset,
 			    const u64 len)
 {
+	u64 read_size = max_send_read_size(sctx);
 	u64 sent = 0;
 
 	if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
 		return send_update_extent(sctx, offset, len);
 
 	while (sent < len) {
-		u64 size = len - sent;
+		u64 size = min(len - sent, read_size);
 		int ret;
 
-		if (size > BTRFS_SEND_READ_SIZE)
-			size = BTRFS_SEND_READ_SIZE;
 		ret = send_write(sctx, offset + sent, size);
 		if (ret < 0)
 			return ret;
@@ -7145,12 +7165,6 @@  long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
 		goto out;
 	}
 
-	sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL);
-	if (!sctx->read_buf) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
 	sctx->pending_dir_moves = RB_ROOT;
 	sctx->waiting_dir_moves = RB_ROOT;
 	sctx->orphan_dirs = RB_ROOT;
@@ -7354,7 +7368,6 @@  long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
 
 		kvfree(sctx->clone_roots);
 		kvfree(sctx->send_buf);
-		kvfree(sctx->read_buf);
 
 		name_cache_free(sctx);
 
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index ead397f7034f..de91488b7cd0 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -13,7 +13,6 @@ 
 #define BTRFS_SEND_STREAM_VERSION 1
 
 #define BTRFS_SEND_BUF_SIZE SZ_64K
-#define BTRFS_SEND_READ_SIZE (48 * SZ_1K)
 
 enum btrfs_tlv_type {
 	BTRFS_TLV_U8,