Message ID | be54e8e7658f85dd5e62627a1ad02beb7a4aeed8.1597994106.git.osandov@osandov.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: implement send/receive of compressed extents without decompressing | expand |
On Fri, Aug 21, 2020 at 8:42 AM Omar Sandoval <osandov@osandov.com> wrote: > > From: Omar Sandoval <osandov@fb.com> > > send_write() currently copies from the page cache to sctx->read_buf, and > then from sctx->read_buf to sctx->send_buf. Similarly, send_hole() > zeroes sctx->read_buf and then copies from sctx->read_buf to > sctx->send_buf. However, if we write the TLV header manually, we can > copy to sctx->send_buf directly and get rid of sctx->read_buf. > > Signed-off-by: Omar Sandoval <osandov@fb.com> Reviewed-by: Filipe Manana <fdmanana@suse.com> Looks good, and it passed some long duration tests with both full and incremental sends here (with and without compression, no-holes, etc). Only one minor thing below, but it's really subjective and doesn't make much of a difference. Thanks. > --- > fs/btrfs/send.c | 65 +++++++++++++++++++++++++++++-------------------- > fs/btrfs/send.h | 1 - > 2 files changed, 39 insertions(+), 27 deletions(-) > > diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c > index 8af5e867e4ca..e70f5ceb3261 100644 > --- a/fs/btrfs/send.c > +++ b/fs/btrfs/send.c > @@ -122,8 +122,6 @@ struct send_ctx { > > struct file_ra_state ra; > > - char *read_buf; > - > /* > * We process inodes by their increasing order, so if before an > * incremental send we reverse the parent/child relationship of > @@ -4794,7 +4792,25 @@ static int process_all_new_xattrs(struct send_ctx *sctx) > return ret; > } > > -static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) > +static u64 max_send_read_size(struct send_ctx *sctx) We could make this inline, since it's so small and trivial, and constify the argument too. > +{ > + return sctx->send_max_size - SZ_16K; > +} > + > +static int put_data_header(struct send_ctx *sctx, u32 len) > +{ > + struct btrfs_tlv_header *hdr; > + > + if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len) > + return -EOVERFLOW; > + hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size); > + hdr->tlv_type = cpu_to_le16(BTRFS_SEND_A_DATA); > + hdr->tlv_len = cpu_to_le16(len); > + sctx->send_size += sizeof(*hdr); > + return 0; > +} > + > +static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len) > { > struct btrfs_root *root = sctx->send_root; > struct btrfs_fs_info *fs_info = root->fs_info; > @@ -4804,8 +4820,11 @@ static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) > pgoff_t index = offset >> PAGE_SHIFT; > pgoff_t last_index; > unsigned pg_offset = offset_in_page(offset); > - int ret = 0; > - size_t read = 0; > + int ret; > + > + ret = put_data_header(sctx, len); > + if (ret) > + return ret; > > inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root); > if (IS_ERR(inode)) > @@ -4851,14 +4870,15 @@ static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) > } > > addr = kmap(page); > - memcpy(sctx->read_buf + read, addr + pg_offset, cur_len); > + memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset, > + cur_len); > kunmap(page); > unlock_page(page); > put_page(page); > index++; > pg_offset = 0; > len -= cur_len; > - read += cur_len; > + sctx->send_size += cur_len; > } > iput(inode); > return ret; > @@ -4880,10 +4900,6 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) > > btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len); > > - ret = fill_read_buf(sctx, offset, len); > - if (ret < 0) > - goto out; > - > ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); > if (ret < 0) > goto out; > @@ -4894,7 +4910,9 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) > > TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); > TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); > - TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); > + ret = put_file_data(sctx, offset, len); > + if (ret < 0) > + goto out; > > ret = send_cmd(sctx); > > @@ -5013,8 +5031,8 @@ static int send_update_extent(struct send_ctx *sctx, > static int send_hole(struct send_ctx *sctx, u64 end) > { > struct fs_path *p = NULL; > + u64 read_size = max_send_read_size(sctx); > u64 offset = sctx->cur_inode_last_extent; > - u64 len; > int ret = 0; > > /* > @@ -5041,16 +5059,19 @@ static int send_hole(struct send_ctx *sctx, u64 end) > ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); > if (ret < 0) > goto tlv_put_failure; > - memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); > while (offset < end) { > - len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); > + u64 len = min(end - offset, read_size); > > ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); > if (ret < 0) > break; > TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); > TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); > - TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); > + ret = put_data_header(sctx, len); > + if (ret < 0) > + break; > + memset(sctx->send_buf + sctx->send_size, 0, len); > + sctx->send_size += len; > ret = send_cmd(sctx); > if (ret < 0) > break; > @@ -5066,17 +5087,16 @@ static int send_extent_data(struct send_ctx *sctx, > const u64 offset, > const u64 len) > { > + u64 read_size = max_send_read_size(sctx); > u64 sent = 0; > > if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) > return send_update_extent(sctx, offset, len); > > while (sent < len) { > - u64 size = len - sent; > + u64 size = min(len - sent, read_size); > int ret; > > - if (size > BTRFS_SEND_READ_SIZE) > - size = BTRFS_SEND_READ_SIZE; > ret = send_write(sctx, offset + sent, size); > if (ret < 0) > return ret; > @@ -7145,12 +7165,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) > goto out; > } > > - sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL); > - if (!sctx->read_buf) { > - ret = -ENOMEM; > - goto out; > - } > - > sctx->pending_dir_moves = RB_ROOT; > sctx->waiting_dir_moves = RB_ROOT; > sctx->orphan_dirs = RB_ROOT; > @@ -7354,7 +7368,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) > > kvfree(sctx->clone_roots); > kvfree(sctx->send_buf); > - kvfree(sctx->read_buf); > > name_cache_free(sctx); > > diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h > index ead397f7034f..de91488b7cd0 100644 > --- a/fs/btrfs/send.h > +++ b/fs/btrfs/send.h > @@ -13,7 +13,6 @@ > #define BTRFS_SEND_STREAM_VERSION 1 > > #define BTRFS_SEND_BUF_SIZE SZ_64K > -#define BTRFS_SEND_READ_SIZE (48 * SZ_1K) > > enum btrfs_tlv_type { > BTRFS_TLV_U8, > -- > 2.28.0 >
On 8/21/20 3:39 AM, Omar Sandoval wrote: > From: Omar Sandoval <osandov@fb.com> > > send_write() currently copies from the page cache to sctx->read_buf, and > then from sctx->read_buf to sctx->send_buf. Similarly, send_hole() > zeroes sctx->read_buf and then copies from sctx->read_buf to > sctx->send_buf. However, if we write the TLV header manually, we can > copy to sctx->send_buf directly and get rid of sctx->read_buf. > > Signed-off-by: Omar Sandoval <osandov@fb.com> I couldn't figure out why you weren't just using TLV_ helper for this, but then I realized the len is the length of the data, so you need a special helper for the header. Just in case anybody else gets confused, Reviewed-by: Josef Bacik <josef@toxicpanda.com> Thanks, Josef
On Fri, Aug 21, 2020 at 06:29:30PM +0100, Filipe Manana wrote: > On Fri, Aug 21, 2020 at 8:42 AM Omar Sandoval <osandov@osandov.com> wrote: > > > > From: Omar Sandoval <osandov@fb.com> > > > > send_write() currently copies from the page cache to sctx->read_buf, and > > then from sctx->read_buf to sctx->send_buf. Similarly, send_hole() > > zeroes sctx->read_buf and then copies from sctx->read_buf to > > sctx->send_buf. However, if we write the TLV header manually, we can > > copy to sctx->send_buf directly and get rid of sctx->read_buf. > > > > Signed-off-by: Omar Sandoval <osandov@fb.com> > > Reviewed-by: Filipe Manana <fdmanana@suse.com> > > Looks good, and it passed some long duration tests with both full and > incremental sends here (with and without compression, no-holes, etc). > Only one minor thing below, but it's really subjective and doesn't > make much of a difference. > > Thanks. > > > --- > > fs/btrfs/send.c | 65 +++++++++++++++++++++++++++++-------------------- > > fs/btrfs/send.h | 1 - > > 2 files changed, 39 insertions(+), 27 deletions(-) > > > > diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c > > index 8af5e867e4ca..e70f5ceb3261 100644 > > --- a/fs/btrfs/send.c > > +++ b/fs/btrfs/send.c > > @@ -122,8 +122,6 @@ struct send_ctx { > > > > struct file_ra_state ra; > > > > - char *read_buf; > > - > > /* > > * We process inodes by their increasing order, so if before an > > * incremental send we reverse the parent/child relationship of > > @@ -4794,7 +4792,25 @@ static int process_all_new_xattrs(struct send_ctx *sctx) > > return ret; > > } > > > > -static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) > > +static u64 max_send_read_size(struct send_ctx *sctx) > > We could make this inline, since it's so small and trivial, and > constify the argument too. Good point, fixed. Thanks, Filipe!
On Fri, Aug 21, 2020 at 12:39:52AM -0700, Omar Sandoval wrote: > +static int put_data_header(struct send_ctx *sctx, u32 len) > +{ > + struct btrfs_tlv_header *hdr; > + > + if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len) > + return -EOVERFLOW; > + hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size); > + hdr->tlv_type = cpu_to_le16(BTRFS_SEND_A_DATA); > + hdr->tlv_len = cpu_to_le16(len); I think we need put_unaligned_le16 here, it's mapping a random buffer to a pointer, this is not alignment safe in general.
On Fri, Sep 11, 2020 at 04:13:39PM +0200, David Sterba wrote: > On Fri, Aug 21, 2020 at 12:39:52AM -0700, Omar Sandoval wrote: > > +static int put_data_header(struct send_ctx *sctx, u32 len) > > +{ > > + struct btrfs_tlv_header *hdr; > > + > > + if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len) > > + return -EOVERFLOW; > > + hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size); > > + hdr->tlv_type = cpu_to_le16(BTRFS_SEND_A_DATA); > > + hdr->tlv_len = cpu_to_le16(len); > > I think we need put_unaligned_le16 here, it's mapping a random buffer to > a pointer, this is not alignment safe in general. I think you're right, although tlv_put() seems to have this same problem.
On Mon, Sep 14, 2020 at 03:04:48PM -0700, Omar Sandoval wrote: > On Fri, Sep 11, 2020 at 04:13:39PM +0200, David Sterba wrote: > > On Fri, Aug 21, 2020 at 12:39:52AM -0700, Omar Sandoval wrote: > > > +static int put_data_header(struct send_ctx *sctx, u32 len) > > > +{ > > > + struct btrfs_tlv_header *hdr; > > > + > > > + if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len) > > > + return -EOVERFLOW; > > > + hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size); > > > + hdr->tlv_type = cpu_to_le16(BTRFS_SEND_A_DATA); > > > + hdr->tlv_len = cpu_to_le16(len); > > > > I think we need put_unaligned_le16 here, it's mapping a random buffer to > > a pointer, this is not alignment safe in general. > > I think you're right, although tlv_put() seems to have this same > problem. Indeed and there's more: tlv_put, TLV_PUT_DEFINE_INT, begin_cmd, send_cmd. Other direct assignments are in local structs so the alignment is fine.
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 8af5e867e4ca..e70f5ceb3261 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -122,8 +122,6 @@ struct send_ctx { struct file_ra_state ra; - char *read_buf; - /* * We process inodes by their increasing order, so if before an * incremental send we reverse the parent/child relationship of @@ -4794,7 +4792,25 @@ static int process_all_new_xattrs(struct send_ctx *sctx) return ret; } -static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) +static u64 max_send_read_size(struct send_ctx *sctx) +{ + return sctx->send_max_size - SZ_16K; +} + +static int put_data_header(struct send_ctx *sctx, u32 len) +{ + struct btrfs_tlv_header *hdr; + + if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len) + return -EOVERFLOW; + hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size); + hdr->tlv_type = cpu_to_le16(BTRFS_SEND_A_DATA); + hdr->tlv_len = cpu_to_le16(len); + sctx->send_size += sizeof(*hdr); + return 0; +} + +static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len) { struct btrfs_root *root = sctx->send_root; struct btrfs_fs_info *fs_info = root->fs_info; @@ -4804,8 +4820,11 @@ static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) pgoff_t index = offset >> PAGE_SHIFT; pgoff_t last_index; unsigned pg_offset = offset_in_page(offset); - int ret = 0; - size_t read = 0; + int ret; + + ret = put_data_header(sctx, len); + if (ret) + return ret; inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root); if (IS_ERR(inode)) @@ -4851,14 +4870,15 @@ static int fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) } addr = kmap(page); - memcpy(sctx->read_buf + read, addr + pg_offset, cur_len); + memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset, + cur_len); kunmap(page); unlock_page(page); put_page(page); index++; pg_offset = 0; len -= cur_len; - read += cur_len; + sctx->send_size += cur_len; } iput(inode); return ret; @@ -4880,10 +4900,6 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len); - ret = fill_read_buf(sctx, offset, len); - if (ret < 0) - goto out; - ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); if (ret < 0) goto out; @@ -4894,7 +4910,9 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); - TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); + ret = put_file_data(sctx, offset, len); + if (ret < 0) + goto out; ret = send_cmd(sctx); @@ -5013,8 +5031,8 @@ static int send_update_extent(struct send_ctx *sctx, static int send_hole(struct send_ctx *sctx, u64 end) { struct fs_path *p = NULL; + u64 read_size = max_send_read_size(sctx); u64 offset = sctx->cur_inode_last_extent; - u64 len; int ret = 0; /* @@ -5041,16 +5059,19 @@ static int send_hole(struct send_ctx *sctx, u64 end) ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); if (ret < 0) goto tlv_put_failure; - memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); while (offset < end) { - len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); + u64 len = min(end - offset, read_size); ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); if (ret < 0) break; TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); - TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); + ret = put_data_header(sctx, len); + if (ret < 0) + break; + memset(sctx->send_buf + sctx->send_size, 0, len); + sctx->send_size += len; ret = send_cmd(sctx); if (ret < 0) break; @@ -5066,17 +5087,16 @@ static int send_extent_data(struct send_ctx *sctx, const u64 offset, const u64 len) { + u64 read_size = max_send_read_size(sctx); u64 sent = 0; if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) return send_update_extent(sctx, offset, len); while (sent < len) { - u64 size = len - sent; + u64 size = min(len - sent, read_size); int ret; - if (size > BTRFS_SEND_READ_SIZE) - size = BTRFS_SEND_READ_SIZE; ret = send_write(sctx, offset + sent, size); if (ret < 0) return ret; @@ -7145,12 +7165,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) goto out; } - sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL); - if (!sctx->read_buf) { - ret = -ENOMEM; - goto out; - } - sctx->pending_dir_moves = RB_ROOT; sctx->waiting_dir_moves = RB_ROOT; sctx->orphan_dirs = RB_ROOT; @@ -7354,7 +7368,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) kvfree(sctx->clone_roots); kvfree(sctx->send_buf); - kvfree(sctx->read_buf); name_cache_free(sctx); diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index ead397f7034f..de91488b7cd0 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h @@ -13,7 +13,6 @@ #define BTRFS_SEND_STREAM_VERSION 1 #define BTRFS_SEND_BUF_SIZE SZ_64K -#define BTRFS_SEND_READ_SIZE (48 * SZ_1K) enum btrfs_tlv_type { BTRFS_TLV_U8,