diff mbox series

[v11,01/10] btrfs-progs: receive: support v2 send stream larger tlv_len

Message ID 8729477d23b83c368a76c4f39b5f73a483a3ad14.1630515568.git.osandov@fb.com (mailing list archive)
State New, archived
Headers show
Series [v11,01/10] btrfs-progs: receive: support v2 send stream larger tlv_len | expand

Commit Message

Omar Sandoval Sept. 1, 2021, 5:01 p.m. UTC
From: Boris Burkov <borisb@fb.com>

An encoded extent can be up to 128K in length, which exceeds the largest
value expressible by the current send stream format's 16 bit tlv_len
field. Since encoded writes cannot be split into multiple writes by
btrfs send, the send stream format must change to accommodate encoded
writes.

Supporting this changed format requires retooling how we store the
commands we have processed. Since we can no longer use btrfs_tlv_header
to describe every attribute, we define a new struct btrfs_send_attribute
which has a 32 bit length field, and use that to store the attribute
information needed for receive processing. This is transparent to users
of the various TLV_GET macros.

Signed-off-by: Boris Burkov <boris@bur.io>
---
 common/send-stream.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

Comments

Nikolay Borisov Oct. 20, 2021, 1:49 p.m. UTC | #1
On 1.09.21 г. 20:01, Omar Sandoval wrote:
> From: Boris Burkov <borisb@fb.com>
> 
> An encoded extent can be up to 128K in length, which exceeds the largest
> value expressible by the current send stream format's 16 bit tlv_len
> field. Since encoded writes cannot be split into multiple writes by
> btrfs send, the send stream format must change to accommodate encoded
> writes.
> 
> Supporting this changed format requires retooling how we store the
> commands we have processed. Since we can no longer use btrfs_tlv_header
> to describe every attribute, we define a new struct btrfs_send_attribute
> which has a 32 bit length field, and use that to store the attribute
> information needed for receive processing. This is transparent to users
> of the various TLV_GET macros.
> 
> Signed-off-by: Boris Burkov <boris@bur.io>
> ---
>  common/send-stream.c | 34 +++++++++++++++++++++++++---------
>  1 file changed, 25 insertions(+), 9 deletions(-)
> 
> diff --git a/common/send-stream.c b/common/send-stream.c
> index a0c52f79..cd5aa311 100644
> --- a/common/send-stream.c
> +++ b/common/send-stream.c
> @@ -24,13 +24,23 @@
>  #include "crypto/crc32c.h"
>  #include "common/utils.h"
>  
> +struct btrfs_send_attribute {
> +	u16 tlv_type;
> +	/*
> +	 * Note: in btrfs_tlv_header, this is __le16, but we need 32 bits for
> +	 * attributes with file data as of version 2 of the send stream format
> +	 */
> +	u32 tlv_len;
> +	char *data;
> +};
> +
>  struct btrfs_send_stream {
>  	char read_buf[BTRFS_SEND_BUF_SIZE];
>  	int fd;
>  
>  	int cmd;
>  	struct btrfs_cmd_header *cmd_hdr;
> -	struct btrfs_tlv_header *cmd_attrs[BTRFS_SEND_A_MAX + 1];
> +	struct btrfs_send_attribute cmd_attrs[BTRFS_SEND_A_MAX + 1];

This is subtle and it took me a couple of minutes to get it at first.
Currently cmds_attrs holds an array of pointers into the command buffer,
with every pointer being the beginning of the tlv_header, whilst with
your change cmd_attr now holds actual btrfs_send_attribute structures
(52 bytes vs sizeof(uintptr_t)  bytes before). So this increases the
overall size of btrfs_send_stream because with  your version of the code
you parse the type/length fields and store them directly in the send
attribute structure at command parse time rather than just referring to
the raw command buffer during read_cmd and referring to them during
attribute parsing.

This might seem superficial but this kind of change should really be
mentioned explicitly in the changelog to better prepare reviewers what
to expect.


OTOH the code LGTM and actually now it seems less tricky than before so:

Reviewed-by: Nikolay Borisov <nborisov@suse.com>


David if you deem it necessary adjust the commit message appropriately.
Omar Sandoval Oct. 20, 2021, 6:48 p.m. UTC | #2
On Wed, Oct 20, 2021 at 04:49:38PM +0300, Nikolay Borisov wrote:
> 
> 
> On 1.09.21 г. 20:01, Omar Sandoval wrote:
> > From: Boris Burkov <borisb@fb.com>
> > 
> > An encoded extent can be up to 128K in length, which exceeds the largest
> > value expressible by the current send stream format's 16 bit tlv_len
> > field. Since encoded writes cannot be split into multiple writes by
> > btrfs send, the send stream format must change to accommodate encoded
> > writes.
> > 
> > Supporting this changed format requires retooling how we store the
> > commands we have processed. Since we can no longer use btrfs_tlv_header
> > to describe every attribute, we define a new struct btrfs_send_attribute
> > which has a 32 bit length field, and use that to store the attribute
> > information needed for receive processing. This is transparent to users
> > of the various TLV_GET macros.
> > 
> > Signed-off-by: Boris Burkov <boris@bur.io>
> > ---
> >  common/send-stream.c | 34 +++++++++++++++++++++++++---------
> >  1 file changed, 25 insertions(+), 9 deletions(-)
> > 
> > diff --git a/common/send-stream.c b/common/send-stream.c
> > index a0c52f79..cd5aa311 100644
> > --- a/common/send-stream.c
> > +++ b/common/send-stream.c
> > @@ -24,13 +24,23 @@
> >  #include "crypto/crc32c.h"
> >  #include "common/utils.h"
> >  
> > +struct btrfs_send_attribute {
> > +	u16 tlv_type;
> > +	/*
> > +	 * Note: in btrfs_tlv_header, this is __le16, but we need 32 bits for
> > +	 * attributes with file data as of version 2 of the send stream format
> > +	 */
> > +	u32 tlv_len;
> > +	char *data;
> > +};
> > +
> >  struct btrfs_send_stream {
> >  	char read_buf[BTRFS_SEND_BUF_SIZE];
> >  	int fd;
> >  
> >  	int cmd;
> >  	struct btrfs_cmd_header *cmd_hdr;
> > -	struct btrfs_tlv_header *cmd_attrs[BTRFS_SEND_A_MAX + 1];
> > +	struct btrfs_send_attribute cmd_attrs[BTRFS_SEND_A_MAX + 1];
> 
> This is subtle and it took me a couple of minutes to get it at first.
> Currently cmds_attrs holds an array of pointers into the command buffer,
> with every pointer being the beginning of the tlv_header, whilst with
> your change cmd_attr now holds actual btrfs_send_attribute structures
> (52 bytes vs sizeof(uintptr_t)  bytes before). So this increases the
> overall size of btrfs_send_stream because with  your version of the code
> you parse the type/length fields and store them directly in the send
> attribute structure at command parse time rather than just referring to
> the raw command buffer during read_cmd and referring to them during
> attribute parsing.
> 
> This might seem superficial but this kind of change should really be
> mentioned explicitly in the changelog to better prepare reviewers what
> to expect.
> 
> 
> OTOH the code LGTM and actually now it seems less tricky than before so:
> 
> Reviewed-by: Nikolay Borisov <nborisov@suse.com>
> 
> 
> David if you deem it necessary adjust the commit message appropriately.

I clarified the second paragraph to:

Supporting this changed format requires retooling how we store the
commands we have processed. We currently store pointers to the struct
btrfs_tlv_headers in the command buffer. This is not sufficient to
represent the new BTRFS_SEND_A_DATA format. Instead, parse the attribute
headers and store them in a new struct btrfs_send_attribute which has a
32-bit length field. This is transparent to users of the various TLV_GET
macros.
diff mbox series

Patch

diff --git a/common/send-stream.c b/common/send-stream.c
index a0c52f79..cd5aa311 100644
--- a/common/send-stream.c
+++ b/common/send-stream.c
@@ -24,13 +24,23 @@ 
 #include "crypto/crc32c.h"
 #include "common/utils.h"
 
+struct btrfs_send_attribute {
+	u16 tlv_type;
+	/*
+	 * Note: in btrfs_tlv_header, this is __le16, but we need 32 bits for
+	 * attributes with file data as of version 2 of the send stream format
+	 */
+	u32 tlv_len;
+	char *data;
+};
+
 struct btrfs_send_stream {
 	char read_buf[BTRFS_SEND_BUF_SIZE];
 	int fd;
 
 	int cmd;
 	struct btrfs_cmd_header *cmd_hdr;
-	struct btrfs_tlv_header *cmd_attrs[BTRFS_SEND_A_MAX + 1];
+	struct btrfs_send_attribute cmd_attrs[BTRFS_SEND_A_MAX + 1];
 	u32 version;
 
 	/*
@@ -152,6 +162,7 @@  static int read_cmd(struct btrfs_send_stream *sctx)
 		struct btrfs_tlv_header *tlv_hdr;
 		u16 tlv_type;
 		u16 tlv_len;
+		struct btrfs_send_attribute *send_attr;
 
 		tlv_hdr = (struct btrfs_tlv_header *)data;
 		tlv_type = le16_to_cpu(tlv_hdr->tlv_type);
@@ -164,10 +175,15 @@  static int read_cmd(struct btrfs_send_stream *sctx)
 			goto out;
 		}
 
-		sctx->cmd_attrs[tlv_type] = tlv_hdr;
+		send_attr = &sctx->cmd_attrs[tlv_type];
+		send_attr->tlv_type = tlv_type;
+		send_attr->tlv_len = tlv_len;
+		pos += sizeof(*tlv_hdr);
+		data += sizeof(*tlv_hdr);
 
-		data += sizeof(*tlv_hdr) + tlv_len;
-		pos += sizeof(*tlv_hdr) + tlv_len;
+		send_attr->data = data;
+		pos += send_attr->tlv_len;
+		data += send_attr->tlv_len;
 	}
 
 	sctx->cmd = cmd;
@@ -180,7 +196,7 @@  out:
 static int tlv_get(struct btrfs_send_stream *sctx, int attr, void **data, int *len)
 {
 	int ret;
-	struct btrfs_tlv_header *hdr;
+	struct btrfs_send_attribute *send_attr;
 
 	if (attr <= 0 || attr > BTRFS_SEND_A_MAX) {
 		error("invalid attribute requested, attr = %d", attr);
@@ -188,15 +204,15 @@  static int tlv_get(struct btrfs_send_stream *sctx, int attr, void **data, int *l
 		goto out;
 	}
 
-	hdr = sctx->cmd_attrs[attr];
-	if (!hdr) {
+	send_attr = &sctx->cmd_attrs[attr];
+	if (!send_attr->data) {
 		error("attribute %d requested but not present", attr);
 		ret = -ENOENT;
 		goto out;
 	}
 
-	*len = le16_to_cpu(hdr->tlv_len);
-	*data = hdr + 1;
+	*len = send_attr->tlv_len;
+	*data = send_attr->data;
 
 	ret = 0;