diff mbox series

[v14,4/7] btrfs: send: write larger chunks when using stream v2

Message ID 2ac307b0d20f84a09302d9d4f7c4fa1207edccc7.1647537027.git.osandov@fb.com (mailing list archive)
State New, archived
Headers show
Series btrfs: add send/receive support for reading/writing compressed data | expand

Commit Message

Omar Sandoval March 17, 2022, 5:25 p.m. UTC
From: Omar Sandoval <osandov@fb.com>

The length field of the send stream TLV header is 16 bits. This means
that the maximum amount of data that can be sent for one write is 64k
minus one. However, encoded writes must be able to send the maximum
compressed extent (128k) in one command. To support this, send stream
version 2 encodes the DATA attribute differently: it has no length
field, and the length is implicitly up to the end of containing command
(which has a 32-bit length field). Although this is necessary for
encoded writes, normal writes can benefit from it, too.

Also add a check to enforce that the DATA attribute is last. It is only
strictly necessary for v2, but we might as well make v1 consistent with
it.

For v2, let's bump up the send buffer to the maximum compressed extent
size plus 16k for the other metadata (144k total). Since this will most
likely be vmalloc'd (and always will be after the next commit), we round
it up to the next page since we might as well use the rest of the page
on systems with >16k pages.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 fs/btrfs/send.c | 42 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 8 deletions(-)

Comments

Sweet Tea Dorminy March 24, 2022, 5:52 p.m. UTC | #1
> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
> index 1f141de3a7d6..02053fff80ca 100644
> --- a/fs/btrfs/send.c
> +++ b/fs/btrfs/send.c
> @@ -82,6 +82,7 @@ struct send_ctx {
>   	char *send_buf;
>   	u32 send_size;
>   	u32 send_max_size;
> +	bool put_data;
put_data's use seems to be about making sure put_data_header() isn't 
called more than once, which is not super obvious to me from the name; 
perhaps one of 'data_header_{set,setup,initialized}' might make it 
clearer? Or if it's actually about put_file_data, maybe moving the 
assertion there would make that clearer?
>   static int put_data_header(struct send_ctx *sctx, u32 len)
>   {
> -	struct btrfs_tlv_header *hdr;
> +	if (WARN_ON_ONCE(sctx->put_data))
> +		return -EINVAL;
> +	sctx->put_data = true;
> +	if (sctx->proto >= 2) {
> +		/*
> +		 * In v2, the data attribute header doesn't include a length; it
> +		 * is implicitly to the end of the command.
> +		 */
> +		if (sctx->send_max_size - sctx->send_size < 2 + len)
> +			return -EOVERFLOW;
> +		put_unaligned_le16(BTRFS_SEND_A_DATA,
> +				   sctx->send_buf + sctx->send_size);
> +		sctx->send_size += 2;
> +	} else {
> +		struct btrfs_tlv_header *hdr;
>   
> -	if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
> -		return -EOVERFLOW;
> -	hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
> -	put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
> -	put_unaligned_le16(len, &hdr->tlv_len);
> -	sctx->send_size += sizeof(*hdr);
> +		if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
> +			return -EOVERFLOW;
> +		hdr = (struct btrfs_tlv_header *)(sctx->send_buf +
> +						  sctx->send_size);
> +		put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
> +		put_unaligned_le16(len, &hdr->tlv_len);
> +		sctx->send_size += sizeof(*hdr);
> +	}
>   	return 0;
>   }

I wish the 2s were named, and that there were more commonality between 
the two branches... Might I propose this alternative? It doesn't check 
the length's suitability until after adding the two fields, but I don't 
think anything bad happens from delaying the check.

static int put_data_header(struct send_ctx *sctx, u32 len)
{
         struct btrfs_tlv_header *hdr =
                 (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);

         if (WARN_ON_ONCE(sctx->put_data))
                 return -EINVAL;
         sctx->put_data = true;

         put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
         sctx->send_size += sizeof(hdr->tlv_type);
                                                                                                                        
         /*
          * In v2+, the data attribute header doesn't include a length; it is
          * implicitly to the end of the command.
          */
         if (sctx->proto == 1) {
                 put_unaligned_le16(len, &hdr->tlv_len);
                 sctx->send_size += sizeof(hdr->tlv_len);
         }

         if (sctx->send_max_size - sctx->send_size < len)
                 return -EOVERFLOW;

         return 0;
}
Omar Sandoval March 30, 2022, 5:05 p.m. UTC | #2
On Thu, Mar 24, 2022 at 01:52:53PM -0400, Sweet Tea Dorminy wrote:
> 
> > diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
> > index 1f141de3a7d6..02053fff80ca 100644
> > --- a/fs/btrfs/send.c
> > +++ b/fs/btrfs/send.c
> > @@ -82,6 +82,7 @@ struct send_ctx {
> >   	char *send_buf;
> >   	u32 send_size;
> >   	u32 send_max_size;
> > +	bool put_data;
> put_data's use seems to be about making sure put_data_header() isn't called
> more than once, which is not super obvious to me from the name; perhaps one
> of 'data_header_{set,setup,initialized}' might make it clearer? Or if it's
> actually about put_file_data, maybe moving the assertion there would make
> that clearer?

The intention is to prevent adding another attribute after a data
attribute, since that's impossible with v2. Notice that it's also
checked in tlv_put(). So "put_data" means "was a data attribute already
added to this command?"; it's not specifically about the data header or
data itself. I'll add a comment to that effect.

> >   static int put_data_header(struct send_ctx *sctx, u32 len)
> >   {
> > -	struct btrfs_tlv_header *hdr;
> > +	if (WARN_ON_ONCE(sctx->put_data))
> > +		return -EINVAL;
> > +	sctx->put_data = true;
> > +	if (sctx->proto >= 2) {
> > +		/*
> > +		 * In v2, the data attribute header doesn't include a length; it
> > +		 * is implicitly to the end of the command.
> > +		 */
> > +		if (sctx->send_max_size - sctx->send_size < 2 + len)
> > +			return -EOVERFLOW;
> > +		put_unaligned_le16(BTRFS_SEND_A_DATA,
> > +				   sctx->send_buf + sctx->send_size);
> > +		sctx->send_size += 2;
> > +	} else {
> > +		struct btrfs_tlv_header *hdr;
> > -	if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
> > -		return -EOVERFLOW;
> > -	hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
> > -	put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
> > -	put_unaligned_le16(len, &hdr->tlv_len);
> > -	sctx->send_size += sizeof(*hdr);
> > +		if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
> > +			return -EOVERFLOW;
> > +		hdr = (struct btrfs_tlv_header *)(sctx->send_buf +
> > +						  sctx->send_size);
> > +		put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
> > +		put_unaligned_le16(len, &hdr->tlv_len);
> > +		sctx->send_size += sizeof(*hdr);
> > +	}
> >   	return 0;
> >   }
> 
> I wish the 2s were named, and that there were more commonality between the
> two branches... Might I propose this alternative? It doesn't check the
> length's suitability until after adding the two fields, but I don't think
> anything bad happens from delaying the check.

We need to check that writing the header itself won't write past the end
of send_buf, so it'd have to look more like:

static int put_data_header(struct send_ctx *sctx, u32 len)
{
	struct btrfs_tlv_header *hdr =
		(struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size)

	if (WARN_ON_ONCE(sctx->put_data))
		return -EINVAL;
	sctx->put_data = true;

	if (sctx->send_max_size - sctx->send_size < sizeof(hdr->tlv_type))
		return -EOVERFLOW;
	put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
	sctx->send_size += sizeof(hdr->tlv_type);

	/*
	 * In v2, the data attribute header doesn't include a length; it is
	 * implicitly to the end of the command.
	 */
	if (sctx->proto < 2) {
		if (sctx->send_max_size - sctx->send_size <
		    sizeof(hdr->tlv_len))
			return -EOVERFLOW;
		put_unaligned_le16(len, &hdr->tlv_len);
		sctx->send_size += sizeof(hdr->tlv_len);
	}

	if (sctx->send_max_size - sctx->send_size < len)
		return -EOVERFLOW;

	return 0;
}

Now we're checking for overflow in three places, shrug.

One thing that I like about the two separate cases is that it makes it
more clear that v2+ doesn't actually have a btrfs_tlv_header; it's just
a single __le16. If it's alright with you, I'll stick with my original
version, but I will replaced the hard-coded 2s with sizeof(__le16).
diff mbox series

Patch

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 1f141de3a7d6..02053fff80ca 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -82,6 +82,7 @@  struct send_ctx {
 	char *send_buf;
 	u32 send_size;
 	u32 send_max_size;
+	bool put_data;
 	u64 flags;	/* 'flags' member of btrfs_ioctl_send_args is u64 */
 	/* Protocol version compatibility requested */
 	u32 proto;
@@ -589,6 +590,9 @@  static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
 	int total_len = sizeof(*hdr) + len;
 	int left = sctx->send_max_size - sctx->send_size;
 
+	if (WARN_ON_ONCE(sctx->put_data))
+		return -EINVAL;
+
 	if (unlikely(left < total_len))
 		return -EOVERFLOW;
 
@@ -726,6 +730,7 @@  static int send_cmd(struct send_ctx *sctx)
 					&sctx->send_off);
 
 	sctx->send_size = 0;
+	sctx->put_data = false;
 
 	return ret;
 }
@@ -4853,14 +4858,30 @@  static inline u64 max_send_read_size(const struct send_ctx *sctx)
 
 static int put_data_header(struct send_ctx *sctx, u32 len)
 {
-	struct btrfs_tlv_header *hdr;
+	if (WARN_ON_ONCE(sctx->put_data))
+		return -EINVAL;
+	sctx->put_data = true;
+	if (sctx->proto >= 2) {
+		/*
+		 * In v2, the data attribute header doesn't include a length; it
+		 * is implicitly to the end of the command.
+		 */
+		if (sctx->send_max_size - sctx->send_size < 2 + len)
+			return -EOVERFLOW;
+		put_unaligned_le16(BTRFS_SEND_A_DATA,
+				   sctx->send_buf + sctx->send_size);
+		sctx->send_size += 2;
+	} else {
+		struct btrfs_tlv_header *hdr;
 
-	if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
-		return -EOVERFLOW;
-	hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
-	put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
-	put_unaligned_le16(len, &hdr->tlv_len);
-	sctx->send_size += sizeof(*hdr);
+		if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
+			return -EOVERFLOW;
+		hdr = (struct btrfs_tlv_header *)(sctx->send_buf +
+						  sctx->send_size);
+		put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
+		put_unaligned_le16(len, &hdr->tlv_len);
+		sctx->send_size += sizeof(*hdr);
+	}
 	return 0;
 }
 
@@ -7459,7 +7480,12 @@  long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
 
 	sctx->clone_roots_cnt = arg->clone_sources_count;
 
-	sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1;
+	if (sctx->proto >= 2) {
+		sctx->send_max_size = ALIGN(SZ_16K + BTRFS_MAX_COMPRESSED,
+					    PAGE_SIZE);
+	} else {
+		sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1;
+	}
 	sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
 	if (!sctx->send_buf) {
 		ret = -ENOMEM;