diff mbox

Btrfs : send, truncate first to enhance many small files

Message ID CAL3q7H546_G+o=T2cpaM74UzyddPe=E-oBr7r+ncX+D-X=D8SA@mail.gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Filipe Manana Dec. 4, 2017, 11:13 a.m. UTC
On Mon, Dec 4, 2017 at 7:19 AM, Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>
>
> On 2017年12月04日 15:02, robbieko wrote:
>> From: Robbie Ko <robbieko@synology.com>
>>
>> The commands generated by send contain the following step:
>> 1. mkfile o1851-19-0
>> 2. rename o1851-19-0 -> alsa-driver/alsa-kernel/isa/es1688/es1688.c
>> 3. set_xattr alsa-driver/alsa-kernel/isa/es1688/es1688.c - name=user.xattr data_len=4 data=test
>> 4. write alsa-driver/alsa-kernel/isa/es1688/es1688.c - offset=0, len=10458
>> 5. truncate alsa-driver/alsa-kernel/isa/es1688/es1688.c size=10458
>> 6. chown alsa-driver/alsa-kernel/isa/es1688/es1688.c - uid=1024, gid=100
>> 7. chmod alsa-driver/alsa-kernel/isa/es1688/es1688.c - mode=0644
>> 8. utimes alsa-driver/alsa-kernel/isa/es1688/es1688.c
>>
>> After writing file content, it will truncate file to the correct size.
>> Btrfs truncate will flush last page if size does not align to sectorsize,
>> and this will cause receive process to wait until flush finishes.
>> In order to avoid waiting flushing data.This patch changes the order so
>> that truncate command is sent before write command.
>
> Personally speaking, it's better to optimize the receive side.
>
> For example, at receive side, if we already know that the file size is
> not changed at all, then just skip the truncate command.
>
> In your send dump, step 5 is not needed at all, and can be skipped to
> speed up the receive procedure.

Better yet, is to not send the truncate commands at all, reduces the
stream size and saves all receiver implementations
from having such logic (which further requires stat calls to figure
out the current file size).

I still have a local branch around with several optimizations (from
the time I was a volunteer), with one of them being precisely to avoid
unnecessary truncate commands, however I never ended up getting to the
point of doing benchmarks.
Pasted below and at https://www.friendpaste.com/1J1TuXf2wlU1125mX57k4r


commit 362290ea03b52e4ce4edb43453a51a3b1f1cd33f
Author: Filipe David Borba Manana <fdmanana@gmail.com>
Date:   Sat Apr 5 22:34:45 2014 +0100

    Btrfs: send, don't issue unnecessary file truncate commands

    Every time a file is created or updated, send (either incremental
or not) always
    issues a file truncate command. In several commons scenarios this
truncate is not
    needed and yet it was still issued all the time. The unnecessary
cases where the
    truncate command is not needed are:

    * We have a new file, and its last extent isn't a hole, so that
its last write
      command ends at the file's size;

    * For an incremental send, we updated in place an existing file,
without changing
      its size;

    * For an incremental send, we updated the file, increased its
size, and the last
      file extent item doesn't correspond to a hole. In this case the last write
      command against the file ends at the file's new size;

    * For an incremental send, we didn't update the file's data at
all, we only changed
      its mode, group or access/update timestamps.

    Therefore don't send truncate commands for these cases, reducing
the stream's size
    and saving time.


@@ -5022,9 +5032,13 @@ static int finish_inode_if_needed(struct
send_ctx *sctx, int at_end)
  need_chown = 1;
  if (!S_ISLNK(sctx->cur_inode_mode))
  need_chmod = 1;
+ if (sctx->cur_inode_max_write_end == sctx->cur_inode_size)
+ need_truncate = 0;
  } else {
+ u64 old_size;
+
  ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
- NULL, NULL, &right_mode, &right_uid,
+ &old_size, NULL, &right_mode, &right_uid,
  &right_gid, NULL);
  if (ret < 0)
  goto out;
@@ -5033,6 +5047,13 @@ static int finish_inode_if_needed(struct
send_ctx *sctx, int at_end)
  need_chown = 1;
  if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
  need_chmod = 1;
+
+ if (old_size == sctx->cur_inode_size &&
+    sctx->cur_inode_max_write_end <= sctx->cur_inode_size)
+ need_truncate = 0;
+ else if (sctx->cur_inode_size > old_size &&
+ sctx->cur_inode_max_write_end == sctx->cur_inode_size)
+ need_truncate = 0;
  }

 truncate_inode:
@@ -5052,10 +5073,13 @@ static int finish_inode_if_needed(struct
send_ctx *sctx, int at_end)
  goto out;
  }
  }
- ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
- sctx->cur_inode_size);
- if (ret < 0)
- goto out;
+ if (need_truncate) {
+ ret = send_truncate(sctx, sctx->cur_ino,
+    sctx->cur_inode_gen,
+    sctx->cur_inode_size);
+ if (ret < 0)
+ goto out;
+ }
  }

  if (need_chown) {
@@ -5110,6 +5134,7 @@ static int changed_inode(struct send_ctx *sctx,
  sctx->cur_ino = key->objectid;
  sctx->cur_inode_new_gen = 0;
  sctx->cur_inode_last_extent = (u64)-1;
+ sctx->cur_inode_max_write_end = 0;

  /*
  * Set send_progress to current inode. This will tell all get_cur_xxx

>
> Thanks,
> Qu
>
>>
>> Overall performance improves by 102 percent when sending 790000 small files.
>> original: 32m45.311s
>> patch: 16m8.387s
>>
>> Signed-off-by: Robbie Ko <robbieko@synology.com>
>> ---
>>  fs/btrfs/send.c | 13 +++++++++----
>>  1 file changed, 9 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
>> index 20d3300..7ae2347 100644
>> --- a/fs/btrfs/send.c
>> +++ b/fs/btrfs/send.c
>> @@ -5857,10 +5857,6 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
>>                                       goto out;
>>                       }
>>               }
>> -             ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
>> -                             sctx->cur_inode_size);
>> -             if (ret < 0)
>> -                     goto out;
>>       }
>>
>>       if (need_chown) {
>> @@ -6044,6 +6040,15 @@ static int changed_inode(struct send_ctx *sctx,
>>                                       sctx->left_path->nodes[0], left_ii);
>>               }
>>       }
>> +     if (result == BTRFS_COMPARE_TREE_NEW ||
>> +         result == BTRFS_COMPARE_TREE_CHANGED) {
>> +             if (S_ISREG(sctx->cur_inode_mode)) {
>> +                     ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
>> +                                             sctx->cur_inode_size);
>> +                     if (ret < 0)
>> +                             goto out;
>> +             }
>> +     }
>>
>>  out:
>>       return ret;
>>
>

Comments

Qu Wenruo Dec. 4, 2017, 11:28 a.m. UTC | #1
On 2017年12月04日 19:13, Filipe Manana wrote:
> On Mon, Dec 4, 2017 at 7:19 AM, Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>>
>>
>> On 2017年12月04日 15:02, robbieko wrote:
>>> From: Robbie Ko <robbieko@synology.com>
>>>
>>> The commands generated by send contain the following step:
>>> 1. mkfile o1851-19-0
>>> 2. rename o1851-19-0 -> alsa-driver/alsa-kernel/isa/es1688/es1688.c
>>> 3. set_xattr alsa-driver/alsa-kernel/isa/es1688/es1688.c - name=user.xattr data_len=4 data=test
>>> 4. write alsa-driver/alsa-kernel/isa/es1688/es1688.c - offset=0, len=10458
>>> 5. truncate alsa-driver/alsa-kernel/isa/es1688/es1688.c size=10458
>>> 6. chown alsa-driver/alsa-kernel/isa/es1688/es1688.c - uid=1024, gid=100
>>> 7. chmod alsa-driver/alsa-kernel/isa/es1688/es1688.c - mode=0644
>>> 8. utimes alsa-driver/alsa-kernel/isa/es1688/es1688.c
>>>
>>> After writing file content, it will truncate file to the correct size.
>>> Btrfs truncate will flush last page if size does not align to sectorsize,
>>> and this will cause receive process to wait until flush finishes.
>>> In order to avoid waiting flushing data.This patch changes the order so
>>> that truncate command is sent before write command.
>>
>> Personally speaking, it's better to optimize the receive side.
>>
>> For example, at receive side, if we already know that the file size is
>> not changed at all, then just skip the truncate command.
>>
>> In your send dump, step 5 is not needed at all, and can be skipped to
>> speed up the receive procedure.
> 
> Better yet, is to not send the truncate commands at all, reduces the
> stream size and saves all receiver implementations
> from having such logic (which further requires stat calls to figure
> out the current file size).

Another one is to optimize btrfs file truncate to avoid the unnecessary
page writeback.

But at least, doing optimization in receive side is easier and less
bug-prone.

If we could do the same in both send and receive side, then I prefer
optimization in receive side.

Thanks,
Qu

> 
> I still have a local branch around with several optimizations (from
> the time I was a volunteer), with one of them being precisely to avoid
> unnecessary truncate commands, however I never ended up getting to the
> point of doing benchmarks.
> Pasted below and at https://www.friendpaste.com/1J1TuXf2wlU1125mX57k4r
> 
> 
> commit 362290ea03b52e4ce4edb43453a51a3b1f1cd33f
> Author: Filipe David Borba Manana <fdmanana@gmail.com>
> Date:   Sat Apr 5 22:34:45 2014 +0100
> 
>     Btrfs: send, don't issue unnecessary file truncate commands
> 
>     Every time a file is created or updated, send (either incremental
> or not) always
>     issues a file truncate command. In several commons scenarios this
> truncate is not
>     needed and yet it was still issued all the time. The unnecessary
> cases where the
>     truncate command is not needed are:
> 
>     * We have a new file, and its last extent isn't a hole, so that
> its last write
>       command ends at the file's size;
> 
>     * For an incremental send, we updated in place an existing file,
> without changing
>       its size;
> 
>     * For an incremental send, we updated the file, increased its
> size, and the last
>       file extent item doesn't correspond to a hole. In this case the last write
>       command against the file ends at the file's new size;
> 
>     * For an incremental send, we didn't update the file's data at
> all, we only changed
>       its mode, group or access/update timestamps.
> 
>     Therefore don't send truncate commands for these cases, reducing
> the stream's size
>     and saving time.
> 
> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
> index fa378c7ebffd..46f52b4bbc21 100644
> --- a/fs/btrfs/send.c
> +++ b/fs/btrfs/send.c
> @@ -120,6 +120,7 @@ struct send_ctx {
>   u64 cur_inode_mode;
>   u64 cur_inode_rdev;
>   u64 cur_inode_last_extent;
> + u64 cur_inode_max_write_end;
> 
>   u64 send_progress;
>   u64 total_data_size;
> @@ -4494,6 +4495,8 @@ static int send_hole(struct send_ctx *sctx, u64 end)
>   break;
>   offset += len;
>   }
> + sctx->cur_inode_max_write_end = max(offset,
> +    sctx->cur_inode_max_write_end);
>  tlv_put_failure:
>   fs_path_free(p);
>   return ret;
> @@ -4529,6 +4532,10 @@ static int send_write_or_clone(struct send_ctx *sctx,
>   len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
>   }
> 
> + if (offset >= sctx->cur_inode_size) {
> + ret = 0;
> + goto out;
> + }
>   if (offset + len > sctx->cur_inode_size)
>   len = sctx->cur_inode_size - offset;
>   if (len == 0) {
> @@ -4560,6 +4567,8 @@ static int send_write_or_clone(struct send_ctx *sctx,
>   }
>   ret = 0;
>   }
> + sctx->cur_inode_max_write_end = max(offset + len,
> +    sctx->cur_inode_max_write_end);
>  out:
>   return ret;
>  }
> @@ -4982,6 +4991,7 @@ static int finish_inode_if_needed(struct
> send_ctx *sctx, int at_end)
>   u64 right_gid;
>   int need_chmod = 0;
>   int need_chown = 0;
> + int need_truncate = 1;
>   int pending_move = 0;
>   int refs_processed = 0;
> 
> @@ -5022,9 +5032,13 @@ static int finish_inode_if_needed(struct
> send_ctx *sctx, int at_end)
>   need_chown = 1;
>   if (!S_ISLNK(sctx->cur_inode_mode))
>   need_chmod = 1;
> + if (sctx->cur_inode_max_write_end == sctx->cur_inode_size)
> + need_truncate = 0;
>   } else {
> + u64 old_size;
> +
>   ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
> - NULL, NULL, &right_mode, &right_uid,
> + &old_size, NULL, &right_mode, &right_uid,
>   &right_gid, NULL);
>   if (ret < 0)
>   goto out;
> @@ -5033,6 +5047,13 @@ static int finish_inode_if_needed(struct
> send_ctx *sctx, int at_end)
>   need_chown = 1;
>   if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
>   need_chmod = 1;
> +
> + if (old_size == sctx->cur_inode_size &&
> +    sctx->cur_inode_max_write_end <= sctx->cur_inode_size)
> + need_truncate = 0;
> + else if (sctx->cur_inode_size > old_size &&
> + sctx->cur_inode_max_write_end == sctx->cur_inode_size)
> + need_truncate = 0;
>   }
> 
>  truncate_inode:
> @@ -5052,10 +5073,13 @@ static int finish_inode_if_needed(struct
> send_ctx *sctx, int at_end)
>   goto out;
>   }
>   }
> - ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
> - sctx->cur_inode_size);
> - if (ret < 0)
> - goto out;
> + if (need_truncate) {
> + ret = send_truncate(sctx, sctx->cur_ino,
> +    sctx->cur_inode_gen,
> +    sctx->cur_inode_size);
> + if (ret < 0)
> + goto out;
> + }
>   }
> 
>   if (need_chown) {
> @@ -5110,6 +5134,7 @@ static int changed_inode(struct send_ctx *sctx,
>   sctx->cur_ino = key->objectid;
>   sctx->cur_inode_new_gen = 0;
>   sctx->cur_inode_last_extent = (u64)-1;
> + sctx->cur_inode_max_write_end = 0;
> 
>   /*
>   * Set send_progress to current inode. This will tell all get_cur_xxx
> 
>>
>> Thanks,
>> Qu
>>
>>>
>>> Overall performance improves by 102 percent when sending 790000 small files.
>>> original: 32m45.311s
>>> patch: 16m8.387s
>>>
>>> Signed-off-by: Robbie Ko <robbieko@synology.com>
>>> ---
>>>  fs/btrfs/send.c | 13 +++++++++----
>>>  1 file changed, 9 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
>>> index 20d3300..7ae2347 100644
>>> --- a/fs/btrfs/send.c
>>> +++ b/fs/btrfs/send.c
>>> @@ -5857,10 +5857,6 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
>>>                                       goto out;
>>>                       }
>>>               }
>>> -             ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
>>> -                             sctx->cur_inode_size);
>>> -             if (ret < 0)
>>> -                     goto out;
>>>       }
>>>
>>>       if (need_chown) {
>>> @@ -6044,6 +6040,15 @@ static int changed_inode(struct send_ctx *sctx,
>>>                                       sctx->left_path->nodes[0], left_ii);
>>>               }
>>>       }
>>> +     if (result == BTRFS_COMPARE_TREE_NEW ||
>>> +         result == BTRFS_COMPARE_TREE_CHANGED) {
>>> +             if (S_ISREG(sctx->cur_inode_mode)) {
>>> +                     ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
>>> +                                             sctx->cur_inode_size);
>>> +                     if (ret < 0)
>>> +                             goto out;
>>> +             }
>>> +     }
>>>
>>>  out:
>>>       return ret;
>>>
>>
> 
> 
>
Filipe Manana Dec. 4, 2017, 12:08 p.m. UTC | #2
On Mon, Dec 4, 2017 at 11:28 AM, Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>
>
> On 2017年12月04日 19:13, Filipe Manana wrote:
>> On Mon, Dec 4, 2017 at 7:19 AM, Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>>>
>>>
>>> On 2017年12月04日 15:02, robbieko wrote:
>>>> From: Robbie Ko <robbieko@synology.com>
>>>>
>>>> The commands generated by send contain the following step:
>>>> 1. mkfile o1851-19-0
>>>> 2. rename o1851-19-0 -> alsa-driver/alsa-kernel/isa/es1688/es1688.c
>>>> 3. set_xattr alsa-driver/alsa-kernel/isa/es1688/es1688.c - name=user.xattr data_len=4 data=test
>>>> 4. write alsa-driver/alsa-kernel/isa/es1688/es1688.c - offset=0, len=10458
>>>> 5. truncate alsa-driver/alsa-kernel/isa/es1688/es1688.c size=10458
>>>> 6. chown alsa-driver/alsa-kernel/isa/es1688/es1688.c - uid=1024, gid=100
>>>> 7. chmod alsa-driver/alsa-kernel/isa/es1688/es1688.c - mode=0644
>>>> 8. utimes alsa-driver/alsa-kernel/isa/es1688/es1688.c
>>>>
>>>> After writing file content, it will truncate file to the correct size.
>>>> Btrfs truncate will flush last page if size does not align to sectorsize,
>>>> and this will cause receive process to wait until flush finishes.
>>>> In order to avoid waiting flushing data.This patch changes the order so
>>>> that truncate command is sent before write command.
>>>
>>> Personally speaking, it's better to optimize the receive side.
>>>
>>> For example, at receive side, if we already know that the file size is
>>> not changed at all, then just skip the truncate command.
>>>
>>> In your send dump, step 5 is not needed at all, and can be skipped to
>>> speed up the receive procedure.
>>
>> Better yet, is to not send the truncate commands at all, reduces the
>> stream size and saves all receiver implementations
>> from having such logic (which further requires stat calls to figure
>> out the current file size).
>
> Another one is to optimize btrfs file truncate to avoid the unnecessary
> page writeback.
>
> But at least, doing optimization in receive side is easier and less
> bug-prone.

Not really, for this case at least.
>
> If we could do the same in both send and receive side, then I prefer
> optimization in receive side.

Can't agree much with that, for the reasons mentioned before.

>
> Thanks,
> Qu
>
>>
>> I still have a local branch around with several optimizations (from
>> the time I was a volunteer), with one of them being precisely to avoid
>> unnecessary truncate commands, however I never ended up getting to the
>> point of doing benchmarks.
>> Pasted below and at https://www.friendpaste.com/1J1TuXf2wlU1125mX57k4r
>>
>>
>> commit 362290ea03b52e4ce4edb43453a51a3b1f1cd33f
>> Author: Filipe David Borba Manana <fdmanana@gmail.com>
>> Date:   Sat Apr 5 22:34:45 2014 +0100
>>
>>     Btrfs: send, don't issue unnecessary file truncate commands
>>
>>     Every time a file is created or updated, send (either incremental
>> or not) always
>>     issues a file truncate command. In several commons scenarios this
>> truncate is not
>>     needed and yet it was still issued all the time. The unnecessary
>> cases where the
>>     truncate command is not needed are:
>>
>>     * We have a new file, and its last extent isn't a hole, so that
>> its last write
>>       command ends at the file's size;
>>
>>     * For an incremental send, we updated in place an existing file,
>> without changing
>>       its size;
>>
>>     * For an incremental send, we updated the file, increased its
>> size, and the last
>>       file extent item doesn't correspond to a hole. In this case the last write
>>       command against the file ends at the file's new size;
>>
>>     * For an incremental send, we didn't update the file's data at
>> all, we only changed
>>       its mode, group or access/update timestamps.
>>
>>     Therefore don't send truncate commands for these cases, reducing
>> the stream's size
>>     and saving time.
>>
>> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
>> index fa378c7ebffd..46f52b4bbc21 100644
>> --- a/fs/btrfs/send.c
>> +++ b/fs/btrfs/send.c
>> @@ -120,6 +120,7 @@ struct send_ctx {
>>   u64 cur_inode_mode;
>>   u64 cur_inode_rdev;
>>   u64 cur_inode_last_extent;
>> + u64 cur_inode_max_write_end;
>>
>>   u64 send_progress;
>>   u64 total_data_size;
>> @@ -4494,6 +4495,8 @@ static int send_hole(struct send_ctx *sctx, u64 end)
>>   break;
>>   offset += len;
>>   }
>> + sctx->cur_inode_max_write_end = max(offset,
>> +    sctx->cur_inode_max_write_end);
>>  tlv_put_failure:
>>   fs_path_free(p);
>>   return ret;
>> @@ -4529,6 +4532,10 @@ static int send_write_or_clone(struct send_ctx *sctx,
>>   len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
>>   }
>>
>> + if (offset >= sctx->cur_inode_size) {
>> + ret = 0;
>> + goto out;
>> + }
>>   if (offset + len > sctx->cur_inode_size)
>>   len = sctx->cur_inode_size - offset;
>>   if (len == 0) {
>> @@ -4560,6 +4567,8 @@ static int send_write_or_clone(struct send_ctx *sctx,
>>   }
>>   ret = 0;
>>   }
>> + sctx->cur_inode_max_write_end = max(offset + len,
>> +    sctx->cur_inode_max_write_end);
>>  out:
>>   return ret;
>>  }
>> @@ -4982,6 +4991,7 @@ static int finish_inode_if_needed(struct
>> send_ctx *sctx, int at_end)
>>   u64 right_gid;
>>   int need_chmod = 0;
>>   int need_chown = 0;
>> + int need_truncate = 1;
>>   int pending_move = 0;
>>   int refs_processed = 0;
>>
>> @@ -5022,9 +5032,13 @@ static int finish_inode_if_needed(struct
>> send_ctx *sctx, int at_end)
>>   need_chown = 1;
>>   if (!S_ISLNK(sctx->cur_inode_mode))
>>   need_chmod = 1;
>> + if (sctx->cur_inode_max_write_end == sctx->cur_inode_size)
>> + need_truncate = 0;
>>   } else {
>> + u64 old_size;
>> +
>>   ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
>> - NULL, NULL, &right_mode, &right_uid,
>> + &old_size, NULL, &right_mode, &right_uid,
>>   &right_gid, NULL);
>>   if (ret < 0)
>>   goto out;
>> @@ -5033,6 +5047,13 @@ static int finish_inode_if_needed(struct
>> send_ctx *sctx, int at_end)
>>   need_chown = 1;
>>   if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
>>   need_chmod = 1;
>> +
>> + if (old_size == sctx->cur_inode_size &&
>> +    sctx->cur_inode_max_write_end <= sctx->cur_inode_size)
>> + need_truncate = 0;
>> + else if (sctx->cur_inode_size > old_size &&
>> + sctx->cur_inode_max_write_end == sctx->cur_inode_size)
>> + need_truncate = 0;
>>   }
>>
>>  truncate_inode:
>> @@ -5052,10 +5073,13 @@ static int finish_inode_if_needed(struct
>> send_ctx *sctx, int at_end)
>>   goto out;
>>   }
>>   }
>> - ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
>> - sctx->cur_inode_size);
>> - if (ret < 0)
>> - goto out;
>> + if (need_truncate) {
>> + ret = send_truncate(sctx, sctx->cur_ino,
>> +    sctx->cur_inode_gen,
>> +    sctx->cur_inode_size);
>> + if (ret < 0)
>> + goto out;
>> + }
>>   }
>>
>>   if (need_chown) {
>> @@ -5110,6 +5134,7 @@ static int changed_inode(struct send_ctx *sctx,
>>   sctx->cur_ino = key->objectid;
>>   sctx->cur_inode_new_gen = 0;
>>   sctx->cur_inode_last_extent = (u64)-1;
>> + sctx->cur_inode_max_write_end = 0;
>>
>>   /*
>>   * Set send_progress to current inode. This will tell all get_cur_xxx
>>
>>>
>>> Thanks,
>>> Qu
>>>
>>>>
>>>> Overall performance improves by 102 percent when sending 790000 small files.
>>>> original: 32m45.311s
>>>> patch: 16m8.387s
>>>>
>>>> Signed-off-by: Robbie Ko <robbieko@synology.com>
>>>> ---
>>>>  fs/btrfs/send.c | 13 +++++++++----
>>>>  1 file changed, 9 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
>>>> index 20d3300..7ae2347 100644
>>>> --- a/fs/btrfs/send.c
>>>> +++ b/fs/btrfs/send.c
>>>> @@ -5857,10 +5857,6 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
>>>>                                       goto out;
>>>>                       }
>>>>               }
>>>> -             ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
>>>> -                             sctx->cur_inode_size);
>>>> -             if (ret < 0)
>>>> -                     goto out;
>>>>       }
>>>>
>>>>       if (need_chown) {
>>>> @@ -6044,6 +6040,15 @@ static int changed_inode(struct send_ctx *sctx,
>>>>                                       sctx->left_path->nodes[0], left_ii);
>>>>               }
>>>>       }
>>>> +     if (result == BTRFS_COMPARE_TREE_NEW ||
>>>> +         result == BTRFS_COMPARE_TREE_CHANGED) {
>>>> +             if (S_ISREG(sctx->cur_inode_mode)) {
>>>> +                     ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
>>>> +                                             sctx->cur_inode_size);
>>>> +                     if (ret < 0)
>>>> +                             goto out;
>>>> +             }
>>>> +     }
>>>>
>>>>  out:
>>>>       return ret;
>>>>
>>>
>>
>>
>>
>
robbieko Dec. 8, 2017, 6:29 a.m. UTC | #3
Hi Filipe David Manana,

I'm sorry to reply so late, your patch is good for me.
Will you submit this patch to the upstream?
In addition, you mentioned other optimization, Can you share it?

I have another case, find_extent_clone will be too slow when there is 
too much backref in the big file.
1G file with fallocate, have 4 extent_item, then do some 4k random write 
IO, finally total 76767 EXTENT_DATA.
My test ignored find_extent_clone, much faster.
original: 16m8.387s
ignore find_extent_clone: 12s
Do you have a better solution?

Thanks.
Robbie Ko

Filipe Manana 於 2017-12-04 20:08 寫到:
> On Mon, Dec 4, 2017 at 11:28 AM, Qu Wenruo <quwenruo.btrfs@gmx.com> 
> wrote:
>> 
>> 
>> On 2017年12月04日 19:13, Filipe Manana wrote:
>>> On Mon, Dec 4, 2017 at 7:19 AM, Qu Wenruo <quwenruo.btrfs@gmx.com> 
>>> wrote:
>>>> 
>>>> 
>>>> On 2017年12月04日 15:02, robbieko wrote:
>>>>> From: Robbie Ko <robbieko@synology.com>
>>>>> 
>>>>> The commands generated by send contain the following step:
>>>>> 1. mkfile o1851-19-0
>>>>> 2. rename o1851-19-0 -> alsa-driver/alsa-kernel/isa/es1688/es1688.c
>>>>> 3. set_xattr alsa-driver/alsa-kernel/isa/es1688/es1688.c - 
>>>>> name=user.xattr data_len=4 data=test
>>>>> 4. write alsa-driver/alsa-kernel/isa/es1688/es1688.c - offset=0, 
>>>>> len=10458
>>>>> 5. truncate alsa-driver/alsa-kernel/isa/es1688/es1688.c size=10458
>>>>> 6. chown alsa-driver/alsa-kernel/isa/es1688/es1688.c - uid=1024, 
>>>>> gid=100
>>>>> 7. chmod alsa-driver/alsa-kernel/isa/es1688/es1688.c - mode=0644
>>>>> 8. utimes alsa-driver/alsa-kernel/isa/es1688/es1688.c
>>>>> 
>>>>> After writing file content, it will truncate file to the correct 
>>>>> size.
>>>>> Btrfs truncate will flush last page if size does not align to 
>>>>> sectorsize,
>>>>> and this will cause receive process to wait until flush finishes.
>>>>> In order to avoid waiting flushing data.This patch changes the 
>>>>> order so
>>>>> that truncate command is sent before write command.
>>>> 
>>>> Personally speaking, it's better to optimize the receive side.
>>>> 
>>>> For example, at receive side, if we already know that the file size 
>>>> is
>>>> not changed at all, then just skip the truncate command.
>>>> 
>>>> In your send dump, step 5 is not needed at all, and can be skipped 
>>>> to
>>>> speed up the receive procedure.
>>> 
>>> Better yet, is to not send the truncate commands at all, reduces the
>>> stream size and saves all receiver implementations
>>> from having such logic (which further requires stat calls to figure
>>> out the current file size).
>> 
>> Another one is to optimize btrfs file truncate to avoid the 
>> unnecessary
>> page writeback.
>> 
>> But at least, doing optimization in receive side is easier and less
>> bug-prone.
> 
> Not really, for this case at least.
>> 
>> If we could do the same in both send and receive side, then I prefer
>> optimization in receive side.
> 
> Can't agree much with that, for the reasons mentioned before.
> 
>> 
>> Thanks,
>> Qu
>> 
>>> 
>>> I still have a local branch around with several optimizations (from
>>> the time I was a volunteer), with one of them being precisely to 
>>> avoid
>>> unnecessary truncate commands, however I never ended up getting to 
>>> the
>>> point of doing benchmarks.
>>> Pasted below and at 
>>> https://www.friendpaste.com/1J1TuXf2wlU1125mX57k4r
>>> 
>>> 
>>> commit 362290ea03b52e4ce4edb43453a51a3b1f1cd33f
>>> Author: Filipe David Borba Manana <fdmanana@gmail.com>
>>> Date:   Sat Apr 5 22:34:45 2014 +0100
>>> 
>>>     Btrfs: send, don't issue unnecessary file truncate commands
>>> 
>>>     Every time a file is created or updated, send (either incremental
>>> or not) always
>>>     issues a file truncate command. In several commons scenarios this
>>> truncate is not
>>>     needed and yet it was still issued all the time. The unnecessary
>>> cases where the
>>>     truncate command is not needed are:
>>> 
>>>     * We have a new file, and its last extent isn't a hole, so that
>>> its last write
>>>       command ends at the file's size;
>>> 
>>>     * For an incremental send, we updated in place an existing file,
>>> without changing
>>>       its size;
>>> 
>>>     * For an incremental send, we updated the file, increased its
>>> size, and the last
>>>       file extent item doesn't correspond to a hole. In this case the 
>>> last write
>>>       command against the file ends at the file's new size;
>>> 
>>>     * For an incremental send, we didn't update the file's data at
>>> all, we only changed
>>>       its mode, group or access/update timestamps.
>>> 
>>>     Therefore don't send truncate commands for these cases, reducing
>>> the stream's size
>>>     and saving time.
>>> 
>>> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
>>> index fa378c7ebffd..46f52b4bbc21 100644
>>> --- a/fs/btrfs/send.c
>>> +++ b/fs/btrfs/send.c
>>> @@ -120,6 +120,7 @@ struct send_ctx {
>>>   u64 cur_inode_mode;
>>>   u64 cur_inode_rdev;
>>>   u64 cur_inode_last_extent;
>>> + u64 cur_inode_max_write_end;
>>> 
>>>   u64 send_progress;
>>>   u64 total_data_size;
>>> @@ -4494,6 +4495,8 @@ static int send_hole(struct send_ctx *sctx, u64 
>>> end)
>>>   break;
>>>   offset += len;
>>>   }
>>> + sctx->cur_inode_max_write_end = max(offset,
>>> +    sctx->cur_inode_max_write_end);
>>>  tlv_put_failure:
>>>   fs_path_free(p);
>>>   return ret;
>>> @@ -4529,6 +4532,10 @@ static int send_write_or_clone(struct send_ctx 
>>> *sctx,
>>>   len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
>>>   }
>>> 
>>> + if (offset >= sctx->cur_inode_size) {
>>> + ret = 0;
>>> + goto out;
>>> + }
>>>   if (offset + len > sctx->cur_inode_size)
>>>   len = sctx->cur_inode_size - offset;
>>>   if (len == 0) {
>>> @@ -4560,6 +4567,8 @@ static int send_write_or_clone(struct send_ctx 
>>> *sctx,
>>>   }
>>>   ret = 0;
>>>   }
>>> + sctx->cur_inode_max_write_end = max(offset + len,
>>> +    sctx->cur_inode_max_write_end);
>>>  out:
>>>   return ret;
>>>  }
>>> @@ -4982,6 +4991,7 @@ static int finish_inode_if_needed(struct
>>> send_ctx *sctx, int at_end)
>>>   u64 right_gid;
>>>   int need_chmod = 0;
>>>   int need_chown = 0;
>>> + int need_truncate = 1;
>>>   int pending_move = 0;
>>>   int refs_processed = 0;
>>> 
>>> @@ -5022,9 +5032,13 @@ static int finish_inode_if_needed(struct
>>> send_ctx *sctx, int at_end)
>>>   need_chown = 1;
>>>   if (!S_ISLNK(sctx->cur_inode_mode))
>>>   need_chmod = 1;
>>> + if (sctx->cur_inode_max_write_end == sctx->cur_inode_size)
>>> + need_truncate = 0;
>>>   } else {
>>> + u64 old_size;
>>> +
>>>   ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
>>> - NULL, NULL, &right_mode, &right_uid,
>>> + &old_size, NULL, &right_mode, &right_uid,
>>>   &right_gid, NULL);
>>>   if (ret < 0)
>>>   goto out;
>>> @@ -5033,6 +5047,13 @@ static int finish_inode_if_needed(struct
>>> send_ctx *sctx, int at_end)
>>>   need_chown = 1;
>>>   if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
>>>   need_chmod = 1;
>>> +
>>> + if (old_size == sctx->cur_inode_size &&
>>> +    sctx->cur_inode_max_write_end <= sctx->cur_inode_size)
>>> + need_truncate = 0;
>>> + else if (sctx->cur_inode_size > old_size &&
>>> + sctx->cur_inode_max_write_end == sctx->cur_inode_size)
>>> + need_truncate = 0;
>>>   }
>>> 
>>>  truncate_inode:
>>> @@ -5052,10 +5073,13 @@ static int finish_inode_if_needed(struct
>>> send_ctx *sctx, int at_end)
>>>   goto out;
>>>   }
>>>   }
>>> - ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
>>> - sctx->cur_inode_size);
>>> - if (ret < 0)
>>> - goto out;
>>> + if (need_truncate) {
>>> + ret = send_truncate(sctx, sctx->cur_ino,
>>> +    sctx->cur_inode_gen,
>>> +    sctx->cur_inode_size);
>>> + if (ret < 0)
>>> + goto out;
>>> + }
>>>   }
>>> 
>>>   if (need_chown) {
>>> @@ -5110,6 +5134,7 @@ static int changed_inode(struct send_ctx *sctx,
>>>   sctx->cur_ino = key->objectid;
>>>   sctx->cur_inode_new_gen = 0;
>>>   sctx->cur_inode_last_extent = (u64)-1;
>>> + sctx->cur_inode_max_write_end = 0;
>>> 
>>>   /*
>>>   * Set send_progress to current inode. This will tell all 
>>> get_cur_xxx
>>> 
>>>> 
>>>> Thanks,
>>>> Qu
>>>> 
>>>>> 
>>>>> Overall performance improves by 102 percent when sending 790000 
>>>>> small files.
>>>>> original: 32m45.311s
>>>>> patch: 16m8.387s
>>>>> 
>>>>> Signed-off-by: Robbie Ko <robbieko@synology.com>
>>>>> ---
>>>>>  fs/btrfs/send.c | 13 +++++++++----
>>>>>  1 file changed, 9 insertions(+), 4 deletions(-)
>>>>> 
>>>>> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
>>>>> index 20d3300..7ae2347 100644
>>>>> --- a/fs/btrfs/send.c
>>>>> +++ b/fs/btrfs/send.c
>>>>> @@ -5857,10 +5857,6 @@ static int finish_inode_if_needed(struct 
>>>>> send_ctx *sctx, int at_end)
>>>>>                                       goto out;
>>>>>                       }
>>>>>               }
>>>>> -             ret = send_truncate(sctx, sctx->cur_ino, 
>>>>> sctx->cur_inode_gen,
>>>>> -                             sctx->cur_inode_size);
>>>>> -             if (ret < 0)
>>>>> -                     goto out;
>>>>>       }
>>>>> 
>>>>>       if (need_chown) {
>>>>> @@ -6044,6 +6040,15 @@ static int changed_inode(struct send_ctx 
>>>>> *sctx,
>>>>>                                       sctx->left_path->nodes[0], 
>>>>> left_ii);
>>>>>               }
>>>>>       }
>>>>> +     if (result == BTRFS_COMPARE_TREE_NEW ||
>>>>> +         result == BTRFS_COMPARE_TREE_CHANGED) {
>>>>> +             if (S_ISREG(sctx->cur_inode_mode)) {
>>>>> +                     ret = send_truncate(sctx, sctx->cur_ino, 
>>>>> sctx->cur_inode_gen,
>>>>> +                                             
>>>>> sctx->cur_inode_size);
>>>>> +                     if (ret < 0)
>>>>> +                             goto out;
>>>>> +             }
>>>>> +     }
>>>>> 
>>>>>  out:
>>>>>       return ret;
>>>>> 
>>>> 
>>> 
>>> 
>>> 
>> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo Dec. 8, 2017, 6:37 a.m. UTC | #4
On 2017年12月08日 14:29, robbieko wrote:
> Hi Filipe David Manana,
> 
> I'm sorry to reply so late, your patch is good for me.
> Will you submit this patch to the upstream?
> In addition, you mentioned other optimization, Can you share it?
> 
> I have another case, find_extent_clone will be too slow when there is
> too much backref in the big file.

I submitted an RFC patch long time ago.
https://patchwork.kernel.org/patch/9245287/

That will break reflink and got objected.

> 1G file with fallocate, have 4 extent_item, then do some 4k random write
> IO, finally total 76767 EXTENT_DATA.

We already have more dangerous test case for that.

Check fstests/btrfs/130, which can even lead to OOM on small memory system.

Thanks,
Qu

> My test ignored find_extent_clone, much faster.
> original: 16m8.387s
> ignore find_extent_clone: 12s
> Do you have a better solution?
> 
> Thanks.
> Robbie Ko
> 
> Filipe Manana 於 2017-12-04 20:08 寫到:
>> On Mon, Dec 4, 2017 at 11:28 AM, Qu Wenruo <quwenruo.btrfs@gmx.com>
>> wrote:
>>>
>>>
>>> On 2017年12月04日 19:13, Filipe Manana wrote:
>>>> On Mon, Dec 4, 2017 at 7:19 AM, Qu Wenruo <quwenruo.btrfs@gmx.com>
>>>> wrote:
>>>>>
>>>>>
>>>>> On 2017年12月04日 15:02, robbieko wrote:
>>>>>> From: Robbie Ko <robbieko@synology.com>
>>>>>>
>>>>>> The commands generated by send contain the following step:
>>>>>> 1. mkfile o1851-19-0
>>>>>> 2. rename o1851-19-0 -> alsa-driver/alsa-kernel/isa/es1688/es1688.c
>>>>>> 3. set_xattr alsa-driver/alsa-kernel/isa/es1688/es1688.c -
>>>>>> name=user.xattr data_len=4 data=test
>>>>>> 4. write alsa-driver/alsa-kernel/isa/es1688/es1688.c - offset=0,
>>>>>> len=10458
>>>>>> 5. truncate alsa-driver/alsa-kernel/isa/es1688/es1688.c size=10458
>>>>>> 6. chown alsa-driver/alsa-kernel/isa/es1688/es1688.c - uid=1024,
>>>>>> gid=100
>>>>>> 7. chmod alsa-driver/alsa-kernel/isa/es1688/es1688.c - mode=0644
>>>>>> 8. utimes alsa-driver/alsa-kernel/isa/es1688/es1688.c
>>>>>>
>>>>>> After writing file content, it will truncate file to the correct
>>>>>> size.
>>>>>> Btrfs truncate will flush last page if size does not align to
>>>>>> sectorsize,
>>>>>> and this will cause receive process to wait until flush finishes.
>>>>>> In order to avoid waiting flushing data.This patch changes the
>>>>>> order so
>>>>>> that truncate command is sent before write command.
>>>>>
>>>>> Personally speaking, it's better to optimize the receive side.
>>>>>
>>>>> For example, at receive side, if we already know that the file size is
>>>>> not changed at all, then just skip the truncate command.
>>>>>
>>>>> In your send dump, step 5 is not needed at all, and can be skipped to
>>>>> speed up the receive procedure.
>>>>
>>>> Better yet, is to not send the truncate commands at all, reduces the
>>>> stream size and saves all receiver implementations
>>>> from having such logic (which further requires stat calls to figure
>>>> out the current file size).
>>>
>>> Another one is to optimize btrfs file truncate to avoid the unnecessary
>>> page writeback.
>>>
>>> But at least, doing optimization in receive side is easier and less
>>> bug-prone.
>>
>> Not really, for this case at least.
>>>
>>> If we could do the same in both send and receive side, then I prefer
>>> optimization in receive side.
>>
>> Can't agree much with that, for the reasons mentioned before.
>>
>>>
>>> Thanks,
>>> Qu
>>>
>>>>
>>>> I still have a local branch around with several optimizations (from
>>>> the time I was a volunteer), with one of them being precisely to avoid
>>>> unnecessary truncate commands, however I never ended up getting to the
>>>> point of doing benchmarks.
>>>> Pasted below and at https://www.friendpaste.com/1J1TuXf2wlU1125mX57k4r
>>>>
>>>>
>>>> commit 362290ea03b52e4ce4edb43453a51a3b1f1cd33f
>>>> Author: Filipe David Borba Manana <fdmanana@gmail.com>
>>>> Date:   Sat Apr 5 22:34:45 2014 +0100
>>>>
>>>>     Btrfs: send, don't issue unnecessary file truncate commands
>>>>
>>>>     Every time a file is created or updated, send (either incremental
>>>> or not) always
>>>>     issues a file truncate command. In several commons scenarios this
>>>> truncate is not
>>>>     needed and yet it was still issued all the time. The unnecessary
>>>> cases where the
>>>>     truncate command is not needed are:
>>>>
>>>>     * We have a new file, and its last extent isn't a hole, so that
>>>> its last write
>>>>       command ends at the file's size;
>>>>
>>>>     * For an incremental send, we updated in place an existing file,
>>>> without changing
>>>>       its size;
>>>>
>>>>     * For an incremental send, we updated the file, increased its
>>>> size, and the last
>>>>       file extent item doesn't correspond to a hole. In this case
>>>> the last write
>>>>       command against the file ends at the file's new size;
>>>>
>>>>     * For an incremental send, we didn't update the file's data at
>>>> all, we only changed
>>>>       its mode, group or access/update timestamps.
>>>>
>>>>     Therefore don't send truncate commands for these cases, reducing
>>>> the stream's size
>>>>     and saving time.
>>>>
>>>> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
>>>> index fa378c7ebffd..46f52b4bbc21 100644
>>>> --- a/fs/btrfs/send.c
>>>> +++ b/fs/btrfs/send.c
>>>> @@ -120,6 +120,7 @@ struct send_ctx {
>>>>   u64 cur_inode_mode;
>>>>   u64 cur_inode_rdev;
>>>>   u64 cur_inode_last_extent;
>>>> + u64 cur_inode_max_write_end;
>>>>
>>>>   u64 send_progress;
>>>>   u64 total_data_size;
>>>> @@ -4494,6 +4495,8 @@ static int send_hole(struct send_ctx *sctx,
>>>> u64 end)
>>>>   break;
>>>>   offset += len;
>>>>   }
>>>> + sctx->cur_inode_max_write_end = max(offset,
>>>> +    sctx->cur_inode_max_write_end);
>>>>  tlv_put_failure:
>>>>   fs_path_free(p);
>>>>   return ret;
>>>> @@ -4529,6 +4532,10 @@ static int send_write_or_clone(struct
>>>> send_ctx *sctx,
>>>>   len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
>>>>   }
>>>>
>>>> + if (offset >= sctx->cur_inode_size) {
>>>> + ret = 0;
>>>> + goto out;
>>>> + }
>>>>   if (offset + len > sctx->cur_inode_size)
>>>>   len = sctx->cur_inode_size - offset;
>>>>   if (len == 0) {
>>>> @@ -4560,6 +4567,8 @@ static int send_write_or_clone(struct send_ctx
>>>> *sctx,
>>>>   }
>>>>   ret = 0;
>>>>   }
>>>> + sctx->cur_inode_max_write_end = max(offset + len,
>>>> +    sctx->cur_inode_max_write_end);
>>>>  out:
>>>>   return ret;
>>>>  }
>>>> @@ -4982,6 +4991,7 @@ static int finish_inode_if_needed(struct
>>>> send_ctx *sctx, int at_end)
>>>>   u64 right_gid;
>>>>   int need_chmod = 0;
>>>>   int need_chown = 0;
>>>> + int need_truncate = 1;
>>>>   int pending_move = 0;
>>>>   int refs_processed = 0;
>>>>
>>>> @@ -5022,9 +5032,13 @@ static int finish_inode_if_needed(struct
>>>> send_ctx *sctx, int at_end)
>>>>   need_chown = 1;
>>>>   if (!S_ISLNK(sctx->cur_inode_mode))
>>>>   need_chmod = 1;
>>>> + if (sctx->cur_inode_max_write_end == sctx->cur_inode_size)
>>>> + need_truncate = 0;
>>>>   } else {
>>>> + u64 old_size;
>>>> +
>>>>   ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
>>>> - NULL, NULL, &right_mode, &right_uid,
>>>> + &old_size, NULL, &right_mode, &right_uid,
>>>>   &right_gid, NULL);
>>>>   if (ret < 0)
>>>>   goto out;
>>>> @@ -5033,6 +5047,13 @@ static int finish_inode_if_needed(struct
>>>> send_ctx *sctx, int at_end)
>>>>   need_chown = 1;
>>>>   if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
>>>>   need_chmod = 1;
>>>> +
>>>> + if (old_size == sctx->cur_inode_size &&
>>>> +    sctx->cur_inode_max_write_end <= sctx->cur_inode_size)
>>>> + need_truncate = 0;
>>>> + else if (sctx->cur_inode_size > old_size &&
>>>> + sctx->cur_inode_max_write_end == sctx->cur_inode_size)
>>>> + need_truncate = 0;
>>>>   }
>>>>
>>>>  truncate_inode:
>>>> @@ -5052,10 +5073,13 @@ static int finish_inode_if_needed(struct
>>>> send_ctx *sctx, int at_end)
>>>>   goto out;
>>>>   }
>>>>   }
>>>> - ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
>>>> - sctx->cur_inode_size);
>>>> - if (ret < 0)
>>>> - goto out;
>>>> + if (need_truncate) {
>>>> + ret = send_truncate(sctx, sctx->cur_ino,
>>>> +    sctx->cur_inode_gen,
>>>> +    sctx->cur_inode_size);
>>>> + if (ret < 0)
>>>> + goto out;
>>>> + }
>>>>   }
>>>>
>>>>   if (need_chown) {
>>>> @@ -5110,6 +5134,7 @@ static int changed_inode(struct send_ctx *sctx,
>>>>   sctx->cur_ino = key->objectid;
>>>>   sctx->cur_inode_new_gen = 0;
>>>>   sctx->cur_inode_last_extent = (u64)-1;
>>>> + sctx->cur_inode_max_write_end = 0;
>>>>
>>>>   /*
>>>>   * Set send_progress to current inode. This will tell all get_cur_xxx
>>>>
>>>>>
>>>>> Thanks,
>>>>> Qu
>>>>>
>>>>>>
>>>>>> Overall performance improves by 102 percent when sending 790000
>>>>>> small files.
>>>>>> original: 32m45.311s
>>>>>> patch: 16m8.387s
>>>>>>
>>>>>> Signed-off-by: Robbie Ko <robbieko@synology.com>
>>>>>> ---
>>>>>>  fs/btrfs/send.c | 13 +++++++++----
>>>>>>  1 file changed, 9 insertions(+), 4 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
>>>>>> index 20d3300..7ae2347 100644
>>>>>> --- a/fs/btrfs/send.c
>>>>>> +++ b/fs/btrfs/send.c
>>>>>> @@ -5857,10 +5857,6 @@ static int finish_inode_if_needed(struct
>>>>>> send_ctx *sctx, int at_end)
>>>>>>                                       goto out;
>>>>>>                       }
>>>>>>               }
>>>>>> -             ret = send_truncate(sctx, sctx->cur_ino,
>>>>>> sctx->cur_inode_gen,
>>>>>> -                             sctx->cur_inode_size);
>>>>>> -             if (ret < 0)
>>>>>> -                     goto out;
>>>>>>       }
>>>>>>
>>>>>>       if (need_chown) {
>>>>>> @@ -6044,6 +6040,15 @@ static int changed_inode(struct send_ctx
>>>>>> *sctx,
>>>>>>                                       sctx->left_path->nodes[0],
>>>>>> left_ii);
>>>>>>               }
>>>>>>       }
>>>>>> +     if (result == BTRFS_COMPARE_TREE_NEW ||
>>>>>> +         result == BTRFS_COMPARE_TREE_CHANGED) {
>>>>>> +             if (S_ISREG(sctx->cur_inode_mode)) {
>>>>>> +                     ret = send_truncate(sctx, sctx->cur_ino,
>>>>>> sctx->cur_inode_gen,
>>>>>> +                                             sctx->cur_inode_size);
>>>>>> +                     if (ret < 0)
>>>>>> +                             goto out;
>>>>>> +             }
>>>>>> +     }
>>>>>>
>>>>>>  out:
>>>>>>       return ret;
>>>>>>
>>>>>
>>>>
>>>>
>>>>
>>>
> -- 
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fa378c7ebffd..46f52b4bbc21 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -120,6 +120,7 @@  struct send_ctx {
  u64 cur_inode_mode;
  u64 cur_inode_rdev;
  u64 cur_inode_last_extent;
+ u64 cur_inode_max_write_end;

  u64 send_progress;
  u64 total_data_size;
@@ -4494,6 +4495,8 @@  static int send_hole(struct send_ctx *sctx, u64 end)
  break;
  offset += len;
  }
+ sctx->cur_inode_max_write_end = max(offset,
+    sctx->cur_inode_max_write_end);
 tlv_put_failure:
  fs_path_free(p);
  return ret;
@@ -4529,6 +4532,10 @@  static int send_write_or_clone(struct send_ctx *sctx,
  len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
  }

+ if (offset >= sctx->cur_inode_size) {
+ ret = 0;
+ goto out;
+ }
  if (offset + len > sctx->cur_inode_size)
  len = sctx->cur_inode_size - offset;
  if (len == 0) {
@@ -4560,6 +4567,8 @@  static int send_write_or_clone(struct send_ctx *sctx,
  }
  ret = 0;
  }
+ sctx->cur_inode_max_write_end = max(offset + len,
+    sctx->cur_inode_max_write_end);
 out:
  return ret;
 }
@@ -4982,6 +4991,7 @@  static int finish_inode_if_needed(struct
send_ctx *sctx, int at_end)
  u64 right_gid;
  int need_chmod = 0;
  int need_chown = 0;
+ int need_truncate = 1;
  int pending_move = 0;
  int refs_processed = 0;