diff mbox

btrfs: send, improve rmdir performance for large directory

Message ID 1525747843-31894-1-git-send-email-robbieko@synology.com (mailing list archive)
State New, archived
Headers show

Commit Message

robbieko May 8, 2018, 2:50 a.m. UTC
From: Robbie Ko <robbieko@synology.com>

At present, we check if the directory can be deleted.
We need to check whether all the files under this directory
have been processed every time we finished processing an
entry under that directory.

Example: 2,000,000 files in 1 directory.
Parent snapshot:
|---- dir/ (ino 257, dir)
    |---- f1 (ino 258, file1)
    |---- f2 (ino 259, file2)
    |---- ...
    |---- f2000000

Send snapshot: (Empty)

Result:
original : 1994m57.071s
patch : 1m38.554s

[FIX]
We add a offset hint for dir index when check can_rmdir.
This hint records the offset to which the last can_rmdir
was processed.

Signed-off-by: Robbie Ko <robbieko@synology.com>
---
 fs/btrfs/send.c | 45 +++++++++++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 16 deletions(-)

Comments

Filipe Manana May 8, 2018, 9:10 a.m. UTC | #1
On Tue, May 8, 2018 at 3:50 AM, robbieko <robbieko@synology.com> wrote:
> From: Robbie Ko <robbieko@synology.com>
>
> At present, we check if the directory can be deleted.

At present, and in the future... it will always be needed.

> We need to check whether all the files under this directory
> have been processed every time we finished processing an
> entry under that directory.

I would rephrase to something more clear like:

"Currently when checking if we can delete a directory, we always check
if all its children have been processed."

>
> Example: 2,000,000 files in 1 directory.
> Parent snapshot:
> |---- dir/ (ino 257, dir)
>     |---- f1 (ino 258, file1)
>     |---- f2 (ino 259, file2)

Those comments of "dir", "file1", "file2", are irrelevant, they don't
provide any useful information.

>     |---- ...
>     |---- f2000000
>
> Send snapshot: (Empty)

Heck, for such trivial case to understand, you don't need to add these
diagrams. Just because I do them most of the time, and ask you too,
doesn't mean they're always needed.
Just say a directory with many files was deleted. Anyone can understand that.

>
> Result:
> original : 1994m57.071s
> patch : 1m38.554s
>
> [FIX]
> We add a offset hint for dir index when check can_rmdir.
> This hint records the offset to which the last can_rmdir
> was processed.

Something like:

"Instead of checking all children on all calls to can_rmdir(), we keep
track of the directory index offset of the child last
checked in the last call to can_rmdir(), and then use it as the
starting point for future calls to can_rmdir()".

>
> Signed-off-by: Robbie Ko <robbieko@synology.com>
> ---
>  fs/btrfs/send.c | 45 +++++++++++++++++++++++++++++----------------
>  1 file changed, 29 insertions(+), 16 deletions(-)
>
> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
> index 484e2af..43b0364 100644
> --- a/fs/btrfs/send.c
> +++ b/fs/btrfs/send.c
> @@ -247,6 +247,7 @@ struct orphan_dir_info {
>         struct rb_node node;
>         u64 ino;
>         u64 gen;
> +       u64 offset_hint;

This name is confusing. A better name could be used like
"last_dir_index_offset".

>  };
>
>  struct name_cache_entry {
> @@ -2855,12 +2856,6 @@ static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
>         struct rb_node *parent = NULL;
>         struct orphan_dir_info *entry, *odi;
>
> -       odi = kmalloc(sizeof(*odi), GFP_KERNEL);
> -       if (!odi)
> -               return ERR_PTR(-ENOMEM);
> -       odi->ino = dir_ino;
> -       odi->gen = 0;
> -
>         while (*p) {
>                 parent = *p;
>                 entry = rb_entry(parent, struct orphan_dir_info, node);
> @@ -2869,11 +2864,17 @@ static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
>                 } else if (dir_ino > entry->ino) {
>                         p = &(*p)->rb_right;
>                 } else {
> -                       kfree(odi);
>                         return entry;
>                 }
>         }
>
> +       odi = kmalloc(sizeof(*odi), GFP_KERNEL);
> +       if (!odi)
> +               return ERR_PTR(-ENOMEM);
> +       odi->ino = dir_ino;
> +       odi->gen = 0;
> +       odi->offset_hint = 0;
> +

So moving the allocation to the end in order to avoid unnecessary
allocations is a whole different change and optimization (not
mentioned in the changelog).
Please split this into its own patch.

>         rb_link_node(&odi->node, parent, p);
>         rb_insert_color(&odi->node, &sctx->orphan_dirs);
>         return odi;
> @@ -2928,6 +2929,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
>         struct btrfs_key found_key;
>         struct btrfs_key loc;
>         struct btrfs_dir_item *di;
> +       struct orphan_dir_info *odi = NULL;
>
>         /*
>          * Don't try to rmdir the top/root subvolume dir.
> @@ -2942,6 +2944,11 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
>         key.objectid = dir;
>         key.type = BTRFS_DIR_INDEX_KEY;
>         key.offset = 0;
> +
> +       odi = get_orphan_dir_info(sctx, dir);
> +       if (odi)
> +               key.offset = odi->offset_hint;
> +
>         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
>         if (ret < 0)
>                 goto out;
> @@ -2969,24 +2976,26 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
>
>                 dm = get_waiting_dir_move(sctx, loc.objectid);
>                 if (dm) {
> -                       struct orphan_dir_info *odi;
> -
>                         odi = add_orphan_dir_info(sctx, dir);
>                         if (IS_ERR(odi)) {
>                                 ret = PTR_ERR(odi);
>                                 goto out;
>                         }
>                         odi->gen = dir_gen;
> +                       odi->offset_hint = found_key.offset;
>                         dm->rmdir_ino = dir;
>                         ret = 0;
>                         goto out;
>                 }
>
>                 if (loc.objectid > send_progress) {
> -                       struct orphan_dir_info *odi;
> -
> -                       odi = get_orphan_dir_info(sctx, dir);
> -                       free_orphan_dir_info(sctx, odi);
> +                       odi = add_orphan_dir_info(sctx, dir);
> +                       if (IS_ERR(odi)) {
> +                               ret = PTR_ERR(odi);
> +                               goto out;
> +                       }
> +                       odi->gen = dir_gen;
> +                       odi->offset_hint = found_key.offset;
>                         ret = 0;
>                         goto out;
>                 }
> @@ -2994,6 +3003,8 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
>                 path->slots[0]++;
>         }
>
> +       if (odi)
> +               free_orphan_dir_info(sctx, odi);

Remove the "if (odi)", free_orphan_dir_info() deals with NULL properly.

>         ret = 1;
>
>  out:
> @@ -3270,13 +3281,16 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
>
>         if (rmdir_ino) {
>                 struct orphan_dir_info *odi;
> +               u64 gen;
>
>                 odi = get_orphan_dir_info(sctx, rmdir_ino);
>                 if (!odi) {
>                         /* already deleted */
>                         goto finish;
>                 }
> -               ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
> +               gen = odi->gen;
> +
> +               ret = can_rmdir(sctx, rmdir_ino, gen, sctx->cur_ino);
>                 if (ret < 0)
>                         goto out;
>                 if (!ret)
> @@ -3287,13 +3301,12 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
>                         ret = -ENOMEM;
>                         goto out;
>                 }
> -               ret = get_cur_path(sctx, rmdir_ino, odi->gen, name);
> +               ret = get_cur_path(sctx, rmdir_ino, gen, name);
>                 if (ret < 0)
>                         goto out;
>                 ret = send_rmdir(sctx, name);
>                 if (ret < 0)
>                         goto out;
> -               free_orphan_dir_info(sctx, odi);
>         }
>
>  finish:
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
robbieko May 8, 2018, 9:25 a.m. UTC | #2
Hi Manana,

I will fix and modify and then send patch V2.

Thanks.
Robbie Ko

Filipe Manana 於 2018-05-08 17:10 寫到:
> On Tue, May 8, 2018 at 3:50 AM, robbieko <robbieko@synology.com> wrote:
>> From: Robbie Ko <robbieko@synology.com>
>> 
>> At present, we check if the directory can be deleted.
> 
> At present, and in the future... it will always be needed.
> 
>> We need to check whether all the files under this directory
>> have been processed every time we finished processing an
>> entry under that directory.
> 
> I would rephrase to something more clear like:
> 
> "Currently when checking if we can delete a directory, we always check
> if all its children have been processed."
> 
>> 
>> Example: 2,000,000 files in 1 directory.
>> Parent snapshot:
>> |---- dir/ (ino 257, dir)
>>     |---- f1 (ino 258, file1)
>>     |---- f2 (ino 259, file2)
> 
> Those comments of "dir", "file1", "file2", are irrelevant, they don't
> provide any useful information.
> 
>>     |---- ...
>>     |---- f2000000
>> 
>> Send snapshot: (Empty)
> 
> Heck, for such trivial case to understand, you don't need to add these
> diagrams. Just because I do them most of the time, and ask you too,
> doesn't mean they're always needed.
> Just say a directory with many files was deleted. Anyone can understand 
> that.
> 
>> 
>> Result:
>> original : 1994m57.071s
>> patch : 1m38.554s
>> 
>> [FIX]
>> We add a offset hint for dir index when check can_rmdir.
>> This hint records the offset to which the last can_rmdir
>> was processed.
> 
> Something like:
> 
> "Instead of checking all children on all calls to can_rmdir(), we keep
> track of the directory index offset of the child last
> checked in the last call to can_rmdir(), and then use it as the
> starting point for future calls to can_rmdir()".
> 
>> 
>> Signed-off-by: Robbie Ko <robbieko@synology.com>
>> ---
>>  fs/btrfs/send.c | 45 +++++++++++++++++++++++++++++----------------
>>  1 file changed, 29 insertions(+), 16 deletions(-)
>> 
>> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
>> index 484e2af..43b0364 100644
>> --- a/fs/btrfs/send.c
>> +++ b/fs/btrfs/send.c
>> @@ -247,6 +247,7 @@ struct orphan_dir_info {
>>         struct rb_node node;
>>         u64 ino;
>>         u64 gen;
>> +       u64 offset_hint;
> 
> This name is confusing. A better name could be used like
> "last_dir_index_offset".
> 
>>  };
>> 
>>  struct name_cache_entry {
>> @@ -2855,12 +2856,6 @@ static int orphanize_inode(struct send_ctx 
>> *sctx, u64 ino, u64 gen,
>>         struct rb_node *parent = NULL;
>>         struct orphan_dir_info *entry, *odi;
>> 
>> -       odi = kmalloc(sizeof(*odi), GFP_KERNEL);
>> -       if (!odi)
>> -               return ERR_PTR(-ENOMEM);
>> -       odi->ino = dir_ino;
>> -       odi->gen = 0;
>> -
>>         while (*p) {
>>                 parent = *p;
>>                 entry = rb_entry(parent, struct orphan_dir_info, 
>> node);
>> @@ -2869,11 +2864,17 @@ static int orphanize_inode(struct send_ctx 
>> *sctx, u64 ino, u64 gen,
>>                 } else if (dir_ino > entry->ino) {
>>                         p = &(*p)->rb_right;
>>                 } else {
>> -                       kfree(odi);
>>                         return entry;
>>                 }
>>         }
>> 
>> +       odi = kmalloc(sizeof(*odi), GFP_KERNEL);
>> +       if (!odi)
>> +               return ERR_PTR(-ENOMEM);
>> +       odi->ino = dir_ino;
>> +       odi->gen = 0;
>> +       odi->offset_hint = 0;
>> +
> 
> So moving the allocation to the end in order to avoid unnecessary
> allocations is a whole different change and optimization (not
> mentioned in the changelog).
> Please split this into its own patch.
> 
>>         rb_link_node(&odi->node, parent, p);
>>         rb_insert_color(&odi->node, &sctx->orphan_dirs);
>>         return odi;
>> @@ -2928,6 +2929,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 
>> dir, u64 dir_gen,
>>         struct btrfs_key found_key;
>>         struct btrfs_key loc;
>>         struct btrfs_dir_item *di;
>> +       struct orphan_dir_info *odi = NULL;
>> 
>>         /*
>>          * Don't try to rmdir the top/root subvolume dir.
>> @@ -2942,6 +2944,11 @@ static int can_rmdir(struct send_ctx *sctx, u64 
>> dir, u64 dir_gen,
>>         key.objectid = dir;
>>         key.type = BTRFS_DIR_INDEX_KEY;
>>         key.offset = 0;
>> +
>> +       odi = get_orphan_dir_info(sctx, dir);
>> +       if (odi)
>> +               key.offset = odi->offset_hint;
>> +
>>         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
>>         if (ret < 0)
>>                 goto out;
>> @@ -2969,24 +2976,26 @@ static int can_rmdir(struct send_ctx *sctx, 
>> u64 dir, u64 dir_gen,
>> 
>>                 dm = get_waiting_dir_move(sctx, loc.objectid);
>>                 if (dm) {
>> -                       struct orphan_dir_info *odi;
>> -
>>                         odi = add_orphan_dir_info(sctx, dir);
>>                         if (IS_ERR(odi)) {
>>                                 ret = PTR_ERR(odi);
>>                                 goto out;
>>                         }
>>                         odi->gen = dir_gen;
>> +                       odi->offset_hint = found_key.offset;
>>                         dm->rmdir_ino = dir;
>>                         ret = 0;
>>                         goto out;
>>                 }
>> 
>>                 if (loc.objectid > send_progress) {
>> -                       struct orphan_dir_info *odi;
>> -
>> -                       odi = get_orphan_dir_info(sctx, dir);
>> -                       free_orphan_dir_info(sctx, odi);
>> +                       odi = add_orphan_dir_info(sctx, dir);
>> +                       if (IS_ERR(odi)) {
>> +                               ret = PTR_ERR(odi);
>> +                               goto out;
>> +                       }
>> +                       odi->gen = dir_gen;
>> +                       odi->offset_hint = found_key.offset;
>>                         ret = 0;
>>                         goto out;
>>                 }
>> @@ -2994,6 +3003,8 @@ static int can_rmdir(struct send_ctx *sctx, u64 
>> dir, u64 dir_gen,
>>                 path->slots[0]++;
>>         }
>> 
>> +       if (odi)
>> +               free_orphan_dir_info(sctx, odi);
> 
> Remove the "if (odi)", free_orphan_dir_info() deals with NULL properly.
> 
>>         ret = 1;
>> 
>>  out:
>> @@ -3270,13 +3281,16 @@ static int apply_dir_move(struct send_ctx 
>> *sctx, struct pending_dir_move *pm)
>> 
>>         if (rmdir_ino) {
>>                 struct orphan_dir_info *odi;
>> +               u64 gen;
>> 
>>                 odi = get_orphan_dir_info(sctx, rmdir_ino);
>>                 if (!odi) {
>>                         /* already deleted */
>>                         goto finish;
>>                 }
>> -               ret = can_rmdir(sctx, rmdir_ino, odi->gen, 
>> sctx->cur_ino);
>> +               gen = odi->gen;
>> +
>> +               ret = can_rmdir(sctx, rmdir_ino, gen, sctx->cur_ino);
>>                 if (ret < 0)
>>                         goto out;
>>                 if (!ret)
>> @@ -3287,13 +3301,12 @@ static int apply_dir_move(struct send_ctx 
>> *sctx, struct pending_dir_move *pm)
>>                         ret = -ENOMEM;
>>                         goto out;
>>                 }
>> -               ret = get_cur_path(sctx, rmdir_ino, odi->gen, name);
>> +               ret = get_cur_path(sctx, rmdir_ino, gen, name);
>>                 if (ret < 0)
>>                         goto out;
>>                 ret = send_rmdir(sctx, name);
>>                 if (ret < 0)
>>                         goto out;
>> -               free_orphan_dir_info(sctx, odi);
>>         }
>> 
>>  finish:
>> --
>> 1.9.1
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" 
>> in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 484e2af..43b0364 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -247,6 +247,7 @@  struct orphan_dir_info {
 	struct rb_node node;
 	u64 ino;
 	u64 gen;
+	u64 offset_hint;
 };
 
 struct name_cache_entry {
@@ -2855,12 +2856,6 @@  static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
 	struct rb_node *parent = NULL;
 	struct orphan_dir_info *entry, *odi;
 
-	odi = kmalloc(sizeof(*odi), GFP_KERNEL);
-	if (!odi)
-		return ERR_PTR(-ENOMEM);
-	odi->ino = dir_ino;
-	odi->gen = 0;
-
 	while (*p) {
 		parent = *p;
 		entry = rb_entry(parent, struct orphan_dir_info, node);
@@ -2869,11 +2864,17 @@  static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
 		} else if (dir_ino > entry->ino) {
 			p = &(*p)->rb_right;
 		} else {
-			kfree(odi);
 			return entry;
 		}
 	}
 
+	odi = kmalloc(sizeof(*odi), GFP_KERNEL);
+	if (!odi)
+		return ERR_PTR(-ENOMEM);
+	odi->ino = dir_ino;
+	odi->gen = 0;
+	odi->offset_hint = 0;
+
 	rb_link_node(&odi->node, parent, p);
 	rb_insert_color(&odi->node, &sctx->orphan_dirs);
 	return odi;
@@ -2928,6 +2929,7 @@  static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
 	struct btrfs_key found_key;
 	struct btrfs_key loc;
 	struct btrfs_dir_item *di;
+	struct orphan_dir_info *odi = NULL;
 
 	/*
 	 * Don't try to rmdir the top/root subvolume dir.
@@ -2942,6 +2944,11 @@  static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
 	key.objectid = dir;
 	key.type = BTRFS_DIR_INDEX_KEY;
 	key.offset = 0;
+
+	odi = get_orphan_dir_info(sctx, dir);
+	if (odi)
+		key.offset = odi->offset_hint;
+
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	if (ret < 0)
 		goto out;
@@ -2969,24 +2976,26 @@  static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
 
 		dm = get_waiting_dir_move(sctx, loc.objectid);
 		if (dm) {
-			struct orphan_dir_info *odi;
-
 			odi = add_orphan_dir_info(sctx, dir);
 			if (IS_ERR(odi)) {
 				ret = PTR_ERR(odi);
 				goto out;
 			}
 			odi->gen = dir_gen;
+			odi->offset_hint = found_key.offset;
 			dm->rmdir_ino = dir;
 			ret = 0;
 			goto out;
 		}
 
 		if (loc.objectid > send_progress) {
-			struct orphan_dir_info *odi;
-
-			odi = get_orphan_dir_info(sctx, dir);
-			free_orphan_dir_info(sctx, odi);
+			odi = add_orphan_dir_info(sctx, dir);
+			if (IS_ERR(odi)) {
+				ret = PTR_ERR(odi);
+				goto out;
+			}
+			odi->gen = dir_gen;
+			odi->offset_hint = found_key.offset;
 			ret = 0;
 			goto out;
 		}
@@ -2994,6 +3003,8 @@  static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
 		path->slots[0]++;
 	}
 
+	if (odi)
+		free_orphan_dir_info(sctx, odi);
 	ret = 1;
 
 out:
@@ -3270,13 +3281,16 @@  static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 
 	if (rmdir_ino) {
 		struct orphan_dir_info *odi;
+		u64 gen;
 
 		odi = get_orphan_dir_info(sctx, rmdir_ino);
 		if (!odi) {
 			/* already deleted */
 			goto finish;
 		}
-		ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
+		gen = odi->gen;
+
+		ret = can_rmdir(sctx, rmdir_ino, gen, sctx->cur_ino);
 		if (ret < 0)
 			goto out;
 		if (!ret)
@@ -3287,13 +3301,12 @@  static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 			ret = -ENOMEM;
 			goto out;
 		}
-		ret = get_cur_path(sctx, rmdir_ino, odi->gen, name);
+		ret = get_cur_path(sctx, rmdir_ino, gen, name);
 		if (ret < 0)
 			goto out;
 		ret = send_rmdir(sctx, name);
 		if (ret < 0)
 			goto out;
-		free_orphan_dir_info(sctx, odi);
 	}
 
 finish: