Btrfs: incremental send, fix wrong unlink path after renaming file
diff mbox

Message ID 20171117144830.17839-1-fdmanana@kernel.org
State New
Headers show

Commit Message

Filipe Manana Nov. 17, 2017, 2:48 p.m. UTC
From: Filipe Manana <fdmanana@suse.com>

Under some circumstances, an incremental send operation can issue wrong
paths for unlink commands related to files that have multiple hard links
and some (or all) of those links were renamed between the parent and send
snapshots. Consider the following example:

Parent snapshot

 .                                                      (ino 256)
 |---- a/                                               (ino 257)
 |     |---- b/                                         (ino 259)
 |     |     |---- c/                                   (ino 260)
 |     |     |---- f2                                   (ino 261)
 |     |
 |     |---- f2l1                                       (ino 261)
 |
 |---- d/                                               (ino 262)
       |---- f1l1_2                                     (ino 258)
       |---- f2l2                                       (ino 261)
       |---- f1_2                                       (ino 258)

Send snapshot

 .                                                      (ino 256)
 |---- a/                                               (ino 257)
 |     |---- f2l1/                                      (ino 263)
 |             |---- b2/                                (ino 259)
 |                   |---- c/                           (ino 260)
 |                   |     |---- d3                     (ino 262)
 |                   |           |---- f1l1_2           (ino 258)
 |                   |           |---- f2l2_2           (ino 261)
 |                   |           |---- f1_2             (ino 258)
 |                   |
 |                   |---- f2                           (ino 261)
 |                   |---- f1l2                         (ino 258)
 |
 |---- d                                                (ino 261)

When computing the incremental send stream the following steps happen:

1) When processing inode 261, a rename operation is issued that renames
   inode 262, which currently as a path of "d", to an orphan name of
   "o262-7-0". This is done because in the send snapshot, inode 261 has
   of its hard links with a path of "d" as well.

2) Two link operations are issued that create the new hard links for
   inode 261, whose names are "d" and "f2l2_2", at paths "/" and
   "o262-7-0/" respectively.

3) Still while processing inode 261, unlink operations are issued to
   remove the old hard links of inode 261, with names "f2l1" and "f2l2",
   at paths "a/" and "d/". However path "d/" does not correspond anymore
   to the directory inode 262 but corresponds instead to a hard link of
   inode 261 (link command issued in the previous step). This makes the
   receiver fail with a ENOTDIR error when attempting the unlink
   operation.

The problem happens because before sending the unlink operation, we failed
to detect that inode 262 was one of ancestors for inode 261 in the parent
snapshot, and therefore we didn't recompute the path for inode 262 before
issuing the unlink operation for the link named "f2l2" of inode 262. The
detection failed because the function "is_ancestor()" only follows the
first hard link it finds for an inode instead of all of its hard links
(as it was originally created for being used with directories only, for
which only one hard link exists). So fix this by making "is_ancestor()"
follow all hard links of the input inode.

A test case for fstests follows soon.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/send.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 106 insertions(+), 18 deletions(-)

Comments

David Sterba Nov. 28, 2017, 4:18 p.m. UTC | #1
On Fri, Nov 17, 2017 at 02:48:30PM +0000, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> Under some circumstances, an incremental send operation can issue wrong
> paths for unlink commands related to files that have multiple hard links
> and some (or all) of those links were renamed between the parent and send
> snapshots. Consider the following example:
> 
> Parent snapshot
> 
>  .                                                      (ino 256)
>  |---- a/                                               (ino 257)
>  |     |---- b/                                         (ino 259)
>  |     |     |---- c/                                   (ino 260)
>  |     |     |---- f2                                   (ino 261)
>  |     |
>  |     |---- f2l1                                       (ino 261)
>  |
>  |---- d/                                               (ino 262)
>        |---- f1l1_2                                     (ino 258)
>        |---- f2l2                                       (ino 261)
>        |---- f1_2                                       (ino 258)
> 
> Send snapshot
> 
>  .                                                      (ino 256)
>  |---- a/                                               (ino 257)
>  |     |---- f2l1/                                      (ino 263)
>  |             |---- b2/                                (ino 259)
>  |                   |---- c/                           (ino 260)
>  |                   |     |---- d3                     (ino 262)
>  |                   |           |---- f1l1_2           (ino 258)
>  |                   |           |---- f2l2_2           (ino 261)
>  |                   |           |---- f1_2             (ino 258)
>  |                   |
>  |                   |---- f2                           (ino 261)
>  |                   |---- f1l2                         (ino 258)
>  |
>  |---- d                                                (ino 261)
> 
> When computing the incremental send stream the following steps happen:
> 
> 1) When processing inode 261, a rename operation is issued that renames
>    inode 262, which currently as a path of "d", to an orphan name of
>    "o262-7-0". This is done because in the send snapshot, inode 261 has
>    of its hard links with a path of "d" as well.
> 
> 2) Two link operations are issued that create the new hard links for
>    inode 261, whose names are "d" and "f2l2_2", at paths "/" and
>    "o262-7-0/" respectively.
> 
> 3) Still while processing inode 261, unlink operations are issued to
>    remove the old hard links of inode 261, with names "f2l1" and "f2l2",
>    at paths "a/" and "d/". However path "d/" does not correspond anymore
>    to the directory inode 262 but corresponds instead to a hard link of
>    inode 261 (link command issued in the previous step). This makes the
>    receiver fail with a ENOTDIR error when attempting the unlink
>    operation.
> 
> The problem happens because before sending the unlink operation, we failed
> to detect that inode 262 was one of ancestors for inode 261 in the parent
> snapshot, and therefore we didn't recompute the path for inode 262 before
> issuing the unlink operation for the link named "f2l2" of inode 262. The
> detection failed because the function "is_ancestor()" only follows the
> first hard link it finds for an inode instead of all of its hard links
> (as it was originally created for being used with directories only, for
> which only one hard link exists). So fix this by making "is_ancestor()"
> follow all hard links of the input inode.
> 
> A test case for fstests follows soon.
> 
> Signed-off-by: Filipe Manana <fdmanana@suse.com>

FYI, added to the rc2 pull.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 8fd195cfe81b..2c35717a3470 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3527,7 +3527,40 @@  static int wait_for_dest_dir_move(struct send_ctx *sctx,
 }
 
 /*
- * Check if ino ino1 is an ancestor of inode ino2 in the given root.
+ * Check if inode ino2, or any of its ancestors, is inode ino1.
+ * Return 1 if true, 0 if false and < 0 on error.
+ */
+static int check_ino_in_path(struct btrfs_root *root,
+			     const u64 ino1,
+			     const u64 ino1_gen,
+			     const u64 ino2,
+			     const u64 ino2_gen,
+			     struct fs_path *fs_path)
+{
+	u64 ino = ino2;
+
+	if (ino1 == ino2)
+		return ino1_gen == ino2_gen;
+
+	while (ino > BTRFS_FIRST_FREE_OBJECTID) {
+		u64 parent;
+		u64 parent_gen;
+		int ret;
+
+		fs_path_reset(fs_path);
+		ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
+		if (ret < 0)
+			return ret;
+		if (parent == ino1)
+			return parent_gen == ino1_gen;
+		ino = parent;
+	}
+	return 0;
+}
+
+/*
+ * Check if ino ino1 is an ancestor of inode ino2 in the given root for any
+ * possible path (in case ino2 is not a directory and has multiple hard links).
  * Return 1 if true, 0 if false and < 0 on error.
  */
 static int is_ancestor(struct btrfs_root *root,
@@ -3536,36 +3569,91 @@  static int is_ancestor(struct btrfs_root *root,
 		       const u64 ino2,
 		       struct fs_path *fs_path)
 {
-	u64 ino = ino2;
-	bool free_path = false;
+	bool free_fs_path = false;
 	int ret = 0;
+	struct btrfs_path *path = NULL;
+	struct btrfs_key key;
 
 	if (!fs_path) {
 		fs_path = fs_path_alloc();
 		if (!fs_path)
 			return -ENOMEM;
-		free_path = true;
+		free_fs_path = true;
 	}
 
-	while (ino > BTRFS_FIRST_FREE_OBJECTID) {
-		u64 parent;
-		u64 parent_gen;
+	path = alloc_path_for_send();
+	if (!path) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
-		fs_path_reset(fs_path);
-		ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
-		if (ret < 0) {
-			if (ret == -ENOENT && ino == ino2)
-				ret = 0;
-			goto out;
+	key.objectid = ino2;
+	key.type = BTRFS_INODE_REF_KEY;
+	key.offset = 0;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+
+	while (true) {
+		struct extent_buffer *leaf = path->nodes[0];
+		int slot = path->slots[0];
+		u32 cur_offset = 0;
+		u32 item_size;
+
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				goto out;
+			if (ret > 0)
+				break;
+			continue;
 		}
-		if (parent == ino1) {
-			ret = parent_gen == ino1_gen ? 1 : 0;
-			goto out;
+
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (key.objectid != ino2)
+			break;
+		if (key.type != BTRFS_INODE_REF_KEY &&
+		    key.type != BTRFS_INODE_EXTREF_KEY)
+			break;
+
+		item_size = btrfs_item_size_nr(leaf, slot);
+		while (cur_offset < item_size) {
+			u64 parent;
+			u64 parent_gen;
+
+			if (key.type == BTRFS_INODE_EXTREF_KEY) {
+				unsigned long ptr;
+				struct btrfs_inode_extref *extref;
+
+				ptr = btrfs_item_ptr_offset(leaf, slot);
+				extref = (struct btrfs_inode_extref *)
+					(ptr + cur_offset);
+				parent = btrfs_inode_extref_parent(leaf,
+								   extref);
+				cur_offset += sizeof(*extref);
+				cur_offset += btrfs_inode_extref_name_len(leaf,
+								  extref);
+			} else {
+				parent = key.offset;
+				cur_offset = item_size;
+			}
+
+			ret = get_inode_info(root, parent, NULL, &parent_gen,
+					     NULL, NULL, NULL, NULL);
+			if (ret < 0)
+				goto out;
+			ret = check_ino_in_path(root, ino1, ino1_gen,
+						parent, parent_gen, fs_path);
+			if (ret)
+				goto out;
 		}
-		ino = parent;
+		path->slots[0]++;
 	}
+	ret = 0;
  out:
-	if (free_path)
+	btrfs_free_path(path);
+	if (free_fs_path)
 		fs_path_free(fs_path);
 	return ret;
 }