[1/2] btrfs-progs: btrfsck: verify qgroups above level 0
diff mbox

Message ID 1466195869-28109-2-git-send-email-mfasheh@suse.de
State New
Headers show

Commit Message

Mark Fasheh June 17, 2016, 8:37 p.m. UTC
At the moment we only check subvolume quota groups (level 0). With this
patch we can check groups above 0, thus verifying the entire qgroup
hierarchy on a file system.  The accounting portion of this patch is modeled
after the kernel - we are essentially reimplementing the 'quota rescan' case
here. Most other sections of this code went unchanged, in particular the
root counting works independently of the accounting.

Signed-off-by: Mark Fasheh <mfasheh@suse.de>
---
 qgroup-verify.c | 309 +++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 252 insertions(+), 57 deletions(-)

Patch
diff mbox

diff --git a/qgroup-verify.c b/qgroup-verify.c
index 7b78504..23f2961 100644
--- a/qgroup-verify.c
+++ b/qgroup-verify.c
@@ -35,7 +35,8 @@ 
 /*#define QGROUP_VERIFY_DEBUG*/
 static unsigned long tot_extents_scanned = 0;
 
-static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive);
+struct qgroup_count;
+static struct qgroup_count *find_count(u64 qgroupid);
 
 struct qgroup_info {
 	u64 referenced;
@@ -54,6 +55,16 @@  struct qgroup_count {
 	struct qgroup_info info;
 
 	struct rb_node rb_node;
+
+	struct list_head groups;  /* Parents when we are a child group */
+
+	/*
+	 * Children when we are a parent group (not currently used but
+	 * maintained to mirror kernel handling of qgroups)
+	 */
+	struct list_head members;
+
+	u64 cur_refcnt;
 };
 
 static struct counts_tree {
@@ -66,6 +77,39 @@  static struct counts_tree {
 static struct rb_root by_bytenr = RB_ROOT;
 
 /*
+ * Glue structure to represent the relations between qgroups. Mirrored
+ * from kernel.
+ */
+struct btrfs_qgroup_list {
+	struct list_head next_group;
+	struct list_head next_member;
+	struct qgroup_count *group; /* Parent group */
+	struct qgroup_count *member;
+};
+
+/* Allow us to reset ref counts during accounting without zeroing each group. */
+static u64 qgroup_seq = 1ULL;
+
+static inline void update_cur_refcnt(struct qgroup_count *c)
+{
+	if (c->cur_refcnt < qgroup_seq)
+		c->cur_refcnt = qgroup_seq;
+	c->cur_refcnt++;
+}
+
+static inline u64 group_get_cur_refcnt(struct qgroup_count *c)
+{
+	if (c->cur_refcnt < qgroup_seq)
+		return 0;
+	return c->cur_refcnt - qgroup_seq;
+}
+
+static void inc_qgroup_seq(int root_count)
+{
+	qgroup_seq += root_count + 1;
+}
+
+/*
  * List of interior tree blocks. We walk this list after loading the
  * extent tree to resolve implied refs. For each interior node we'll
  * place a shared ref in the ref tree against each child object. This
@@ -296,9 +340,10 @@  static void find_parent_roots(struct ulist *roots, u64 parent)
 	}
 
 	do {
-		if (ref->root)
-			ulist_add(roots, ref->root, 0, 0);
-		else
+		if (ref->root) {
+			if (is_fstree(ref->root))
+				ulist_add(roots, ref->root, 0, 0);
+		} else
 			find_parent_roots(roots, ref->parent);
 
 		node = rb_next(node);
@@ -307,6 +352,114 @@  static void find_parent_roots(struct ulist *roots, u64 parent)
 	} while (node && ref->bytenr == parent);
 }
 
+static int account_one_extent(struct ulist *roots, u64 bytenr, u64 num_bytes)
+{
+	int ret;
+	u64 id, nr_roots, nr_refs;
+	struct qgroup_count *count;
+	struct ulist *counts = ulist_alloc(0);
+	struct ulist *tmp = ulist_alloc(0);
+	struct ulist_iterator uiter;
+	struct ulist_iterator tmp_uiter;
+	struct ulist_node *unode;
+	struct ulist_node *tmp_unode;
+	struct btrfs_qgroup_list *glist;
+
+	if (!counts || !tmp) {
+		ulist_free(counts);
+		ulist_free(tmp);
+		return ENOMEM;
+	}
+
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(roots, &uiter))) {
+		BUG_ON(unode->val == 0ULL);
+
+		/*
+		 * For each root, find their corresponding tracking group and
+		 * add it to our qgroups list.
+		 */
+		count = find_count(unode->val);
+		if (!count)
+			continue;
+
+		BUG_ON(!is_fstree(unode->val));
+		ret = ulist_add(counts, count->qgroupid, ptr_to_u64(count), 0);
+		if (ret < 0)
+			goto out;
+
+		/*
+		 * Now we look for parents (and parents of those...). Use a tmp
+		 * ulist here to avoid re-walking (and re-incrementing) our
+		 * already added items on every loop iteration.
+		 */
+		ulist_reinit(tmp);
+		ret = ulist_add(tmp, count->qgroupid, ptr_to_u64(count), 0);
+		if (ret < 0)
+			goto out;
+
+		ULIST_ITER_INIT(&tmp_uiter);
+		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+			/* Bump the refcount on a node every time we see it. */
+			count = u64_to_ptr(tmp_unode->aux);
+			update_cur_refcnt(count);
+
+			list_for_each_entry(glist, &count->groups, next_group) {
+				struct qgroup_count *parent;
+				parent = glist->group;
+				id = parent->qgroupid;
+
+				BUG_ON(!count);
+
+				ret = ulist_add(counts, id, ptr_to_u64(parent),
+						0);
+				if (ret < 0)
+					goto out;
+				ret = ulist_add(tmp, id, ptr_to_u64(parent),
+						0);
+				if (ret < 0)
+					goto out;
+			}
+		}
+	}
+
+	/*
+	 * Now that we have gathered up and counted all the groups, we
+	 * can add bytes for this ref.
+	 */
+	nr_roots = roots->nnodes;
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(counts, &uiter))) {
+		count = u64_to_ptr(unode->aux);
+
+		nr_refs = group_get_cur_refcnt(count);
+		if (nr_refs) {
+			count->info.referenced += num_bytes;
+			count->info.referenced_compressed += num_bytes;
+
+			if (nr_refs == nr_roots) {
+				count->info.exclusive += num_bytes;
+				count->info.exclusive_compressed += num_bytes;
+			}
+		}
+#ifdef QGROUP_VERIFY_DEBUG
+		printf("account (%llu, %llu), qgroup %llu/%llu, rfer %llu,"
+		       " excl %llu, refs %llu, roots %llu\n", bytenr, num_bytes,
+		       btrfs_qgroup_level(count->qgroupid),
+		       btrfs_qgroup_subvid(count->qgroupid),
+		       count->info.referenced, count->info.exclusive, nr_refs,
+		       nr_roots);
+#endif
+	}
+
+	inc_qgroup_seq(roots->nnodes);
+	ret = 0;
+out:
+	ulist_free(counts);
+	ulist_free(tmp);
+	return ret;
+}
+
 static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
 			      struct ulist *roots);
 /*
@@ -318,18 +471,15 @@  static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
  * - resolve all possible roots for shared refs, insert each
  *   of those into ref_roots ulist (this is a recursive process)
  *
- * - Walk ref_roots ulist, adding extent bytes to each qgroup count that
- *    cooresponds to a found root.
+ * - With all roots resolved we can account the ref - this is done in
+ *   account_one_extent().
  */
-static void account_all_refs(int do_qgroups, u64 search_subvol)
+static int account_all_refs(int do_qgroups, u64 search_subvol)
 {
-	int exclusive;
 	struct ref *ref;
 	struct rb_node *node;
 	u64 bytenr, num_bytes;
 	struct ulist *roots = ulist_alloc(0);
-	struct ulist_iterator uiter;
-	struct ulist_node *unode;
 
 	node = rb_first(&by_bytenr);
 	while (node) {
@@ -347,10 +497,14 @@  static void account_all_refs(int do_qgroups, u64 search_subvol)
 		do {
 			BUG_ON(ref->bytenr != bytenr);
 			BUG_ON(ref->num_bytes != num_bytes);
-			if (ref->root)
-				ulist_add(roots, ref->root, 0, 0);
-			else
+			if (ref->root) {
+				if (is_fstree(ref->root)) {
+					if (ulist_add(roots, ref->root, 0, 0) < 0)
+						goto enomem;
+				}
+			} else {
 				find_parent_roots(roots, ref->parent);
+			}
 
 			/*
 			 * When we leave this inner loop, node is set
@@ -362,29 +516,22 @@  static void account_all_refs(int do_qgroups, u64 search_subvol)
 				ref = rb_entry(node, struct ref, bytenr_node);
 		} while (node && ref->bytenr == bytenr);
 
-		/*
-		 * Now that we have all roots, we can properly account
-		 * this extent against the corresponding qgroups.
-		 */
-		if (roots->nnodes == 1)
-			exclusive = 1;
-		else
-			exclusive = 0;
-
 		if (search_subvol)
 			print_subvol_info(search_subvol, bytenr, num_bytes,
 					  roots);
 
-		ULIST_ITER_INIT(&uiter);
-		while ((unode = ulist_next(roots, &uiter))) {
-			BUG_ON(unode->val == 0ULL);
-			/* We only want to account fs trees */
-			if (is_fstree(unode->val) && do_qgroups)
-				add_bytes(unode->val, num_bytes, exclusive);
-		}
+		if (!do_qgroups)
+			continue;
+
+		if (account_one_extent(roots, bytenr, num_bytes))
+			goto enomem;
 	}
 
 	ulist_free(roots);
+	return 0;
+enomem:
+	error("Out of memory while accounting refs for qgroups!\n");
+	return -ENOMEM;
 }
 
 static u64 resolve_one_root(u64 bytenr)
@@ -668,6 +815,8 @@  static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
 		item->exclusive = btrfs_qgroup_info_exclusive(leaf, disk);
 		item->exclusive_compressed =
 			btrfs_qgroup_info_exclusive_compressed(leaf, disk);
+		INIT_LIST_HEAD(&c->groups);
+		INIT_LIST_HEAD(&c->members);
 
 		if (insert_count(c)) {
 			free(c);
@@ -677,29 +826,30 @@  static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
 	return c;
 }
 
-static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive)
+static int add_qgroup_relation(u64 memberid, u64 parentid)
 {
-	struct qgroup_count *count = find_count(root_objectid);
-	struct qgroup_info *qg;
+	struct qgroup_count *member;
+	struct qgroup_count *parent;
+	struct btrfs_qgroup_list *list;
 
-	BUG_ON(num_bytes < 4096); /* Random sanity check. */
+	if (memberid > parentid)
+		return 0;
 
-	if (!count)
-		return;
+	member = find_count(memberid);
+	parent = find_count(parentid);
+	if (!member || !parent)
+		return -ENOENT;
 
-	qg = &count->info;
+	list = calloc(1, sizeof(*list));
+	if (!list)
+		return -ENOMEM;
 
-	qg->referenced += num_bytes;
-	/*
-	 * count of compressed bytes is unimplemented, so we do the
-	 * same as kernel.
-	 */
-	qg->referenced_compressed += num_bytes;
+	list->group = parent;
+	list->member = member;
+	list_add_tail(&list->next_group, &member->groups);
+	list_add_tail(&list->next_member, &parent->members);
 
-	if (exclusive) {
-		qg->exclusive += num_bytes;
-		qg->exclusive_compressed += num_bytes;
-	}
+	return 0;
 }
 
 static void read_qgroup_status(struct btrfs_path *path,
@@ -728,11 +878,18 @@  static int load_quota_info(struct btrfs_fs_info *info)
 	struct btrfs_qgroup_info_item *item;
 	struct qgroup_count *count;
 	int i, nr;
+	int search_relations = 0;
 
+loop:
+	/*
+	 * Do 2 passes, the first allocates group counts and reads status
+	 * items. The 2nd pass picks up relation items and glues them
+	 * to their respective count structures.
+	 */
 	btrfs_init_path(&path);
 
 	key.offset = 0;
-	key.objectid = 0;
+	key.objectid = search_relations ? 0 : BTRFS_QGROUP_RELATION_KEY;
 	key.type = 0;
 
 	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
@@ -749,17 +906,26 @@  static int load_quota_info(struct btrfs_fs_info *info)
 			btrfs_item_key(leaf, &disk_key, i);
 			btrfs_disk_key_to_cpu(&key, &disk_key);
 
+			if (search_relations) {
+				if (key.type == BTRFS_QGROUP_RELATION_KEY) {
+					ret = add_qgroup_relation(key.objectid,
+								  key.offset);
+					if (ret) {
+						error("out of memory\n");
+						goto out;
+					}
+				}
+				continue;
+			}
+
 			if (key.type == BTRFS_QGROUP_STATUS_KEY) {
 				read_qgroup_status(&path, &counts);
 				continue;
 			}
-			if (key.type == BTRFS_QGROUP_RELATION_KEY)
-				printf("Ignoring qgroup relation key %llu\n",
-				       key.objectid);
 
 			/*
-			 * Ignore: BTRFS_QGROUP_LIMIT_KEY,
-			 * 	   BTRFS_QGROUP_RELATION_KEY
+			 * At this point, we can ignore anything that
+			 * isn't a qgroup info.
 			 */
 			if (key.type != BTRFS_QGROUP_INFO_KEY)
 				continue;
@@ -791,6 +957,12 @@  static int load_quota_info(struct btrfs_fs_info *info)
 
 	ret = 0;
 	btrfs_release_path(&path);
+
+	if (!search_relations) {
+		search_relations = 1;
+		goto loop;
+	}
+
 out:
 	return ret;
 }
@@ -1035,6 +1207,11 @@  static void print_fields_signed(long long bytes,
 	       prefix, type, bytes, type, bytes_compressed);
 }
 
+static inline int qgroup_printable(struct qgroup_count *c)
+{
+	return !!(c->subvol_exists || btrfs_qgroup_level(c->qgroupid));
+}
+
 static int report_qgroup_difference(struct qgroup_count *count, int verbose)
 {
 	int is_different;
@@ -1045,9 +1222,10 @@  static int report_qgroup_difference(struct qgroup_count *count, int verbose)
 
 	is_different = excl_diff || ref_diff;
 
-	if (verbose || (is_different && count->subvol_exists)) {
-		printf("Counts for qgroup id: %llu %s\n",
-		       (unsigned long long)count->qgroupid,
+	if (verbose || (is_different && qgroup_printable(count))) {
+		printf("Counts for qgroup id: %llu/%llu %s\n",
+		       btrfs_qgroup_level(count->qgroupid),
+		       btrfs_qgroup_subvid(count->qgroupid),
 		       is_different ? "are different" : "");
 
 		print_fields(info->referenced, info->referenced_compressed,
@@ -1099,10 +1277,27 @@  void free_qgroup_counts(void)
 {
 	struct rb_node *node;
 	struct qgroup_count *c;
+	struct btrfs_qgroup_list *glist, *tmpglist;
+
 	node = rb_first(&counts.root);
 	while (node) {
 		c = rb_entry(node, struct qgroup_count, rb_node);
+
+		list_for_each_entry_safe(glist, tmpglist, &c->groups,
+					 next_group) {
+			list_del(&glist->next_group);
+			list_del(&glist->next_member);
+			free(glist);
+		}
+		list_for_each_entry_safe(glist, tmpglist, &c->members,
+					 next_group) {
+			list_del(&glist->next_group);
+			list_del(&glist->next_member);
+			free(glist);
+		}
+
 		node = rb_next(node);
+
 		rb_erase(&c->rb_node, &counts.root);
 		free(c);
 	}
@@ -1143,7 +1338,7 @@  int qgroup_verify_all(struct btrfs_fs_info *info)
 		goto out;
 	}
 
-	account_all_refs(1, 0);
+	ret = account_all_refs(1, 0);
 
 out:
 	/*
@@ -1215,7 +1410,7 @@  int print_extent_state(struct btrfs_fs_info *info, u64 subvol)
 	}
 
 	printf("Offset\t\tLen\tRoot Refs\tRoots\n");
-	account_all_refs(0, subvol);
+	ret = account_all_refs(0, subvol);
 
 out:
 	free_tree_blocks();