diff mbox series

[v4,02/15] btrfs: combine device update operations during transaction commit

Message ID 20190327122418.24027-3-nborisov@suse.com (mailing list archive)
State New, archived
Headers show
Series FITRIM improvement | expand

Commit Message

Nikolay Borisov March 27, 2019, 12:24 p.m. UTC
We currently overload the pending_chunks list to handle updating
btrfs_device->commit_bytes used.  We don't actually care about
the extent mapping or even the device mapping for the chunk - we
just need the device, and we can end up processing it multiple
times.  The fs_devices->resized_list does more or less the same
thing, but with the disk size.  They are called consecutively
during commit and have more or less the same purpose.

We can combine the two lists into a single list that attaches
to the transaction and contains a list of devices that need
updating.  Since we always add the device to a list when we
change bytes_used or disk_total_size, there's no harm in
copying both values at once.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/dev-replace.c |  2 +-
 fs/btrfs/disk-io.c     |  7 ++++
 fs/btrfs/transaction.c |  5 ++-
 fs/btrfs/transaction.h |  1 +
 fs/btrfs/volumes.c     | 84 ++++++++++++++++++------------------------
 fs/btrfs/volumes.h     | 13 ++-----
 6 files changed, 51 insertions(+), 61 deletions(-)

Comments

David Sterba May 3, 2019, 10:23 a.m. UTC | #1
On Wed, Mar 27, 2019 at 02:24:05PM +0200, Nikolay Borisov wrote:
> --- a/fs/btrfs/dev-replace.c
> +++ b/fs/btrfs/dev-replace.c
> @@ -662,7 +662,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
>  	btrfs_device_set_disk_total_bytes(tgt_device,
>  					  src_device->disk_total_bytes);
>  	btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used);
> -	ASSERT(list_empty(&src_device->resized_list));
> +	ASSERT(list_empty(&src_device->post_commit_list));

I've just caught this assertion to fail at btrfs/071. It's for the first
time but this could also explain the infrequent failures of umount after
btrfs/011 test that caused other tests to fail.

btrfs/071               [10:10:22][ 2348.888263] run fstests btrfs/071 at 2019-05-03 10:10:22
[ 2349.018607] BTRFS info (device vda): disk space caching is enabled
[ 2349.021433] BTRFS info (device vda): has skinny extents
[ 2349.958749] BTRFS: device fsid 1e913c80-9d7f-4e42-95e6-55f207ef0c79 devid 1 transid 5 /dev/vdb
[ 2349.962961] BTRFS: device fsid 1e913c80-9d7f-4e42-95e6-55f207ef0c79 devid 2 transid 5 /dev/vdc
[ 2349.966961] BTRFS: device fsid 1e913c80-9d7f-4e42-95e6-55f207ef0c79 devid 3 transid 5 /dev/vdd
[ 2349.970983] BTRFS: device fsid 1e913c80-9d7f-4e42-95e6-55f207ef0c79 devid 4 transid 5 /dev/vde
[ 2349.974966] BTRFS: device fsid 1e913c80-9d7f-4e42-95e6-55f207ef0c79 devid 5 transid 5 /dev/vdf
[ 2349.978829] BTRFS: device fsid 1e913c80-9d7f-4e42-95e6-55f207ef0c79 devid 6 transid 5 /dev/vdg
[ 2349.993612] BTRFS info (device vdb): disk space caching is enabled
[ 2349.996386] BTRFS info (device vdb): has skinny extents
[ 2349.998780] BTRFS info (device vdb): flagging fs with big metadata feature
[ 2350.018398] BTRFS info (device vdb): checking UUID tree
[ 2350.123817] BTRFS info (device vdb): dev_replace from /dev/vdc (devid 2) to /dev/vdh started
[ 2350.275831] BTRFS info (device vdb): use no compression, level 0
[ 2350.279478] BTRFS info (device vdb): disk space caching is enabled
[ 2351.586031] BTRFS info (device vdb): use zlib compression, level 3
[ 2351.588935] BTRFS info (device vdb): disk space caching is enabled
[ 2351.757525] BTRFS info (device vdb): dev_replace from /dev/vdc (devid 2) to /dev/vdh finished
[ 2351.761606] assertion failed: list_empty(&src_device->post_commit_list), file: fs/btrfs/dev-replace.c, line: 665
[ 2351.766222] ------------[ cut here ]------------
[ 2351.768904] kernel BUG at fs/btrfs/ctree.h:3518!
[ 2351.771982] invalid opcode: 0000 [#1] PREEMPT SMP
[ 2351.774878] CPU: 6 PID: 26220 Comm: btrfs Not tainted 5.1.0-rc7-default+ #16
[ 2351.778992] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58-prebuilt.qemu.org 04/01/2014
[ 2351.785773] RIP: 0010:assfail.constprop.14+0x18/0x1a [btrfs]
[ 2351.789143] Code: c6 80 1f 37 c0 48 89 d1 48 89 c2 e8 7f e8 ff ff 58 c3 89 f1 48 c7 c2 f0 7d 36 c0 48 89 fe 48 c7 c7 50 23 37 c0 e8 e2 25 d7 d6 <0f> 0b 89 f1 48 c7 c2 61 7e 36 c0 48 89 fe 48 c7 c7 80 28 37 c0 e8
[ 2351.800167] RSP: 0018:ffffb6af0c07fc58 EFLAGS: 00010282
[ 2351.803316] RAX: 0000000000000064 RBX: ffff8fbfa065c000 RCX: 0000000000000000
[ 2351.807407] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffffffff970c9e13
[ 2351.811676] RBP: ffffb6af0c07fcc0 R08: 0000000000000001 R09: 0000000000000000
[ 2351.815868] R10: ffff8fbfa1a57c00 R11: 0000000000000000 R12: ffff8fbfa065f658
[ 2351.820078] R13: ffff8fbfa065f5b8 R14: 0000000000000002 R15: ffff8fbfa1dc7000
[ 2351.824435] FS:  00007f07984a48c0(0000) GS:ffff8fbfb7800000(0000) knlGS:0000000000000000
[ 2351.829613] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2351.833253] CR2: 00007fb27c8e330c CR3: 000000021bfe7000 CR4: 00000000000006e0
[ 2351.836535] Call Trace:
[ 2351.837705]  btrfs_dev_replace_finishing+0x585/0x600 [btrfs]
[ 2351.840662]  ? btrfs_dev_replace_by_ioctl+0x502/0x7f0 [btrfs]
[ 2351.843173]  btrfs_dev_replace_by_ioctl+0x502/0x7f0 [btrfs]
[ 2351.846375]  btrfs_ioctl+0x24d9/0x2e40 [btrfs]
[ 2351.849004]  ? writeback_single_inode+0xbe/0x110
[ 2351.851664]  ? do_sigaction+0x63/0x250
[ 2351.853477]  ? do_vfs_ioctl+0xa2/0x6f0
[ 2351.855295]  do_vfs_ioctl+0xa2/0x6f0
[ 2351.857691]  ? do_sigaction+0xfc/0x250
[ 2351.860046]  ksys_ioctl+0x3a/0x70
[ 2351.862109]  __x64_sys_ioctl+0x16/0x20
[ 2351.864235]  do_syscall_64+0x54/0x190
[ 2351.865969]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 2351.868919] RIP: 0033:0x7f079859c607
[ 2351.871253] Code: 00 00 90 48 8b 05 91 88 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 61 88 0c 00 f7 d8 64 89 01 48
[ 2351.882057] RSP: 002b:00007ffe865db768 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[ 2351.886691] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f079859c607
[ 2351.890792] RDX: 00007ffe865dbba0 RSI: 00000000ca289435 RDI: 0000000000000003
[ 2351.894822] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
[ 2351.898903] R10: 0000000000000008 R11: 0000000000000246 R12: 00007ffe865df280
[ 2351.903036] R13: 0000000000000001 R14: 0000000000000004 R15: 00005622c7bc32a0
diff mbox series

Patch

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index ee193c5222b2..dba43ada41d1 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -662,7 +662,7 @@  static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 	btrfs_device_set_disk_total_bytes(tgt_device,
 					  src_device->disk_total_bytes);
 	btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used);
-	ASSERT(list_empty(&src_device->resized_list));
+	ASSERT(list_empty(&src_device->post_commit_list));
 	tgt_device->commit_total_bytes = src_device->commit_total_bytes;
 	tgt_device->commit_bytes_used = src_device->bytes_used;
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5d58dd48342f..79800311b8e1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4497,10 +4497,17 @@  void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
 void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
 				   struct btrfs_fs_info *fs_info)
 {
+	struct btrfs_device *dev, *tmp;
+
 	btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
 	ASSERT(list_empty(&cur_trans->dirty_bgs));
 	ASSERT(list_empty(&cur_trans->io_bgs));
 
+	list_for_each_entry_safe(dev, tmp, &cur_trans->dev_update_list,
+				 post_commit_list) {
+		list_del_init(&dev->post_commit_list);
+	}
+
 	btrfs_destroy_delayed_refs(cur_trans, fs_info);
 
 	cur_trans->state = TRANS_STATE_COMMIT_START;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f1732b77a379..4aa827a2e951 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -75,6 +75,7 @@  void btrfs_put_transaction(struct btrfs_transaction *transaction)
 			btrfs_put_block_group_trimming(cache);
 			btrfs_put_block_group(cache);
 		}
+		WARN_ON(!list_empty(&transaction->dev_update_list));
 		kfree(transaction);
 	}
 }
@@ -264,6 +265,7 @@  static noinline int join_transaction(struct btrfs_fs_info *fs_info,
 
 	INIT_LIST_HEAD(&cur_trans->pending_snapshots);
 	INIT_LIST_HEAD(&cur_trans->pending_chunks);
+	INIT_LIST_HEAD(&cur_trans->dev_update_list);
 	INIT_LIST_HEAD(&cur_trans->switch_commits);
 	INIT_LIST_HEAD(&cur_trans->dirty_bgs);
 	INIT_LIST_HEAD(&cur_trans->io_bgs);
@@ -2241,8 +2243,7 @@  int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	memcpy(fs_info->super_for_commit, fs_info->super_copy,
 	       sizeof(*fs_info->super_copy));
 
-	btrfs_update_commit_device_size(fs_info);
-	btrfs_update_commit_device_bytes_used(cur_trans);
+	btrfs_commit_device_sizes(cur_trans);
 
 	clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
 	clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index b34678e7968e..2bd76f681520 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -52,6 +52,7 @@  struct btrfs_transaction {
 	wait_queue_head_t commit_wait;
 	struct list_head pending_snapshots;
 	struct list_head pending_chunks;
+	struct list_head dev_update_list;
 	struct list_head switch_commits;
 	struct list_head dirty_bgs;
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fcb0d3f34e09..66f4032dba13 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -318,7 +318,6 @@  static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
 	mutex_init(&fs_devs->device_list_mutex);
 
 	INIT_LIST_HEAD(&fs_devs->devices);
-	INIT_LIST_HEAD(&fs_devs->resized_devices);
 	INIT_LIST_HEAD(&fs_devs->alloc_list);
 	INIT_LIST_HEAD(&fs_devs->fs_list);
 	if (fsid)
@@ -334,6 +333,7 @@  static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
 
 void btrfs_free_device(struct btrfs_device *device)
 {
+	WARN_ON(!list_empty(&device->post_commit_list));
 	rcu_string_free(device->name);
 	bio_put(device->flush_bio);
 	kfree(device);
@@ -402,7 +402,7 @@  static struct btrfs_device *__alloc_device(void)
 
 	INIT_LIST_HEAD(&dev->dev_list);
 	INIT_LIST_HEAD(&dev->dev_alloc_list);
-	INIT_LIST_HEAD(&dev->resized_list);
+	INIT_LIST_HEAD(&dev->post_commit_list);
 
 	spin_lock_init(&dev->io_lock);
 
@@ -2880,9 +2880,9 @@  int btrfs_grow_device(struct btrfs_trans_handle *trans,
 	btrfs_device_set_total_bytes(device, new_size);
 	btrfs_device_set_disk_total_bytes(device, new_size);
 	btrfs_clear_space_info_full(device->fs_info);
-	if (list_empty(&device->resized_list))
-		list_add_tail(&device->resized_list,
-			      &fs_devices->resized_devices);
+	if (list_empty(&device->post_commit_list))
+		list_add_tail(&device->post_commit_list,
+			      &trans->transaction->dev_update_list);
 	mutex_unlock(&fs_info->chunk_mutex);
 
 	return btrfs_update_device(trans, device);
@@ -4871,9 +4871,9 @@  int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
 	}
 
 	btrfs_device_set_disk_total_bytes(device, new_size);
-	if (list_empty(&device->resized_list))
-		list_add_tail(&device->resized_list,
-			      &fs_info->fs_devices->resized_devices);
+	if (list_empty(&device->post_commit_list))
+		list_add_tail(&device->post_commit_list,
+			      &trans->transaction->dev_update_list);
 
 	WARN_ON(diff > old_total);
 	btrfs_set_super_total_bytes(super_copy,
@@ -5222,9 +5222,14 @@  static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto error_del_extent;
 
-	for (i = 0; i < map->num_stripes; i++)
-		btrfs_device_set_bytes_used(map->stripes[i].dev,
-				map->stripes[i].dev->bytes_used + stripe_size);
+	for (i = 0; i < map->num_stripes; i++) {
+		struct btrfs_device *dev = map->stripes[i].dev;
+
+		btrfs_device_set_bytes_used(dev, dev->bytes_used + stripe_size);
+		if (list_empty(&dev->post_commit_list))
+			list_add_tail(&dev->post_commit_list,
+				      &trans->transaction->dev_update_list);
+	}
 
 	atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space);
 
@@ -7674,51 +7679,34 @@  void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_pat
 }
 
 /*
- * Update the size of all devices, which is used for writing out the
- * super blocks.
+ * Update the size and bytes used for each device where it changed.
+ * This is delayed since we would otherwise get errors while writing
+ * out the superblocks.
+ *
+ * Must be invoked during transaction commit.
  */
-void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info)
+void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
 {
-	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
 	struct btrfs_device *curr, *next;
 
-	if (list_empty(&fs_devices->resized_devices))
-		return;
-
-	mutex_lock(&fs_devices->device_list_mutex);
-	mutex_lock(&fs_info->chunk_mutex);
-	list_for_each_entry_safe(curr, next, &fs_devices->resized_devices,
-				 resized_list) {
-		list_del_init(&curr->resized_list);
-		curr->commit_total_bytes = curr->disk_total_bytes;
-	}
-	mutex_unlock(&fs_info->chunk_mutex);
-	mutex_unlock(&fs_devices->device_list_mutex);
-}
-
-/* Must be invoked during the transaction commit */
-void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans)
-{
-	struct btrfs_fs_info *fs_info = trans->fs_info;
-	struct extent_map *em;
-	struct map_lookup *map;
-	struct btrfs_device *dev;
-	int i;
+	ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
 
-	if (list_empty(&trans->pending_chunks))
+	if (list_empty(&trans->dev_update_list))
 		return;
 
-	/* In order to kick the device replace finish process */
-	mutex_lock(&fs_info->chunk_mutex);
-	list_for_each_entry(em, &trans->pending_chunks, list) {
-		map = em->map_lookup;
-
-		for (i = 0; i < map->num_stripes; i++) {
-			dev = map->stripes[i].dev;
-			dev->commit_bytes_used = dev->bytes_used;
-		}
+	/*
+	 * We don't need the device_list_mutex here.  This list is owned
+	 * by the transaction and the transaction must complete before
+	 * the device is released.
+	 */
+	mutex_lock(&trans->fs_info->chunk_mutex);
+	list_for_each_entry_safe(curr, next, &trans->dev_update_list,
+				 post_commit_list) {
+		list_del_init(&curr->post_commit_list);
+		curr->commit_total_bytes = curr->disk_total_bytes;
+		curr->commit_bytes_used = curr->bytes_used;
 	}
-	mutex_unlock(&fs_info->chunk_mutex);
+	mutex_unlock(&trans->fs_info->chunk_mutex);
 }
 
 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 3ad9d58d1b66..a0f09aad3770 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -45,6 +45,7 @@  struct btrfs_pending_bios {
 struct btrfs_device {
 	struct list_head dev_list;
 	struct list_head dev_alloc_list;
+	struct list_head post_commit_list; /* chunk mutex */
 	struct btrfs_fs_devices *fs_devices;
 	struct btrfs_fs_info *fs_info;
 
@@ -102,18 +103,12 @@  struct btrfs_device {
 	 * size of the device on the current transaction
 	 *
 	 * This variant is update when committing the transaction,
-	 * and protected by device_list_mutex
+	 * and protected by chunk mutex
 	 */
 	u64 commit_total_bytes;
 
 	/* bytes used on the current transaction */
 	u64 commit_bytes_used;
-	/*
-	 * used to manage the device which is resized
-	 *
-	 * It is protected by chunk_lock.
-	 */
-	struct list_head resized_list;
 
 	/* for sending down flush barriers */
 	struct bio *flush_bio;
@@ -235,7 +230,6 @@  struct btrfs_fs_devices {
 	struct mutex device_list_mutex;
 	struct list_head devices;
 
-	struct list_head resized_devices;
 	/* devices not currently being allocated */
 	struct list_head alloc_list;
 
@@ -558,8 +552,7 @@  static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
 
 const char *get_raid_name(enum btrfs_raid_types type);
 
-void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
-void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans);
+void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
 
 struct list_head *btrfs_get_fs_uuids(void);
 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);