diff mbox series

[v3,12/15] btrfs-progs: redirty clean extent buffers in seq

Message ID 20190820045258.1571640-13-naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs-progs: zoned block device support | expand

Commit Message

Naohiro Aota Aug. 20, 2019, 4:52 a.m. UTC
Tree manipulating operations like merging nodes often release
once-allocated tree nodes. Btrfs cleans such nodes so that pages in the
node are not uselessly written out. On HMZONED drives, however, such
optimization blocks the following IOs as the cancellation of the write out
of the freed blocks breaks the sequential write sequence expected by the
device.

This patch check if next dirty extent buffer is continuous to a previously
written one. If not, it redirty extent buffers between the previous one and
the next one, so that all dirty buffers are written sequentially.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 common/hmzoned.c | 28 ++++++++++++++++++++++++++++
 common/hmzoned.h |  2 ++
 ctree.h          |  1 +
 transaction.c    |  7 +++++++
 4 files changed, 38 insertions(+)
diff mbox series

Patch

diff --git a/common/hmzoned.c b/common/hmzoned.c
index 0e54144259b7..1b3830b429ab 100644
--- a/common/hmzoned.c
+++ b/common/hmzoned.c
@@ -555,6 +555,34 @@  int btrfs_load_block_group_zone_info(struct btrfs_fs_info *fs_info,
 
 out:
 	cache->alloc_type = alloc_type;
+	cache->write_offset = cache->alloc_offset;
 	free(alloc_offsets);
 	return ret;
 }
+
+bool btrfs_redirty_extent_buffer_for_hmzoned(struct btrfs_fs_info *fs_info,
+					     u64 start, u64 end)
+{
+	u64 next;
+	struct btrfs_block_group_cache *cache;
+	struct extent_buffer *eb;
+
+	cache = btrfs_lookup_first_block_group(fs_info, start);
+	BUG_ON(!cache);
+
+	if (cache->alloc_type != BTRFS_ALLOC_SEQ)
+		return false;
+
+	if (cache->key.objectid + cache->write_offset < start) {
+		next = cache->key.objectid + cache->write_offset;
+		BUG_ON(next + fs_info->nodesize > start);
+		eb = btrfs_find_create_tree_block(fs_info, next);
+		btrfs_mark_buffer_dirty(eb);
+		free_extent_buffer(eb);
+		return true;
+	}
+
+	cache->write_offset += (end + 1 - start);
+
+	return false;
+}
diff --git a/common/hmzoned.h b/common/hmzoned.h
index dca7588f840b..bcbf6ea15c0b 100644
--- a/common/hmzoned.h
+++ b/common/hmzoned.h
@@ -55,6 +55,8 @@  int btrfs_get_zone_info(int fd, const char *file, bool hmzoned,
 			struct btrfs_zone_info *zinfo);
 bool btrfs_check_allocatable_zones(struct btrfs_device *device, u64 pos,
 				   u64 num_bytes);
+bool btrfs_redirty_extent_buffer_for_hmzoned(struct btrfs_fs_info *fs_info,
+					     u64 start, u64 end);
 
 #ifdef BTRFS_ZONED
 bool zone_is_sequential(struct btrfs_zone_info *zinfo, u64 bytenr);
diff --git a/ctree.h b/ctree.h
index d38708b8a6c5..cd315814614a 100644
--- a/ctree.h
+++ b/ctree.h
@@ -1125,6 +1125,7 @@  struct btrfs_block_group_cache {
 
 	enum btrfs_alloc_type alloc_type;
 	u64 alloc_offset;
+	u64 write_offset;
 };
 
 struct btrfs_device;
diff --git a/transaction.c b/transaction.c
index 45bb9e1f9de6..7b37f12f118f 100644
--- a/transaction.c
+++ b/transaction.c
@@ -18,6 +18,7 @@ 
 #include "disk-io.h"
 #include "transaction.h"
 #include "delayed-ref.h"
+#include "common/hmzoned.h"
 
 #include "common/messages.h"
 
@@ -136,10 +137,16 @@  int __commit_transaction(struct btrfs_trans_handle *trans,
 	int ret;
 
 	while(1) {
+again:
 		ret = find_first_extent_bit(tree, 0, &start, &end,
 					    EXTENT_DIRTY);
 		if (ret)
 			break;
+
+		if (btrfs_redirty_extent_buffer_for_hmzoned(fs_info, start,
+							    end))
+			goto again;
+
 		while(start <= end) {
 			eb = find_first_extent_buffer(tree, start);
 			BUG_ON(!eb || eb->start != start);