diff mbox

[09/14] Btrfs: raid56: add readahead for recovery

Message ID 20170801161439.13426-10-bo.li.liu@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Liu Bo Aug. 1, 2017, 4:14 p.m. UTC
While doing recovery, blocks are read from the raid5/6 disk one by
one, so this is adding readahead so that we can read at most 256
contiguous blocks in one read IO.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/raid56.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 109 insertions(+), 5 deletions(-)
diff mbox

Patch

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index dea33c4..24f7cbb 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1530,15 +1530,81 @@  static int btrfs_r5l_write_empty_meta_block(struct btrfs_r5l_log *log, u64 pos,
 	return ret;
 }
 
+#define BTRFS_R5L_RECOVER_IO_POOL_SIZE BIO_MAX_PAGES
 struct btrfs_r5l_recover_ctx {
 	u64 pos;
 	u64 seq;
 	u64 total_size;
 	struct page *meta_page;
 	struct page *io_page;
+
+	struct page *ra_pages[BTRFS_R5L_RECOVER_IO_POOL_SIZE];
+	struct bio *ra_bio;
+	int total;
+	int valid;
+	u64 start_offset;
+
+	struct btrfs_r5l_log *log;
 };
 
-static int btrfs_r5l_recover_load_meta(struct btrfs_r5l_log *log, struct btrfs_r5l_recover_ctx *ctx)
+static int btrfs_r5l_recover_read_ra(struct btrfs_r5l_recover_ctx *ctx, u64 offset)
+{
+	bio_reset(ctx->ra_bio);
+	ctx->ra_bio->bi_bdev = ctx->log->dev->bdev;
+	ctx->ra_bio->bi_opf = REQ_OP_READ;
+	ctx->ra_bio->bi_iter.bi_sector = (ctx->log->data_offset + offset) >> 9;
+
+	ctx->valid = 0;
+	ctx->start_offset = offset;
+
+	while (ctx->valid < ctx->total) {
+		bio_add_page(ctx->ra_bio, ctx->ra_pages[ctx->valid++], PAGE_SIZE, 0);
+
+		offset = btrfs_r5l_ring_add(ctx->log, offset, PAGE_SIZE);
+		if (offset == 0)
+			break;
+	}
+
+#ifdef BTRFS_DEBUG_R5LOG
+	trace_printk("to read %d pages starting from 0x%llx\n", ctx->valid, ctx->log->data_offset + ctx->start_offset);
+#endif
+	return submit_bio_wait(ctx->ra_bio);
+}
+
+static int btrfs_r5l_recover_read_page(struct btrfs_r5l_recover_ctx *ctx, struct page *page, u64 offset)
+{
+	struct page *tmp;
+	int index;
+	char *src;
+	char *dst;
+	int ret;
+
+	if (offset < ctx->start_offset || offset >= (ctx->start_offset + ctx->valid * PAGE_SIZE)) {
+		ret = btrfs_r5l_recover_read_ra(ctx, offset);
+		if (ret)
+			return ret;
+	}
+
+#ifdef BTRFS_DEBUG_R5LOG
+	trace_printk("offset 0x%llx start->offset 0x%llx ctx->valid %d\n", offset, ctx->start_offset, ctx->valid);
+#endif
+
+	ASSERT(IS_ALIGNED(ctx->start_offset, PAGE_SIZE));
+	ASSERT(IS_ALIGNED(offset, PAGE_SIZE));
+
+	index = (offset - ctx->start_offset) >> PAGE_SHIFT;
+	ASSERT(index < ctx->valid);
+
+	tmp = ctx->ra_pages[index];
+	src = kmap(tmp);
+	dst = kmap(page);
+	memcpy(dst, src, PAGE_SIZE);
+	kunmap(page);
+	kunmap(tmp);
+	return 0;
+}
+
+static int btrfs_r5l_recover_load_meta(struct btrfs_r5l_recover_ctx *ctx)
 {
 	struct btrfs_r5l_meta_block *mb;
 
@@ -1642,6 +1708,42 @@  static int btrfs_r5l_recover_flush_log(struct btrfs_r5l_log *log, struct btrfs_r
 	}
 
 	return ret;
+
+static int btrfs_r5l_recover_allocate_ra(struct btrfs_r5l_recover_ctx *ctx)
+{
+	struct page *page;
+	ctx->ra_bio = btrfs_io_bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
+
+	ctx->total = 0;
+	ctx->valid = 0;
+	while (ctx->total < BTRFS_R5L_RECOVER_IO_POOL_SIZE) {
+		page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+		if (!page)
+			break;
+
+		ctx->ra_pages[ctx->total++] = page;
+	}
+
+	if (ctx->total == 0) {
+		bio_put(ctx->ra_bio);
+		return -ENOMEM;
+	}
+
+#ifdef BTRFS_DEBUG_R5LOG
+	trace_printk("readahead: %d allocated pages\n", ctx->total);
+#endif
+	return 0;
+}
+
+static void btrfs_r5l_recover_free_ra(struct btrfs_r5l_recover_ctx *ctx)
+{
+	int i;
+#ifdef BTRFS_DEBUG_R5LOG
+	trace_printk("readahead: %d to free pages\n", ctx->total);
+#endif
+	for (i = 0; i < ctx->total; i++)
+		__free_page(ctx->ra_pages[i]);
+	bio_put(ctx->ra_bio);
 }
 
 static void btrfs_r5l_write_super(struct btrfs_fs_info *fs_info, u64 cp);
@@ -1655,6 +1757,7 @@  static int btrfs_r5l_recover_log(struct btrfs_r5l_log *log)
 	ctx = kzalloc(sizeof(*ctx), GFP_NOFS);
 	ASSERT(ctx);
 
+	ctx->log = log;
 	ctx->pos = log->last_checkpoint;
 	ctx->seq = log->last_cp_seq;
 	ctx->meta_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
@@ -1662,10 +1765,10 @@  static int btrfs_r5l_recover_log(struct btrfs_r5l_log *log)
 	ctx->io_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
 	ASSERT(ctx->io_page);
 
-	ret = btrfs_r5l_recover_flush_log(log, ctx);
-	if (ret) {
-		;
-	}
+	ret = btrfs_r5l_recover_allocate_ra(ctx);
+	ASSERT(ret == 0);
+
+	btrfs_r5l_recover_flush_log(ctx);
 
 	pos = ctx->pos;
 	log->next_checkpoint = ctx->pos;
@@ -1684,6 +1787,7 @@  static int btrfs_r5l_recover_log(struct btrfs_r5l_log *log)
 #endif
 	__free_page(ctx->meta_page);
 	__free_page(ctx->io_page);
+	btrfs_r5l_recover_free_ra(ctx);
 	kfree(ctx);
 	return 0;
 }