diff mbox

[11/14] Btrfs: raid56: add csum support

Message ID 20170801161439.13426-12-bo.li.liu@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Liu Bo Aug. 1, 2017, 4:14 p.m. UTC
This is adding checksum to meta/data/parity resident on the raid5/6
log.  So recovery now can verify checksum to see if anything inside
meta/data/parity has been changed.

If anything is wrong in meta block, we stops replaying data/parity at
that position, while if anything is wrong in data/parity block, we
just skip this this meta/data/parity pair and move onto the next one.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/raid56.c | 235 ++++++++++++++++++++++++++++++++++++++++++++----------
 fs/btrfs/raid56.h |   4 +
 2 files changed, 197 insertions(+), 42 deletions(-)
diff mbox

Patch

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 8f47e56..8bc7ba4 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -43,6 +43,7 @@ 
 #include "async-thread.h"
 #include "check-integrity.h"
 #include "rcu-string.h"
+#include "hash.h"
 
 /* set when additional merges to this rbio are not allowed */
 #define RBIO_RMW_LOCKED_BIT	1
@@ -197,6 +198,7 @@  struct btrfs_r5l_log {
 	u64 last_cp_seq;
 	u64 seq;
 	u64 log_start;
+	u32 uuid_csum;
 	struct btrfs_r5l_io_unit *current_io;
 };
 
@@ -1309,7 +1311,7 @@  static int btrfs_r5l_get_meta(struct btrfs_r5l_log *log, struct btrfs_raid_bio *
 	return 0;
 }
 
-static void btrfs_r5l_append_payload_meta(struct btrfs_r5l_log *log, u16 type, u64 location, u64 devid)
+static void btrfs_r5l_append_payload_meta(struct btrfs_r5l_log *log, u16 type, u64 location, u64 devid, u32 csum)
 {
 	struct btrfs_r5l_io_unit *io = log->current_io;
 	struct btrfs_r5l_payload *payload;
@@ -1326,11 +1328,11 @@  static void btrfs_r5l_append_payload_meta(struct btrfs_r5l_log *log, u16 type, u
 		payload->size = cpu_to_le32(16); /* stripe_len / PAGE_SIZE */
 	payload->devid = cpu_to_le64(devid);
 	payload->location = cpu_to_le64(location);
+	payload->csum = cpu_to_le32(csum);
 	kunmap(io->meta_page);
 
-	/* XXX: add checksum later */
 	io->meta_offset += sizeof(*payload);
-	//io->meta_offset += sizeof(__le32);
+
 #ifdef BTRFS_DEBUG_R5LOG
 	trace_printk("io->meta_offset %d\n", io->meta_offset);
 #endif
@@ -1380,6 +1382,10 @@  static void btrfs_r5l_log_stripe(struct btrfs_r5l_log *log, int data_pages, int
 	int meta_size;
 	int stripe, pagenr;
 	struct page *page;
+	char *kaddr;
+	u32 csum;
+	u64 location;
+	u64 devid;
 
 	/*
 	 * parity pages are contiguous on disk, thus only one
@@ -1394,8 +1400,6 @@  static void btrfs_r5l_log_stripe(struct btrfs_r5l_log *log, int data_pages, int
 	/* add data blocks which need to be written */
 	for (stripe = 0; stripe < rbio->nr_data; stripe++) {
 		for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
-			u64 location;
-			u64 devid;
 			if (stripe < rbio->nr_data) {
 				page = page_in_rbio(rbio, stripe, pagenr, 1);
 				if (!page)
@@ -1406,7 +1410,11 @@  static void btrfs_r5l_log_stripe(struct btrfs_r5l_log *log, int data_pages, int
 #ifdef BTRFS_DEBUG_R5LOG
 				trace_printk("data: stripe %d pagenr %d location 0x%llx devid %llu\n", stripe, pagenr, location, devid);
 #endif
-				btrfs_r5l_append_payload_meta(log, R5LOG_PAYLOAD_DATA, location, devid);
+				kaddr = kmap(page);
+				csum = btrfs_crc32c(log->uuid_csum, kaddr, PAGE_SIZE);
+				kunmap(page);
+
+				btrfs_r5l_append_payload_meta(log, R5LOG_PAYLOAD_DATA, location, devid, csum);
 				btrfs_r5l_append_payload_page(log, page);
 			}
 		}
@@ -1414,17 +1422,26 @@  static void btrfs_r5l_log_stripe(struct btrfs_r5l_log *log, int data_pages, int
 
 	/* add the whole parity blocks */
 	for (; stripe < rbio->real_stripes; stripe++) {
-		u64 location = btrfs_compute_location(rbio, stripe, 0);
-		u64 devid = btrfs_compute_devid(rbio, stripe);
+		location = btrfs_compute_location(rbio, stripe, 0);
+		devid = btrfs_compute_devid(rbio, stripe);
 
 #ifdef BTRFS_DEBUG_R5LOG
 		trace_printk("parity: stripe %d location 0x%llx devid %llu\n", stripe, location, devid);
 #endif
-		btrfs_r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, location, devid);
 		for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
 			page = rbio_stripe_page(rbio, stripe, pagenr);
+
+			kaddr = kmap(page);
+			if (pagenr == 0)
+				csum = btrfs_crc32c(log->uuid_csum, kaddr, PAGE_SIZE);
+			else
+				csum = btrfs_crc32c(csum, kaddr, PAGE_SIZE);
+			kunmap(page);
+
 			btrfs_r5l_append_payload_page(log, page);
 		}
+
+		btrfs_r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, location, devid, csum);
 	}
 }
 
@@ -1432,12 +1449,16 @@  static void btrfs_r5l_submit_current_io(struct btrfs_r5l_log *log)
 {
 	struct btrfs_r5l_io_unit *io = log->current_io;
 	struct btrfs_r5l_meta_block *mb;
+	u32 csum;
 
 	if (!io)
 		return;
 
 	mb = kmap(io->meta_page);
 	mb->meta_size = cpu_to_le32(io->meta_offset);
+	ASSERT(mb->csum == 0);
+	csum = btrfs_crc32c(log->uuid_csum, mb, PAGE_SIZE);
+	mb->csum = cpu_to_le32(csum);
 	kunmap(io->meta_page);
 
 	log->current_io = NULL;
@@ -1506,6 +1527,7 @@  static int btrfs_r5l_write_empty_meta_block(struct btrfs_r5l_log *log, u64 pos,
 {
 	struct page *page;
 	struct btrfs_r5l_meta_block *mb;
+	u32 csum;
 	int ret = 0;
 
 #ifdef BTRFS_DEBUG_R5LOG
@@ -1520,6 +1542,9 @@  static int btrfs_r5l_write_empty_meta_block(struct btrfs_r5l_log *log, u64 pos,
 	mb->meta_size = cpu_to_le32(sizeof(struct btrfs_r5l_meta_block));
 	mb->seq = cpu_to_le64(seq);
 	mb->position = cpu_to_le64(pos);
+
+	csum = btrfs_crc32c(log->uuid_csum, mb, PAGE_SIZE);
+	mb->csum = cpu_to_le32(csum);
 	kunmap(page);
 
 	if (!btrfs_r5l_sync_page_io(log, log->dev, (pos >> 9), PAGE_SIZE, page, REQ_OP_WRITE | REQ_FUA)) {
@@ -1607,6 +1632,9 @@  static int btrfs_r5l_recover_read_page(struct btrfs_r5l_recover_ctx *ctx, struct
 static int btrfs_r5l_recover_load_meta(struct btrfs_r5l_recover_ctx *ctx)
 {
 	struct btrfs_r5l_meta_block *mb;
+	u32 csum;
+	u32 expected;
+	int ret = 0;
 
 	ret = btrfs_r5l_recover_read_page(ctx, ctx->meta_page, ctx->pos);
 	if (ret)
@@ -1623,25 +1651,131 @@  static int btrfs_r5l_recover_load_meta(struct btrfs_r5l_recover_ctx *ctx)
 #ifdef BTRFS_DEBUG_R5LOG
 		trace_printk("%s: mismatch magic %llu default %llu\n", __func__, le32_to_cpu(mb->magic), BTRFS_R5LOG_MAGIC);
 #endif
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
-	ASSERT(le32_to_cpu(mb->meta_size) <= PAGE_SIZE);
-	kunmap(ctx->meta_page);
+	expected = le32_to_cpu(mb->csum);
+	/*
+	 * when we calculate mb->csum, it's zero, so we need to zero
+	 * it back.
+	 */
+	mb->csum = 0;
+	csum = btrfs_crc32c(ctx->log->uuid_csum, mb, PAGE_SIZE);
+	if (csum != expected) {
+#ifdef BTRFS_DEBUG_R5LOG
+		pr_info("%s: mismatch checksum for r5l meta block\n", __func__);
+#endif
+		ret = -EINVAL;
+		goto out;
+	}
 
+	ASSERT(le32_to_cpu(mb->meta_size) <= PAGE_SIZE);
 	/* meta_block */
 	ctx->total_size = PAGE_SIZE;
 
-	return 0;
+out:
+	kunmap(ctx->meta_page);
+
+	return ret;
+}
+
+static int btrfs_r5l_recover_verify_checksum(struct btrfs_r5l_recover_ctx *ctx)
+{
+	u64 offset;
+	u32 meta_size;
+	u64 csum_io_offset;
+	u64 read_pos;
+	char *kaddr;
+	u32 csum;
+	int type;
+	struct btrfs_r5l_meta_block *mb;
+	struct btrfs_r5l_payload *payload;
+	struct btrfs_r5l_log *log = ctx->log;
+	struct btrfs_device *dev;
+	int ret = 0;
+
+	mb = kmap(ctx->meta_page);
+	meta_size = le32_to_cpu(mb->meta_size);
+	csum_io_offset = PAGE_SIZE;
+
+	for (offset = sizeof(struct btrfs_r5l_meta_block);
+	     offset < meta_size;
+	     offset += sizeof(struct btrfs_r5l_payload)) {
+		payload = (void *)mb + offset;
+
+		/* check if there is any invalid device, if so, skip writing this mb. */
+		dev = btrfs_find_device(log->fs_info, le64_to_cpu(payload->devid), NULL, NULL);
+		if (!dev || dev->missing) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		type = le16_to_cpu(payload->type);
+		if (type == R5LOG_PAYLOAD_DATA) {
+			read_pos = btrfs_r5l_ring_add(log, ctx->pos, csum_io_offset);
+			csum_io_offset += PAGE_SIZE;
+
+			ASSERT(le32_to_cpu(payload->size) == 1);
+			ret = btrfs_r5l_recover_read_page(ctx, ctx->io_page, read_pos);
+			if (ret) {
+				ret = -EIO;
+				goto out;
+			}
+
+			kaddr = kmap(ctx->io_page);
+			csum = btrfs_crc32c(log->uuid_csum, kaddr, PAGE_SIZE);
+			kunmap(ctx->io_page);
+		} else if (type == R5LOG_PAYLOAD_PARITY) {
+			int i;
+			for (i = 0; i < le32_to_cpu(payload->size); i++) {
+				read_pos = btrfs_r5l_ring_add(log, ctx->pos, csum_io_offset);
+				csum_io_offset += PAGE_SIZE;
+
+				ret = btrfs_r5l_recover_read_page(ctx, ctx->io_page, read_pos);
+				if (ret) {
+					ret = -EIO;
+					goto out;
+				}
+
+				kaddr = kmap(ctx->io_page);
+				if (i == 0)
+					csum = btrfs_crc32c(log->uuid_csum, kaddr, PAGE_SIZE);
+				else
+					csum = btrfs_crc32c(csum, kaddr, PAGE_SIZE);
+				kunmap(ctx->io_page);
+			}
+		} else {
+			ASSERT(0);
+		}
+
+		if (csum != le32_to_cpu(payload->csum)) {
+			trace_printk("r5l data csum fails location 0x%llx devid %llu\n", le64_to_cpu(payload->location), le64_to_cpu(payload->devid));
+			ret = -EAGAIN;
+			goto out;
+		}
+	}
+out:
+	kunmap(ctx->meta_page);
+	return ret;
 }
 
-static int btrfs_r5l_recover_load_data(struct btrfs_r5l_log *log, struct btrfs_r5l_recover_ctx *ctx)
+static int btrfs_r5l_recover_load_data(struct btrfs_r5l_recover_ctx *ctx)
 {
 	u64 offset;
 	struct btrfs_r5l_meta_block *mb;
-	u64 meta_size;
+	u32 meta_size;
 	u64 io_offset;
+	u64 read_pos;
 	struct btrfs_device *dev;
+	struct btrfs_r5l_payload *payload;
+	struct btrfs_r5l_log *log = ctx->log;
+	int ret = 0;
+
+	/* if any checksum fails, skip writing this mb. */
+	ret = btrfs_r5l_recover_verify_checksum(ctx);
+	if (ret)
+		return ret;
 
 	mb = kmap(ctx->meta_page);
 
@@ -1649,67 +1783,81 @@  static int btrfs_r5l_recover_load_data(struct btrfs_r5l_log *log, struct btrfs_r
 	offset = sizeof(struct btrfs_r5l_meta_block);
 	meta_size = le32_to_cpu(mb->meta_size);
 
-	while (offset < meta_size) {
-		struct btrfs_r5l_payload *payload = (void *)mb + offset;
+	for (offset = sizeof(struct btrfs_r5l_meta_block);
+	     offset < meta_size;
+	     offset += sizeof(struct btrfs_r5l_payload)) {
+		payload = (void *)mb + offset;
 
 		/* read data from log disk and write to payload->location */
 #ifdef BTRFS_DEBUG_R5LOG
 		trace_printk("payload type %d flags %d size %d location 0x%llx devid %llu\n", le16_to_cpu(payload->type), le16_to_cpu(payload->flags), le32_to_cpu(payload->size), le64_to_cpu(payload->location), le64_to_cpu(payload->devid));
 #endif
 
+		/* liubo: how to handle the case where dev is suddenly off? */
 		dev = btrfs_find_device(log->fs_info, le64_to_cpu(payload->devid), NULL, NULL);
-		if (!dev || dev->missing) {
-			ASSERT(0);
-		}
+		ASSERT(dev && !dev->missing);
 
 		if (le16_to_cpu(payload->type) == R5LOG_PAYLOAD_DATA) {
-			ASSERT(le32_to_cpu(payload->size) == 1);
-			btrfs_r5l_sync_page_io(log, log->dev, (ctx->pos + io_offset) >> 9, PAGE_SIZE, ctx->io_page, REQ_OP_READ);
-			btrfs_r5l_sync_page_io(log, dev, le64_to_cpu(payload->location) >> 9, PAGE_SIZE, ctx->io_page, REQ_OP_WRITE);
+			read_pos = btrfs_r5l_ring_add(log, ctx->pos, io_offset);
 			io_offset += PAGE_SIZE;
+
+			ret = btrfs_r5l_recover_read_page(ctx, ctx->io_page, read_pos);
+			if (ret)
+				goto out;
+
+			if (!btrfs_r5l_sync_page_io(log, dev, le64_to_cpu(payload->location) >> 9, PAGE_SIZE, ctx->io_page, REQ_OP_WRITE)) {
+				ret = -EIO;
+				goto out;
+			}
 		} else if (le16_to_cpu(payload->type) == R5LOG_PAYLOAD_PARITY) {
 			int i;
-			ASSERT(le32_to_cpu(payload->size) == 16);
+
+			ASSERT(offset + sizeof(struct btrfs_r5l_payload) == meta_size);
+
 			for (i = 0; i < le32_to_cpu(payload->size); i++) {
-				/* liubo: parity are guaranteed to be
-				 * contiguous, use just one bio to
-				 * hold all pages and flush them. */
 				u64 parity_off = le64_to_cpu(payload->location) + i * PAGE_SIZE;
-				btrfs_r5l_sync_page_io(log, log->dev, (ctx->pos + io_offset) >> 9, PAGE_SIZE, ctx->io_page, REQ_OP_READ);
-				btrfs_r5l_sync_page_io(log, dev, parity_off >> 9, PAGE_SIZE, ctx->io_page, REQ_OP_WRITE);
+				read_pos = btrfs_r5l_ring_add(log, ctx->pos, io_offset);
 				io_offset += PAGE_SIZE;
+
+				ret = btrfs_r5l_recover_read_page(ctx, ctx->io_page, read_pos);
+				if (ret)
+					goto out;
+
+				if (!btrfs_r5l_sync_page_io(log, dev, parity_off >> 9, PAGE_SIZE, ctx->io_page, REQ_OP_WRITE)) {
+					ret = -EIO;
+					goto out;
+				}
 			}
 		} else {
 			ASSERT(0);
 		}
-
-		offset += sizeof(struct btrfs_r5l_payload);
 	}
-	kunmap(ctx->meta_page);
 
 	ctx->total_size += (io_offset - PAGE_SIZE);
-	return 0;
+out:
+	kunmap(ctx->meta_page);
+	return ret;
 }
 
-static int btrfs_r5l_recover_flush_log(struct btrfs_r5l_log *log, struct btrfs_r5l_recover_ctx *ctx)
+static int btrfs_r5l_recover_flush_log(struct btrfs_r5l_recover_ctx *ctx)
 {
 	int ret;
 
 	while (1) {
-		ret = btrfs_r5l_recover_load_meta(log, ctx);
+		ret = btrfs_r5l_recover_load_meta(ctx);
 		if (ret)
 			break;
 
-		ret = btrfs_r5l_recover_load_data(log, ctx);
-		ASSERT(!ret || ret > 0);
-		if (ret)
+		ret = btrfs_r5l_recover_load_data(ctx);
+		if (ret && ret != -EAGAIN)
 			break;
 
 		ctx->seq++;
-		ctx->pos = btrfs_r5l_ring_add(log, ctx->pos, ctx->total_size);
+		ctx->pos = btrfs_r5l_ring_add(ctx->log, ctx->pos, ctx->total_size);
 	}
 
-	return ret;
+	return 0;
+}
 
 static int btrfs_r5l_recover_allocate_ra(struct btrfs_r5l_recover_ctx *ctx)
 {
@@ -1801,6 +1949,7 @@  int btrfs_r5l_load_log(struct btrfs_fs_info *fs_info, u64 cp)
 	struct page *page;
 	struct btrfs_r5l_meta_block *mb;
 	bool create_new = false;
+	int ret;
 
 	ASSERT(log);
 
@@ -1856,10 +2005,10 @@  int btrfs_r5l_load_log(struct btrfs_fs_info *fs_info, u64 cp)
 		log->seq = log->last_cp_seq + 1;
 		log->next_checkpoint = cp;
 	} else {
-		btrfs_r5l_recover_log(log);
+		ret = btrfs_r5l_recover_log(log);
 	}
 
-	return 0;
+	return ret;
 }
 
 /*
@@ -3576,6 +3725,8 @@  int btrfs_set_r5log(struct btrfs_fs_info *fs_info, struct btrfs_device *device)
 	log->device_size = round_down(log->device_size, PAGE_SIZE);
 	log->dev = device;
 	log->fs_info = fs_info;
+	ASSERT(sizeof(device->uuid) == BTRFS_UUID_SIZE);
+	log->uuid_csum = btrfs_crc32c(~0, device->uuid, sizeof(device->uuid));
 	mutex_init(&log->io_mutex);
 
 	cmpxchg(&fs_info->r5log, NULL, log);
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 314d299..569cec8 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -87,6 +87,8 @@  struct btrfs_r5l_payload {
 	/* data or parity */
 	__le64 location;
 	__le64 devid;
+
+	__le32 csum;
 };
 
 /* io unit starts from a meta block. */
@@ -96,6 +98,8 @@  struct btrfs_r5l_meta_block {
 	/* the whole size of the block */
 	__le32 meta_size;
 
+	__le32 csum;
+
 	__le64 seq;
 	__le64 position;