From patchwork Tue Apr 8 11:18:52 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wang Shilong X-Patchwork-Id: 3948251 Return-Path: X-Original-To: patchwork-linux-btrfs@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.19.201]) by patchwork1.web.kernel.org (Postfix) with ESMTP id 9F71D9F369 for ; Tue, 8 Apr 2014 11:22:17 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 9E851203AE for ; Tue, 8 Apr 2014 11:22:16 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 6C9E9203AC for ; Tue, 8 Apr 2014 11:22:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756141AbaDHLWK (ORCPT ); Tue, 8 Apr 2014 07:22:10 -0400 Received: from cn.fujitsu.com ([59.151.112.132]:61534 "EHLO heian.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-FAIL) by vger.kernel.org with ESMTP id S1755384AbaDHLWJ (ORCPT ); Tue, 8 Apr 2014 07:22:09 -0400 X-IronPort-AV: E=Sophos;i="4.97,817,1389715200"; d="scan'208";a="28997568" Received: from unknown (HELO edo.cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 08 Apr 2014 19:19:15 +0800 Received: from G08CNEXCHPEKD03.g08.fujitsu.local (localhost.localdomain [127.0.0.1]) by edo.cn.fujitsu.com (8.14.3/8.13.1) with ESMTP id s38BLJid031095; Tue, 8 Apr 2014 19:21:39 +0800 Received: from wangs.fnst.cn.fujitsu.com (10.167.226.104) by G08CNEXCHPEKD03.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server (TLS) id 14.3.146.2; Tue, 8 Apr 2014 19:21:25 +0800 From: Wang Shilong To: , CC: , Subject: [PATCH v4 2/2] Btrfs: scrub raid56 stripes in the right way Date: Tue, 8 Apr 2014 19:18:52 +0800 Message-ID: <1396955932-4547-1-git-send-email-wangsl.fnst@cn.fujitsu.com> X-Mailer: git-send-email 1.9.0 MIME-Version: 1.0 X-Originating-IP: [10.167.226.104] Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org X-Spam-Status: No, score=-7.2 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Steps to reproduce: # mkfs.btrfs -f /dev/sda[8-11] -m raid5 -d raid5 # mount /dev/sda8 /mnt # btrfs scrub start -BR /mnt # echo $? <--unverified errors make return value be 3 This is because we don't setup right mapping between physical and logical address for raid56, which makes checksum mismatch. But we will find everthing is fine later when rechecking using btrfs_map_block(). This patch fixed the problem by settuping right mappings and we only verify data stripes' checksums. Signed-off-by: Wang Shilong --- v3->v4: fix compile warnings on avr32 platform.(reported by 0-day LKP) v2->v3: fix wrong offset set plus some cleanups. v1->v2: on the right way to fix the problem. --- fs/btrfs/scrub.c | 108 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 89 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index db21a13..68a5a26 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2235,6 +2235,47 @@ behind_scrub_pages: return 0; } +/* + * Given a physical address, this will calculate it's + * logical offset. if this is a parity stripe, it will return + * the most left data stripe's logical offset. + * + * return 0 if it is a data stripe, 1 means parity stripe. + */ +static int get_raid56_logic_offset(u64 physical, int num, + struct map_lookup *map, u64 *offset) +{ + int i; + int j = 0; + u64 stripe_nr; + u64 last_offset; + int stripe_index; + int rot; + + last_offset = (physical - map->stripes[num].physical) * + nr_data_stripes(map); + *offset = last_offset; + for (i = 0; i < nr_data_stripes(map); i++) { + *offset = last_offset + i * map->stripe_len; + + stripe_nr = *offset; + do_div(stripe_nr, map->stripe_len); + do_div(stripe_nr, nr_data_stripes(map)); + + /* Work out the disk rotation on this stripe-set */ + rot = do_div(stripe_nr, map->num_stripes); + /* calculate which stripe this data locates */ + rot += i; + stripe_index = rot % map->num_stripes; + if (stripe_index == num) + return 0; + if (stripe_index < num) + j++; + } + *offset = last_offset + j * map->stripe_len; + return 1; +} + static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, struct map_lookup *map, struct btrfs_device *scrub_dev, @@ -2256,6 +2297,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, u64 physical; u64 logical; u64 logic_end; + u64 physical_end; u64 generation; int mirror_num; struct reada_control *reada1; @@ -2269,16 +2311,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, u64 extent_len; struct btrfs_device *extent_dev; int extent_mirror_num; - int stop_loop; - - if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID6)) { - if (num >= nr_data_stripes(map)) { - return 0; - } - } + int stop_loop = 0; nstripes = length; + physical = map->stripes[num].physical; offset = 0; do_div(nstripes, map->stripe_len); if (map->type & BTRFS_BLOCK_GROUP_RAID0) { @@ -2296,6 +2332,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { increment = map->stripe_len; mirror_num = num % map->num_stripes + 1; + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + get_raid56_logic_offset(physical, num, map, &offset); + increment = map->stripe_len * nr_data_stripes(map); + mirror_num = 1; } else { increment = map->stripe_len; mirror_num = 1; @@ -2319,7 +2360,15 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, * to not hold off transaction commits */ logical = base + offset; - + physical_end = physical + nstripes * map->stripe_len; + if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + get_raid56_logic_offset(physical_end, num, + map, &logic_end); + logic_end += base; + } else { + logic_end = logical + increment * nstripes; + } wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0); scrub_blocked_if_needed(fs_info); @@ -2328,7 +2377,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, key_start.objectid = logical; key_start.type = BTRFS_EXTENT_ITEM_KEY; key_start.offset = (u64)0; - key_end.objectid = base + offset + nstripes * increment; + key_end.objectid = logic_end; key_end.type = BTRFS_METADATA_ITEM_KEY; key_end.offset = (u64)-1; reada1 = btrfs_reada_add(root, &key_start, &key_end); @@ -2338,7 +2387,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, key_start.offset = logical; key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key_end.type = BTRFS_EXTENT_CSUM_KEY; - key_end.offset = base + offset + nstripes * increment; + key_end.offset = logic_end; reada2 = btrfs_reada_add(csum_root, &key_start, &key_end); if (!IS_ERR(reada1)) @@ -2356,11 +2405,17 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, /* * now find all extents for each stripe and scrub them */ - logical = base + offset; - physical = map->stripes[num].physical; - logic_end = logical + increment * nstripes; ret = 0; - while (logical < logic_end) { + while (physical < physical_end) { + /* for raid56, we skip parity stripe */ + if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + ret = get_raid56_logic_offset(physical, num, + map, &logical); + logical += base; + if (ret) + goto skip; + } /* * canceled? */ @@ -2504,15 +2559,29 @@ again: scrub_free_csums(sctx); if (extent_logical + extent_len < key.objectid + bytes) { - logical += increment; - physical += map->stripe_len; - + if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + /* + * loop until we find next data stripe + * or we have finished all stripes. + */ + do { + physical += map->stripe_len; + ret = get_raid56_logic_offset( + physical, num, + map, &logical); + logical += base; + } while (physical < physical_end && ret); + } else { + physical += map->stripe_len; + logical += increment; + } if (logical < key.objectid + bytes) { cond_resched(); goto again; } - if (logical >= logic_end) { + if (physical >= physical_end) { stop_loop = 1; break; } @@ -2521,6 +2590,7 @@ next: path->slots[0]++; } btrfs_release_path(path); +skip: logical += increment; physical += map->stripe_len; spin_lock(&sctx->stat_lock);