From patchwork Fri Mar 18 16:21:50 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ilya Dryomov X-Patchwork-Id: 644361 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p2IGM16f015015 for ; Fri, 18 Mar 2011 16:22:02 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756818Ab1CRQVy (ORCPT ); Fri, 18 Mar 2011 12:21:54 -0400 Received: from mail-bw0-f46.google.com ([209.85.214.46]:36429 "EHLO mail-bw0-f46.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756734Ab1CRQVy (ORCPT ); Fri, 18 Mar 2011 12:21:54 -0400 Received: by bwz15 with SMTP id 15so3482326bwz.19 for ; Fri, 18 Mar 2011 09:21:52 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:date:from:to:cc:subject:message-id:mime-version :content-type:content-disposition:user-agent; bh=NevluqwHw4g91tGhQ1eM+alAN7Vbft43FN5VFFIk7Yc=; b=VjKu5uCNyFaIrUE/CmzhmMaRdpWv+KJ/GEBdGvQqP6KeK2BtQRsrOSKufIgcOa1Q7i 1nI+07/HvO6SbS9MRzy4UxPZAJFmxNBT4cMB2zyMPbrIXNyO8UXJsDQJib0hXgZM25nQ xh+61sKInBlRql0QNLuxHRGyjwLkj5KBd3wnQ= DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=date:from:to:cc:subject:message-id:mime-version:content-type :content-disposition:user-agent; b=iGuPWwzCIKix6xL1Eg9PT2y3ddJgU0p86+BxLIBzUlVaLR37X3nmdSCKSdSFpnaC4X zki5TMgfz5hQHvu3MNXlReHT4y1FHguqEZOrqQMLKw9SJYph3uSWN1BY3DAVT0uMh/yz 6hf3Vj2/dinLcMD/pdoMIvwuLTsEWsGUiH4y0= Received: by 10.204.230.194 with SMTP id jn2mr1154968bkb.133.1300465312327; Fri, 18 Mar 2011 09:21:52 -0700 (PDT) Received: from localhost ([46.35.239.144]) by mx.google.com with ESMTPS id t1sm2259364bkx.19.2011.03.18.09.21.50 (version=TLSv1/SSLv3 cipher=OTHER); Fri, 18 Mar 2011 09:21:51 -0700 (PDT) Date: Fri, 18 Mar 2011 18:21:50 +0200 From: Ilya Dryomov To: Arne Jansen Cc: linux-btrfs@vger.kernel.org Subject: [PATCH] btrfs scrub: make fixups sync, don't reuse fixup bios Message-ID: <20110318162150.GA31882@kwango.lan.net> MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.18 (2008-05-17) Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Fri, 18 Mar 2011 16:22:02 +0000 (UTC) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 85a4d4b..f3fe5a5 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -69,9 +69,6 @@ static int scrub_checksum_tree_block(struct scrub_dev *sdev, struct scrub_page *spag, u64 logical, void *buffer); static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer); -static void scrub_recheck_end_io(struct bio *bio, int err); -static void scrub_fixup_worker(scrub_work_t *work); -static void scrub_fixup(struct scrub_fixup *fixup); #define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ #define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */ @@ -117,13 +114,10 @@ struct scrub_dev { struct scrub_fixup { struct scrub_dev *sdev; - struct bio *bio; u64 logical; u64 physical; - struct scrub_page spag; - scrub_work_t work; - int err; - int recheck; + struct page *page; + struct scrub_page *spag; }; static void scrub_free_csums(struct scrub_dev *sdev) @@ -230,115 +224,19 @@ nomem: return ERR_PTR(-ENOMEM); } -/* - * scrub_recheck_error gets called when either verification of the page - * failed or the bio failed to read, e.g. with EIO. In the latter case, - * recheck_error gets called for every page in the bio, even though only - * one may be bad - */ -static void scrub_recheck_error(struct scrub_bio *sbio, int ix) -{ - struct scrub_dev *sdev = sbio->sdev; - struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; - struct bio *bio = NULL; - struct page *page = NULL; - struct scrub_fixup *fixup = NULL; - int ret; - - /* - * while we're in here we do not want the transaction to commit. - * To prevent it, we increment scrubs_running. scrub_pause will - * have to wait until we're finished - * we can safely increment scrubs_running here, because we're - * in the context of the original bio which is still marked in_flight - */ - atomic_inc(&fs_info->scrubs_running); - - fixup = kzalloc(sizeof(*fixup), GFP_NOFS); - if (!fixup) - goto malloc_error; - - fixup->logical = sbio->logical + ix * PAGE_SIZE; - fixup->physical = sbio->physical + ix * PAGE_SIZE; - fixup->spag = sbio->spag[ix]; - fixup->sdev = sdev; - - bio = bio_alloc(GFP_NOFS, 1); - if (!bio) - goto malloc_error; - bio->bi_private = fixup; - bio->bi_size = 0; - bio->bi_bdev = sdev->dev->bdev; - fixup->bio = bio; - fixup->recheck = 0; - - page = alloc_page(GFP_NOFS); - if (!page) - goto malloc_error; - - ret = bio_add_page(bio, page, PAGE_SIZE, 0); - if (!ret) - goto malloc_error; - - if (!sbio->err) { - /* - * shorter path: just a checksum error, go ahead and correct it - */ - scrub_fixup_worker(&fixup->work); - return; - } - - /* - * an I/O-error occured for one of the blocks in the bio, not - * necessarily for this one, so first try to read it separately - */ - SCRUB_INIT_WORK(&fixup->work, scrub_fixup_worker); - fixup->recheck = 1; - bio->bi_end_io = scrub_recheck_end_io; - bio->bi_sector = fixup->physical >> 9; - bio->bi_bdev = sdev->dev->bdev; - submit_bio(0, bio); - - return; - -malloc_error: - if (bio) - bio_put(bio); - if (page) - __free_page(page); - if (fixup) - kfree(fixup); - spin_lock(&sdev->stat_lock); - ++sdev->stat.malloc_errors; - spin_unlock(&sdev->stat_lock); - atomic_dec(&fs_info->scrubs_running); - wake_up(&fs_info->scrub_pause_wait); -} - -static void scrub_recheck_end_io(struct bio *bio, int err) -{ - struct scrub_fixup *fixup = bio->bi_private; - struct btrfs_fs_info *fs_info = fixup->sdev->dev->dev_root->fs_info; - - fixup->err = err; - SCRUB_QUEUE_WORK(fs_info->scrub_workers, &fixup->work); -} - static int scrub_fixup_check(struct scrub_fixup *fixup) { int ret = 1; - struct page *page; void *buffer; - u64 flags = fixup->spag.flags; + u64 flags = fixup->spag->flags; - page = fixup->bio->bi_io_vec[0].bv_page; - buffer = kmap_atomic(page, KM_USER0); + buffer = kmap_atomic(fixup->page, KM_USER0); if (flags & BTRFS_EXTENT_FLAG_DATA) { ret = scrub_checksum_data(fixup->sdev, - &fixup->spag, buffer); + fixup->spag, buffer); } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { ret = scrub_checksum_tree_block(fixup->sdev, - &fixup->spag, + fixup->spag, fixup->logical, buffer); } else { @@ -349,35 +247,34 @@ static int scrub_fixup_check(struct scrub_fixup *fixup) return ret; } -static void scrub_fixup_worker(scrub_work_t *work) +static void scrub_fixup_end_io(struct bio *bio, int err) { - struct scrub_fixup *fixup; - struct btrfs_fs_info *fs_info; - u64 flags; - int ret = 1; - - fixup = container_of(work, struct scrub_fixup, work); - fs_info = fixup->sdev->dev->dev_root->fs_info; - flags = fixup->spag.flags; - - if (fixup->recheck && fixup->err == 0) - ret = scrub_fixup_check(fixup); + complete((struct completion *)bio->bi_private); +} - if (ret || fixup->err) - scrub_fixup(fixup); +static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, + struct page *page) +{ + struct bio *bio = NULL; + int ret; + DECLARE_COMPLETION_ONSTACK(complete); - __free_page(fixup->bio->bi_io_vec[0].bv_page); - bio_put(fixup->bio); + /* we are going to wait on this IO */ + rw |= REQ_SYNC | REQ_UNPLUG; - atomic_dec(&fs_info->scrubs_running); - wake_up(&fs_info->scrub_pause_wait); + bio = bio_alloc(GFP_NOFS, 1); + bio->bi_bdev = bdev; + bio->bi_sector = sector; + bio_add_page(bio, page, PAGE_SIZE, 0); + bio->bi_end_io = scrub_fixup_end_io; + bio->bi_private = &complete; + submit_bio(rw, bio); - kfree(fixup); -} + wait_for_completion(&complete); -static void scrub_fixup_end_io(struct bio *bio, int err) -{ - complete((struct completion *)bio->bi_private); + ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); + bio_put(bio); + return ret; } static void scrub_fixup(struct scrub_fixup *fixup) @@ -386,14 +283,13 @@ static void scrub_fixup(struct scrub_fixup *fixup) struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; struct btrfs_multi_bio *multi = NULL; - struct bio *bio = fixup->bio; u64 length; int i; int ret; DECLARE_COMPLETION_ONSTACK(complete); - if ((fixup->spag.flags & BTRFS_EXTENT_FLAG_DATA) && - (fixup->spag.have_csum == 0)) { + if ((fixup->spag->flags & BTRFS_EXTENT_FLAG_DATA) && + (fixup->spag->have_csum == 0)) { /* * nodatasum, don't try to fix anything * FIXME: we can do better, open the inode and trigger a @@ -413,61 +309,38 @@ static void scrub_fixup(struct scrub_fixup *fixup) return; } - if (multi->num_stripes == 1) { + if (multi->num_stripes == 1) /* there aren't any replicas */ goto uncorrectable; - } /* * first find a good copy */ for (i = 0; i < multi->num_stripes; ++i) { - if (i == fixup->spag.mirror_num) + if (i == fixup->spag->mirror_num) continue; - bio->bi_sector = multi->stripes[i].physical >> 9; - bio->bi_bdev = multi->stripes[i].dev->bdev; - bio->bi_size = PAGE_SIZE; - bio->bi_next = NULL; - bio->bi_flags |= 1 << BIO_UPTODATE; - bio->bi_comp_cpu = -1; - bio->bi_end_io = scrub_fixup_end_io; - bio->bi_private = &complete; - - submit_bio(0, bio); - - wait_for_completion(&complete); - - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev, + multi->stripes[i].physical >> 9, + fixup->page)) { /* I/O-error, this is not a good copy */ continue; + } - ret = scrub_fixup_check(fixup); - if (ret == 0) + if (scrub_fixup_check(fixup) == 0) break; } if (i == multi->num_stripes) goto uncorrectable; /* - * the bio now contains good data, write it back + * fixup->page now contains good data, write it back */ - bio->bi_sector = fixup->physical >> 9; - bio->bi_bdev = sdev->dev->bdev; - bio->bi_size = PAGE_SIZE; - bio->bi_next = NULL; - bio->bi_flags |= 1 << BIO_UPTODATE; - bio->bi_comp_cpu = -1; - bio->bi_end_io = scrub_fixup_end_io; - bio->bi_private = &complete; - - submit_bio(REQ_WRITE, bio); - - wait_for_completion(&complete); - - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (scrub_fixup_io(WRITE, sdev->dev->bdev, + fixup->physical >> 9, fixup->page)) { /* I/O-error, writeback failed, give up */ goto uncorrectable; + } kfree(multi); spin_lock(&sdev->stat_lock); @@ -490,6 +363,60 @@ uncorrectable: (unsigned long long)fixup->logical); } +/* + * scrub_recheck_error gets called when either verification of the page + * failed or the bio failed to read, e.g. with EIO. In the latter case, + * recheck_error gets called for every page in the bio, even though only + * one may be bad + */ +static void scrub_recheck_error(struct scrub_bio *sbio, int ix) +{ + struct scrub_dev *sdev = sbio->sdev; + struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; + struct scrub_fixup *fixup = NULL; + + /* + * while we're in here we do not want the transaction to commit. + * To prevent it, we increment scrubs_running. scrub_pause will + * have to wait until we're finished + * we can safely increment scrubs_running here, because we're + * in the context of the original bio which is still marked in_flight + */ + atomic_inc(&fs_info->scrubs_running); + + fixup = kzalloc(sizeof(*fixup), GFP_NOFS); + if (!fixup) { + spin_lock(&sdev->stat_lock); + ++sdev->stat.malloc_errors; + /* XXX - ++sdev->stat.uncorrectable_errors ? */ + spin_unlock(&sdev->stat_lock); + goto out; + } + + fixup->logical = sbio->logical + ix * PAGE_SIZE; + fixup->physical = sbio->physical + ix * PAGE_SIZE; + fixup->page = sbio->bio->bi_io_vec[ix].bv_page; + fixup->spag = sbio->spag + ix; + fixup->sdev = sdev; + + if (sbio->err) { + if (scrub_fixup_io(READ, sdev->dev->bdev, + fixup->physical >> 9, + fixup->page) == 0) { + if (scrub_fixup_check(fixup) == 0) + goto done; + } + } + + scrub_fixup(fixup); + +done: + kfree(fixup); +out: + atomic_dec(&fs_info->scrubs_running); + wake_up(&fs_info->scrub_pause_wait); +} + static void scrub_bio_end_io(struct bio *bio, int err) { struct scrub_bio *sbio = bio->bi_private; @@ -1296,6 +1223,23 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) mutex_unlock(&fs_info->scrub_lock); } +static void print_scrub_full(struct btrfs_scrub_progress *sp) +{ + printk(KERN_INFO "\tdata_extents_scrubbed: %lld\n", sp->data_extents_scrubbed); + printk(KERN_INFO "\ttree_extents_scrubbed: %lld\n", sp->tree_extents_scrubbed); + printk(KERN_INFO "\tdata_bytes_scrubbed: %lld\n", sp->data_bytes_scrubbed); + printk(KERN_INFO "\ttree_bytes_scrubbed: %lld\n", sp->tree_bytes_scrubbed); + printk(KERN_INFO "\tread_errors: %lld\n", sp->read_errors); + printk(KERN_INFO "\tcsum_errors: %lld\n", sp->csum_errors); + printk(KERN_INFO "\tverify_errors: %lld\n", sp->verify_errors); + printk(KERN_INFO "\tno_csum: %lld\n", sp->no_csum); + printk(KERN_INFO "\tcsum_discards: %lld\n", sp->csum_discards); + printk(KERN_INFO "\tsuper_errors: %lld\n", sp->super_errors); + printk(KERN_INFO "\tmalloc_errors: %lld\n", sp->malloc_errors); + printk(KERN_INFO "\tuncorrectable_errors: %lld\n", sp->uncorrectable_errors); + printk(KERN_INFO "\tcorrected_errors: %lld\n", sp->corrected_errors); + printk(KERN_INFO "\tlast_physical: %lld\n", sp->last_physical); +} int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, struct btrfs_scrub_progress *progress) @@ -1308,6 +1252,9 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, if (root->fs_info->closing) return -EINVAL; + printk(KERN_INFO "btrfs_scrub_dev start=%llu, end=%llu\n", + start, end); + /* * check some assumptions */ @@ -1360,8 +1307,10 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, atomic_dec(&fs_info->scrubs_running); wake_up(&fs_info->scrub_pause_wait); - if (progress) + if (progress) { memcpy(progress, &sdev->stat, sizeof(*progress)); + print_scrub_full(progress); + } mutex_lock(&fs_info->scrub_lock); dev->scrub_device = NULL;