From patchwork Wed May 21 09:41:15 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chandan Rajendra X-Patchwork-Id: 4215511 Return-Path: X-Original-To: patchwork-linux-btrfs@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.19.201]) by patchwork1.web.kernel.org (Postfix) with ESMTP id 1DCCC9F23C for ; Wed, 21 May 2014 09:41:42 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id CD7E720148 for ; Wed, 21 May 2014 09:41:40 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 7A385203EC for ; Wed, 21 May 2014 09:41:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751930AbaEUJlh (ORCPT ); Wed, 21 May 2014 05:41:37 -0400 Received: from e28smtp02.in.ibm.com ([122.248.162.2]:35743 "EHLO e28smtp02.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751425AbaEUJld (ORCPT ); Wed, 21 May 2014 05:41:33 -0400 Received: from /spool/local by e28smtp02.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Wed, 21 May 2014 15:11:31 +0530 Received: from d28dlp01.in.ibm.com (9.184.220.126) by e28smtp02.in.ibm.com (192.168.1.132) with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted; Wed, 21 May 2014 15:11:29 +0530 Received: from d28relay03.in.ibm.com (d28relay03.in.ibm.com [9.184.220.60]) by d28dlp01.in.ibm.com (Postfix) with ESMTP id 05F4DE0044 for ; Wed, 21 May 2014 15:12:09 +0530 (IST) Received: from d28av05.in.ibm.com (d28av05.in.ibm.com [9.184.220.67]) by d28relay03.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s4L9frvN8454524 for ; Wed, 21 May 2014 15:11:53 +0530 Received: from d28av05.in.ibm.com (localhost [127.0.0.1]) by d28av05.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s4L9fSNF018410 for ; Wed, 21 May 2014 15:11:28 +0530 Received: from localhost.in.ibm.com ([9.124.35.251]) by d28av05.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVin) with ESMTP id s4L9fLuu017990; Wed, 21 May 2014 15:11:28 +0530 From: Chandan Rajendra To: linux-btrfs@vger.kernel.org, clm@fb.com, jbacik@fb.com Cc: Chandan Rajendra , aneesh.kumar@linux.vnet.ibm.com Subject: [RFC PATCH 5/8] Btrfs: subpagesize-blocksize: Read tree blocks whose size is X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1400665278-4091-1-git-send-email-chandan@linux.vnet.ibm.com> References: <1400665278-4091-1-git-send-email-chandan@linux.vnet.ibm.com> X-TM-AS-MML: disable X-Content-Scanned: Fidelis XPS MAILER x-cbid: 14052109-5816-0000-0000-00000E230D82 Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org X-Spam-Status: No, score=-7.5 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP In the case of subpagesize-blocksize, this patch makes it possible to read only a single metadata block from the disk instead of all the metadata blocks that map into a page. Signed-off-by: Chandan Rajendra --- fs/btrfs/disk-io.c | 45 ++++++++--------- fs/btrfs/disk-io.h | 3 ++ fs/btrfs/extent_io.c | 135 +++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 137 insertions(+), 46 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bda2157..b2c4e9d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -413,7 +413,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, int mirror_num = 0; int failed_mirror = 0; - clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); + clear_bit(EXTENT_BUFFER_CORRUPT, &eb->ebflags); io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; while (1) { ret = read_extent_buffer_pages(io_tree, eb, start, @@ -432,7 +432,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, * there is no reason to read the other copies, they won't be * any less wrong. */ - if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) + if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->ebflags)) break; num_copies = btrfs_num_copies(root->fs_info, @@ -564,12 +564,13 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } -static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, - u64 phy_offset, struct page *page, - u64 start, u64 end, int mirror) +int verify_extent_buffer_read(struct btrfs_io_bio *io_bio, + struct page *page, + u64 start, u64 end, int mirror) { u64 found_start; int found_level; + struct extent_buffer_head *ebh; struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; int ret = 0; @@ -579,18 +580,26 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, goto out; eb = (struct extent_buffer *)page->private; + do { + if ((eb->start <= start) && (eb->start + eb->len - 1 > start)) + break; + } while ((eb = eb->eb_next) != NULL); + + BUG_ON(!eb); + + ebh = eb_head(eb); /* the pending IO might have been the only thing that kept this buffer * in memory. Make sure we have a ref for all this other checks */ extent_buffer_get(eb); - reads_done = atomic_dec_and_test(&eb->io_pages); + reads_done = atomic_dec_and_test(&ebh->io_bvecs); if (!reads_done) goto err; eb->read_mirror = mirror; - if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { + if (test_bit(EXTENT_BUFFER_IOERR, &eb->ebflags)) { ret = -EIO; goto err; } @@ -632,7 +641,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, * return -EIO. */ if (found_level == 0 && check_leaf(root, eb)) { - set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); + set_bit(EXTENT_BUFFER_CORRUPT, &eb->ebflags); ret = -EIO; } @@ -640,7 +649,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, set_extent_buffer_uptodate(eb); err: if (reads_done && - test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) + test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->ebflags)) btree_readahead_hook(root, eb, eb->start, ret); if (ret) { @@ -649,7 +658,7 @@ err: * again, we have to make sure it has something * to decrement */ - atomic_inc(&eb->io_pages); + atomic_inc(&eb_head(eb)->io_bvecs); clear_extent_buffer_uptodate(eb); } free_extent_buffer(eb); @@ -657,20 +666,6 @@ out: return ret; } -static int btree_io_failed_hook(struct page *page, int failed_mirror) -{ - struct extent_buffer *eb; - struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; - - eb = (struct extent_buffer *)page->private; - set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); - eb->read_mirror = failed_mirror; - atomic_dec(&eb->io_pages); - if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(root, eb, eb->start, -EIO); - return -EIO; /* we fixed nothing */ -} - static void end_workqueue_bio(struct bio *bio, int err) { struct end_io_wq *end_io_wq = bio->bi_private; @@ -4109,8 +4104,6 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) } static struct extent_io_ops btree_extent_io_ops = { - .readpage_end_io_hook = btree_readpage_end_io_hook, - .readpage_io_failed_hook = btree_io_failed_hook, .submit_bio_hook = btree_submit_bio_hook, /* note we're sharing with inode.c for the merge bio hook */ .merge_bio_hook = btrfs_merge_bio_hook, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 53059df..678a09b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -110,6 +110,9 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root) kfree(root); } +int verify_extent_buffer_read(struct btrfs_io_bio *io_bio, + struct page *page, + u64 start, u64 end, int mirror); void btrfs_mark_buffer_dirty(struct extent_buffer *buf); int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, int atomic); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5bc7b9b..5d23935 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -14,6 +14,7 @@ #include "extent_io.h" #include "extent_map.h" #include "ctree.h" +#include "disk-io.h" #include "btrfs_inode.h" #include "volumes.h" #include "check-integrity.h" @@ -2120,7 +2121,7 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, for (i = 0; i < num_pages; i++) { struct page *p = extent_buffer_page(eb, i); - ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE, + ret = repair_io_failure(root->fs_info, start, eb->len, start, p, mirror_num); if (ret) break; @@ -3551,17 +3552,88 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { struct page *p = extent_buffer_page(eb, i); +static void end_bio_extent_buffer_readpage(struct bio *bio, int err) +{ + struct address_space *mapping = bio->bi_io_vec->bv_page->mapping; + struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree; + struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); + struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; + struct bio_vec *bvec = bio->bi_io_vec; + struct extent_buffer *eb; + struct page *page = bvec->bv_page; + struct btrfs_root *root; + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + u64 start; + u64 end; + int mirror; + int ret; - if (!trylock_page(p)) { - if (!flush) { - flush_write_bio(epd); - flush = 1; - } - lock_page(p); + root = BTRFS_I(page->mapping->host)->root; + + if (err) + uptodate = 0; + + do { + page = bvec->bv_page; + + if (!page->private) { + SetPageUptodate(page); + goto unlock; } - } - return ret; + eb = (struct extent_buffer *)page->private; + + start = io_bio->start_offset; + end = start + io_bio->len - 1; + + do { + /* + read_extent_buffer_pages() does not start + I/O on PG_uptodate pages. Hence the bio may + map only part of the extent buffer. + */ + if ((eb->start <= start) && (eb->start + eb->len - 1 > start)) + break; + } while ((eb = eb->eb_next) != NULL); + + BUG_ON(!eb); + + mirror = io_bio->mirror_num; + + if (uptodate) { + ret = verify_extent_buffer_read(io_bio, page, start, + end, mirror); + if (ret) + uptodate = 0; + } + + if (!uptodate) { + set_bit(EXTENT_BUFFER_IOERR, &eb->ebflags); + eb->read_mirror = mirror; + atomic_dec(&eb_head(eb)->io_bvecs); + if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, + &eb->ebflags)) + btree_readahead_hook(root, eb, eb->start, + -EIO); + ClearPageUptodate(page); + SetPageError(page); + goto unlock; + } + +unlock: + unlock_page(page); + ++bvec; + } while (bvec <= bvec_end); + + /* + We don't need to add a check to see if + extent_io_tree->track_uptodate is set or not, Since + this function only deals with extent buffers. + */ + unlock_extent(tree, io_bio->start_offset, + io_bio->start_offset + io_bio->len - 1); + + bio_put(bio); } static void end_extent_buffer_writeback(struct extent_buffer *eb) @@ -5064,6 +5136,9 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, struct extent_buffer *eb, u64 start, int wait, get_extent_t *get_extent, int mirror_num) { + struct inode *inode = tree->mapping->host; + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct extent_state *cached_state = NULL; unsigned long i; unsigned long start_i; struct page *page; @@ -5076,7 +5151,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, struct bio *bio = NULL; unsigned long bio_flags = 0; - if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) + if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->ebflags)) return 0; if (start) { @@ -5104,21 +5179,34 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, } if (all_uptodate) { if (start_i == 0) - set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + set_bit(EXTENT_BUFFER_UPTODATE, &eb->ebflags); goto unlock_exit; } - clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); + clear_bit(EXTENT_BUFFER_IOERR, &eb->ebflags); eb->read_mirror = 0; - atomic_set(&eb->io_pages, num_reads); + atomic_set(&eb_head(eb)->io_bvecs, num_reads); + lock_extent_bits(tree, eb->start, eb->start + eb->len - 1, 0, + &cached_state); for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (!PageUptodate(page)) { ClearPageError(page); - err = __extent_read_full_page(tree, page, - get_extent, &bio, - mirror_num, &bio_flags, - READ | REQ_META); + if (eb->len < PAGE_CACHE_SIZE) { + err = submit_extent_page(READ | REQ_META, tree, + page, eb->start >> 9, + eb->len, eb->start - page_offset(page), + fs_info->fs_devices->latest_bdev, + &bio, -1, end_bio_extent_buffer_readpage, + mirror_num, bio_flags, bio_flags); + } else { + err = submit_extent_page(READ | REQ_META, tree, + page, page_offset(page) >> 9, + PAGE_CACHE_SIZE, 0, + fs_info->fs_devices->latest_bdev, + &bio, -1, end_bio_extent_buffer_readpage, + mirror_num, bio_flags, bio_flags); + } if (err) ret = err; } else { @@ -5136,11 +5224,18 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (ret || wait != WAIT_COMPLETE) return ret; - for (i = start_i; i < num_pages; i++) { - page = extent_buffer_page(eb, i); + if (eb->len < PAGE_CACHE_SIZE) { + page = extent_buffer_page(eb, 0); wait_on_page_locked(page); - if (!PageUptodate(page)) + if (!extent_buffer_uptodate(eb)) ret = -EIO; + } else { + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + wait_on_page_locked(page); + if (!PageUptodate(page)) + ret = -EIO; + } } return ret;