From patchwork Thu May 6 19:01:01 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Josef Bacik X-Patchwork-Id: 97434 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o46IqZai015058 for ; Thu, 6 May 2010 18:52:36 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756287Ab0EFSwK (ORCPT ); Thu, 6 May 2010 14:52:10 -0400 Received: from mx1.redhat.com ([209.132.183.28]:20803 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755713Ab0EFSwI (ORCPT ); Thu, 6 May 2010 14:52:08 -0400 Received: from int-mx08.intmail.prod.int.phx2.redhat.com (int-mx08.intmail.prod.int.phx2.redhat.com [10.5.11.21]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id o46Iq7Pl007919 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Thu, 6 May 2010 14:52:07 -0400 Received: from dhcp231-156.rdu.redhat.com (dhcp231-156.rdu.redhat.com [10.11.231.156]) by int-mx08.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id o46Iq6LJ017459; Thu, 6 May 2010 14:52:06 -0400 Date: Thu, 6 May 2010 15:01:01 -0400 From: Josef Bacik To: linux-btrfs@vger.kernel.org, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH 3/3] Btrfs: add basic DIO read support Message-ID: <20100506190101.GD13974@dhcp231-156.rdu.redhat.com> MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.19 (2009-01-05) X-Scanned-By: MIMEDefang 2.67 on 10.5.11.21 Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Thu, 06 May 2010 18:52:36 +0000 (UTC) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 746a724..36994e8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2257,6 +2257,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 len); int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u32 *dst); +int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, + struct bio *bio, u64 logical_offset, u32 *dst); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 54a2550..34ea718 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -149,13 +149,14 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, } -int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u32 *dst) +static int __btrfs_lookup_bio_sums(struct btrfs_root *root, + struct inode *inode, struct bio *bio, + u64 logical_offset, u32 *dst, int dio) { u32 sum; struct bio_vec *bvec = bio->bi_io_vec; int bio_index = 0; - u64 offset; + u64 offset = 0; u64 item_start_offset = 0; u64 item_last_offset = 0; u64 disk_bytenr; @@ -174,8 +175,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, WARN_ON(bio->bi_vcnt <= 0); disk_bytenr = (u64)bio->bi_sector << 9; + if (dio) + offset = logical_offset; while (bio_index < bio->bi_vcnt) { - offset = page_offset(bvec->bv_page) + bvec->bv_offset; + if (!dio) + offset = page_offset(bvec->bv_page) + bvec->bv_offset; ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); if (ret == 0) goto found; @@ -238,6 +242,7 @@ found: else set_state_private(io_tree, offset, sum); disk_bytenr += bvec->bv_len; + offset += bvec->bv_len; bio_index++; bvec++; } @@ -245,6 +250,18 @@ found: return 0; } +int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, + struct bio *bio, u32 *dst) +{ + return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0); +} + +int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, + struct bio *bio, u64 offset, u32 *dst) +{ + return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); +} + int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct list_head *list) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2bfdc64..ce7008a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4875,11 +4875,217 @@ out: return em; } +static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + struct extent_map *em; + u64 start = iblock << inode->i_blkbits; + u64 len = bh_result->b_size; + + + em = btrfs_get_extent(inode, NULL, 0, start, len, 0); + if (IS_ERR(em)) + return PTR_ERR(em); + + /* + * Ok for INLINE and COMPRESSED extents we need to fallback on buffered + * io. INLINE is special, and we could probably kludge it in here, but + * it's still buffered so for safety lets just fall back to the generic + * buffered path. + * + * For COMPRESSED we _have_ to read the entire extent in so we can + * decompress it, so there will be buffering required no matter what we + * do, so go ahead and fallback to buffered. + * + * We return -ENOTBLK because thats what makes DIO go ahead and go back + * to buffered IO. Don't blame me, this is the price we pay for using + * the generic code. + */ + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || + em->block_start == EXTENT_MAP_INLINE) { + free_extent_map(em); + return -ENOTBLK; + } + + /* Just a good old fashioned hole, return */ + if (em->block_start == EXTENT_MAP_HOLE) { + free_extent_map(em); + return 0; + } + + /* + * We use the relative offset in the file so that our own submit bio + * routine will do the mapping to the real blocks. + */ + bh_result->b_blocknr = start >> inode->i_blkbits; + bh_result->b_size = em->len - (start - em->start); + bh_result->b_bdev = em->bdev; + set_buffer_mapped(bh_result); + set_buffer_boundary(bh_result); + + free_extent_map(em); + + return 0; +} + +struct btrfs_dio_private { + struct inode *inode; + u64 logical_offset; + u32 *csums; + void *private; +}; + +static void btrfs_endio_direct(struct bio *bio, int err) +{ + struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; + struct bio_vec *bvec = bio->bi_io_vec; + struct btrfs_dio_private *dip = bio->bi_private; + struct inode *inode = dip->inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 start; + u32 *private = dip->csums; + + start = dip->logical_offset; + do { + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { + struct page *page = bvec->bv_page; + char *kaddr; + u32 csum = ~(u32)0; + + kaddr = kmap_atomic(page, KM_USER0); + csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, + csum, bvec->bv_len); + btrfs_csum_final(csum, (char *)&csum); + kunmap_atomic(kaddr, KM_USER0); + + if (csum != *private) { + printk(KERN_ERR "btrfs csum failed ino %lu off" + " %llu csum %u private %u\n", + inode->i_ino, (unsigned long long)start, + csum, *private); + err = -EIO; + } + } + + start += bvec->bv_len; + private++; + bvec++; + } while (bvec <= bvec_end); + + bio->bi_private = dip->private; + + kfree(dip->csums); + kfree(dip); + dio_end_io(bio, err); +} + +static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map *em; + struct btrfs_dio_private *dip; + struct bio_vec *bvec = bio->bi_io_vec; + u64 start; + int skip_sum; + int ret = 0; + + dip = kmalloc(sizeof(*dip), GFP_NOFS); + if (!dip) { + bio_endio(bio, -ENOMEM); + return; + } + + dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); + if (!dip->csums) { + kfree(dip); + bio_endio(bio, -ENOMEM); + } + + dip->private = bio->bi_private; + dip->inode = inode; + dip->logical_offset = (u64)bio->bi_sector << 9; + + start = dip->logical_offset; + em = btrfs_get_extent(inode, NULL, 0, start, bvec->bv_len, 0); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto out_err; + } + + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + printk(KERN_ERR "dio to inode resulted in a bad extent " + "(%llu) %llu\n", (unsigned long long)em->block_start, start); + ret = -EIO; + free_extent_map(em); + goto out_err; + } + + bio->bi_sector = + (em->block_start + (dip->logical_offset - em->start)) >> 9; + bio->bi_private = dip; + + free_extent_map(em); + skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; + + bio->bi_end_io = btrfs_endio_direct; + + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + if (ret) + goto out_err; + + if (!skip_sum) + btrfs_lookup_bio_sums_dio(root, inode, bio, + dip->logical_offset, dip->csums); + + ret = btrfs_map_bio(root, rw, bio, 0, 0); + if (ret) + goto out_err; + return; +out_err: + kfree(dip->csums); + kfree(dip); + bio_endio(bio, ret); +} + static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { - return -EINVAL; + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct extent_state *cached_state = NULL; + struct btrfs_ordered_extent *ordered; + u64 len = 0; + ssize_t ret; + int i; + + if (rw == WRITE) + return 0; + + while (1) { + lock_extent_bits(&BTRFS_I(inode)->io_tree, offset, + offset + iov_length(iov, nr_segs) - 1, 0, + &cached_state, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(inode, offset); + if (!ordered) + break; + unlock_extent_cached(&BTRFS_I(inode)->io_tree, offset, + offset + iov_length(iov, nr_segs) - 1, + &cached_state, GFP_NOFS); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + cond_resched(); + } + + ret = blockdev_direct_IO_own_submit(rw, iocb, inode, NULL, iov, + offset, nr_segs, + btrfs_get_blocks_direct, + btrfs_submit_direct); + + unlock_extent_cached(&BTRFS_I(inode)->io_tree, offset, + offset + iov_length(iov, nr_segs) - 1, + &cached_state, GFP_NOFS); + return ret; } static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,