diff mbox

[02/12] Btrfs: load checksum data once when submitting a direct read io

Message ID 1403955302-22396-3-git-send-email-miaox@cn.fujitsu.com (mailing list archive)
State Superseded
Headers show

Commit Message

Miao Xie June 28, 2014, 11:34 a.m. UTC
The current code would load checksum data for several times when we split
a whole direct read io because of the limit of the raid stripe, it would
make us search the csum tree for several times. In fact, it just wasted time,
and made the contention of the csum tree root be more serious. This patch
improves this problem by loading the data at once.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
 fs/btrfs/btrfs_inode.h |  1 -
 fs/btrfs/ctree.h       |  3 +--
 fs/btrfs/file-item.c   | 14 ++------------
 fs/btrfs/inode.c       | 40 ++++++++++++++++++++++------------------
 4 files changed, 25 insertions(+), 33 deletions(-)

Comments

Filipe Manana July 28, 2014, 5:24 p.m. UTC | #1
On Sat, Jun 28, 2014 at 12:34 PM, Miao Xie <miaox@cn.fujitsu.com> wrote:
> The current code would load checksum data for several times when we split
> a whole direct read io because of the limit of the raid stripe, it would
> make us search the csum tree for several times. In fact, it just wasted time,
> and made the contention of the csum tree root be more serious. This patch
> improves this problem by loading the data at once.
>
> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
> ---
>  fs/btrfs/btrfs_inode.h |  1 -
>  fs/btrfs/ctree.h       |  3 +--
>  fs/btrfs/file-item.c   | 14 ++------------
>  fs/btrfs/inode.c       | 40 ++++++++++++++++++++++------------------
>  4 files changed, 25 insertions(+), 33 deletions(-)
>
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index 4794923..7e9f53b 100644
> --- a/fs/btrfs/btrfs_inode.h
> +++ b/fs/btrfs/btrfs_inode.h
> @@ -263,7 +263,6 @@ struct btrfs_dio_private {
>
>         /* dio_bio came from fs/direct-io.c */
>         struct bio *dio_bio;
> -       u8 csum[0];
>  };
>
>  /*
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index be91397..40e9938 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -3739,8 +3739,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
>  int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
>                           struct bio *bio, u32 *dst);
>  int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
> -                             struct btrfs_dio_private *dip, struct bio *bio,
> -                             u64 logical_offset);
> +                             struct bio *bio, u64 logical_offset);
>  int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
>                              struct btrfs_root *root,
>                              u64 objectid, u64 pos,
> diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
> index f46cfe4..cf1b94f 100644
> --- a/fs/btrfs/file-item.c
> +++ b/fs/btrfs/file-item.c
> @@ -299,19 +299,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
>  }
>
>  int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
> -                             struct btrfs_dio_private *dip, struct bio *bio,
> -                             u64 offset)
> +                             struct bio *bio, u64 offset)
>  {
> -       int len = (bio->bi_iter.bi_sector << 9) - dip->disk_bytenr;
> -       u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
> -       int ret;
> -
> -       len >>= inode->i_sb->s_blocksize_bits;
> -       len *= csum_size;
> -
> -       ret = __btrfs_lookup_bio_sums(root, inode, bio, offset,
> -                                     (u32 *)(dip->csum + len), 1);
> -       return ret;
> +       return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
>  }
>
>  int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index a3f102f..969fb22 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -7081,7 +7081,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
>         struct inode *inode = dip->inode;
>         struct btrfs_root *root = BTRFS_I(inode)->root;
>         struct bio *dio_bio;
> -       u32 *csums = (u32 *)dip->csum;
> +       struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
> +       u32 *csums = (u32 *)io_bio->csum;
>         u64 start;
>         int i;
>
> @@ -7123,6 +7124,9 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
>         if (err)
>                 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
>         dio_end_io(dio_bio, err);
> +
> +       if (io_bio->end_io)
> +               io_bio->end_io(io_bio, err);
>         bio_put(bio);
>  }
>
> @@ -7261,13 +7265,20 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
>                 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
>                 if (ret)
>                         goto err;
> -       } else if (!skip_sum) {
> -               ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
> +       } else {
> +               /*
> +                * We have loaded all the csum data we need when we submit
> +                * the first bio, so skip it.
> +                */
> +               if (dip->logical_offset != file_offset)
> +                       goto map;
> +
> +               /* Load all csum data at once. */
> +               ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio,
>                                                 file_offset);
>                 if (ret)
>                         goto err;
>         }
> -
>  map:
>         ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
>  err:
> @@ -7288,7 +7299,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
>         u64 submit_len = 0;
>         u64 map_length;
>         int nr_pages = 0;
> -       int ret = 0;
> +       int ret;
>         int async_submit = 0;
>
>         map_length = orig_bio->bi_iter.bi_size;
> @@ -7392,30 +7403,20 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
>         struct btrfs_root *root = BTRFS_I(inode)->root;
>         struct btrfs_dio_private *dip;
>         struct bio *io_bio;
> +       struct btrfs_io_bio *btrfs_bio;
>         int skip_sum;
> -       int sum_len;
>         int write = rw & REQ_WRITE;
>         int ret = 0;
> -       u16 csum_size;
>
>         skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
>
> -       io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
> +       io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS | __GFP_ZERO);

Hi Miao,

With this change (adding the __GFP_ZERO flag), I ran once into the
following warning while running xfstests (dunno exactly which test
case triggered it, likely one of those that run fsstress):

[ 3941.856860] ------------[ cut here ]------------
[ 3941.856871] WARNING: CPU: 0 PID: 4154 at mm/mempool.c:205
mempool_alloc+0xc8/0x1c0()
[ 3941.856873] Modules linked in: btrfs xor raid6_pq binfmt_misc nfsd
auth_rpcgss oid_registry nfs_acl nfs lockd fscache sunrpc i2c_piix4
i2c_core pcspkr evbug psmouse serio_raw e1000 [
last unloaded: btrfs]
[ 3941.856886] CPU: 0 PID: 4154 Comm: xfs_io Tainted: G        W
3.16.0-rc6-fdm-btrfs-next-37+ #1
[ 3941.856887] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 3941.856889]  0000000000000009 ffff8800d569f778 ffffffff8169a687
00000000000077b0
[ 3941.856892]  0000000000000000 ffff8800d569f7b8 ffffffff8104fb4c
00000000ffffffff
[ 3941.856894]  0000000000008050 0000000000000001 0000000000008050
ffff88004f921918
[ 3941.856896] Call Trace:
[ 3941.856901]  [<ffffffff8169a687>] dump_stack+0x4e/0x68
[ 3941.856904]  [<ffffffff8104fb4c>] warn_slowpath_common+0x8c/0xc0
[ 3941.856905]  [<ffffffff8104fb9a>] warn_slowpath_null+0x1a/0x20
[ 3941.856907]  [<ffffffff81151fc8>] mempool_alloc+0xc8/0x1c0
[ 3941.856911]  [<ffffffff810129cf>] ? save_stack_trace+0x2f/0x50
[ 3941.856918]  [<ffffffff8131331a>] bio_alloc_bioset+0x10a/0x1c0
[ 3941.856921]  [<ffffffff81314c68>] bio_clone_bioset+0x88/0x310
[ 3941.856923]  [<ffffffff81151a65>] ? mempool_alloc_slab+0x15/0x20
[ 3941.856936]  [<ffffffffa0209385>] btrfs_bio_clone+0x15/0x20 [btrfs]
[ 3941.856944]  [<ffffffffa01ed47f>] btrfs_submit_direct+0x4f/0x7b0 [btrfs]
[ 3941.856948]  [<ffffffff811fc10a>] ? do_blockdev_direct_IO+0x17ea/0x1f60
[ 3941.856952]  [<ffffffff810afb35>] ? mark_held_locks+0x75/0xa0
[ 3941.856955]  [<ffffffff816a383f>] ? _raw_spin_unlock_irqrestore+0x3f/0x70
[ 3941.856956]  [<ffffffff811fc13e>] do_blockdev_direct_IO+0x181e/0x1f60
[ 3941.856965]  [<ffffffffa01f86d0>] ?
btrfs_page_exists_in_range+0x2a0/0x2a0 [btrfs]
[ 3941.856972]  [<ffffffffa01ed430>] ?
btrfs_writepage_start_hook+0xf0/0xf0 [btrfs]
[ 3941.856974]  [<ffffffff811fc8cc>] __blockdev_direct_IO+0x4c/0x50
[ 3941.856981]  [<ffffffffa01f86d0>] ?
btrfs_page_exists_in_range+0x2a0/0x2a0 [btrfs]
[ 3941.856987]  [<ffffffffa01ed430>] ?
btrfs_writepage_start_hook+0xf0/0xf0 [btrfs]
[ 3941.856993]  [<ffffffffa01eb591>] btrfs_direct_IO+0x1a1/0x340 [btrfs]
[ 3941.856999]  [<ffffffffa01f86d0>] ?
btrfs_page_exists_in_range+0x2a0/0x2a0 [btrfs]
[ 3941.857005]  [<ffffffffa01ed430>] ?
btrfs_writepage_start_hook+0xf0/0xf0 [btrfs]
[ 3941.857007]  [<ffffffff81150210>] generic_file_direct_write+0xb0/0x180
[ 3941.857014]  [<ffffffffa01fc4a1>] btrfs_file_write_iter+0x411/0x560 [btrfs]
[ 3941.857017]  [<ffffffff811ba541>] new_sync_write+0x81/0xb0
[ 3941.857019]  [<ffffffff811bb342>] vfs_write+0xc2/0x1f0
[ 3941.857020]  [<ffffffff811bba2a>] SyS_pwrite64+0x9a/0xb0
[ 3941.857022]  [<ffffffff816a3d92>] system_call_fastpath+0x16/0x1b
[ 3941.857024] ---[ end trace c1dfd29523250709 ]---

Thanks.


>         if (!io_bio) {
>                 ret = -ENOMEM;
>                 goto free_ordered;
>         }
>
> -       if (!skip_sum && !write) {
> -               csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
> -               sum_len = dio_bio->bi_iter.bi_size >>
> -                       inode->i_sb->s_blocksize_bits;
> -               sum_len *= csum_size;
> -       } else {
> -               sum_len = 0;
> -       }
> -
> -       dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
> +       dip = kmalloc(sizeof(*dip), GFP_NOFS);
>         if (!dip) {
>                 ret = -ENOMEM;
>                 goto free_io_bio;
> @@ -7441,6 +7442,9 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
>         if (!ret)
>                 return;
>
> +       btrfs_bio = btrfs_io_bio(io_bio);
> +       if (btrfs_bio->end_io)
> +               btrfs_bio->end_io(btrfs_bio, ret);
>  free_io_bio:
>         bio_put(io_bio);
>
> --
> 1.9.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Miao Xie July 29, 2014, 1:56 a.m. UTC | #2
On Mon, 28 Jul 2014 18:24:47 +0100, Filipe David Manana wrote:
> On Sat, Jun 28, 2014 at 12:34 PM, Miao Xie <miaox@cn.fujitsu.com> wrote:
>> The current code would load checksum data for several times when we split
>> a whole direct read io because of the limit of the raid stripe, it would
>> make us search the csum tree for several times. In fact, it just wasted time,
>> and made the contention of the csum tree root be more serious. This patch
>> improves this problem by loading the data at once.
>>
>> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
>> ---
>>  fs/btrfs/btrfs_inode.h |  1 -
>>  fs/btrfs/ctree.h       |  3 +--
>>  fs/btrfs/file-item.c   | 14 ++------------
>>  fs/btrfs/inode.c       | 40 ++++++++++++++++++++++------------------
>>  4 files changed, 25 insertions(+), 33 deletions(-)
>>
>> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
>> index 4794923..7e9f53b 100644
>> --- a/fs/btrfs/btrfs_inode.h
>> +++ b/fs/btrfs/btrfs_inode.h
>> @@ -263,7 +263,6 @@ struct btrfs_dio_private {
>>
>>         /* dio_bio came from fs/direct-io.c */
>>         struct bio *dio_bio;
>> -       u8 csum[0];
>>  };
>>
>>  /*
>> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
>> index be91397..40e9938 100644
>> --- a/fs/btrfs/ctree.h
>> +++ b/fs/btrfs/ctree.h
>> @@ -3739,8 +3739,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
>>  int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
>>                           struct bio *bio, u32 *dst);
>>  int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
>> -                             struct btrfs_dio_private *dip, struct bio *bio,
>> -                             u64 logical_offset);
>> +                             struct bio *bio, u64 logical_offset);
>>  int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
>>                              struct btrfs_root *root,
>>                              u64 objectid, u64 pos,
>> diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
>> index f46cfe4..cf1b94f 100644
>> --- a/fs/btrfs/file-item.c
>> +++ b/fs/btrfs/file-item.c
>> @@ -299,19 +299,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
>>  }
>>
>>  int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
>> -                             struct btrfs_dio_private *dip, struct bio *bio,
>> -                             u64 offset)
>> +                             struct bio *bio, u64 offset)
>>  {
>> -       int len = (bio->bi_iter.bi_sector << 9) - dip->disk_bytenr;
>> -       u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
>> -       int ret;
>> -
>> -       len >>= inode->i_sb->s_blocksize_bits;
>> -       len *= csum_size;
>> -
>> -       ret = __btrfs_lookup_bio_sums(root, inode, bio, offset,
>> -                                     (u32 *)(dip->csum + len), 1);
>> -       return ret;
>> +       return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
>>  }
>>
>>  int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
>> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
>> index a3f102f..969fb22 100644
>> --- a/fs/btrfs/inode.c
>> +++ b/fs/btrfs/inode.c
>> @@ -7081,7 +7081,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
>>         struct inode *inode = dip->inode;
>>         struct btrfs_root *root = BTRFS_I(inode)->root;
>>         struct bio *dio_bio;
>> -       u32 *csums = (u32 *)dip->csum;
>> +       struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
>> +       u32 *csums = (u32 *)io_bio->csum;
>>         u64 start;
>>         int i;
>>
>> @@ -7123,6 +7124,9 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
>>         if (err)
>>                 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
>>         dio_end_io(dio_bio, err);
>> +
>> +       if (io_bio->end_io)
>> +               io_bio->end_io(io_bio, err);
>>         bio_put(bio);
>>  }
>>
>> @@ -7261,13 +7265,20 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
>>                 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
>>                 if (ret)
>>                         goto err;
>> -       } else if (!skip_sum) {
>> -               ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
>> +       } else {
>> +               /*
>> +                * We have loaded all the csum data we need when we submit
>> +                * the first bio, so skip it.
>> +                */
>> +               if (dip->logical_offset != file_offset)
>> +                       goto map;
>> +
>> +               /* Load all csum data at once. */
>> +               ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio,
>>                                                 file_offset);
>>                 if (ret)
>>                         goto err;
>>         }
>> -
>>  map:
>>         ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
>>  err:
>> @@ -7288,7 +7299,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
>>         u64 submit_len = 0;
>>         u64 map_length;
>>         int nr_pages = 0;
>> -       int ret = 0;
>> +       int ret;
>>         int async_submit = 0;
>>
>>         map_length = orig_bio->bi_iter.bi_size;
>> @@ -7392,30 +7403,20 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
>>         struct btrfs_root *root = BTRFS_I(inode)->root;
>>         struct btrfs_dio_private *dip;
>>         struct bio *io_bio;
>> +       struct btrfs_io_bio *btrfs_bio;
>>         int skip_sum;
>> -       int sum_len;
>>         int write = rw & REQ_WRITE;
>>         int ret = 0;
>> -       u16 csum_size;
>>
>>         skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
>>
>> -       io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
>> +       io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS | __GFP_ZERO);
> 
> Hi Miao,
> 
> With this change (adding the __GFP_ZERO flag), I ran once into the
> following warning while running xfstests (dunno exactly which test
> case triggered it, likely one of those that run fsstress):

Thanks for test.
I'll fix it.

Miao

> 
> [ 3941.856860] ------------[ cut here ]------------
> [ 3941.856871] WARNING: CPU: 0 PID: 4154 at mm/mempool.c:205
> mempool_alloc+0xc8/0x1c0()
> [ 3941.856873] Modules linked in: btrfs xor raid6_pq binfmt_misc nfsd
> auth_rpcgss oid_registry nfs_acl nfs lockd fscache sunrpc i2c_piix4
> i2c_core pcspkr evbug psmouse serio_raw e1000 [
> last unloaded: btrfs]
> [ 3941.856886] CPU: 0 PID: 4154 Comm: xfs_io Tainted: G        W
> 3.16.0-rc6-fdm-btrfs-next-37+ #1
> [ 3941.856887] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
> [ 3941.856889]  0000000000000009 ffff8800d569f778 ffffffff8169a687
> 00000000000077b0
> [ 3941.856892]  0000000000000000 ffff8800d569f7b8 ffffffff8104fb4c
> 00000000ffffffff
> [ 3941.856894]  0000000000008050 0000000000000001 0000000000008050
> ffff88004f921918
> [ 3941.856896] Call Trace:
> [ 3941.856901]  [<ffffffff8169a687>] dump_stack+0x4e/0x68
> [ 3941.856904]  [<ffffffff8104fb4c>] warn_slowpath_common+0x8c/0xc0
> [ 3941.856905]  [<ffffffff8104fb9a>] warn_slowpath_null+0x1a/0x20
> [ 3941.856907]  [<ffffffff81151fc8>] mempool_alloc+0xc8/0x1c0
> [ 3941.856911]  [<ffffffff810129cf>] ? save_stack_trace+0x2f/0x50
> [ 3941.856918]  [<ffffffff8131331a>] bio_alloc_bioset+0x10a/0x1c0
> [ 3941.856921]  [<ffffffff81314c68>] bio_clone_bioset+0x88/0x310
> [ 3941.856923]  [<ffffffff81151a65>] ? mempool_alloc_slab+0x15/0x20
> [ 3941.856936]  [<ffffffffa0209385>] btrfs_bio_clone+0x15/0x20 [btrfs]
> [ 3941.856944]  [<ffffffffa01ed47f>] btrfs_submit_direct+0x4f/0x7b0 [btrfs]
> [ 3941.856948]  [<ffffffff811fc10a>] ? do_blockdev_direct_IO+0x17ea/0x1f60
> [ 3941.856952]  [<ffffffff810afb35>] ? mark_held_locks+0x75/0xa0
> [ 3941.856955]  [<ffffffff816a383f>] ? _raw_spin_unlock_irqrestore+0x3f/0x70
> [ 3941.856956]  [<ffffffff811fc13e>] do_blockdev_direct_IO+0x181e/0x1f60
> [ 3941.856965]  [<ffffffffa01f86d0>] ?
> btrfs_page_exists_in_range+0x2a0/0x2a0 [btrfs]
> [ 3941.856972]  [<ffffffffa01ed430>] ?
> btrfs_writepage_start_hook+0xf0/0xf0 [btrfs]
> [ 3941.856974]  [<ffffffff811fc8cc>] __blockdev_direct_IO+0x4c/0x50
> [ 3941.856981]  [<ffffffffa01f86d0>] ?
> btrfs_page_exists_in_range+0x2a0/0x2a0 [btrfs]
> [ 3941.856987]  [<ffffffffa01ed430>] ?
> btrfs_writepage_start_hook+0xf0/0xf0 [btrfs]
> [ 3941.856993]  [<ffffffffa01eb591>] btrfs_direct_IO+0x1a1/0x340 [btrfs]
> [ 3941.856999]  [<ffffffffa01f86d0>] ?
> btrfs_page_exists_in_range+0x2a0/0x2a0 [btrfs]
> [ 3941.857005]  [<ffffffffa01ed430>] ?
> btrfs_writepage_start_hook+0xf0/0xf0 [btrfs]
> [ 3941.857007]  [<ffffffff81150210>] generic_file_direct_write+0xb0/0x180
> [ 3941.857014]  [<ffffffffa01fc4a1>] btrfs_file_write_iter+0x411/0x560 [btrfs]
> [ 3941.857017]  [<ffffffff811ba541>] new_sync_write+0x81/0xb0
> [ 3941.857019]  [<ffffffff811bb342>] vfs_write+0xc2/0x1f0
> [ 3941.857020]  [<ffffffff811bba2a>] SyS_pwrite64+0x9a/0xb0
> [ 3941.857022]  [<ffffffff816a3d92>] system_call_fastpath+0x16/0x1b
> [ 3941.857024] ---[ end trace c1dfd29523250709 ]---
> 
> Thanks.
> 
> 
>>         if (!io_bio) {
>>                 ret = -ENOMEM;
>>                 goto free_ordered;
>>         }
>>
>> -       if (!skip_sum && !write) {
>> -               csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
>> -               sum_len = dio_bio->bi_iter.bi_size >>
>> -                       inode->i_sb->s_blocksize_bits;
>> -               sum_len *= csum_size;
>> -       } else {
>> -               sum_len = 0;
>> -       }
>> -
>> -       dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
>> +       dip = kmalloc(sizeof(*dip), GFP_NOFS);
>>         if (!dip) {
>>                 ret = -ENOMEM;
>>                 goto free_io_bio;
>> @@ -7441,6 +7442,9 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
>>         if (!ret)
>>                 return;
>>
>> +       btrfs_bio = btrfs_io_bio(io_bio);
>> +       if (btrfs_bio->end_io)
>> +               btrfs_bio->end_io(btrfs_bio, ret);
>>  free_io_bio:
>>         bio_put(io_bio);
>>
>> --
>> 1.9.3
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4794923..7e9f53b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -263,7 +263,6 @@  struct btrfs_dio_private {
 
 	/* dio_bio came from fs/direct-io.c */
 	struct bio *dio_bio;
-	u8 csum[0];
 };
 
 /*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index be91397..40e9938 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3739,8 +3739,7 @@  int btrfs_del_csums(struct btrfs_trans_handle *trans,
 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
 			  struct bio *bio, u32 *dst);
 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
-			      struct btrfs_dio_private *dip, struct bio *bio,
-			      u64 logical_offset);
+			      struct bio *bio, u64 logical_offset);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root,
 			     u64 objectid, u64 pos,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f46cfe4..cf1b94f 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -299,19 +299,9 @@  int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
 }
 
 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
-			      struct btrfs_dio_private *dip, struct bio *bio,
-			      u64 offset)
+			      struct bio *bio, u64 offset)
 {
-	int len = (bio->bi_iter.bi_sector << 9) - dip->disk_bytenr;
-	u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
-	int ret;
-
-	len >>= inode->i_sb->s_blocksize_bits;
-	len *= csum_size;
-
-	ret = __btrfs_lookup_bio_sums(root, inode, bio, offset,
-				      (u32 *)(dip->csum + len), 1);
-	return ret;
+	return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
 }
 
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a3f102f..969fb22 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7081,7 +7081,8 @@  static void btrfs_endio_direct_read(struct bio *bio, int err)
 	struct inode *inode = dip->inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct bio *dio_bio;
-	u32 *csums = (u32 *)dip->csum;
+	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+	u32 *csums = (u32 *)io_bio->csum;
 	u64 start;
 	int i;
 
@@ -7123,6 +7124,9 @@  static void btrfs_endio_direct_read(struct bio *bio, int err)
 	if (err)
 		clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
 	dio_end_io(dio_bio, err);
+
+	if (io_bio->end_io)
+		io_bio->end_io(io_bio, err);
 	bio_put(bio);
 }
 
@@ -7261,13 +7265,20 @@  static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 		ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
 		if (ret)
 			goto err;
-	} else if (!skip_sum) {
-		ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
+	} else {
+		/*
+		 * We have loaded all the csum data we need when we submit
+		 * the first bio, so skip it.
+		 */
+		if (dip->logical_offset != file_offset)
+			goto map;
+
+		/* Load all csum data at once. */
+		ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio,
 						file_offset);
 		if (ret)
 			goto err;
 	}
-
 map:
 	ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
 err:
@@ -7288,7 +7299,7 @@  static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 	u64 submit_len = 0;
 	u64 map_length;
 	int nr_pages = 0;
-	int ret = 0;
+	int ret;
 	int async_submit = 0;
 
 	map_length = orig_bio->bi_iter.bi_size;
@@ -7392,30 +7403,20 @@  static void btrfs_submit_direct(int rw, struct bio *dio_bio,
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_dio_private *dip;
 	struct bio *io_bio;
+	struct btrfs_io_bio *btrfs_bio;
 	int skip_sum;
-	int sum_len;
 	int write = rw & REQ_WRITE;
 	int ret = 0;
-	u16 csum_size;
 
 	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-	io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
+	io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS | __GFP_ZERO);
 	if (!io_bio) {
 		ret = -ENOMEM;
 		goto free_ordered;
 	}
 
-	if (!skip_sum && !write) {
-		csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
-		sum_len = dio_bio->bi_iter.bi_size >>
-			inode->i_sb->s_blocksize_bits;
-		sum_len *= csum_size;
-	} else {
-		sum_len = 0;
-	}
-
-	dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
+	dip = kmalloc(sizeof(*dip), GFP_NOFS);
 	if (!dip) {
 		ret = -ENOMEM;
 		goto free_io_bio;
@@ -7441,6 +7442,9 @@  static void btrfs_submit_direct(int rw, struct bio *dio_bio,
 	if (!ret)
 		return;
 
+	btrfs_bio = btrfs_io_bio(io_bio);
+	if (btrfs_bio->end_io)
+		btrfs_bio->end_io(btrfs_bio, ret);
 free_io_bio:
 	bio_put(io_bio);