@@ -3201,6 +3201,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
u64 extent_offset;
u64 extent_end;
u64 iosize;
+ u64 blk, nr_blks;
+ u64 blk_submitted;
sector_t sector;
struct extent_state *cached_state = NULL;
struct block_device *bdev;
@@ -3267,11 +3269,26 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
iosize = min(extent_end - cur, end - cur + 1);
iosize = ALIGN(iosize, blocksize);
+ blk = extent_offset >> inode->i_sb->s_blocksize_bits;
+ nr_blks = iosize >> inode->i_sb->s_blocksize_bits;
+
+ blk_submitted = find_next_bit(ordered->blocks_submitted,
+ ordered->len >> inode->i_sb->s_blocksize_bits,
+ blk);
+ if (blk_submitted < blk + nr_blks) {
+ if (blk_submitted == blk) {
+ cur += blocksize;
+ btrfs_put_ordered_extent(ordered);
+ continue;
+ }
+ iosize = (blk_submitted - blk)
+ << inode->i_sb->s_blocksize_bits;
+ nr_blks = iosize >> inode->i_sb->s_blocksize_bits;
+ }
+
sector = (ordered->start + extent_offset) >> 9;
bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
compressed = test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags);
- btrfs_put_ordered_extent(ordered);
- ordered = NULL;
/*
* compressed and inline extents are written through other
@@ -3284,6 +3301,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
*/
nr++;
cur += iosize;
+ btrfs_put_ordered_extent(ordered);
continue;
}
@@ -3298,6 +3316,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
} else {
unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
+ bitmap_set(ordered->blocks_submitted, blk, nr_blks);
+ btrfs_put_ordered_extent(ordered);
set_range_writeback(tree, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
btrfs_err(BTRFS_I(inode)->root->fs_info,
@@ -199,13 +199,15 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
nr_longs = BITS_TO_LONGS(len >> inode->i_sb->s_blocksize_bits);
if (nr_longs == 1) {
entry->blocks_done = &entry->blocks_bitmap;
+ entry->blocks_submitted = &entry->blocks_submitted_bitmap;
} else {
- entry->blocks_done = kzalloc(nr_longs * sizeof(unsigned long),
+ entry->blocks_done = kzalloc(2 * nr_longs * sizeof(unsigned long),
GFP_NOFS);
if (!entry->blocks_done) {
kmem_cache_free(btrfs_ordered_extent_cache, entry);
return -ENOMEM;
}
+ entry->blocks_submitted = entry->blocks_done + nr_longs;
}
entry->file_offset = file_offset;
@@ -139,6 +139,10 @@ struct btrfs_ordered_extent {
/* bitmap to track the blocks that have been written to disk */
unsigned long *blocks_done;
unsigned long blocks_bitmap;
+
+ /* bitmap to track the blocks that have been submitted for write i/o */
+ unsigned long *blocks_submitted;
+ unsigned long blocks_submitted_bitmap;
};
/*
In the subpagesize-blocksize scenario, the following command (with 4k as the PAGE_SIZE and 2k as the block size) can cause false accounting of blocks of an ordered extent that is written to disk: $ xfs_io -f -c "pwrite 0 10240" \ -c "sync_range 0 4096" \ -c "sync_range 8192 2048" \ -c "pwrite 10240 2048" \ -c "sync_range 10240 2048" \ /mnt/btrfs/file.bin To fix this, we would have to explicitly track the blocks of an ordered extent that have already been submitted for write I/O. Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com> --- fs/btrfs/extent_io.c | 24 ++++++++++++++++++++++-- fs/btrfs/ordered-data.c | 4 +++- fs/btrfs/ordered-data.h | 4 ++++ 3 files changed, 29 insertions(+), 3 deletions(-)