Message ID | 20240102123918.799062-3-yi.zhang@huaweicloud.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | ext4: use iomap for regular file's buffered IO path and enable large foilo | expand |
On Tue 02-01-24 20:38:55, Zhang Yi wrote: > From: Zhang Yi <yi.zhang@huawei.com> > > ext4_da_map_blocks() only hold i_data_sem in shared mode and i_rwsem > when inserting delalloc extents, it could be raced by another querying > path of ext4_map_blocks() without i_rwsem, .e.g buffered read path. > Suppose we buffered read a file containing just a hole, and without any > cached extents tree, then it is raced by another delayed buffered write > to the same area or the near area belongs to the same hole, and the new > delalloc extent could be overwritten to a hole extent. > > pread() pwrite() > filemap_read_folio() > ext4_mpage_readpages() > ext4_map_blocks() > down_read(i_data_sem) > ext4_ext_determine_hole() > //find hole > ext4_ext_put_gap_in_cache() > ext4_es_find_extent_range() > //no delalloc extent > ext4_da_map_blocks() > down_read(i_data_sem) > ext4_insert_delayed_block() > //insert delalloc extent > ext4_es_insert_extent() > //overwrite delalloc extent to hole > > This race could lead to inconsistent delalloc extents tree and > incorrect reserved space counter. Fix this by converting to hold > i_data_sem in exclusive mode when adding a new delalloc extent in > ext4_da_map_blocks(). > > Cc: stable@vger.kernel.org > Signed-off-by: Zhang Yi <yi.zhang@huawei.com> > Suggested-by: Jan Kara <jack@suse.cz> Looks good to me! Feel free to add: Reviewed-by: Jan Kara <jack@suse.cz> Honza > --- > fs/ext4/inode.c | 25 +++++++++++-------------- > 1 file changed, 11 insertions(+), 14 deletions(-) > > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 5b0d3075be12..142c67f5c7fc 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -1703,10 +1703,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, > > /* Lookup extent status tree firstly */ > if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { > - if (ext4_es_is_hole(&es)) { > - down_read(&EXT4_I(inode)->i_data_sem); > + if (ext4_es_is_hole(&es)) > goto add_delayed; > - } > > /* > * Delayed extent could be allocated by fallocate. > @@ -1748,8 +1746,10 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, > retval = ext4_ext_map_blocks(NULL, inode, map, 0); > else > retval = ext4_ind_map_blocks(NULL, inode, map, 0); > - if (retval < 0) > - goto out_unlock; > + if (retval < 0) { > + up_read(&EXT4_I(inode)->i_data_sem); > + return retval; > + } > if (retval > 0) { > unsigned int status; > > @@ -1765,24 +1765,21 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, > EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; > ext4_es_insert_extent(inode, map->m_lblk, map->m_len, > map->m_pblk, status); > - goto out_unlock; > + up_read(&EXT4_I(inode)->i_data_sem); > + return retval; > } > + up_read(&EXT4_I(inode)->i_data_sem); > > add_delayed: > - /* > - * XXX: __block_prepare_write() unmaps passed block, > - * is it OK? > - */ > + down_write(&EXT4_I(inode)->i_data_sem); > retval = ext4_insert_delayed_block(inode, map->m_lblk); > + up_write(&EXT4_I(inode)->i_data_sem); > if (retval) > - goto out_unlock; > + return retval; > > map_bh(bh, inode->i_sb, invalid_block); > set_buffer_new(bh); > set_buffer_delay(bh); > - > -out_unlock: > - up_read((&EXT4_I(inode)->i_data_sem)); > return retval; > } > > -- > 2.39.2 >
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5b0d3075be12..142c67f5c7fc 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1703,10 +1703,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { - if (ext4_es_is_hole(&es)) { - down_read(&EXT4_I(inode)->i_data_sem); + if (ext4_es_is_hole(&es)) goto add_delayed; - } /* * Delayed extent could be allocated by fallocate. @@ -1748,8 +1746,10 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, retval = ext4_ext_map_blocks(NULL, inode, map, 0); else retval = ext4_ind_map_blocks(NULL, inode, map, 0); - if (retval < 0) - goto out_unlock; + if (retval < 0) { + up_read(&EXT4_I(inode)->i_data_sem); + return retval; + } if (retval > 0) { unsigned int status; @@ -1765,24 +1765,21 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ext4_es_insert_extent(inode, map->m_lblk, map->m_len, map->m_pblk, status); - goto out_unlock; + up_read(&EXT4_I(inode)->i_data_sem); + return retval; } + up_read(&EXT4_I(inode)->i_data_sem); add_delayed: - /* - * XXX: __block_prepare_write() unmaps passed block, - * is it OK? - */ + down_write(&EXT4_I(inode)->i_data_sem); retval = ext4_insert_delayed_block(inode, map->m_lblk); + up_write(&EXT4_I(inode)->i_data_sem); if (retval) - goto out_unlock; + return retval; map_bh(bh, inode->i_sb, invalid_block); set_buffer_new(bh); set_buffer_delay(bh); - -out_unlock: - up_read((&EXT4_I(inode)->i_data_sem)); return retval; }