@@ -2728,7 +2728,8 @@ int btrfs_check_data_free_space(struct inode *inode,
void btrfs_free_reserved_data_space(struct inode *inode,
struct extent_changeset *reserved, u64 start, u64 len);
void btrfs_delalloc_release_space(struct inode *inode,
- struct extent_changeset *reserved, u64 start, u64 len);
+ struct extent_changeset *reserved,
+ u64 start, u64 len, bool qgroup_free);
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
u64 len);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
@@ -2743,10 +2744,12 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
u64 *qgroup_reserved, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
+ bool qgroup_free);
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
+ bool qgroup_free);
int btrfs_delalloc_reserve_space(struct inode *inode,
struct extent_changeset **reserved, u64 start, u64 len);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
@@ -5761,6 +5761,9 @@ int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
if (num_bytes == 0)
return 0;
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
+ if (ret)
+ return ret;
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 0);
@@ -5773,11 +5776,16 @@ int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
/**
* btrfs_inode_rsv_release - release any excessive reservation.
* @inode - the inode we need to release from.
+ * @qgroup_free - free or convert qgroup meta.
+ * Unlike normal operation, qgroup meta reservation needs to know if
+ * we are freeing qgroup reservation or just convert them into per-trans.
+ * Normally @qgroup_free is true for error handler, and false for normal
+ * release.
*
* This is the same as btrfs_block_rsv_release, except that it handles the
* tracepoint for the reservation.
*/
-void btrfs_inode_rsv_release(struct btrfs_inode *inode)
+void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
@@ -5793,6 +5801,10 @@ void btrfs_inode_rsv_release(struct btrfs_inode *inode)
if (released > 0)
trace_btrfs_space_reservation(fs_info, "delalloc",
btrfs_ino(inode), released, 0);
+ if (qgroup_free)
+ btrfs_qgroup_free_meta_prealloc(inode->root, released);
+ else
+ btrfs_qgroup_convert_reserved_meta(inode->root, released);
}
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
@@ -6052,7 +6064,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
- struct btrfs_root *root = inode->root;
unsigned nr_extents;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
@@ -6090,19 +6101,9 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);
- if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
- ret = btrfs_qgroup_reserve_meta_prealloc(root,
- nr_extents * fs_info->nodesize, true);
- if (ret)
- goto out_fail;
- }
-
ret = btrfs_inode_rsv_refill(inode, flush);
- if (unlikely(ret)) {
- btrfs_qgroup_free_meta_prealloc(root,
- nr_extents * fs_info->nodesize);
+ if (unlikely(ret))
goto out_fail;
- }
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
@@ -6116,7 +6117,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);
- btrfs_inode_rsv_release(inode);
+ btrfs_inode_rsv_release(inode, true);
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
return ret;
@@ -6126,12 +6127,14 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
* btrfs_delalloc_release_metadata - release a metadata reservation for an inode
* @inode: the inode to release the reservation for.
* @num_bytes: the number of bytes we are releasing.
+ * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
*
* This will release the metadata reservation for an inode. This can be called
* once we complete IO for a given set of bytes to release their metadata
* reservations, or on error for the same reason.
*/
-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
+ bool qgroup_free)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
@@ -6144,13 +6147,14 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
if (btrfs_is_testing(fs_info))
return;
- btrfs_inode_rsv_release(inode);
+ btrfs_inode_rsv_release(inode, qgroup_free);
}
/**
* btrfs_delalloc_release_extents - release our outstanding_extents
* @inode: the inode to balance the reservation for.
* @num_bytes: the number of bytes we originally reserved with
+ * @qgroup_free: do we need to free qgroup meta reservation or convert them.
*
* When we reserve space we increase outstanding_extents for the extents we may
* add. Once we've set the range as delalloc or created our ordered extents we
@@ -6158,7 +6162,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
* temporarily tracked outstanding_extents. This _must_ be used in conjunction
* with btrfs_delalloc_reserve_metadata.
*/
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
+ bool qgroup_free)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
unsigned num_extents;
@@ -6172,7 +6177,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
if (btrfs_is_testing(fs_info))
return;
- btrfs_inode_rsv_release(inode);
+ btrfs_inode_rsv_release(inode, qgroup_free);
}
/**
@@ -6228,9 +6233,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
*/
void btrfs_delalloc_release_space(struct inode *inode,
struct extent_changeset *reserved,
- u64 start, u64 len)
+ u64 start, u64 len, bool qgroup_free)
{
- btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
+ btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
btrfs_free_reserved_data_space(inode, reserved, start, len);
}
@@ -1690,7 +1690,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
force_page_uptodate);
if (ret) {
btrfs_delalloc_release_extents(BTRFS_I(inode),
- reserve_bytes);
+ reserve_bytes, true);
break;
}
@@ -1702,7 +1702,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (extents_locked == -EAGAIN)
goto again;
btrfs_delalloc_release_extents(BTRFS_I(inode),
- reserve_bytes);
+ reserve_bytes, true);
ret = extents_locked;
break;
}
@@ -1737,7 +1737,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
fs_info->sb->s_blocksize_bits;
if (only_release_metadata) {
btrfs_delalloc_release_metadata(BTRFS_I(inode),
- release_bytes);
+ release_bytes, true);
} else {
u64 __pos;
@@ -1746,7 +1746,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
(dirty_pages << PAGE_SHIFT);
btrfs_delalloc_release_space(inode,
data_reserved, __pos,
- release_bytes);
+ release_bytes, true);
}
}
@@ -1760,7 +1760,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state,
GFP_NOFS);
- btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
+ (ret != 0));
if (ret) {
btrfs_drop_pages(pages, num_pages);
break;
@@ -1800,11 +1801,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (only_release_metadata) {
btrfs_end_write_no_snapshotting(root);
btrfs_delalloc_release_metadata(BTRFS_I(inode),
- release_bytes);
+ release_bytes, true);
} else {
btrfs_delalloc_release_space(inode, data_reserved,
round_down(pos, fs_info->sectorsize),
- release_bytes);
+ release_bytes, true);
}
}
@@ -3548,7 +3548,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
if (ret) {
if (release_metadata)
btrfs_delalloc_release_metadata(BTRFS_I(inode),
- inode->i_size);
+ inode->i_size, true);
#ifdef DEBUG
btrfs_err(fs_info,
"failed to write free ino cache for root %llu",
@@ -500,12 +500,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
- btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true);
goto out_put;
}
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
- btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false);
out_put:
iput(inode);
out_release:
@@ -1839,7 +1839,7 @@ static void btrfs_clear_bit_hook(void *private_data,
*/
if (*bits & EXTENT_CLEAR_META_RESV &&
root != fs_info->tree_root)
- btrfs_delalloc_release_metadata(inode, len);
+ btrfs_delalloc_release_metadata(inode, len, false);
/* For sanity tests. */
if (btrfs_is_testing(fs_info))
@@ -2102,7 +2102,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
0);
ClearPageChecked(page);
set_page_dirty(page);
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
@@ -4758,8 +4758,8 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
page = find_or_create_page(mapping, index, mask);
if (!page) {
btrfs_delalloc_release_space(inode, data_reserved,
- block_start, blocksize);
- btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
+ block_start, blocksize, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
ret = -ENOMEM;
goto out;
}
@@ -4827,8 +4827,8 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
out_unlock:
if (ret)
btrfs_delalloc_release_space(inode, data_reserved, block_start,
- blocksize);
- btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
+ blocksize, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
unlock_page(page);
put_page(page);
out:
@@ -8812,7 +8812,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (ret < 0 && ret != -EIOCBQUEUED) {
if (dio_data.reserve)
btrfs_delalloc_release_space(inode, data_reserved,
- offset, dio_data.reserve);
+ offset, dio_data.reserve, true);
/*
* On error we might have left some ordered extents
* without submitting corresponding bios for them, so
@@ -8828,8 +8828,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
false);
} else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, data_reserved,
- offset, count - (size_t)ret);
- btrfs_delalloc_release_extents(BTRFS_I(inode), count);
+ offset, count - (size_t)ret, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
}
out:
if (wakeup)
@@ -9148,7 +9148,8 @@ int btrfs_page_mkwrite(struct vm_fault *vmf)
if (reserved_space < PAGE_SIZE) {
end = page_start + reserved_space - 1;
btrfs_delalloc_release_space(inode, data_reserved,
- page_start, PAGE_SIZE - reserved_space);
+ page_start, PAGE_SIZE - reserved_space,
+ true);
}
}
@@ -9198,16 +9199,16 @@ int btrfs_page_mkwrite(struct vm_fault *vmf)
out_unlock:
if (!ret) {
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved);
return VM_FAULT_LOCKED;
}
unlock_page(page);
out:
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
btrfs_delalloc_release_space(inode, data_reserved, page_start,
- reserved_space);
+ reserved_space, (ret != 0));
out_noreserve:
sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved);
@@ -1198,7 +1198,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_SHIFT,
- (page_cnt - i_done) << PAGE_SHIFT);
+ (page_cnt - i_done) << PAGE_SHIFT, true);
}
@@ -1217,7 +1217,8 @@ static int cluster_pages_for_defrag(struct inode *inode,
unlock_page(pages[i]);
put_page(pages[i]);
}
- btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
+ false);
extent_changeset_free(data_reserved);
return i_done;
out:
@@ -1227,8 +1228,9 @@ static int cluster_pages_for_defrag(struct inode *inode,
}
btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_SHIFT,
- page_cnt << PAGE_SHIFT);
- btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
+ page_cnt << PAGE_SHIFT, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
+ true);
extent_changeset_free(data_reserved);
return ret;
@@ -610,7 +610,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
btrfs_mod_outstanding_extents(btrfs_inode, -1);
spin_unlock(&btrfs_inode->lock);
if (root != fs_info->tree_root)
- btrfs_delalloc_release_metadata(btrfs_inode, entry->len);
+ btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false);
tree = &btrfs_inode->ordered_tree;
spin_lock_irq(&tree->lock);
@@ -3226,7 +3226,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
mask);
if (!page) {
btrfs_delalloc_release_metadata(BTRFS_I(inode),
- PAGE_SIZE);
+ PAGE_SIZE, true);
ret = -ENOMEM;
goto out;
}
@@ -3245,9 +3245,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
unlock_page(page);
put_page(page);
btrfs_delalloc_release_metadata(BTRFS_I(inode),
- PAGE_SIZE);
+ PAGE_SIZE, true);
btrfs_delalloc_release_extents(BTRFS_I(inode),
- PAGE_SIZE);
+ PAGE_SIZE, true);
ret = -EIO;
goto out;
}
@@ -3278,7 +3278,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
put_page(page);
index++;
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE,
+ false);
balance_dirty_pages_ratelimited(inode->i_mapping);
btrfs_throttle(fs_info);
}
Before this patch, btrfs qgroup is mixing per-transcation meta rsv with preallocated meta rsv, making it quite easy to underflow qgroup meta reservation. Since we have the new qgroup meta rsv types, apply it to delalloc reservation. Now for delalloc, most of its reserved space will use META_PREALLOC qgroup rsv type. And for callers reducing outstanding extent like btrfs_finish_ordered_io(), they will convert corresponding META_PREALLOC reservation to META_PERTRANS. This is mainly due to the fact that current qgroup numbers will only be updated in btrfs_commit_transaction(), that's to say if we don't keep such placeholder reservation, we can exceed qgroup limitation. And for callers freeing outstanding extent in error handler, we will just free META_PREALLOC bytes. This behavior makes callers of btrfs_qgroup_release_meta() or btrfs_qgroup_convert_meta() to be aware of which type they are. So in this patch, btrfs_delalloc_release_metadata() and its callers get an extra parameter to info qgroup to do correct meta convert/release. The good news is, even we use the wrong type (convert or free), it won't cause obvious bug, as prealloc type is always in good shape, and the type only affects how per-trans meta is increased or not. So the worst case will be at most metadata limitation can be sometimes exceeded (no convert at all) or metadata limitation is reached too soon (no free at all). Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/ctree.h | 9 ++++++--- fs/btrfs/extent-tree.c | 45 +++++++++++++++++++++++++-------------------- fs/btrfs/file.c | 15 ++++++++------- fs/btrfs/free-space-cache.c | 2 +- fs/btrfs/inode-map.c | 4 ++-- fs/btrfs/inode.c | 27 ++++++++++++++------------- fs/btrfs/ioctl.c | 10 ++++++---- fs/btrfs/ordered-data.c | 2 +- fs/btrfs/relocation.c | 9 +++++---- 9 files changed, 68 insertions(+), 55 deletions(-)