diff mbox series

[8/8] btrfs: reserve extra space during evict()

Message ID 20181203152459.21630-9-josef@toxicpanda.com (mailing list archive)
State New, archived
Headers show
Series Enospc cleanups and fixeS | expand

Commit Message

Josef Bacik Dec. 3, 2018, 3:24 p.m. UTC
We could generate a lot of delayed refs in evict but never have any left
over space from our block rsv to make up for that fact.  So reserve some
extra space and give it to the transaction so it can be used to refill
the delayed refs rsv every loop through the truncate path.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/inode.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

Comments

Nikolay Borisov Dec. 14, 2018, 8:20 a.m. UTC | #1
On 3.12.18 г. 17:24 ч., Josef Bacik wrote:
> We could generate a lot of delayed refs in evict but never have any left
> over space from our block rsv to make up for that fact.  So reserve some
> extra space and give it to the transaction so it can be used to refill
> the delayed refs rsv every loop through the truncate path.
> 
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> ---
>  fs/btrfs/inode.c | 25 +++++++++++++++++++++++--
>  1 file changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 623a71d871d4..8ac7abe2ae9b 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -5258,13 +5258,15 @@ static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
>  {
>  	struct btrfs_fs_info *fs_info = root->fs_info;
>  	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
> +	u64 delayed_refs_extra = btrfs_calc_trans_metadata_size(fs_info, 1);
>  	int failures = 0;
>  
>  	for (;;) {
>  		struct btrfs_trans_handle *trans;
>  		int ret;
>  
> -		ret = btrfs_block_rsv_refill(root, rsv, rsv->size,
> +		ret = btrfs_block_rsv_refill(root, rsv,
> +					     rsv->size + delayed_refs_extra,
>  					     BTRFS_RESERVE_FLUSH_LIMIT);

Rather than having to play those tricks, why not just modify the call in
btrfs_evict_inode, from:

rsv->size = btrfs_calc_trunc_metadata_size(fs_info, 1);

to

rsv->size = btrfs_calc_trunc_metadata_size(fs_info, 2);

and add a comment what the number 2 means of course.

>  
>  		if (ret && ++failures > 2) {
> @@ -5273,9 +5275,28 @@ static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
>  			return ERR_PTR(-ENOSPC);
>  		}
>  
> +		/*
> +		 * Evict can generate a large amount of delayed refs without
> +		 * having a way to add space back since we exhaust our temporary
> +		 * block rsv.  We aren't allowed to do FLUSH_ALL in this case
> +		 * because we could deadlock with so many things in the flushing
> +		 * code, so we have to try and hold some extra space to
> +		 * compensate for our delayed ref generation.  If we can't get
> +		 * that space then we need see if we can steal our minimum from
> +		 * the global reserve.  We will be ratelimited by the amount of
> +		 * space we have for the delayed refs rsv, so we'll end up
> +		 * committing and trying again.
> +		 */
>  		trans = btrfs_join_transaction(root);
> -		if (IS_ERR(trans) || !ret)
> +		if (IS_ERR(trans) || !ret) {
> +			if (!IS_ERR(trans)) {
> +				trans->block_rsv = &fs_info->trans_block_rsv;

This line is redundant since evict_refill_and_join is called before the
trans->block_rsv  is modified.

> +				trans->bytes_reserved = delayed_refs_extra;

Is this even correct, since we join a transaction it might have already
had some bytes reserved. So in anycase shouldn't the line here say:
trans->bytes_reserved += delayed_refs_extra ?

> +				btrfs_block_rsv_migrate(rsv, trans->block_rsv,
> +							delayed_refs_extra, 1);



> +			}
>  			return trans;
> +		}
>  
>  		/*
>  		 * Try to steal from the global reserve if there is space for
>
diff mbox series

Patch

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 623a71d871d4..8ac7abe2ae9b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5258,13 +5258,15 @@  static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+	u64 delayed_refs_extra = btrfs_calc_trans_metadata_size(fs_info, 1);
 	int failures = 0;
 
 	for (;;) {
 		struct btrfs_trans_handle *trans;
 		int ret;
 
-		ret = btrfs_block_rsv_refill(root, rsv, rsv->size,
+		ret = btrfs_block_rsv_refill(root, rsv,
+					     rsv->size + delayed_refs_extra,
 					     BTRFS_RESERVE_FLUSH_LIMIT);
 
 		if (ret && ++failures > 2) {
@@ -5273,9 +5275,28 @@  static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
 			return ERR_PTR(-ENOSPC);
 		}
 
+		/*
+		 * Evict can generate a large amount of delayed refs without
+		 * having a way to add space back since we exhaust our temporary
+		 * block rsv.  We aren't allowed to do FLUSH_ALL in this case
+		 * because we could deadlock with so many things in the flushing
+		 * code, so we have to try and hold some extra space to
+		 * compensate for our delayed ref generation.  If we can't get
+		 * that space then we need see if we can steal our minimum from
+		 * the global reserve.  We will be ratelimited by the amount of
+		 * space we have for the delayed refs rsv, so we'll end up
+		 * committing and trying again.
+		 */
 		trans = btrfs_join_transaction(root);
-		if (IS_ERR(trans) || !ret)
+		if (IS_ERR(trans) || !ret) {
+			if (!IS_ERR(trans)) {
+				trans->block_rsv = &fs_info->trans_block_rsv;
+				trans->bytes_reserved = delayed_refs_extra;
+				btrfs_block_rsv_migrate(rsv, trans->block_rsv,
+							delayed_refs_extra, 1);
+			}
 			return trans;
+		}
 
 		/*
 		 * Try to steal from the global reserve if there is space for