From patchwork Fri Oct 15 21:28:35 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Josef Bacik X-Patchwork-Id: 257941 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id o9FLYA5P010541 for ; Fri, 15 Oct 2010 21:34:15 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751098Ab0JOVeF (ORCPT ); Fri, 15 Oct 2010 17:34:05 -0400 Received: from mx1.redhat.com ([209.132.183.28]:23544 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751029Ab0JOVeC (ORCPT ); Fri, 15 Oct 2010 17:34:02 -0400 Received: from int-mx09.intmail.prod.int.phx2.redhat.com (int-mx09.intmail.prod.int.phx2.redhat.com [10.5.11.22]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id o9FLY2mv004957 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Fri, 15 Oct 2010 17:34:02 -0400 Received: from localhost.localdomain (test1244.test.redhat.com [10.10.10.244]) by int-mx09.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id o9FLY0vP003011 for ; Fri, 15 Oct 2010 17:34:01 -0400 From: Josef Bacik To: linux-btrfs@vger.kernel.org Subject: [PATCH 4/4] Btrfs: rework how we reserve metadata bytes Date: Fri, 15 Oct 2010 17:28:35 -0400 Message-Id: <1287178115-18229-5-git-send-email-josef@redhat.com> In-Reply-To: <1287178115-18229-1-git-send-email-josef@redhat.com> References: <1287178115-18229-1-git-send-email-josef@redhat.com> X-Scanned-By: MIMEDefang 2.68 on 10.5.11.22 Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Fri, 15 Oct 2010 21:34:16 +0000 (UTC) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 72f5e1a..9e923c1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2144,7 +2144,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes); void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root, - int num_items, int *retries); + int num_items); void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, @@ -2165,7 +2165,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, - u64 num_bytes, int *retries); + u64 num_bytes); int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 265d8e0..13f723a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3400,79 +3400,162 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, return reclaimed >= to_reclaim; } -static int should_retry_reserve(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, - u64 num_bytes, int *retries) +/* + * Retries tells us how many times we've called reserve_metadata_bytes. The + * idea is if this is the first call (retries == 0) then we will add to our + * reserved count if we can't make the allocation in order to hold our place + * while we go and try and free up space. That way for retries > 1 we don't try + * and add space, we just check to see if the amount of unused space is >= the + * total space, meaning that our reservation is valid. + * + * However if we don't intend to retry this reservation, pass -1 as retries so + * that it short circuits this logic. + */ +static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 orig_bytes) { struct btrfs_space_info *space_info = block_rsv->space_info; - int ret; - - if ((*retries) > 2) - return -ENOSPC; + u64 unused; + u64 num_bytes = orig_bytes; + int retries = 0; + int ret = 0; + bool reserved = false; - ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); - if (ret) - return 1; +again: + ret = -ENOSPC; + if (reserved) + num_bytes = 0; - if (trans && trans->transaction->in_commit) - return -ENOSPC; + spin_lock(&space_info->lock); + unused = space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + space_info->bytes_may_use; - ret = shrink_delalloc(trans, root, num_bytes, 0); - if (ret) - return ret; + /* + * The idea here is that we've not already over-reserved the block group + * then we can go ahead and save our reservation first and then start + * flushing if we need to. Otherwise if we've already overcommitted + * lets start flushing stuff first and then come back and try to make + * our reservation. + */ + if (unused <= space_info->total_bytes) { + unused -= space_info->total_bytes; + if (unused >= num_bytes) { + if (!reserved) + space_info->bytes_reserved += orig_bytes; + reserved = true; + ret = 0; + } else if (!reserved) { + /* + * Don't have enough space, reserve what we need and try + * to flush. + */ + space_info->bytes_reserved += orig_bytes; + reserved = true; + } + } else { + /* + * Ok we're over committed, set num_bytes to some huge number so + * that the flushing stuff will give us plenty of slack space. + */ + num_bytes = orig_bytes * 2; + } - spin_lock(&space_info->lock); - if (space_info->bytes_pinned < num_bytes) - ret = 1; spin_unlock(&space_info->lock); - if (ret) - return -ENOSPC; - (*retries)++; + if (!ret) + return 0; - if (trans) - return -EAGAIN; + /* We haven't retried yet, try and allocate a chunk. */ + if (retries == 0) { + retries++; + ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); + if (ret) { + if (reserved) + return 0; + goto again; + } + } - trans = btrfs_join_transaction(root, 1); - BUG_ON(IS_ERR(trans)); - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); + /* + * So for the first round of retries (retries == 1) we don't want to be + * synchronously writing out inodes. For retries == 2 we want to do + * sync, and if that still doesn't work out move on. + */ + if (retries < 3) { + /* + * We do this because sometimes we get trans from + * start_trans_handle, which will deal with shrink_delalloc + * returning -EAGAIN. HOWEVER, there are cases that don't deal + * with -EAGAIN, so we'll get things erroring out that really + * shouldn't have. So instead just force us to use synchronous + * delalloc flushing if we have a trans to give us the best + * possible chance of actually reclaiming space. + */ + if (trans) + retries = 2; - return 1; -} + ret = shrink_delalloc(trans, root, num_bytes, retries == 2); + if (ret > 0) { + /* + * Ok we reclaimed enough space and already reserved, + * we're good to go. + */ + if (reserved) + return 0; + /* + * We got enough space but didn't actually reserve + * already, which means we're quite low on space, so try + * and reserve again and if it fails lets just skip to + * doing synchronous writeout. + */ + retries = 2; + goto again; + } else if (ret < 0) { + goto out; + } -static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, - u64 num_bytes) -{ - struct btrfs_space_info *space_info = block_rsv->space_info; - u64 unused; - int ret = -ENOSPC; + /* + * Ok shrink_delalloc didn't reclaim enough, but we may not have + * done a synchronous writeout either, so bump retries and try + * again. + */ + retries++; + goto again; + } spin_lock(&space_info->lock); - unused = space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly + - space_info->bytes_may_use; + /* + * Not enough space to be reclaimed, don't bother committing the + * transaction. + */ + if (space_info->bytes_pinned < orig_bytes) + ret = -ENOSPC; + spin_unlock(&space_info->lock); + if (ret) + goto out; - if (unused < space_info->total_bytes) - unused = space_info->total_bytes - unused; - else - unused = 0; + ret = -EAGAIN; + if (trans) + goto out; - if (unused >= num_bytes) { - if (block_rsv->priority >= 10) { - space_info->bytes_reserved += num_bytes; - ret = 0; - } else { - if ((unused + block_rsv->reserved) * - block_rsv->priority >= - (num_bytes + block_rsv->reserved) * 10) { - space_info->bytes_reserved += num_bytes; - ret = 0; - } - } + + ret = -ENOSPC; + trans = btrfs_join_transaction(root, 1); + if (IS_ERR(trans)) + goto out; + ret = btrfs_commit_transaction(trans, root); + if (!ret) + goto again; + +out: + if (reserved) { + spin_lock(&space_info->lock); + space_info->bytes_reserved -= orig_bytes; + spin_unlock(&space_info->lock); } - spin_unlock(&space_info->lock); return ret; } @@ -3616,23 +3699,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, - u64 num_bytes, int *retries) + u64 num_bytes) { int ret; if (num_bytes == 0) return 0; -again: - ret = reserve_metadata_bytes(block_rsv, num_bytes); + + ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes); if (!ret) { block_rsv_add_bytes(block_rsv, num_bytes, 1); return 0; } - ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries); - if (ret > 0) - goto again; - return ret; } @@ -3667,7 +3746,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, return 0; if (block_rsv->refill_used) { - ret = reserve_metadata_bytes(block_rsv, num_bytes); + ret = reserve_metadata_bytes(trans, root, block_rsv, + num_bytes); if (!ret) { block_rsv_add_bytes(block_rsv, num_bytes, 0); return 0; @@ -3847,7 +3927,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root, - int num_items, int *retries) + int num_items) { u64 num_bytes; int ret; @@ -3857,7 +3937,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, num_bytes = calc_trans_metadata_size(root, num_items); ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, - num_bytes, retries); + num_bytes); if (!ret) { trans->bytes_reserved += num_bytes; trans->block_rsv = &root->fs_info->trans_block_rsv; @@ -3931,14 +4011,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; u64 to_reserve; int nr_extents; - int retries = 0; int ret; if (btrfs_transaction_in_commit(root->fs_info)) schedule_timeout(1); num_bytes = ALIGN(num_bytes, root->sectorsize); -again: + spin_lock(&BTRFS_I(inode)->accounting_lock); nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; if (nr_extents > BTRFS_I(inode)->reserved_extents) { @@ -3948,18 +4027,14 @@ again: nr_extents = 0; to_reserve = 0; } + spin_unlock(&BTRFS_I(inode)->accounting_lock); to_reserve += calc_csum_metadata_size(inode, num_bytes); - ret = reserve_metadata_bytes(block_rsv, to_reserve); - if (ret) { - spin_unlock(&BTRFS_I(inode)->accounting_lock); - ret = should_retry_reserve(NULL, root, block_rsv, to_reserve, - &retries); - if (ret > 0) - goto again; + ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve); + if (ret) return ret; - } + spin_lock(&BTRFS_I(inode)->accounting_lock); BTRFS_I(inode)->reserved_extents += nr_extents; atomic_inc(&BTRFS_I(inode)->outstanding_extents); spin_unlock(&BTRFS_I(inode)->accounting_lock); @@ -5586,7 +5661,8 @@ use_block_rsv(struct btrfs_trans_handle *trans, block_rsv = get_block_rsv(trans, root); if (block_rsv->size == 0) { - ret = reserve_metadata_bytes(block_rsv, blocksize); + ret = reserve_metadata_bytes(trans, root, block_rsv, + blocksize); if (ret) return ERR_PTR(ret); return block_rsv; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index af339ee..fd07144 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -179,8 +179,6 @@ struct reloc_control { u64 search_start; u64 extents_found; - int block_rsv_retries; - unsigned int stage:8; unsigned int create_reloc_tree:1; unsigned int merge_reloc_tree:1; @@ -2134,7 +2132,6 @@ int prepare_to_merge(struct reloc_control *rc, int err) LIST_HEAD(reloc_roots); u64 num_bytes = 0; int ret; - int retries = 0; mutex_lock(&root->fs_info->trans_mutex); rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; @@ -2144,7 +2141,7 @@ again: if (!err) { num_bytes = rc->merging_rsv_size; ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, - num_bytes, &retries); + num_bytes); if (ret) err = ret; } @@ -2156,7 +2153,6 @@ again: btrfs_end_transaction(trans, rc->extent_root); btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); - retries = 0; goto again; } } @@ -2406,15 +2402,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, num_bytes = calcu_metadata_size(rc, node, 1) * 2; trans->block_rsv = rc->block_rsv; - ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, - &rc->block_rsv_retries); + ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes); if (ret) { if (ret == -EAGAIN) rc->commit_transaction = 1; return ret; } - rc->block_rsv_retries = 0; return 0; } @@ -3615,8 +3609,7 @@ int prepare_to_relocate(struct reloc_control *rc) * is no reservation in transaction handle. */ ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, - rc->extent_root->nodesize * 256, - &rc->block_rsv_retries); + rc->extent_root->nodesize * 256); if (ret) return ret; @@ -3628,7 +3621,6 @@ int prepare_to_relocate(struct reloc_control *rc) rc->extents_found = 0; rc->nodes_relocated = 0; rc->merging_rsv_size = 0; - rc->block_rsv_retries = 0; rc->create_reloc_tree = 1; set_reloc_control(rc); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index db0da35..0af647c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -180,7 +180,6 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, { struct btrfs_trans_handle *h; struct btrfs_transaction *cur_trans; - int retries = 0; int ret; again: h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); @@ -215,8 +214,7 @@ again: } if (num_items > 0) { - ret = btrfs_trans_reserve_metadata(h, root, num_items, - &retries); + ret = btrfs_trans_reserve_metadata(h, root, num_items); if (ret == -EAGAIN) { btrfs_commit_transaction(h, root); goto again; @@ -855,7 +853,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct extent_buffer *tmp; struct extent_buffer *old; int ret; - int retries = 0; u64 to_reserve = 0; u64 index = 0; u64 objectid; @@ -877,7 +874,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (to_reserve > 0) { ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, - to_reserve, &retries); + to_reserve); if (ret) { pending->error = ret; goto fail;