From patchwork Fri Mar 12 19:29:44 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Josef Bacik X-Patchwork-Id: 85349 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o2CJTmoa032167 for ; Fri, 12 Mar 2010 19:29:48 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S934522Ab0CLT3q (ORCPT ); Fri, 12 Mar 2010 14:29:46 -0500 Received: from mx1.redhat.com ([209.132.183.28]:13674 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S934475Ab0CLT3p (ORCPT ); Fri, 12 Mar 2010 14:29:45 -0500 Received: from int-mx02.intmail.prod.int.phx2.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id o2CJTjDC027477 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Fri, 12 Mar 2010 14:29:45 -0500 Received: from localhost.localdomain (vpn-10-159.rdu.redhat.com [10.11.10.159]) by int-mx02.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id o2CJTi4u017727 for ; Fri, 12 Mar 2010 14:29:44 -0500 Date: Fri, 12 Mar 2010 14:29:44 -0500 From: Josef Bacik To: linux-btrfs@vger.kernel.org Subject: [PATCH] Btrfs: fix ENOSPC accounting when max_extent is not maxed out Message-ID: <20100312192943.GB2332@localhost.localdomain> MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.19 (2009-01-05) X-Scanned-By: MIMEDefang 2.67 on 10.5.11.12 Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Fri, 12 Mar 2010 19:29:48 +0000 (UTC) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f12fe00..2f5c01f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1965,6 +1965,14 @@ static inline struct dentry *fdentry(struct file *file) return file->f_path.dentry; } +static inline int calculate_extents(struct btrfs_root *root, u64 bytes) +{ + if (bytes <= root->fs_info->max_extent) + return 1; + return (int)div64_u64(bytes + root->fs_info->max_extent -1, + root->fs_info->max_extent); +} + /* extent-tree.c */ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 73ac69b..0085dcb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2837,7 +2837,7 @@ int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, spin_unlock(&BTRFS_I(inode)->accounting_lock); return 0; } - BTRFS_I(inode)->reserved_extents--; + BTRFS_I(inode)->reserved_extents -= num_items; spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_unreserve_metadata_space(root, num_items); @@ -3059,7 +3059,7 @@ again: if (realloc_bytes >= num_bytes) { pool->total_bytes += realloc_bytes; spin_lock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->reserved_extents++; + BTRFS_I(inode)->reserved_extents += num_items; spin_unlock(&BTRFS_I(inode)->accounting_lock); spin_unlock(&pool->lock); return 0; @@ -3074,7 +3074,7 @@ again: */ if (pool->reserved_bytes + pool->used_bytes <= pool->total_bytes) { spin_lock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->reserved_extents++; + BTRFS_I(inode)->reserved_extents += num_items; spin_unlock(&BTRFS_I(inode)->accounting_lock); spin_unlock(&pool->lock); return 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d146dde..a457a94 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -838,6 +838,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, unsigned long first_index; unsigned long last_index; int will_write; + int reserved = calculate_extents(root, count); will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || (file->f_flags & O_DIRECT)); @@ -855,7 +856,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, /* do the reserve before the mutex lock in case we have to do some * flushing. We wouldn't deadlock, but this is more polite. */ - err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + err = btrfs_reserve_metadata_for_delalloc(root, inode, reserved); if (err) goto out_nolock; @@ -975,7 +976,7 @@ out: mutex_unlock(&inode->i_mutex); if (ret) err = ret; - btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + btrfs_unreserve_metadata_for_delalloc(root, inode, reserved); out_nolock: kfree(pages); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b4056ca..09f18b9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1231,19 +1231,37 @@ static int btrfs_split_extent_hook(struct inode *inode, size = orig->end - orig->start + 1; if (size > root->fs_info->max_extent) { - u64 num_extents; - u64 new_size; + u64 left_extents, right_extents; + u64 orig_extents; + u64 left_size, right_size; - new_size = orig->end - split + 1; - num_extents = div64_u64(size + root->fs_info->max_extent - 1, - root->fs_info->max_extent); + left_size = orig->end - split + 1; + right_size = split - orig->start; + left_extents = calculate_extents(root, left_size); + right_extents = calculate_extents(root, right_size); + orig_extents = calculate_extents(root, size); /* - * if we break a large extent up then leave oustanding_extents - * be, since we've already accounted for the large extent. + * If we are splitting off a max_extent multiple of space, then + * we don't need to account for another extent. However if we + * take off less than a max_extent, we need to add another + * outstanding_extent, because we will likely still have the + * original amount of extents needed for the big chunk left + * over. + * + * Think of it this way. We have 4 outstanding extents for the + * original giant chunk of data. If we split off a max_extent + * sized chunk, then the remaining chunk has 3 outstanding + * extens, and the new little chunk has 1 outstanding extents + * worth, so we're even. + * + * But if we carve off < max_extent, thats its own extent still. + * So the left over chunk still needs 4 extents to describe it + * on disk, and the chunk we just split off needs 1 extent, so + * thats 5 total extents, so we need to add 1 to + * outstanding_extents. */ - if (div64_u64(new_size + root->fs_info->max_extent - 1, - root->fs_info->max_extent) < num_extents) + if (left_extents + right_extents == orig_extents) return 0; } @@ -1265,18 +1283,16 @@ static int btrfs_merge_extent_hook(struct inode *inode, struct extent_state *other) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 new_size, old_size; - u64 num_extents; + u64 new_size, size1, size2; + u64 extents1, extents2; /* not delalloc, ignore it */ if (!(other->state & EXTENT_DELALLOC)) return 0; - old_size = other->end - other->start + 1; - if (new->start < other->start) - new_size = other->end - new->start + 1; - else - new_size = new->end - other->start + 1; + size1 = other->end - other->start + 1; + size2 = new->end - new->start + 1; + new_size = size1 + size2; /* we're not bigger than the max, unreserve the space and go */ if (new_size <= root->fs_info->max_extent) { @@ -1286,14 +1302,20 @@ static int btrfs_merge_extent_hook(struct inode *inode, return 0; } + extents1 = calculate_extents(root, size1); + extents2 = calculate_extents(root, size2); + /* - * If we grew by another max_extent, just return, we want to keep that - * reserved amount. + * So if the number of extents required for the two chunks of space we + * are merging equals the number of extents we need for the entire space + * we now have, just return. + * + * The idea here is if max extent is say 1M, and the first chunk was + * exactly 1M and the second chunk was 4k, we now need 2 extents to + * cover that area, so we keep the reservation we got from adding the 4k + * extent. */ - num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, - root->fs_info->max_extent); - if (div64_u64(new_size + root->fs_info->max_extent - 1, - root->fs_info->max_extent) > num_extents) + if (calculate_extents(root, new_size) == extents1 + extents2) return 0; spin_lock(&BTRFS_I(inode)->accounting_lock); @@ -1319,11 +1341,13 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, */ if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; + int extents = calculate_extents(root, end - start + 1); spin_lock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->outstanding_extents++; + BTRFS_I(inode)->outstanding_extents += extents; spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_delalloc_reserve_space(root, inode, end - start + 1); + spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += end - start + 1; root->fs_info->delalloc_bytes += end - start + 1; @@ -1351,11 +1375,18 @@ static int btrfs_clear_bit_hook(struct inode *inode, struct btrfs_root *root = BTRFS_I(inode)->root; if (bits & EXTENT_DO_ACCOUNTING) { + int extents = calculate_extents(root, state->end - + state->start + 1); + int bug = 0; + spin_lock(&BTRFS_I(inode)->accounting_lock); - BUG_ON(!BTRFS_I(inode)->outstanding_extents); - BTRFS_I(inode)->outstanding_extents--; + if (BTRFS_I(inode)->outstanding_extents >= extents) + BTRFS_I(inode)->outstanding_extents -= extents; + else + bug = 1; spin_unlock(&BTRFS_I(inode)->accounting_lock); - btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + BUG_ON(bug); + btrfs_unreserve_metadata_for_delalloc(root, inode, extents); } spin_lock(&root->fs_info->delalloc_lock); @@ -3103,7 +3134,8 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if (ret) goto out; - ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + ret = btrfs_reserve_metadata_for_delalloc(root, inode, + calculate_extents(root, PAGE_CACHE_SIZE)); if (ret) goto out; @@ -3112,7 +3144,8 @@ again: page = grab_cache_page(mapping, index); if (!page) { btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); - btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + btrfs_unreserve_metadata_for_delalloc(root, inode, + calculate_extents(root, PAGE_CACHE_SIZE)); goto out; } @@ -3176,7 +3209,8 @@ again: out_unlock: if (ret) btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); - btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + btrfs_unreserve_metadata_for_delalloc(root, inode, + calculate_extents(root, PAGE_CACHE_SIZE)); unlock_page(page); page_cache_release(page); out: @@ -5092,7 +5126,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out; } - ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + ret = btrfs_reserve_metadata_for_delalloc(root, inode, + calculate_extents(root, PAGE_CACHE_SIZE)); if (ret) { btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); ret = VM_FAULT_SIGBUS; @@ -5176,7 +5211,8 @@ again: unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); out_unlock: - btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + btrfs_unreserve_metadata_for_delalloc(root, inode, + calculate_extents(root, PAGE_CACHE_SIZE)); if (!ret) return VM_FAULT_LOCKED; unlock_page(page); @@ -5378,7 +5414,6 @@ free: void btrfs_drop_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; - if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) generic_delete_inode(inode); else diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 2aace81..fd366da 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -303,7 +303,9 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry) { struct btrfs_ordered_inode_tree *tree; + struct btrfs_root *root = BTRFS_I(inode)->root; struct rb_node *node; + int bug = 0; tree = &BTRFS_I(inode)->ordered_tree; node = &entry->rb_node; @@ -312,13 +314,16 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); spin_lock(&BTRFS_I(inode)->accounting_lock); - BUG_ON(!BTRFS_I(inode)->outstanding_extents); - BTRFS_I(inode)->outstanding_extents--; + if (BTRFS_I(inode)->outstanding_extents) + BTRFS_I(inode)->outstanding_extents--; + else + bug = 1; spin_unlock(&BTRFS_I(inode)->accounting_lock); + BUG_ON(bug); btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, inode, 1); - spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + spin_lock(&root->fs_info->ordered_extent_lock); list_del_init(&entry->root_extent_list); /* @@ -330,7 +335,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { list_del_init(&BTRFS_I(inode)->ordered_operations); } - spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + spin_unlock(&root->fs_info->ordered_extent_lock); return 0; }