From patchwork Thu Nov 12 09:34:40 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Yan, Zheng" X-Patchwork-Id: 59546 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id nAC9YtXd019640 for ; Thu, 12 Nov 2009 09:34:56 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752368AbZKLJeq (ORCPT ); Thu, 12 Nov 2009 04:34:46 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1752278AbZKLJeq (ORCPT ); Thu, 12 Nov 2009 04:34:46 -0500 Received: from rcsinet12.oracle.com ([148.87.113.124]:20064 "EHLO rgminet12.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752368AbZKLJeo (ORCPT ); Thu, 12 Nov 2009 04:34:44 -0500 Received: from rgminet13.oracle.com (rcsinet13.oracle.com [148.87.113.125]) by rgminet12.oracle.com (Switch-3.3.1/Switch-3.3.1) with ESMTP id nAC9YlFC003393 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Thu, 12 Nov 2009 09:34:48 GMT Received: from acsmt357.oracle.com (acsmt357.oracle.com [141.146.40.157]) by rgminet13.oracle.com (Switch-3.3.1/Switch-3.3.1) with ESMTP id nABDGKNb006151 for ; Thu, 12 Nov 2009 09:34:48 GMT Received: from abhmt010.oracle.com by acsmt356.oracle.com with ESMTP id 265464471258018483; Thu, 12 Nov 2009 01:34:43 -0800 Received: from [10.182.120.236] (/10.182.120.236) by default (Oracle Beehive Gateway v4.0) with ESMTP ; Thu, 12 Nov 2009 01:34:43 -0800 Message-ID: <4AFBD6B0.3040001@oracle.com> Date: Thu, 12 Nov 2009 17:34:40 +0800 From: "Yan, Zheng" User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.4pre) Gecko/20091014 Fedora/3.0-2.8.b4.fc11 Thunderbird/3.0b4 MIME-Version: 1.0 To: linux-btrfs@vger.kernel.org, chris Mason Subject: [PATCH 05/12] Btrfs: Avoid orphan inodes cleanup during replaying log X-Source-IP: acsmt357.oracle.com [141.146.40.157] X-Auth-Type: Internal IP X-CT-RefId: str=0001.0A090202.4AFBD6B6.024A:SCFMA4539814,ss=1,fgs=0 Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f6ad2d5..ec7884c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -859,8 +859,9 @@ struct btrfs_fs_info { struct mutex ordered_operations_mutex; struct rw_semaphore extent_commit_sem; - struct rw_semaphore subvol_sem; + struct rw_semaphore cleanup_work_sem; + struct rw_semaphore subvol_sem; struct srcu_struct subvol_srcu; struct list_head trans_list; @@ -1034,12 +1035,12 @@ struct btrfs_root { int ref_cows; int track_dirty; int in_radix; + int clean_orphans; u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; int defrag_running; - int defrag_level; char *name; int in_sysfs; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0cf1781..e2ebc47 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -894,6 +894,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->stripesize = stripesize; root->ref_cows = 0; root->track_dirty = 0; + root->in_radix = 0; + root->clean_orphans = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -930,7 +932,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->defrag_trans_start = fs_info->generation; init_completion(&root->kobj_unregister); root->defrag_running = 0; - root->defrag_level = 0; root->root_key.objectid = objectid; root->anon_super.s_root = NULL; root->anon_super.s_dev = 0; @@ -1212,8 +1213,10 @@ again: ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); - if (ret == 0) + if (ret == 0) { root->in_radix = 1; + root->clean_orphans = 1; + } spin_unlock(&fs_info->fs_roots_radix_lock); radix_tree_preload_end(); if (ret) { @@ -1227,10 +1230,6 @@ again: ret = btrfs_find_dead_roots(fs_info->tree_root, root->root_key.objectid); WARN_ON(ret); - - if (!(fs_info->sb->s_flags & MS_RDONLY)) - btrfs_orphan_cleanup(root); - return root; fail: free_fs_root(root); @@ -1689,6 +1688,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); init_rwsem(&fs_info->extent_commit_sem); + init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); @@ -2388,6 +2388,11 @@ int btrfs_commit_super(struct btrfs_root *root) mutex_lock(&root->fs_info->cleaner_mutex); btrfs_clean_old_snapshots(root); mutex_unlock(&root->fs_info->cleaner_mutex); + + /* wait until ongoing cleanup work done */ + down_write(&root->fs_info->cleanup_work_sem); + up_write(&root->fs_info->cleanup_work_sem); + trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a824372..75a6647 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2088,16 +2088,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) struct inode *inode; int ret = 0, nr_unlink = 0, nr_truncate = 0; - path = btrfs_alloc_path(); - if (!path) + if (!xchg(&root->clean_orphans, 0)) return; + + path = btrfs_alloc_path(); + BUG_ON(!path); path->reada = -1; key.objectid = BTRFS_ORPHAN_OBJECTID; btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); key.offset = (u64)-1; - while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { @@ -3293,6 +3294,11 @@ void btrfs_delete_inode(struct inode *inode) } btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (root->fs_info->log_root_recovering) { + BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); + goto no_delete; + } + if (inode->i_nlink > 0) { BUG_ON(btrfs_root_refs(&root->root_item) != 0); goto no_delete; @@ -3700,6 +3706,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) } srcu_read_unlock(&root->fs_info->subvol_srcu, index); + if (root != sub_root) { + down_read(&root->fs_info->cleanup_work_sem); + if (!(inode->i_sb->s_flags & MS_RDONLY)) + btrfs_orphan_cleanup(sub_root); + up_read(&root->fs_info->cleanup_work_sem); + } + return inode; } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index cfcc93c..975fdd3 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3755,6 +3755,7 @@ out: BTRFS_DATA_RELOC_TREE_OBJECTID); if (IS_ERR(fs_root)) err = PTR_ERR(fs_root); + btrfs_orphan_cleanup(fs_root); } return err; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ce39fe6..9c0dc10 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -930,6 +930,17 @@ out_nowrite: return 0; } +static int insert_orphan_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 offset) +{ + int ret; + ret = btrfs_find_orphan_item(root, offset); + if (ret > 0) + ret = btrfs_insert_orphan_item(trans, root, offset); + return ret; +} + + /* * There are a few corners where the link count of the file can't * be properly maintained during replay. So, instead of adding @@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, } BTRFS_I(inode)->index_cnt = (u64)-1; - if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { - ret = replay_dir_deletes(trans, root, NULL, path, - inode->i_ino, 1); + if (inode->i_nlink == 0) { + if (S_ISDIR(inode->i_mode)) { + ret = replay_dir_deletes(trans, root, NULL, path, + inode->i_ino, 1); + BUG_ON(ret); + } + ret = insert_orphan_item(trans, root, inode->i_ino); BUG_ON(ret); } btrfs_free_path(path); @@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, /* inode keys are done during the first stage */ if (key.type == BTRFS_INODE_ITEM_KEY && wc->stage == LOG_WALK_REPLAY_INODES) { - struct inode *inode; struct btrfs_inode_item *inode_item; u32 mode; @@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, eb, i, &key); BUG_ON(ret); - /* for regular files, truncate away - * extents past the new EOF + /* for regular files, make sure corresponding + * orhpan item exist. extents past the new EOF + * will be truncated later by orphan cleanup. */ if (S_ISREG(mode)) { - inode = read_one_inode(root, - key.objectid); - BUG_ON(!inode); - - ret = btrfs_truncate_inode_items(wc->trans, - root, inode, inode->i_size, - BTRFS_EXTENT_DATA_KEY); + ret = insert_orphan_item(wc->trans, root, + key.objectid); BUG_ON(ret); - - /* if the nlink count is zero here, the iput - * will free the inode. We bump it to make - * sure it doesn't get freed until the link - * count fixup is done - */ - if (inode->i_nlink == 0) { - btrfs_inc_nlink(inode); - btrfs_update_inode(wc->trans, - root, inode); - } - iput(inode); } + ret = link_to_fixup_dir(wc->trans, root, path, key.objectid); BUG_ON(ret);