Message ID | a0b43dd0428248dc1ff98e0986b142ad9eb418e4.1525997057.git.osandov@fb.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 11.05.2018 03:11, Omar Sandoval wrote: > From: Omar Sandoval <osandov@fb.com> > > Currently, we keep space reserved for all inode orphan items until the > inode is evicted (i.e., all references to it are dropped). We hit an > issue where an application would keep a bunch of deleted files open (by > design) and thus keep a large amount of space reserved, causing ENOSPC > errors when other operations tried to reserve space. This long-standing > reservation isn't absolutely necessary for a couple of reasons: > > - We can almost always make the reservation we need or steal from the > global reserve for the orphan item > - If we can't, it's not the end of the world if we drop the orphan item > on the floor and let the next mount clean it up > > So, get rid of persistent reservation and just reserve space in > btrfs_evict_inode(). > > Signed-off-by: Omar Sandoval <osandov@fb.com> > --- > fs/btrfs/btrfs_inode.h | 19 +++--- > fs/btrfs/inode.c | 142 ++++++++++++----------------------------- > 2 files changed, 50 insertions(+), 111 deletions(-) > > diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h > index 234bae55b85d..2f466cf55790 100644 > --- a/fs/btrfs/btrfs_inode.h > +++ b/fs/btrfs/btrfs_inode.h > @@ -20,16 +20,15 @@ > * new data the application may have written before commit. > */ > #define BTRFS_INODE_ORDERED_DATA_CLOSE 0 > -#define BTRFS_INODE_ORPHAN_META_RESERVED 1 > -#define BTRFS_INODE_DUMMY 2 > -#define BTRFS_INODE_IN_DEFRAG 3 > -#define BTRFS_INODE_HAS_ORPHAN_ITEM 4 > -#define BTRFS_INODE_HAS_ASYNC_EXTENT 5 > -#define BTRFS_INODE_NEEDS_FULL_SYNC 6 > -#define BTRFS_INODE_COPY_EVERYTHING 7 > -#define BTRFS_INODE_IN_DELALLOC_LIST 8 > -#define BTRFS_INODE_READDIO_NEED_LOCK 9 > -#define BTRFS_INODE_HAS_PROPS 10 > +#define BTRFS_INODE_DUMMY 1 > +#define BTRFS_INODE_IN_DEFRAG 2 > +#define BTRFS_INODE_HAS_ORPHAN_ITEM 3 > +#define BTRFS_INODE_HAS_ASYNC_EXTENT 4 > +#define BTRFS_INODE_NEEDS_FULL_SYNC 5 > +#define BTRFS_INODE_COPY_EVERYTHING 6 > +#define BTRFS_INODE_IN_DELALLOC_LIST 7 > +#define BTRFS_INODE_READDIO_NEED_LOCK 8 > +#define BTRFS_INODE_HAS_PROPS 9 > > /* in memory btrfs inode */ > struct btrfs_inode { > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 348dc57920f5..b9a046b8c72c 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -3343,88 +3343,25 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, > /* > * This creates an orphan entry for the given inode in case something goes wrong > * in the middle of an unlink. > - * > - * NOTE: caller of this function should reserve 5 units of metadata for > - * this function. > */ > int btrfs_orphan_add(struct btrfs_trans_handle *trans, > struct btrfs_inode *inode) > { > - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); > struct btrfs_root *root = inode->root; > - struct btrfs_block_rsv *block_rsv = NULL; > - int reserve = 0; > - bool insert = false; > int ret; > > - if (!root->orphan_block_rsv) { > - block_rsv = btrfs_alloc_block_rsv(fs_info, > - BTRFS_BLOCK_RSV_TEMP); > - if (!block_rsv) > - return -ENOMEM; > - } > - > - if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, > - &inode->runtime_flags)) > - insert = true; > - > - if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, > - &inode->runtime_flags)) > - reserve = 1; > - > - spin_lock(&root->orphan_lock); > - /* If someone has created ->orphan_block_rsv, be happy to use it. */ > - if (!root->orphan_block_rsv) { > - root->orphan_block_rsv = block_rsv; > - } else if (block_rsv) { > - btrfs_free_block_rsv(fs_info, block_rsv); > - block_rsv = NULL; > - } > + if (test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, > + &inode->runtime_flags)) > + return 0; How come this can return true? Shouldn't btrfs_orphan_add always be called for an inode which doesn't have an orphan item? Having this check seems to indicate there is some non-determinism in the lifetime of orphan items. > > - if (insert) > - atomic_inc(&root->orphan_inodes); > - spin_unlock(&root->orphan_lock); > + atomic_inc(&root->orphan_inodes); > > - /* grab metadata reservation from transaction handle */ > - if (reserve) { > - ret = btrfs_orphan_reserve_metadata(trans, inode); > - ASSERT(!ret); > - if (ret) { > - /* > - * dec doesn't need spin_lock as ->orphan_block_rsv > - * would be released only if ->orphan_inodes is > - * zero. > - */ > - atomic_dec(&root->orphan_inodes); > - clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, > - &inode->runtime_flags); > - if (insert) > - clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, > - &inode->runtime_flags); > - return ret; > - } > - } > - > - /* insert an orphan item to track this unlinked file */ > - if (insert) { > - ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); > - if (ret && ret != -EEXIST) { > - if (reserve) { > - clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, > - &inode->runtime_flags); > - btrfs_orphan_release_metadata(inode); > - } > - /* > - * btrfs_orphan_commit_root may race with us and set > - * ->orphan_block_rsv to zero, in order to avoid that, > - * decrease ->orphan_inodes after everything is done. > - */ > - atomic_dec(&root->orphan_inodes); > - clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, > - &inode->runtime_flags); > - btrfs_abort_transaction(trans, ret); > - return ret; > - } > + ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); > + if (ret && ret != -EEXIST) { > + atomic_dec(&root->orphan_inodes); > + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &inode->runtime_flags); > + btrfs_abort_transaction(trans, ret); > + return ret; > } > > return 0; > @@ -3438,27 +3375,16 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans, > struct btrfs_inode *inode) > { > struct btrfs_root *root = inode->root; > - int delete_item = 0; > int ret = 0; > > - if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, > - &inode->runtime_flags)) > - delete_item = 1; > + if (!test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, > + &inode->runtime_flags)) > + return 0; Similar comment as in btrfs_orphan_del. Shouldn't this always follow a successful btrfs_orphan_add, guaranteeing there is an item for this inode? Are there some "benign" races that could happen in the mean time which could trigger this check ? > > - if (delete_item && trans) > + if (trans) > ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); > > - if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, > - &inode->runtime_flags)) > - btrfs_orphan_release_metadata(inode); > - > - /* > - * btrfs_orphan_commit_root may race with us and set ->orphan_block_rsv > - * to zero, in order to avoid that, decrease ->orphan_inodes after > - * everything is done. > - */ > - if (delete_item) > - atomic_dec(&root->orphan_inodes); > + atomic_dec(&root->orphan_inodes); > > return ret; > } > @@ -5339,10 +5265,10 @@ void btrfs_evict_inode(struct inode *inode) > trans->block_rsv = rsv; > > ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); > + trans->block_rsv = &fs_info->trans_block_rsv; > + btrfs_end_transaction(trans); > + btrfs_btree_balance_dirty(fs_info); > if (ret) { > - trans->block_rsv = &fs_info->trans_block_rsv; > - btrfs_end_transaction(trans); > - btrfs_btree_balance_dirty(fs_info); > if (ret != -ENOSPC && ret != -EAGAIN) { > btrfs_orphan_del(NULL, BTRFS_I(inode)); > btrfs_free_block_rsv(fs_info, rsv); > @@ -5353,22 +5279,36 @@ void btrfs_evict_inode(struct inode *inode) > } > } > > - btrfs_free_block_rsv(fs_info, rsv); > - > /* > - * Errors here aren't a big deal, it just means we leave orphan items > - * in the tree. They will be cleaned up on the next mount. > + * Errors here aren't a big deal, it just means we leave orphan items in > + * the tree. They will be cleaned up on the next mount. If the inode > + * number gets reused, cleanup deletes the orphan item without doing > + * anything, and unlink reuses the existing orphan item. > + * > + * If it turns out that we are dropping too many of these, we might want > + * to add a mechanism for retrying these after a commit. > */ > - trans->block_rsv = root->orphan_block_rsv; > - btrfs_orphan_del(trans, BTRFS_I(inode)); > + trans = evict_refill_and_join(root, rsv, min_size); > + if (IS_ERR(trans)) { > + btrfs_orphan_del(NULL, BTRFS_I(inode)); > + } else { > + trans->block_rsv = rsv; > + btrfs_orphan_del(trans, BTRFS_I(inode)); > + trans->block_rsv = &fs_info->trans_block_rsv; > + btrfs_end_transaction(trans); > + } > + > + btrfs_free_block_rsv(fs_info, rsv); > > - trans->block_rsv = &fs_info->trans_block_rsv; > if (!(root == fs_info->tree_root || > root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) > btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode))); > > - btrfs_end_transaction(trans); > - btrfs_btree_balance_dirty(fs_info); > + /* > + * If we didn't successfully delete, the orphan item will still be in > + * the tree and we'll retry on the next mount. Again, we might also want > + * to retry these periodically in the future. > + */ > no_delete: > btrfs_remove_delayed_node(BTRFS_I(inode)); > clear_inode(inode); > -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 11, 2018 at 09:38:15AM +0300, Nikolay Borisov wrote: > > > On 11.05.2018 03:11, Omar Sandoval wrote: > > From: Omar Sandoval <osandov@fb.com> > > > > Currently, we keep space reserved for all inode orphan items until the > > inode is evicted (i.e., all references to it are dropped). We hit an > > issue where an application would keep a bunch of deleted files open (by > > design) and thus keep a large amount of space reserved, causing ENOSPC > > errors when other operations tried to reserve space. This long-standing > > reservation isn't absolutely necessary for a couple of reasons: > > > > - We can almost always make the reservation we need or steal from the > > global reserve for the orphan item > > - If we can't, it's not the end of the world if we drop the orphan item > > on the floor and let the next mount clean it up > > > > So, get rid of persistent reservation and just reserve space in > > btrfs_evict_inode(). > > > > Signed-off-by: Omar Sandoval <osandov@fb.com> > > --- > > fs/btrfs/btrfs_inode.h | 19 +++--- > > fs/btrfs/inode.c | 142 ++++++++++++----------------------------- > > 2 files changed, 50 insertions(+), 111 deletions(-) > > > > diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h > > index 234bae55b85d..2f466cf55790 100644 > > --- a/fs/btrfs/btrfs_inode.h > > +++ b/fs/btrfs/btrfs_inode.h > > @@ -20,16 +20,15 @@ > > * new data the application may have written before commit. > > */ > > #define BTRFS_INODE_ORDERED_DATA_CLOSE 0 > > -#define BTRFS_INODE_ORPHAN_META_RESERVED 1 > > -#define BTRFS_INODE_DUMMY 2 > > -#define BTRFS_INODE_IN_DEFRAG 3 > > -#define BTRFS_INODE_HAS_ORPHAN_ITEM 4 > > -#define BTRFS_INODE_HAS_ASYNC_EXTENT 5 > > -#define BTRFS_INODE_NEEDS_FULL_SYNC 6 > > -#define BTRFS_INODE_COPY_EVERYTHING 7 > > -#define BTRFS_INODE_IN_DELALLOC_LIST 8 > > -#define BTRFS_INODE_READDIO_NEED_LOCK 9 > > -#define BTRFS_INODE_HAS_PROPS 10 > > +#define BTRFS_INODE_DUMMY 1 > > +#define BTRFS_INODE_IN_DEFRAG 2 > > +#define BTRFS_INODE_HAS_ORPHAN_ITEM 3 > > +#define BTRFS_INODE_HAS_ASYNC_EXTENT 4 > > +#define BTRFS_INODE_NEEDS_FULL_SYNC 5 > > +#define BTRFS_INODE_COPY_EVERYTHING 6 > > +#define BTRFS_INODE_IN_DELALLOC_LIST 7 > > +#define BTRFS_INODE_READDIO_NEED_LOCK 8 > > +#define BTRFS_INODE_HAS_PROPS 9 > > > > /* in memory btrfs inode */ > > struct btrfs_inode { > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > > index 348dc57920f5..b9a046b8c72c 100644 > > --- a/fs/btrfs/inode.c > > +++ b/fs/btrfs/inode.c > > @@ -3343,88 +3343,25 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, > > /* > > * This creates an orphan entry for the given inode in case something goes wrong > > * in the middle of an unlink. > > - * > > - * NOTE: caller of this function should reserve 5 units of metadata for > > - * this function. > > */ > > int btrfs_orphan_add(struct btrfs_trans_handle *trans, > > struct btrfs_inode *inode) > > { > > - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); > > struct btrfs_root *root = inode->root; > > - struct btrfs_block_rsv *block_rsv = NULL; > > - int reserve = 0; > > - bool insert = false; > > int ret; > > > > - if (!root->orphan_block_rsv) { > > - block_rsv = btrfs_alloc_block_rsv(fs_info, > > - BTRFS_BLOCK_RSV_TEMP); > > - if (!block_rsv) > > - return -ENOMEM; > > - } > > - > > - if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, > > - &inode->runtime_flags)) > > - insert = true; > > - > > - if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, > > - &inode->runtime_flags)) > > - reserve = 1; > > - > > - spin_lock(&root->orphan_lock); > > - /* If someone has created ->orphan_block_rsv, be happy to use it. */ > > - if (!root->orphan_block_rsv) { > > - root->orphan_block_rsv = block_rsv; > > - } else if (block_rsv) { > > - btrfs_free_block_rsv(fs_info, block_rsv); > > - block_rsv = NULL; > > - } > > + if (test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, > > + &inode->runtime_flags)) > > + return 0; > > How come this can return true? Shouldn't btrfs_orphan_add always be > called for an inode which doesn't have an orphan item? Having this check > seems to indicate there is some non-determinism in the lifetime of > orphan items. Another great point, this was needed when we also inserted orphan items for truncate, but it's not needed anymore. I'll clean that up, too. > > - if (insert) > > - atomic_inc(&root->orphan_inodes); > > - spin_unlock(&root->orphan_lock); > > + atomic_inc(&root->orphan_inodes); > > > > - /* grab metadata reservation from transaction handle */ > > - if (reserve) { > > - ret = btrfs_orphan_reserve_metadata(trans, inode); > > - ASSERT(!ret); > > - if (ret) { > > - /* > > - * dec doesn't need spin_lock as ->orphan_block_rsv > > - * would be released only if ->orphan_inodes is > > - * zero. > > - */ > > - atomic_dec(&root->orphan_inodes); > > - clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, > > - &inode->runtime_flags); > > - if (insert) > > - clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, > > - &inode->runtime_flags); > > - return ret; > > - } > > - } > > - > > - /* insert an orphan item to track this unlinked file */ > > - if (insert) { > > - ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); > > - if (ret && ret != -EEXIST) { > > - if (reserve) { > > - clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, > > - &inode->runtime_flags); > > - btrfs_orphan_release_metadata(inode); > > - } > > - /* > > - * btrfs_orphan_commit_root may race with us and set > > - * ->orphan_block_rsv to zero, in order to avoid that, > > - * decrease ->orphan_inodes after everything is done. > > - */ > > - atomic_dec(&root->orphan_inodes); > > - clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, > > - &inode->runtime_flags); > > - btrfs_abort_transaction(trans, ret); > > - return ret; > > - } > > + ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); > > + if (ret && ret != -EEXIST) { > > + atomic_dec(&root->orphan_inodes); > > + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &inode->runtime_flags); > > + btrfs_abort_transaction(trans, ret); > > + return ret; > > } > > > > return 0; > > @@ -3438,27 +3375,16 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans, > > struct btrfs_inode *inode) > > { > > struct btrfs_root *root = inode->root; > > - int delete_item = 0; > > int ret = 0; > > > > - if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, > > - &inode->runtime_flags)) > > - delete_item = 1; > > + if (!test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, > > + &inode->runtime_flags)) > > + return 0; > > Similar comment as in btrfs_orphan_del. Shouldn't this always follow a > successful btrfs_orphan_add, guaranteeing there is an item for this > inode? Are there some "benign" races that could happen in the mean time > which could trigger this check ? Same thing here, and it's even easier to convince myself here that it's unnecessary: btrfs_evict_inode() ignores errors if something magical did happen, and we really expect the orphan item to be there for an O_TMPFILE in btrfs_link(). -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 234bae55b85d..2f466cf55790 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -20,16 +20,15 @@ * new data the application may have written before commit. */ #define BTRFS_INODE_ORDERED_DATA_CLOSE 0 -#define BTRFS_INODE_ORPHAN_META_RESERVED 1 -#define BTRFS_INODE_DUMMY 2 -#define BTRFS_INODE_IN_DEFRAG 3 -#define BTRFS_INODE_HAS_ORPHAN_ITEM 4 -#define BTRFS_INODE_HAS_ASYNC_EXTENT 5 -#define BTRFS_INODE_NEEDS_FULL_SYNC 6 -#define BTRFS_INODE_COPY_EVERYTHING 7 -#define BTRFS_INODE_IN_DELALLOC_LIST 8 -#define BTRFS_INODE_READDIO_NEED_LOCK 9 -#define BTRFS_INODE_HAS_PROPS 10 +#define BTRFS_INODE_DUMMY 1 +#define BTRFS_INODE_IN_DEFRAG 2 +#define BTRFS_INODE_HAS_ORPHAN_ITEM 3 +#define BTRFS_INODE_HAS_ASYNC_EXTENT 4 +#define BTRFS_INODE_NEEDS_FULL_SYNC 5 +#define BTRFS_INODE_COPY_EVERYTHING 6 +#define BTRFS_INODE_IN_DELALLOC_LIST 7 +#define BTRFS_INODE_READDIO_NEED_LOCK 8 +#define BTRFS_INODE_HAS_PROPS 9 /* in memory btrfs inode */ struct btrfs_inode { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 348dc57920f5..b9a046b8c72c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3343,88 +3343,25 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, /* * This creates an orphan entry for the given inode in case something goes wrong * in the middle of an unlink. - * - * NOTE: caller of this function should reserve 5 units of metadata for - * this function. */ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); struct btrfs_root *root = inode->root; - struct btrfs_block_rsv *block_rsv = NULL; - int reserve = 0; - bool insert = false; int ret; - if (!root->orphan_block_rsv) { - block_rsv = btrfs_alloc_block_rsv(fs_info, - BTRFS_BLOCK_RSV_TEMP); - if (!block_rsv) - return -ENOMEM; - } - - if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &inode->runtime_flags)) - insert = true; - - if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, - &inode->runtime_flags)) - reserve = 1; - - spin_lock(&root->orphan_lock); - /* If someone has created ->orphan_block_rsv, be happy to use it. */ - if (!root->orphan_block_rsv) { - root->orphan_block_rsv = block_rsv; - } else if (block_rsv) { - btrfs_free_block_rsv(fs_info, block_rsv); - block_rsv = NULL; - } + if (test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &inode->runtime_flags)) + return 0; - if (insert) - atomic_inc(&root->orphan_inodes); - spin_unlock(&root->orphan_lock); + atomic_inc(&root->orphan_inodes); - /* grab metadata reservation from transaction handle */ - if (reserve) { - ret = btrfs_orphan_reserve_metadata(trans, inode); - ASSERT(!ret); - if (ret) { - /* - * dec doesn't need spin_lock as ->orphan_block_rsv - * would be released only if ->orphan_inodes is - * zero. - */ - atomic_dec(&root->orphan_inodes); - clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, - &inode->runtime_flags); - if (insert) - clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &inode->runtime_flags); - return ret; - } - } - - /* insert an orphan item to track this unlinked file */ - if (insert) { - ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); - if (ret && ret != -EEXIST) { - if (reserve) { - clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, - &inode->runtime_flags); - btrfs_orphan_release_metadata(inode); - } - /* - * btrfs_orphan_commit_root may race with us and set - * ->orphan_block_rsv to zero, in order to avoid that, - * decrease ->orphan_inodes after everything is done. - */ - atomic_dec(&root->orphan_inodes); - clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &inode->runtime_flags); - btrfs_abort_transaction(trans, ret); - return ret; - } + ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); + if (ret && ret != -EEXIST) { + atomic_dec(&root->orphan_inodes); + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &inode->runtime_flags); + btrfs_abort_transaction(trans, ret); + return ret; } return 0; @@ -3438,27 +3375,16 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct btrfs_inode *inode) { struct btrfs_root *root = inode->root; - int delete_item = 0; int ret = 0; - if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &inode->runtime_flags)) - delete_item = 1; + if (!test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &inode->runtime_flags)) + return 0; - if (delete_item && trans) + if (trans) ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); - if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, - &inode->runtime_flags)) - btrfs_orphan_release_metadata(inode); - - /* - * btrfs_orphan_commit_root may race with us and set ->orphan_block_rsv - * to zero, in order to avoid that, decrease ->orphan_inodes after - * everything is done. - */ - if (delete_item) - atomic_dec(&root->orphan_inodes); + atomic_dec(&root->orphan_inodes); return ret; } @@ -5339,10 +5265,10 @@ void btrfs_evict_inode(struct inode *inode) trans->block_rsv = rsv; ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); + trans->block_rsv = &fs_info->trans_block_rsv; + btrfs_end_transaction(trans); + btrfs_btree_balance_dirty(fs_info); if (ret) { - trans->block_rsv = &fs_info->trans_block_rsv; - btrfs_end_transaction(trans); - btrfs_btree_balance_dirty(fs_info); if (ret != -ENOSPC && ret != -EAGAIN) { btrfs_orphan_del(NULL, BTRFS_I(inode)); btrfs_free_block_rsv(fs_info, rsv); @@ -5353,22 +5279,36 @@ void btrfs_evict_inode(struct inode *inode) } } - btrfs_free_block_rsv(fs_info, rsv); - /* - * Errors here aren't a big deal, it just means we leave orphan items - * in the tree. They will be cleaned up on the next mount. + * Errors here aren't a big deal, it just means we leave orphan items in + * the tree. They will be cleaned up on the next mount. If the inode + * number gets reused, cleanup deletes the orphan item without doing + * anything, and unlink reuses the existing orphan item. + * + * If it turns out that we are dropping too many of these, we might want + * to add a mechanism for retrying these after a commit. */ - trans->block_rsv = root->orphan_block_rsv; - btrfs_orphan_del(trans, BTRFS_I(inode)); + trans = evict_refill_and_join(root, rsv, min_size); + if (IS_ERR(trans)) { + btrfs_orphan_del(NULL, BTRFS_I(inode)); + } else { + trans->block_rsv = rsv; + btrfs_orphan_del(trans, BTRFS_I(inode)); + trans->block_rsv = &fs_info->trans_block_rsv; + btrfs_end_transaction(trans); + } + + btrfs_free_block_rsv(fs_info, rsv); - trans->block_rsv = &fs_info->trans_block_rsv; if (!(root == fs_info->tree_root || root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode))); - btrfs_end_transaction(trans); - btrfs_btree_balance_dirty(fs_info); + /* + * If we didn't successfully delete, the orphan item will still be in + * the tree and we'll retry on the next mount. Again, we might also want + * to retry these periodically in the future. + */ no_delete: btrfs_remove_delayed_node(BTRFS_I(inode)); clear_inode(inode);