Message ID | 154897672012.26065.1375987197453969157.stgit@magnolia (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xfs: incore unlinked list | expand |
On Thu, Jan 31, 2019 at 03:18:40PM -0800, Darrick J. Wong wrote: > From: Darrick J. Wong <darrick.wong@oracle.com> > > Use a rhashtable to cache the unlinked list incore. This should speed > up unlinked processing considerably when there are a lot of inodes on > the unlinked list because iunlink_remove no longer has to traverse an > entire bucket list to find which inode points to the one being removed. This sounds pretty reasonable and a real review will follow. But can you quantify the considerably speedups for real life workloads?
On Thu, Jan 31, 2019 at 03:18:40PM -0800, Darrick J. Wong wrote: > From: Darrick J. Wong <darrick.wong@oracle.com> > > Use a rhashtable to cache the unlinked list incore. This should speed > up unlinked processing considerably when there are a lot of inodes on > the unlinked list because iunlink_remove no longer has to traverse an > entire bucket list to find which inode points to the one being removed. > > The incore list structure records "X.next_unlinked = Y" relations, with > the rhashtable using Y to index the records. This makes finding the > inode X that points to a inode Y very quick. If our cache fails to find > anything we can always fall back on the old method. > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > --- I still need to take a closer look at this one, but I want to make sure I grok what's happening from the unlinked list perspective... > fs/xfs/xfs_inode.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++ > fs/xfs/xfs_inode.h | 10 ++ > fs/xfs/xfs_log_recover.c | 12 ++- > fs/xfs/xfs_mount.c | 7 + > fs/xfs/xfs_mount.h | 1 > 5 files changed, 245 insertions(+), 1 deletion(-) > > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > index 56349497d75b..eec3c6109fc6 100644 > --- a/fs/xfs/xfs_inode.c > +++ b/fs/xfs/xfs_inode.c ... > @@ -2072,6 +2255,9 @@ xfs_iunlink( > if (error) > goto out_unlock; > ASSERT(old_agino == NULLAGINO); > + error = xfs_iunlink_add_backref(pag, agino, next_agino); > + if (error) > + goto out_unlock; agino has been unlinked, add a backref from the next inode (i.e., we point to) back to agino. > } > > /* Point the head of the list to point to this inode. */ > @@ -2144,6 +2330,17 @@ xfs_iunlink_map_prev( > > ASSERT(head_agino != target_agino); > > + /* See if our backref cache can find it faster. */ > + error = xfs_iunlink_lookup_backref(pag, target_agino, &next_agino); > + if (error == 0) { > + next_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, next_agino); > + error = xfs_iunlink_map_ino(tp, next_ino, &last_imap, > + &last_dip, &last_ibp); > + if (error) > + return error; > + goto out; > + } > + > next_agino = head_agino; > while (next_agino != target_agino) { > xfs_agino_t unlinked_agino; > @@ -2169,6 +2366,7 @@ xfs_iunlink_map_prev( > next_agino = unlinked_agino; > } > > +out: > /* Should never happen, but don't return garbage. */ > if (next_ino == NULLFSINO) > return -EFSCORRUPTED; > @@ -2243,6 +2441,12 @@ xfs_iunlink_remove( > if (error) > goto out_unlock; > > + if (next_agino != NULLAGINO) { > + error = xfs_iunlink_forget_backref(pag, next_agino); > + if (error) > + goto out_unlock; > + } Removed from the head. We thus had no inode pointing to us, but the next record had a ref that we pointed to it. Drop that since we're gone and that inode is now the head. > + > /* Point the head of the list to the next unlinked inode. */ > error = xfs_iunlink_update_bucket(tp, agibp, bucket_index, > next_agino); > @@ -2265,10 +2469,22 @@ xfs_iunlink_remove( > &next_agino); > if (error) > goto out_unlock; > + if (next_agino != NULLAGINO) { > + error = xfs_iunlink_forget_backref(pag, next_agino); > + if (error) > + goto out_unlock; > + } Removed from the middle. next_agino had a backref to us, so we drop that since we're going away. > > /* Point the previous inode on the list to the next inode. */ > xfs_iunlink_update_dinode(tp, last_ibp, last_dip, &imap, > next_ino, next_agino); > + if (next_agino == NULLAGINO) > + error = xfs_iunlink_forget_backref(pag, agino); > + else > + error = xfs_iunlink_change_backref(pag, agino, > + next_agino); Now we deal with the backref for us (i.e., to whatever pointed at us and now points at something else). If it now points at a real inode, change the ref that pointed to us to point to our old next. If it doesn't, delete the ref that pointed to us. Am I following that correctly? Hmm, I realize this may be what's happening behind these wrappers, but I'm wondering if the logic at this level would be easier to grok if we just dropped the backref that points to our ip (IIUC we should always have one here), if we point to something, drop the backref for that and then add a new one for the prev->next link we've just created. But I still need to take a closer look at all of this... it might be more clear with some comments. Brian > + if (error) > + goto out_unlock; > } > pag->pagi_unlinked_count--; > > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h > index be2014520155..9690f0d32e33 100644 > --- a/fs/xfs/xfs_inode.h > +++ b/fs/xfs/xfs_inode.h > @@ -500,4 +500,14 @@ extern struct kmem_zone *xfs_inode_zone; > > bool xfs_inode_verify_forks(struct xfs_inode *ip); > > +int xfs_iunlink_init(struct xfs_perag *pag); > +void xfs_iunlink_destroy(struct xfs_perag *pag); > +int xfs_iunlink_lookup_backref(struct xfs_perag *pag, xfs_agino_t agino, > + xfs_agino_t *prev_agino); > +int xfs_iunlink_add_backref(struct xfs_perag *pag, xfs_agino_t prev_agino, > + xfs_agino_t this_agino); > +int xfs_iunlink_forget_backref(struct xfs_perag *pag, xfs_agino_t agino); > +int xfs_iunlink_change_backref(struct xfs_perag *pag, xfs_agino_t prev_agino, > + xfs_agino_t this_agino); > + > #endif /* __XFS_INODE_H__ */ > diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c > index c920b8aeba01..909b70550aa8 100644 > --- a/fs/xfs/xfs_log_recover.c > +++ b/fs/xfs/xfs_log_recover.c > @@ -5078,12 +5078,22 @@ xlog_recover_process_one_iunlink( > agino = be32_to_cpu(dip->di_next_unlinked); > xfs_buf_relse(ibp); > > - /* Make sure the in-core data knows about this unlinked inode. */ > + /* > + * Make sure the in-core data knows about this unlinked inode. Since > + * our iunlinks recovery basically just deletes the head of a bucket > + * list until the bucket is empty, we need only to add the backref from > + * the current list item to the next one, if this isn't the list tail. > + */ > pag = xfs_perag_get(mp, agno); > mutex_lock(&pag->pagi_unlinked_lock); > pag->pagi_unlinked_count++; > + if (agino != NULLAGINO) > + error = xfs_iunlink_add_backref(pag, XFS_INO_TO_AGINO(mp, ino), > + agino); > mutex_unlock(&pag->pagi_unlinked_lock); > xfs_perag_put(pag); > + if (error) > + goto fail_iput; > > /* > * Prevent any DMAPI event from being sent when the reference on > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c > index 6bfc985669e0..4eb97ddc915e 100644 > --- a/fs/xfs/xfs_mount.c > +++ b/fs/xfs/xfs_mount.c > @@ -151,6 +151,7 @@ xfs_free_perag( > ASSERT(atomic_read(&pag->pag_ref) == 0); > ASSERT(pag->pagi_unlinked_count == 0 || > XFS_FORCED_SHUTDOWN(mp)); > + xfs_iunlink_destroy(pag); > mutex_destroy(&pag->pagi_unlinked_lock); > xfs_buf_hash_destroy(pag); > mutex_destroy(&pag->pag_ici_reclaim_lock); > @@ -231,6 +232,9 @@ xfs_initialize_perag( > if (first_initialised == NULLAGNUMBER) > first_initialised = index; > mutex_init(&pag->pagi_unlinked_lock); > + error = xfs_iunlink_init(pag); > + if (error) > + goto out_iunlink_mutex; > } > > index = xfs_set_inode_alloc(mp, agcount); > @@ -241,6 +245,8 @@ xfs_initialize_perag( > mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); > return 0; > > +out_iunlink_mutex: > + mutex_destroy(&pag->pagi_unlinked_lock); > out_hash_destroy: > xfs_buf_hash_destroy(pag); > out_free_pag: > @@ -253,6 +259,7 @@ xfs_initialize_perag( > if (!pag) > break; > xfs_buf_hash_destroy(pag); > + xfs_iunlink_destroy(pag); > mutex_destroy(&pag->pagi_unlinked_lock); > mutex_destroy(&pag->pag_ici_reclaim_lock); > kmem_free(pag); > diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h > index 0fcc6b6a4f67..27a433e93d32 100644 > --- a/fs/xfs/xfs_mount.h > +++ b/fs/xfs/xfs_mount.h > @@ -392,6 +392,7 @@ typedef struct xfs_perag { > /* unlinked inodes */ > struct mutex pagi_unlinked_lock; > uint32_t pagi_unlinked_count; > + struct rhashtable pagi_unlinked_hash; > } xfs_perag_t; > > static inline struct xfs_ag_resv * >
On Fri, Feb 01, 2019 at 02:29:02PM -0500, Brian Foster wrote: > On Thu, Jan 31, 2019 at 03:18:40PM -0800, Darrick J. Wong wrote: > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > Use a rhashtable to cache the unlinked list incore. This should speed > > up unlinked processing considerably when there are a lot of inodes on > > the unlinked list because iunlink_remove no longer has to traverse an > > entire bucket list to find which inode points to the one being removed. > > > > The incore list structure records "X.next_unlinked = Y" relations, with > > the rhashtable using Y to index the records. This makes finding the > > inode X that points to a inode Y very quick. If our cache fails to find > > anything we can always fall back on the old method. > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > --- > > I still need to take a closer look at this one, but I want to make sure > I grok what's happening from the unlinked list perspective... > > > fs/xfs/xfs_inode.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++ > > fs/xfs/xfs_inode.h | 10 ++ > > fs/xfs/xfs_log_recover.c | 12 ++- > > fs/xfs/xfs_mount.c | 7 + > > fs/xfs/xfs_mount.h | 1 > > 5 files changed, 245 insertions(+), 1 deletion(-) > > > > > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > > index 56349497d75b..eec3c6109fc6 100644 > > --- a/fs/xfs/xfs_inode.c > > +++ b/fs/xfs/xfs_inode.c > ... > > @@ -2072,6 +2255,9 @@ xfs_iunlink( > > if (error) > > goto out_unlock; > > ASSERT(old_agino == NULLAGINO); > > + error = xfs_iunlink_add_backref(pag, agino, next_agino); > > + if (error) > > + goto out_unlock; > > agino has been unlinked, add a backref from the next inode (i.e., we > point to) back to agino. > > > } > > > > /* Point the head of the list to point to this inode. */ > > @@ -2144,6 +2330,17 @@ xfs_iunlink_map_prev( > > > > ASSERT(head_agino != target_agino); > > > > + /* See if our backref cache can find it faster. */ > > + error = xfs_iunlink_lookup_backref(pag, target_agino, &next_agino); > > + if (error == 0) { > > + next_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, next_agino); > > + error = xfs_iunlink_map_ino(tp, next_ino, &last_imap, > > + &last_dip, &last_ibp); > > + if (error) > > + return error; > > + goto out; > > + } > > + > > next_agino = head_agino; > > while (next_agino != target_agino) { > > xfs_agino_t unlinked_agino; > > @@ -2169,6 +2366,7 @@ xfs_iunlink_map_prev( > > next_agino = unlinked_agino; > > } > > > > +out: > > /* Should never happen, but don't return garbage. */ > > if (next_ino == NULLFSINO) > > return -EFSCORRUPTED; > > @@ -2243,6 +2441,12 @@ xfs_iunlink_remove( > > if (error) > > goto out_unlock; > > > > + if (next_agino != NULLAGINO) { > > + error = xfs_iunlink_forget_backref(pag, next_agino); > > + if (error) > > + goto out_unlock; > > + } > > Removed from the head. We thus had no inode pointing to us, but the next > record had a ref that we pointed to it. Drop that since we're gone and > that inode is now the head. > > > + > > /* Point the head of the list to the next unlinked inode. */ > > error = xfs_iunlink_update_bucket(tp, agibp, bucket_index, > > next_agino); > > @@ -2265,10 +2469,22 @@ xfs_iunlink_remove( > > &next_agino); > > if (error) > > goto out_unlock; > > + if (next_agino != NULLAGINO) { > > + error = xfs_iunlink_forget_backref(pag, next_agino); > > + if (error) > > + goto out_unlock; > > + } > > Removed from the middle. next_agino had a backref to us, so we drop that > since we're going away. > > > > > /* Point the previous inode on the list to the next inode. */ > > xfs_iunlink_update_dinode(tp, last_ibp, last_dip, &imap, > > next_ino, next_agino); > > + if (next_agino == NULLAGINO) > > + error = xfs_iunlink_forget_backref(pag, agino); > > + else > > + error = xfs_iunlink_change_backref(pag, agino, > > + next_agino); > > Now we deal with the backref for us (i.e., to whatever pointed at us and > now points at something else). If it now points at a real inode, change > the ref that pointed to us to point to our old next. If it doesn't, > delete the ref that pointed to us. Am I following that correctly? > > Hmm, I realize this may be what's happening behind these wrappers, but > I'm wondering if the logic at this level would be easier to grok if we > just dropped the backref that points to our ip (IIUC we should always > have one here), if we point to something, drop the backref for that and > then add a new one for the prev->next link we've just created. > > But I still need to take a closer look at all of this... it might be > more clear with some comments. Yep, your understanding of what we're doing with the backrefs is correct. I'll add some comments to make what we're doing clearer. FWIW the reason for having a separate change_backref is to avoid freeing the old backref only to allocate memory for a new backref immediately. --D > Brian > > > + if (error) > > + goto out_unlock; > > } > > pag->pagi_unlinked_count--; > > > > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h > > index be2014520155..9690f0d32e33 100644 > > --- a/fs/xfs/xfs_inode.h > > +++ b/fs/xfs/xfs_inode.h > > @@ -500,4 +500,14 @@ extern struct kmem_zone *xfs_inode_zone; > > > > bool xfs_inode_verify_forks(struct xfs_inode *ip); > > > > +int xfs_iunlink_init(struct xfs_perag *pag); > > +void xfs_iunlink_destroy(struct xfs_perag *pag); > > +int xfs_iunlink_lookup_backref(struct xfs_perag *pag, xfs_agino_t agino, > > + xfs_agino_t *prev_agino); > > +int xfs_iunlink_add_backref(struct xfs_perag *pag, xfs_agino_t prev_agino, > > + xfs_agino_t this_agino); > > +int xfs_iunlink_forget_backref(struct xfs_perag *pag, xfs_agino_t agino); > > +int xfs_iunlink_change_backref(struct xfs_perag *pag, xfs_agino_t prev_agino, > > + xfs_agino_t this_agino); > > + > > #endif /* __XFS_INODE_H__ */ > > diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c > > index c920b8aeba01..909b70550aa8 100644 > > --- a/fs/xfs/xfs_log_recover.c > > +++ b/fs/xfs/xfs_log_recover.c > > @@ -5078,12 +5078,22 @@ xlog_recover_process_one_iunlink( > > agino = be32_to_cpu(dip->di_next_unlinked); > > xfs_buf_relse(ibp); > > > > - /* Make sure the in-core data knows about this unlinked inode. */ > > + /* > > + * Make sure the in-core data knows about this unlinked inode. Since > > + * our iunlinks recovery basically just deletes the head of a bucket > > + * list until the bucket is empty, we need only to add the backref from > > + * the current list item to the next one, if this isn't the list tail. > > + */ > > pag = xfs_perag_get(mp, agno); > > mutex_lock(&pag->pagi_unlinked_lock); > > pag->pagi_unlinked_count++; > > + if (agino != NULLAGINO) > > + error = xfs_iunlink_add_backref(pag, XFS_INO_TO_AGINO(mp, ino), > > + agino); > > mutex_unlock(&pag->pagi_unlinked_lock); > > xfs_perag_put(pag); > > + if (error) > > + goto fail_iput; > > > > /* > > * Prevent any DMAPI event from being sent when the reference on > > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c > > index 6bfc985669e0..4eb97ddc915e 100644 > > --- a/fs/xfs/xfs_mount.c > > +++ b/fs/xfs/xfs_mount.c > > @@ -151,6 +151,7 @@ xfs_free_perag( > > ASSERT(atomic_read(&pag->pag_ref) == 0); > > ASSERT(pag->pagi_unlinked_count == 0 || > > XFS_FORCED_SHUTDOWN(mp)); > > + xfs_iunlink_destroy(pag); > > mutex_destroy(&pag->pagi_unlinked_lock); > > xfs_buf_hash_destroy(pag); > > mutex_destroy(&pag->pag_ici_reclaim_lock); > > @@ -231,6 +232,9 @@ xfs_initialize_perag( > > if (first_initialised == NULLAGNUMBER) > > first_initialised = index; > > mutex_init(&pag->pagi_unlinked_lock); > > + error = xfs_iunlink_init(pag); > > + if (error) > > + goto out_iunlink_mutex; > > } > > > > index = xfs_set_inode_alloc(mp, agcount); > > @@ -241,6 +245,8 @@ xfs_initialize_perag( > > mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); > > return 0; > > > > +out_iunlink_mutex: > > + mutex_destroy(&pag->pagi_unlinked_lock); > > out_hash_destroy: > > xfs_buf_hash_destroy(pag); > > out_free_pag: > > @@ -253,6 +259,7 @@ xfs_initialize_perag( > > if (!pag) > > break; > > xfs_buf_hash_destroy(pag); > > + xfs_iunlink_destroy(pag); > > mutex_destroy(&pag->pagi_unlinked_lock); > > mutex_destroy(&pag->pag_ici_reclaim_lock); > > kmem_free(pag); > > diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h > > index 0fcc6b6a4f67..27a433e93d32 100644 > > --- a/fs/xfs/xfs_mount.h > > +++ b/fs/xfs/xfs_mount.h > > @@ -392,6 +392,7 @@ typedef struct xfs_perag { > > /* unlinked inodes */ > > struct mutex pagi_unlinked_lock; > > uint32_t pagi_unlinked_count; > > + struct rhashtable pagi_unlinked_hash; > > } xfs_perag_t; > > > > static inline struct xfs_ag_resv * > >
On Fri, Feb 01, 2019 at 12:03:43AM -0800, Christoph Hellwig wrote: > On Thu, Jan 31, 2019 at 03:18:40PM -0800, Darrick J. Wong wrote: > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > Use a rhashtable to cache the unlinked list incore. This should speed > > up unlinked processing considerably when there are a lot of inodes on > > the unlinked list because iunlink_remove no longer has to traverse an > > entire bucket list to find which inode points to the one being removed. > > This sounds pretty reasonable and a real review will follow. But can > you quantify the considerably speedups for real life workloads? When you have more than a few hundred open-but-unlinked inodes in an AG, the unlinked list walking starts to take a significant amount of time when the final reference to an inode goes away. It's essentially an O(N) buffer walk, so takes a lot of CPU time when a list gets more than a few inode long. With darrick's background inactivation, the lists grow to tens of thousands of inodes very quickly. I was seeing >95% of CPU time being spend in unlinked list walking on large scale 'rm -rf' workloads and performance absolutely dropped off a cliff. My initial code (from years and years ago) used a double linked list, and when I forward ported that and ran it up, the CPU overhead of unlink list removal was cut to almost nothing and all the performance regressions with background inactivation went away. i.e. Unlink list removal went from an O(N) overhead to always being O(1), so the length of the list didnt' affect performance any more. Darrick's code replaces the list_head in each inode I used with a rhashtable - it removes the 16 bytes per-inode memory footprint my implementation required, but otherwise is identical. Hence I expect that it'll show exactly the same performance characteristics as the existing code. IOWs, it's really required for backgrounding and parallelising inode inactivation, but otherwise it won't make any noticable/measurable difference to most workloads because the unlinked lists almost never grows more than one inode in length and so O(N) ~= O(1) almost all the time... Cheers, Dave.
On Sat, Feb 02, 2019 at 10:59:00AM +1100, Dave Chinner wrote: > On Fri, Feb 01, 2019 at 12:03:43AM -0800, Christoph Hellwig wrote: > > On Thu, Jan 31, 2019 at 03:18:40PM -0800, Darrick J. Wong wrote: > > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > > > Use a rhashtable to cache the unlinked list incore. This should speed > > > up unlinked processing considerably when there are a lot of inodes on > > > the unlinked list because iunlink_remove no longer has to traverse an > > > entire bucket list to find which inode points to the one being removed. > > > > This sounds pretty reasonable and a real review will follow. But can > > you quantify the considerably speedups for real life workloads? > > When you have more than a few hundred open-but-unlinked inodes in an > AG, the unlinked list walking starts to take a significant amount of > time when the final reference to an inode goes away. It's > essentially an O(N) buffer walk, so takes a lot of CPU time when a > list gets more than a few inode long. > > With darrick's background inactivation, the lists grow to tens of > thousands of inodes very quickly. I was seeing >95% of CPU time > being spend in unlinked list walking on large scale 'rm -rf' > workloads and performance absolutely dropped off a cliff. My initial > code (from years and years ago) used a > double linked list, and when I forward ported that and ran it up, > the CPU overhead of unlink list removal was cut to almost nothing and > all the performance regressions with background inactivation went > away. i.e. Unlink list removal went from an O(N) overhead to always > being O(1), so the length of the list didnt' affect performance any > more. > > Darrick's code replaces the list_head in each inode I used with a > rhashtable - it removes the 16 bytes per-inode memory footprint my > implementation required, but otherwise is identical. Hence I expect > that it'll show exactly the same performance characteristics as the > existing code. > > IOWs, it's really required for backgrounding and parallelising inode > inactivation, but otherwise it won't make any noticable/measurable > difference to most workloads because the unlinked lists almost never > grows more than one inode in length and so O(N) ~= O(1) almost all > the time... I wrote a silly program to open as many O_TMPFILE files as it can and then close them all. I wrapped that in a script to crank up ulimit -n to fs.file_max (which on this VM was about 194000 files) and came up with this on a 5.0-rc4 kernel with deferred inactivation and iunlink backref caching: + /d/t/tmpfile/tmpfile Opened 193519 files in 6.44s. Closed 193519 files in 1.37s real 0m7.818s user 0m0.083s sys 0m7.373s + cd / + umount /mnt real 0m5.681s user 0m0.008s sys 0m0.000s I then repeated the experiment with a vanilla 5.0-rc2 kernel I had lying around and saw much slower results: + /d/t/tmpfile/tmpfile Opened 193588 files in 6.35s. Closed 193588 files in 751.61s real 12m38.853s user 0m0.084s sys 12m34.470s + cd / + umount /mnt real 0m0.086s user 0m0.000s sys 0m0.060s The VM had 2G of RAM and a 3GB SCSI disk backed by a tmpfs file. --D Wrapper script: #!/bin/bash -x dir="$(dirname "$0")" umount /mnt rmmod xfs mkfs.xfs -f /dev/sda mount /dev/sda /mnt ulimit -n $(cat /proc/sys/fs/file-max) cd /mnt time $dir/tmpfile cd / time umount /mnt and tmpfile.c: /* Open files and leak them. */ #define _GNU_SOURCE #include <time.h> #include <unistd.h> #include <stdlib.h> #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <errno.h> static int min_fd = -1; static int max_fd = -1; static unsigned int nr_opened = 0; static float start_time; void clock_time(float *time) { static clockid_t clkid = CLOCK_MONOTONIC; struct timespec ts; int ret; retry: ret = clock_gettime(clkid, &ts); if (ret) { if (clkid == CLOCK_MONOTONIC) { clkid = CLOCK_REALTIME; goto retry; } perror("clock_gettime"); exit(2); } *time = ts.tv_sec + ((float)ts.tv_nsec / 1000000000); } /* * Exit the program due to an error. * * If we've exhausted all the file descriptors, make sure we close all the * open fds in the order we received them in order to exploit a quirk of ext4 * and xfs where the oldest unlinked inodes are at the /end/ of the unlinked * lists, which will make removing the unlinked files maximally painful. * * If it's some other error, just die and let the kernel sort it out. */ void die(void) { float end_time; int fd; switch (errno) { case EMFILE: case ENFILE: case ENOSPC: clock_time(&end_time); printf("Opened %u files in %.2fs.\n", nr_opened, end_time - start_time); clock_time(&start_time); for (fd = min_fd; fd <= max_fd; fd++) close(fd); clock_time(&end_time); printf("Closed %u files in %.2fs\n", nr_opened, end_time - start_time); exit(0); break; default: perror("open?"); exit(2); break; } } /* Remember how many file we open and all that. */ void remember_fd(int fd) { if (min_fd == -1 || min_fd > fd) min_fd = fd; if (max_fd == -1 || max_fd < fd) max_fd = fd; nr_opened++; } /* Put an opened file on the unlinked list and leak the fd. */ void leak_tmpfile(void) { int fd = -1; int ret; /* Try to create an O_TMPFILE and leak the fd. */ #ifdef O_TMPFILE fd = open(".", O_TMPFILE | O_RDWR, 0644); if (fd >= 0) { remember_fd(fd); return; } if (fd < 0) die(); #endif /* Oh well, create a new file, unlink it, and leak the fd. */ fd = open("./moo", O_CREAT | O_RDWR, 0644); if (fd < 0) die(); ret = unlink("./moo"); if (ret) die(); remember_fd(fd); } /* Try to put as many files on the unlinked list and then kill them. */ int main(int argc, char *argv[]) { clock_time(&start_time); while (1) leak_tmpfile(); return 0; }
On Fri, Feb 01, 2019 at 08:31:59PM -0800, Darrick J. Wong wrote: > > I wrote a silly program to open as many O_TMPFILE files as it can and > then close them all. I wrapped that in a script to crank up ulimit -n > to fs.file_max (which on this VM was about 194000 files) and came up > with this on a 5.0-rc4 kernel with deferred inactivation and iunlink > backref caching: Ah, cool. Maybe throw a one-line summary of the results into the commit log for the next version.
On Thu, Jan 31, 2019 at 03:18:40PM -0800, Darrick J. Wong wrote: > +/* > + * Faster In-Core Unlinked List Lookups > + * ==================================== I'd drop the "Faster " here - the whole point of a separate in-core lookup is generally that it is faster. > +struct xfs_iunlink_key { > + xfs_agino_t iu_next_unlinked; > +}; I don't think we need this structure at all. We can just directly pass a pointer to the xfs_agino_t instead. > + .obj_cmpfn = _xfs_iunlink_obj_cmp, No real need for the _ prefix. Also it would generally be nice if the method implementation name is prefix + method name. > +int > +xfs_iunlink_lookup_backref( > + struct xfs_perag *pag, > + xfs_agino_t agino, > + xfs_agino_t *prev_agino) > +{ > + struct xfs_iunlink_key key = { .iu_next_unlinked = agino }; > + struct xfs_iunlink *iu; > + > + iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &key, > + xfs_iunlink_hash_params); > + if (!iu) > + return -ENOENT; > + *prev_agino = iu->iu_agino; > + return 0; I think we can just retun the xfs_agino_t as the return value, with NULLAGINO as the not found return value. > + > +/* Forget that X.next_unlinked = @agino. */ > +int > +xfs_iunlink_forget_backref( > + struct xfs_perag *pag, > + xfs_agino_t agino) > +{ > + struct xfs_iunlink_key key = { .iu_next_unlinked = agino }; > + struct xfs_iunlink *iu; > + int error; > + > + iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &key, > + xfs_iunlink_hash_params); > + if (!iu) > + return -ENOENT; > + > + error = rhashtable_remove_fast(&pag->pagi_unlinked_hash, > + &iu->iu_rhash_head, xfs_iunlink_hash_params); > + if (error) > + return error; > + > + kmem_free(iu); > + return 0; This mostly duplicates the change_backref help. If we just free the iu for a NULLAGINO next_unlinked there we can merge the two functions easily. > +int xfs_iunlink_init(struct xfs_perag *pag); > +void xfs_iunlink_destroy(struct xfs_perag *pag); > +int xfs_iunlink_lookup_backref(struct xfs_perag *pag, xfs_agino_t agino, > + xfs_agino_t *prev_agino); > +int xfs_iunlink_add_backref(struct xfs_perag *pag, xfs_agino_t prev_agino, > + xfs_agino_t this_agino); > +int xfs_iunlink_forget_backref(struct xfs_perag *pag, xfs_agino_t agino); > +int xfs_iunlink_change_backref(struct xfs_perag *pag, xfs_agino_t prev_agino, > + xfs_agino_t this_agino); Half of these aren't used outside xfs_inode.c. > + xfs_iunlink_destroy(pag); > mutex_destroy(&pag->pagi_unlinked_lock); > xfs_buf_hash_destroy(pag); > mutex_destroy(&pag->pag_ici_reclaim_lock); > @@ -231,6 +232,9 @@ xfs_initialize_perag( > if (first_initialised == NULLAGNUMBER) > first_initialised = index; > mutex_init(&pag->pagi_unlinked_lock); > + error = xfs_iunlink_init(pag); > + if (error) > + goto out_iunlink_mutex; Any good reason pagi_unlinked_lock isn't handled in xfs_iunlink_init / xfs_iunlink_destroy? Untested patch for most of my comments above: diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e797b11ad842..f8da8ce48348 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1907,7 +1907,7 @@ xfs_inactive( } /* - * Faster In-Core Unlinked List Lookups + * In-Core Unlinked List Lookups * ==================================== * * Every inode is supposed to be reachable from some other piece of metadata @@ -1937,20 +1937,16 @@ struct xfs_iunlink { xfs_agino_t iu_next_unlinked; }; -struct xfs_iunlink_key { - xfs_agino_t iu_next_unlinked; -}; - /* Unlinked list predecessor lookup hashtable construction */ static int -xfs_iunlink_obj_cmp( +xfs_iunlink_obj_cmpfn( struct rhashtable_compare_arg *arg, const void *obj) { - const struct xfs_iunlink_key *key = arg->key; + const xfs_agino_t *key = arg->key; const struct xfs_iunlink *iu = obj; - if (iu->iu_next_unlinked != key->iu_next_unlinked) + if (iu->iu_next_unlinked != *key) return 1; return 0; } @@ -1962,28 +1958,25 @@ static const struct rhashtable_params xfs_iunlink_hash_params = { .key_offset = offsetof(struct xfs_iunlink, iu_next_unlinked), .head_offset = offsetof(struct xfs_iunlink, iu_rhash_head), .automatic_shrinking = true, - .obj_cmpfn = xfs_iunlink_obj_cmp, + .obj_cmpfn = xfs_iunlink_obj_cmpfn, }; /* - * Return X (in @prev_agino), where X.next_unlinked == @agino. Returns -ENOENT - * if no such relation is found. + * Return X, where X.next_unlinked == @agino. Returns NULLAGINO if no such + * relation is found. */ -static int +static xfs_agino_t xfs_iunlink_lookup_backref( struct xfs_perag *pag, - xfs_agino_t agino, - xfs_agino_t *prev_agino) + xfs_agino_t agino) { - struct xfs_iunlink_key key = { .iu_next_unlinked = agino }; struct xfs_iunlink *iu; - iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &key, + iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino, xfs_iunlink_hash_params); if (!iu) - return -ENOENT; - *prev_agino = iu->iu_agino; - return 0; + return NULLAGINO; + return iu->iu_agino; } /* Remember that @prev_agino.next_unlinked = @this_agino. */ @@ -2003,31 +1996,6 @@ xfs_iunlink_add_backref( &iu->iu_rhash_head, xfs_iunlink_hash_params); } -/* Forget that X.next_unlinked = @agino. */ -static int -xfs_iunlink_forget_backref( - struct xfs_perag *pag, - xfs_agino_t agino) -{ - struct xfs_iunlink_key key = { .iu_next_unlinked = agino }; - struct xfs_iunlink *iu; - int error; - - iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &key, - xfs_iunlink_hash_params); - if (!iu) - return -ENOENT; - - error = rhashtable_remove_fast(&pag->pagi_unlinked_hash, - &iu->iu_rhash_head, xfs_iunlink_hash_params); - if (error) - return error; - - kmem_free(iu); - return 0; -} - - /* Replace X.next_unlinked = @agino with X.next_unlinked = @next_unlinked. */ static int xfs_iunlink_change_backref( @@ -2035,11 +2003,10 @@ xfs_iunlink_change_backref( xfs_agino_t agino, xfs_agino_t next_unlinked) { - struct xfs_iunlink_key key = { .iu_next_unlinked = agino }; struct xfs_iunlink *iu; int error; - iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &key, + iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino, xfs_iunlink_hash_params); if (!iu) return -ENOENT; @@ -2049,8 +2016,18 @@ xfs_iunlink_change_backref( if (error) return error; - iu->iu_next_unlinked = next_unlinked; + /* + * If there is no new next entry just free our item and return. + */ + if (next_unlinked == NULLAGINO) { + kmem_free(iu); + return 0; + } + /* + * Else update the hash table to the new entry. + */ + iu->iu_next_unlinked = next_unlinked; return rhashtable_insert_fast(&pag->pagi_unlinked_hash, &iu->iu_rhash_head, xfs_iunlink_hash_params); } @@ -2357,8 +2334,8 @@ xfs_iunlink_map_prev( ASSERT(head_agino != target_agino); /* See if our backref cache can find it faster. */ - error = xfs_iunlink_lookup_backref(pag, target_agino, &next_agino); - if (error == 0) { + next_agino = xfs_iunlink_lookup_backref(pag, target_agino); + if (next_agino != NULLAGINO) { next_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, next_agino); error = xfs_iunlink_map_ino(tp, next_ino, &last_imap, &last_dip, &last_ibp); @@ -2468,7 +2445,8 @@ xfs_iunlink_remove( goto out_unlock; if (next_agino != NULLAGINO) { - error = xfs_iunlink_forget_backref(pag, next_agino); + error = xfs_iunlink_change_backref(pag, next_agino, + NULLAGINO); if (error) goto out_unlock; } @@ -2496,7 +2474,8 @@ xfs_iunlink_remove( if (error) goto out_unlock; if (next_agino != NULLAGINO) { - error = xfs_iunlink_forget_backref(pag, next_agino); + error = xfs_iunlink_change_backref(pag, next_agino, + NULLAGINO); if (error) goto out_unlock; } @@ -2504,11 +2483,7 @@ xfs_iunlink_remove( /* Point the previous inode on the list to the next inode. */ xfs_iunlink_update_dinode(tp, last_ibp, last_dip, &imap, next_ino, next_agino); - if (next_agino == NULLAGINO) - error = xfs_iunlink_forget_backref(pag, agino); - else - error = xfs_iunlink_change_backref(pag, agino, - next_agino); + error = xfs_iunlink_change_backref(pag, agino, next_agino); if (error) goto out_unlock; }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 56349497d75b..eec3c6109fc6 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1880,6 +1880,189 @@ xfs_inactive( xfs_qm_dqdetach(ip); } +/* + * Faster In-Core Unlinked List Lookups + * ==================================== + * + * Every inode is supposed to be reachable from some other piece of metadata + * with the exception of the root directory. Inodes with a connection to a + * file descriptor but not linked from anywhere in the on-disk directory tree + * are collectively known as unlinked inodes, though the filesystem itself + * maintains links to these inodes so that on-disk metadata are consistent. + * + * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI + * header contains a number of buckets that point to an inode, and each inode + * record has a pointer to the next inode in the hash chain. This + * singly-linked list causes scaling problems in the iunlink remove function + * because we must walk that list to find the inode that points to the inode + * being removed from the unlinked hash bucket list. + * + * What if we modelled the unlinked list as a collection of records capturing + * "X.next_unlinked = Y" relations? If we indexed those records on Y, we'd + * have a fast way to look up unlinked list predecessors, which avoids the + * slow list walk. That's exactly what we do here (in-core) with a per-AG + * rhashtable. + */ + +/* Capture a "X.next_unlinked = Y" relationship. */ +struct xfs_iunlink { + struct rhash_head iu_rhash_head; + xfs_agino_t iu_agino; + xfs_agino_t iu_next_unlinked; +}; + +struct xfs_iunlink_key { + xfs_agino_t iu_next_unlinked; +}; + +/* Unlinked list predecessor lookup hashtable construction */ +static int +_xfs_iunlink_obj_cmp( + struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct xfs_iunlink_key *key = arg->key; + const struct xfs_iunlink *iu = obj; + + if (iu->iu_next_unlinked != key->iu_next_unlinked) + return 1; + return 0; +} + +static const struct rhashtable_params xfs_iunlink_hash_params = { + .min_size = XFS_AGI_UNLINKED_BUCKETS, + .nelem_hint = 512, + .key_len = sizeof(xfs_agino_t), + .key_offset = offsetof(struct xfs_iunlink, iu_next_unlinked), + .head_offset = offsetof(struct xfs_iunlink, iu_rhash_head), + .automatic_shrinking = true, + .obj_cmpfn = _xfs_iunlink_obj_cmp, +}; + +/* + * Return X (in @prev_agino), where X.next_unlinked == @agino. Returns -ENOENT + * if no such relation is found. + */ +int +xfs_iunlink_lookup_backref( + struct xfs_perag *pag, + xfs_agino_t agino, + xfs_agino_t *prev_agino) +{ + struct xfs_iunlink_key key = { .iu_next_unlinked = agino }; + struct xfs_iunlink *iu; + + iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &key, + xfs_iunlink_hash_params); + if (!iu) + return -ENOENT; + *prev_agino = iu->iu_agino; + return 0; +} + +/* Remember that @prev_agino.next_unlinked = @this_agino. */ +int +xfs_iunlink_add_backref( + struct xfs_perag *pag, + xfs_agino_t prev_agino, + xfs_agino_t this_agino) +{ + struct xfs_iunlink *iu; + + iu = kmem_zalloc(sizeof(*iu), KM_SLEEP | KM_NOFS); + iu->iu_agino = prev_agino; + iu->iu_next_unlinked = this_agino; + + return rhashtable_insert_fast(&pag->pagi_unlinked_hash, + &iu->iu_rhash_head, xfs_iunlink_hash_params); +} + +/* Forget that X.next_unlinked = @agino. */ +int +xfs_iunlink_forget_backref( + struct xfs_perag *pag, + xfs_agino_t agino) +{ + struct xfs_iunlink_key key = { .iu_next_unlinked = agino }; + struct xfs_iunlink *iu; + int error; + + iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &key, + xfs_iunlink_hash_params); + if (!iu) + return -ENOENT; + + error = rhashtable_remove_fast(&pag->pagi_unlinked_hash, + &iu->iu_rhash_head, xfs_iunlink_hash_params); + if (error) + return error; + + kmem_free(iu); + return 0; +} + + +/* Replace X.next_unlinked = @agino with X.next_unlinked = @next_unlinked. */ +int +xfs_iunlink_change_backref( + struct xfs_perag *pag, + xfs_agino_t agino, + xfs_agino_t next_unlinked) +{ + struct xfs_iunlink_key key = { .iu_next_unlinked = agino }; + struct xfs_iunlink *iu; + int error; + + iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &key, + xfs_iunlink_hash_params); + if (!iu) + return -ENOENT; + + error = rhashtable_remove_fast(&pag->pagi_unlinked_hash, + &iu->iu_rhash_head, xfs_iunlink_hash_params); + if (error) + return error; + + iu->iu_next_unlinked = next_unlinked; + + return rhashtable_insert_fast(&pag->pagi_unlinked_hash, + &iu->iu_rhash_head, xfs_iunlink_hash_params); +} + +/* Set up the in-core predecessor structures. */ +int +xfs_iunlink_init( + struct xfs_perag *pag) +{ + return rhashtable_init(&pag->pagi_unlinked_hash, + &xfs_iunlink_hash_params); +} + +/* Free the in-core predecessor structures. */ +static void +xfs_iunlink_free_item( + void *ptr, + void *arg) +{ + struct xfs_iunlink *iu = ptr; + bool *freed_anything = arg; + + *freed_anything = true; + kmem_free(iu); +} + +void +xfs_iunlink_destroy( + struct xfs_perag *pag) +{ + bool freed_anything = false; + + rhashtable_free_and_destroy(&pag->pagi_unlinked_hash, + xfs_iunlink_free_item, &freed_anything); + + ASSERT(freed_anything == false || XFS_FORCED_SHUTDOWN(pag->pag_mount)); +} + /* * Point the AGI unlinked bucket at an inode and log the results. The caller * is responsible for validating the old value. @@ -2072,6 +2255,9 @@ xfs_iunlink( if (error) goto out_unlock; ASSERT(old_agino == NULLAGINO); + error = xfs_iunlink_add_backref(pag, agino, next_agino); + if (error) + goto out_unlock; } /* Point the head of the list to point to this inode. */ @@ -2144,6 +2330,17 @@ xfs_iunlink_map_prev( ASSERT(head_agino != target_agino); + /* See if our backref cache can find it faster. */ + error = xfs_iunlink_lookup_backref(pag, target_agino, &next_agino); + if (error == 0) { + next_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, next_agino); + error = xfs_iunlink_map_ino(tp, next_ino, &last_imap, + &last_dip, &last_ibp); + if (error) + return error; + goto out; + } + next_agino = head_agino; while (next_agino != target_agino) { xfs_agino_t unlinked_agino; @@ -2169,6 +2366,7 @@ xfs_iunlink_map_prev( next_agino = unlinked_agino; } +out: /* Should never happen, but don't return garbage. */ if (next_ino == NULLFSINO) return -EFSCORRUPTED; @@ -2243,6 +2441,12 @@ xfs_iunlink_remove( if (error) goto out_unlock; + if (next_agino != NULLAGINO) { + error = xfs_iunlink_forget_backref(pag, next_agino); + if (error) + goto out_unlock; + } + /* Point the head of the list to the next unlinked inode. */ error = xfs_iunlink_update_bucket(tp, agibp, bucket_index, next_agino); @@ -2265,10 +2469,22 @@ xfs_iunlink_remove( &next_agino); if (error) goto out_unlock; + if (next_agino != NULLAGINO) { + error = xfs_iunlink_forget_backref(pag, next_agino); + if (error) + goto out_unlock; + } /* Point the previous inode on the list to the next inode. */ xfs_iunlink_update_dinode(tp, last_ibp, last_dip, &imap, next_ino, next_agino); + if (next_agino == NULLAGINO) + error = xfs_iunlink_forget_backref(pag, agino); + else + error = xfs_iunlink_change_backref(pag, agino, + next_agino); + if (error) + goto out_unlock; } pag->pagi_unlinked_count--; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index be2014520155..9690f0d32e33 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -500,4 +500,14 @@ extern struct kmem_zone *xfs_inode_zone; bool xfs_inode_verify_forks(struct xfs_inode *ip); +int xfs_iunlink_init(struct xfs_perag *pag); +void xfs_iunlink_destroy(struct xfs_perag *pag); +int xfs_iunlink_lookup_backref(struct xfs_perag *pag, xfs_agino_t agino, + xfs_agino_t *prev_agino); +int xfs_iunlink_add_backref(struct xfs_perag *pag, xfs_agino_t prev_agino, + xfs_agino_t this_agino); +int xfs_iunlink_forget_backref(struct xfs_perag *pag, xfs_agino_t agino); +int xfs_iunlink_change_backref(struct xfs_perag *pag, xfs_agino_t prev_agino, + xfs_agino_t this_agino); + #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index c920b8aeba01..909b70550aa8 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -5078,12 +5078,22 @@ xlog_recover_process_one_iunlink( agino = be32_to_cpu(dip->di_next_unlinked); xfs_buf_relse(ibp); - /* Make sure the in-core data knows about this unlinked inode. */ + /* + * Make sure the in-core data knows about this unlinked inode. Since + * our iunlinks recovery basically just deletes the head of a bucket + * list until the bucket is empty, we need only to add the backref from + * the current list item to the next one, if this isn't the list tail. + */ pag = xfs_perag_get(mp, agno); mutex_lock(&pag->pagi_unlinked_lock); pag->pagi_unlinked_count++; + if (agino != NULLAGINO) + error = xfs_iunlink_add_backref(pag, XFS_INO_TO_AGINO(mp, ino), + agino); mutex_unlock(&pag->pagi_unlinked_lock); xfs_perag_put(pag); + if (error) + goto fail_iput; /* * Prevent any DMAPI event from being sent when the reference on diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 6bfc985669e0..4eb97ddc915e 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -151,6 +151,7 @@ xfs_free_perag( ASSERT(atomic_read(&pag->pag_ref) == 0); ASSERT(pag->pagi_unlinked_count == 0 || XFS_FORCED_SHUTDOWN(mp)); + xfs_iunlink_destroy(pag); mutex_destroy(&pag->pagi_unlinked_lock); xfs_buf_hash_destroy(pag); mutex_destroy(&pag->pag_ici_reclaim_lock); @@ -231,6 +232,9 @@ xfs_initialize_perag( if (first_initialised == NULLAGNUMBER) first_initialised = index; mutex_init(&pag->pagi_unlinked_lock); + error = xfs_iunlink_init(pag); + if (error) + goto out_iunlink_mutex; } index = xfs_set_inode_alloc(mp, agcount); @@ -241,6 +245,8 @@ xfs_initialize_perag( mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); return 0; +out_iunlink_mutex: + mutex_destroy(&pag->pagi_unlinked_lock); out_hash_destroy: xfs_buf_hash_destroy(pag); out_free_pag: @@ -253,6 +259,7 @@ xfs_initialize_perag( if (!pag) break; xfs_buf_hash_destroy(pag); + xfs_iunlink_destroy(pag); mutex_destroy(&pag->pagi_unlinked_lock); mutex_destroy(&pag->pag_ici_reclaim_lock); kmem_free(pag); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 0fcc6b6a4f67..27a433e93d32 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -392,6 +392,7 @@ typedef struct xfs_perag { /* unlinked inodes */ struct mutex pagi_unlinked_lock; uint32_t pagi_unlinked_count; + struct rhashtable pagi_unlinked_hash; } xfs_perag_t; static inline struct xfs_ag_resv *