Message ID | 20200707135741.487-2-hsiangkao@redhat.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | xfs: more unlinked inode list optimization v1 | expand |
On Tue, Jul 07, 2020 at 09:57:40PM +0800, Gao Xiang wrote: > There is no need to keep old multiple short unlink inode buckets > since we have an in-memory double linked list for all unlinked > inodes. > > Apart from the perspective of the necessity, the main advantage > is that the log and AGI update can be reduced since each AG has > the only one head now, which is implemented in the following patch. > > Therefore, this patch applies the new way in xfs_iunlink() and > keep the old approach in xfs_iunlink_remove_inode() path as well > so inode eviction can still work properly in recovery. > > Signed-off-by: Gao Xiang <hsiangkao@redhat.com> > --- > fs/xfs/xfs_inode.c | 40 ++++++++++++++++++++-------------------- > 1 file changed, 20 insertions(+), 20 deletions(-) > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > index ab288424764c..10565fa5ace4 100644 > --- a/fs/xfs/xfs_inode.c > +++ b/fs/xfs/xfs_inode.c > @@ -33,6 +33,7 @@ > #include "xfs_symlink.h" > #include "xfs_trans_priv.h" > #include "xfs_log.h" > +#include "xfs_log_priv.h" > #include "xfs_bmap_btree.h" > #include "xfs_reflink.h" > #include "xfs_iunlink_item.h" > @@ -1955,25 +1956,32 @@ xfs_iunlink_update_bucket( > struct xfs_trans *tp, > xfs_agnumber_t agno, > struct xfs_buf *agibp, > - unsigned int bucket_index, > + xfs_agino_t old_agino, > xfs_agino_t new_agino) > { > + struct xlog *log = tp->t_mountp->m_log; > struct xfs_agi *agi = agibp->b_addr; > xfs_agino_t old_value; > - int offset; > + unsigned int bucket_index; > + int offset; > > ASSERT(xfs_verify_agino_or_null(tp->t_mountp, agno, new_agino)); > > + bucket_index = 0; > + /* During recovery, the old multiple bucket index can be applied */ > + if (!log || log->l_flags & XLOG_RECOVERY_NEEDED) { > + ASSERT(old_agino != NULLAGINO); > + > + if (be32_to_cpu(agi->agi_unlinked[0]) != old_agino) > + bucket_index = old_agino % XFS_AGI_UNLINKED_BUCKETS; > + } Ok, so you are doing this because you changed the function to pass in an agino rather than a bucket index from the caller context. So now you have to look up a structure to determine what the caller context was to determine what the bucket index we need to use is. Seems like we probably should have kept passing in the bucket index from the caller because that's where the knowledge of what bucket we need to update comes from? And in that case, the higher level code should be checking for the log recovery case when selecting the bucket, not hiding it deep in the guts of the code here.... > + > old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); > trace_xfs_iunlink_update_bucket(tp->t_mountp, agno, bucket_index, > old_value, new_agino); > > - /* > - * We should never find the head of the list already set to the value > - * passed in because either we're adding or removing ourselves from the > - * head of the list. > - */ > - if (old_value == new_agino) { > + /* check if the old agi_unlinked head is as expected */ > + if (old_value != old_agino) { > xfs_buf_mark_corrupt(agibp); > return -EFSCORRUPTED; > } This looks like a change of behaviour - it no longer checks against the inode we are about to add/remove from the list, but instead checks that old inode is what we found on the list. We're not concerned that what we found on the list matches what the caller found on the list and passed us - we're concerned about doing a double add/remove of the current inode... > @@ -2001,14 +2009,13 @@ xfs_iunlink_insert_inode( > xfs_agino_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); > xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); > xfs_agino_t next_agino; > - short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; > > /* > * Get the index into the agi hash table for the list this inode will > * go on. Make sure the pointer isn't garbage and that this inode > * isn't already on the list. > */ > - next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); > + next_agino = be32_to_cpu(agi->agi_unlinked[0]); > if (next_agino == agino || > !xfs_verify_agino_or_null(mp, agno, next_agino)) { > xfs_buf_mark_corrupt(agibp); > @@ -2036,7 +2043,7 @@ xfs_iunlink_insert_inode( > } > > /* Point the head of the list to point to this inode. */ > - return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, agino); > + return xfs_iunlink_update_bucket(tp, agno, agibp, next_agino, agino); > } > > /* > @@ -2051,27 +2058,20 @@ xfs_iunlink_remove_inode( > struct xfs_inode *ip) > { > struct xfs_mount *mp = tp->t_mountp; > - struct xfs_agi *agi = agibp->b_addr; > xfs_agino_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); > xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); > xfs_agino_t next_agino = ip->i_next_unlinked; > - short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; > int error; > > if (ip->i_prev_unlinked == NULLAGINO) { > /* remove from head of list */ > - if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino) { > - xfs_buf_mark_corrupt(agibp); > - return -EFSCORRUPTED; > - } > if (next_agino == agino || > !xfs_verify_agino_or_null(mp, agno, next_agino)) > return -EFSCORRUPTED; > > - error = xfs_iunlink_update_bucket(tp, agno, agibp, > - bucket_index, next_agino); > + error = xfs_iunlink_update_bucket(tp, agno, agibp, agino, next_agino); > if (error) > - return -EFSCORRUPTED; > + return error; i.e. this is the point where we know we need to remove from the head of the unlinked list and all the bucket selection and verification should probably remain here...
Hi Dave, On Thu, Jul 09, 2020 at 08:33:26AM +1000, Dave Chinner wrote: > > > > + bucket_index = 0; > > + /* During recovery, the old multiple bucket index can be applied */ > > + if (!log || log->l_flags & XLOG_RECOVERY_NEEDED) { > > + ASSERT(old_agino != NULLAGINO); > > + > > + if (be32_to_cpu(agi->agi_unlinked[0]) != old_agino) > > + bucket_index = old_agino % XFS_AGI_UNLINKED_BUCKETS; > > + } > > Ok, so you are doing this because you changed the function to pass > in an agino rather than a bucket index from the caller context. So > now you have to look up a structure to determine what the caller > context was to determine what the bucket index we need to use is. > > Seems like we probably should have kept passing in the bucket index > from the caller because that's where the knowledge of what bucket we > need to update comes from? My thought is since bucket_index is now fixed as 0 except for the determinated recovery path, so I think there is no need for any exist callers or future callers to specify some bucket number. The old formula is only used for xfs_iunlink_remove_inode() when recovering, so old_agino won't be NULLAGINO but indicate the old bucket_index as well (since it removes the head unlinked inode.) That is the only determinated path. > > And in that case, the higher level code should be checking for the > log recovery case when selecting the bucket, not hiding it deep in > the guts of the code here.... I could do that instead, it means callers should obey more rules to call this function. e.g bucket_index should obey "old_agino % XFS_AGI_UNLINKED_BUCKETS" rule for the old way rather than passing any possible value. but if we specify all callers do that, I think "if (old_value != old_agino)" isn't needed here as well but instead add an ASSERT here since all callers should know what they are doing now, and leave only some integral check here. > > > + > > old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); > > trace_xfs_iunlink_update_bucket(tp->t_mountp, agno, bucket_index, > > old_value, new_agino); > > > > - /* > > - * We should never find the head of the list already set to the value > > - * passed in because either we're adding or removing ourselves from the > > - * head of the list. > > - */ > > - if (old_value == new_agino) { > > + /* check if the old agi_unlinked head is as expected */ > > + if (old_value != old_agino) { > > xfs_buf_mark_corrupt(agibp); > > return -EFSCORRUPTED; > > } > > This looks like a change of behaviour - it no longer checks against > the inode we are about to add/remove from the list, but instead > checks that old inode is what we found on the list. We're not > concerned that what we found on the list matches what the caller > found on the list and passed us - we're concerned about doing a > double add/remove of the current inode... Just as I said above, this checks the bucket head validity instead (since we get a bucket_index and the bucket head should be as what we expect). It doesn't mean to check that old inode is what we found on the list. So we could kill the original check in xfs_iunlink_remove_inode() as well. Anyway, I prefer this way since all callers won't know too much about the bucket index. Since bucket index is much related to agino, it cannot be specified without some rule, e.g. agino is 0 (just for a example, should not possible), but a caller passes a bucket index 12. It doesn't work so if we have a bucket_index argument, we have also to consider bucket_index check in xfs_iunlink_update_bucket() just as the old "if (old_value == new_agino) {" integral check as well. From this perspective, xfs_iunlink_update_bucket() should also know the rule as the proposed code does... I could keep the old argument instead, that isn't too much for me. I listed all my thoughts above. Thanks, Gao Xiang
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ab288424764c..10565fa5ace4 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -33,6 +33,7 @@ #include "xfs_symlink.h" #include "xfs_trans_priv.h" #include "xfs_log.h" +#include "xfs_log_priv.h" #include "xfs_bmap_btree.h" #include "xfs_reflink.h" #include "xfs_iunlink_item.h" @@ -1955,25 +1956,32 @@ xfs_iunlink_update_bucket( struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf *agibp, - unsigned int bucket_index, + xfs_agino_t old_agino, xfs_agino_t new_agino) { + struct xlog *log = tp->t_mountp->m_log; struct xfs_agi *agi = agibp->b_addr; xfs_agino_t old_value; - int offset; + unsigned int bucket_index; + int offset; ASSERT(xfs_verify_agino_or_null(tp->t_mountp, agno, new_agino)); + bucket_index = 0; + /* During recovery, the old multiple bucket index can be applied */ + if (!log || log->l_flags & XLOG_RECOVERY_NEEDED) { + ASSERT(old_agino != NULLAGINO); + + if (be32_to_cpu(agi->agi_unlinked[0]) != old_agino) + bucket_index = old_agino % XFS_AGI_UNLINKED_BUCKETS; + } + old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); trace_xfs_iunlink_update_bucket(tp->t_mountp, agno, bucket_index, old_value, new_agino); - /* - * We should never find the head of the list already set to the value - * passed in because either we're adding or removing ourselves from the - * head of the list. - */ - if (old_value == new_agino) { + /* check if the old agi_unlinked head is as expected */ + if (old_value != old_agino) { xfs_buf_mark_corrupt(agibp); return -EFSCORRUPTED; } @@ -2001,14 +2009,13 @@ xfs_iunlink_insert_inode( xfs_agino_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); xfs_agino_t next_agino; - short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; /* * Get the index into the agi hash table for the list this inode will * go on. Make sure the pointer isn't garbage and that this inode * isn't already on the list. */ - next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); + next_agino = be32_to_cpu(agi->agi_unlinked[0]); if (next_agino == agino || !xfs_verify_agino_or_null(mp, agno, next_agino)) { xfs_buf_mark_corrupt(agibp); @@ -2036,7 +2043,7 @@ xfs_iunlink_insert_inode( } /* Point the head of the list to point to this inode. */ - return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, agino); + return xfs_iunlink_update_bucket(tp, agno, agibp, next_agino, agino); } /* @@ -2051,27 +2058,20 @@ xfs_iunlink_remove_inode( struct xfs_inode *ip) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi = agibp->b_addr; xfs_agino_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); xfs_agino_t next_agino = ip->i_next_unlinked; - short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; int error; if (ip->i_prev_unlinked == NULLAGINO) { /* remove from head of list */ - if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino) { - xfs_buf_mark_corrupt(agibp); - return -EFSCORRUPTED; - } if (next_agino == agino || !xfs_verify_agino_or_null(mp, agno, next_agino)) return -EFSCORRUPTED; - error = xfs_iunlink_update_bucket(tp, agno, agibp, - bucket_index, next_agino); + error = xfs_iunlink_update_bucket(tp, agno, agibp, agino, next_agino); if (error) - return -EFSCORRUPTED; + return error; } else { /* lookup previous inode and update to point at next */ struct xfs_inode *pip;
There is no need to keep old multiple short unlink inode buckets since we have an in-memory double linked list for all unlinked inodes. Apart from the perspective of the necessity, the main advantage is that the log and AGI update can be reduced since each AG has the only one head now, which is implemented in the following patch. Therefore, this patch applies the new way in xfs_iunlink() and keep the old approach in xfs_iunlink_remove_inode() path as well so inode eviction can still work properly in recovery. Signed-off-by: Gao Xiang <hsiangkao@redhat.com> --- fs/xfs/xfs_inode.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-)