Message ID | 20210601224029.GF664593@dread.disaster.area (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
Series | None | expand |
On Wed, Jun 02, 2021 at 08:40:29AM +1000, Dave Chinner wrote: > From: Dave Chinner <dchinner@redhat.com> > > xfs_dialloc_select_ag() does a lot of repetitive work. It first > calls xfs_ialloc_ag_select() to select the AG to start allocation > attempts in, which can do up to two entire loops across the perags > that inodes can be allocated in. This is simply checking if there is > spce available to allocate inodes in an AG, and it returns when it > finds the first candidate AG. > > xfs_dialloc_select_ag() then does it's own iterative walk across > all the perags locking the AGIs and trying to allocate inodes from > the locked AG. It also doesn't limit the search to mp->m_maxagi, > so it will walk all AGs whether they can allocate inodes or not. > > Hence if we are really low on inodes, we could do almost 3 entire > walks across the whole perag range before we find an allocation > group we can allocate inodes in or report ENOSPC. > > Because xfs_ialloc_ag_select() returns on the first candidate AG it > finds, we can simply do these checks directly in > xfs_dialloc_select_ag() before we lock and try to allocate inodes. > This reduces the inode allocation pass down to 2 perag sweeps at > most - one for aligned inode cluster allocation and if we can't > allocate full, aligned inode clusters anywhere we'll do another pass > trying to do sparse inode cluster allocation. > > This also removes a big chunk of duplicate code. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > --- > V2: fix space calculation in xfs_dialloc_select_ag() Looks good to me now, Reviewed-by: Darrick J. Wong <djwong@kernel.org> --D > > fs/xfs/libxfs/xfs_ialloc.c | 225 ++++++++++++++++----------------------------- > 1 file changed, 78 insertions(+), 147 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c > index 872591e8f5cb..79119af36d12 100644 > --- a/fs/xfs/libxfs/xfs_ialloc.c > +++ b/fs/xfs/libxfs/xfs_ialloc.c > @@ -899,139 +899,6 @@ xfs_ialloc_ag_alloc( > return 0; > } > > -STATIC xfs_agnumber_t > -xfs_ialloc_next_ag( > - xfs_mount_t *mp) > -{ > - xfs_agnumber_t agno; > - > - spin_lock(&mp->m_agirotor_lock); > - agno = mp->m_agirotor; > - if (++mp->m_agirotor >= mp->m_maxagi) > - mp->m_agirotor = 0; > - spin_unlock(&mp->m_agirotor_lock); > - > - return agno; > -} > - > -/* > - * Select an allocation group to look for a free inode in, based on the parent > - * inode and the mode. Return the allocation group buffer. > - */ > -STATIC xfs_agnumber_t > -xfs_ialloc_ag_select( > - xfs_trans_t *tp, /* transaction pointer */ > - xfs_ino_t parent, /* parent directory inode number */ > - umode_t mode) /* bits set to indicate file type */ > -{ > - xfs_agnumber_t agcount; /* number of ag's in the filesystem */ > - xfs_agnumber_t agno; /* current ag number */ > - int flags; /* alloc buffer locking flags */ > - xfs_extlen_t ineed; /* blocks needed for inode allocation */ > - xfs_extlen_t longest = 0; /* longest extent available */ > - xfs_mount_t *mp; /* mount point structure */ > - int needspace; /* file mode implies space allocated */ > - xfs_perag_t *pag; /* per allocation group data */ > - xfs_agnumber_t pagno; /* parent (starting) ag number */ > - int error; > - > - /* > - * Files of these types need at least one block if length > 0 > - * (and they won't fit in the inode, but that's hard to figure out). > - */ > - needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); > - mp = tp->t_mountp; > - agcount = mp->m_maxagi; > - if (S_ISDIR(mode)) > - pagno = xfs_ialloc_next_ag(mp); > - else { > - pagno = XFS_INO_TO_AGNO(mp, parent); > - if (pagno >= agcount) > - pagno = 0; > - } > - > - ASSERT(pagno < agcount); > - > - /* > - * Loop through allocation groups, looking for one with a little > - * free space in it. Note we don't look for free inodes, exactly. > - * Instead, we include whether there is a need to allocate inodes > - * to mean that blocks must be allocated for them, > - * if none are currently free. > - */ > - agno = pagno; > - flags = XFS_ALLOC_FLAG_TRYLOCK; > - for (;;) { > - pag = xfs_perag_get(mp, agno); > - if (!pag->pagi_inodeok) { > - xfs_ialloc_next_ag(mp); > - goto nextag; > - } > - > - if (!pag->pagi_init) { > - error = xfs_ialloc_pagi_init(mp, tp, agno); > - if (error) > - goto nextag; > - } > - > - if (pag->pagi_freecount) { > - xfs_perag_put(pag); > - return agno; > - } > - > - if (!pag->pagf_init) { > - error = xfs_alloc_pagf_init(mp, tp, agno, flags); > - if (error) > - goto nextag; > - } > - > - /* > - * Check that there is enough free space for the file plus a > - * chunk of inodes if we need to allocate some. If this is the > - * first pass across the AGs, take into account the potential > - * space needed for alignment of inode chunks when checking the > - * longest contiguous free space in the AG - this prevents us > - * from getting ENOSPC because we have free space larger than > - * ialloc_blks but alignment constraints prevent us from using > - * it. > - * > - * If we can't find an AG with space for full alignment slack to > - * be taken into account, we must be near ENOSPC in all AGs. > - * Hence we don't include alignment for the second pass and so > - * if we fail allocation due to alignment issues then it is most > - * likely a real ENOSPC condition. > - */ > - ineed = M_IGEO(mp)->ialloc_min_blks; > - if (flags && ineed > 1) > - ineed += M_IGEO(mp)->cluster_align; > - longest = pag->pagf_longest; > - if (!longest) > - longest = pag->pagf_flcount > 0; > - > - if (pag->pagf_freeblks >= needspace + ineed && > - longest >= ineed) { > - xfs_perag_put(pag); > - return agno; > - } > -nextag: > - xfs_perag_put(pag); > - /* > - * No point in iterating over the rest, if we're shutting > - * down. > - */ > - if (XFS_FORCED_SHUTDOWN(mp)) > - return NULLAGNUMBER; > - agno++; > - if (agno >= agcount) > - agno = 0; > - if (agno == pagno) { > - if (flags == 0) > - return NULLAGNUMBER; > - flags = 0; > - } > - } > -} > - > /* > * Try to retrieve the next record to the left/right from the current one. > */ > @@ -1708,6 +1575,21 @@ xfs_dialloc_roll( > return 0; > } > > +STATIC xfs_agnumber_t > +xfs_ialloc_next_ag( > + xfs_mount_t *mp) > +{ > + xfs_agnumber_t agno; > + > + spin_lock(&mp->m_agirotor_lock); > + agno = mp->m_agirotor; > + if (++mp->m_agirotor >= mp->m_maxagi) > + mp->m_agirotor = 0; > + spin_unlock(&mp->m_agirotor_lock); > + > + return agno; > +} > + > /* > * Select and prepare an AG for inode allocation. > * > @@ -1734,16 +1616,24 @@ xfs_dialloc_select_ag( > struct xfs_perag *pag; > struct xfs_ino_geometry *igeo = M_IGEO(mp); > bool okalloc = true; > + int needspace; > + int flags; > > *IO_agbp = NULL; > > /* > - * We do not have an agbp, so select an initial allocation > - * group for inode allocation. > + * Directories, symlinks, and regular files frequently allocate at least > + * one block, so factor that potential expansion when we examine whether > + * an AG has enough space for file creation. > */ > - start_agno = xfs_ialloc_ag_select(*tpp, parent, mode); > - if (start_agno == NULLAGNUMBER) > - return -ENOSPC; > + needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); > + if (S_ISDIR(mode)) > + start_agno = xfs_ialloc_next_ag(mp); > + else { > + start_agno = XFS_INO_TO_AGNO(mp, parent); > + if (start_agno >= mp->m_maxagi) > + start_agno = 0; > + } > > /* > * If we have already hit the ceiling of inode blocks then clear > @@ -1765,12 +1655,14 @@ xfs_dialloc_select_ag( > * allocation groups upward, wrapping at the end. > */ > agno = start_agno; > + flags = XFS_ALLOC_FLAG_TRYLOCK; > for (;;) { > + xfs_extlen_t ineed; > + xfs_extlen_t longest = 0; > + > pag = xfs_perag_get(mp, agno); > - if (!pag->pagi_inodeok) { > - xfs_ialloc_next_ag(mp); > + if (!pag->pagi_inodeok) > goto nextag; > - } > > if (!pag->pagi_init) { > error = xfs_ialloc_pagi_init(mp, *tpp, agno); > @@ -1778,12 +1670,44 @@ xfs_dialloc_select_ag( > break; > } > > - /* > - * Do a first racy fast path check if this AG is usable. > - */ > if (!pag->pagi_freecount && !okalloc) > goto nextag; > > + if (!pag->pagf_init) { > + error = xfs_alloc_pagf_init(mp, *tpp, agno, flags); > + if (error) > + goto nextag; > + } > + > + /* > + * Check that there is enough free space for the file plus a > + * chunk of inodes if we need to allocate some. If this is the > + * first pass across the AGs, take into account the potential > + * space needed for alignment of inode chunks when checking the > + * longest contiguous free space in the AG - this prevents us > + * from getting ENOSPC because we have free space larger than > + * ialloc_blks but alignment constraints prevent us from using > + * it. > + * > + * If we can't find an AG with space for full alignment slack to > + * be taken into account, we must be near ENOSPC in all AGs. > + * Hence we don't include alignment for the second pass and so > + * if we fail allocation due to alignment issues then it is most > + * likely a real ENOSPC condition. > + */ > + if (!pag->pagi_freecount) { > + ineed = M_IGEO(mp)->ialloc_min_blks; > + if (flags && ineed > 1) > + ineed += M_IGEO(mp)->cluster_align; > + longest = pag->pagf_longest; > + if (!longest) > + longest = pag->pagf_flcount > 0; > + > + if (pag->pagf_freeblks < needspace + ineed || > + longest < ineed) > + goto nextag; > + } > + > /* > * Then read in the AGI buffer and recheck with the AGI buffer > * lock held. > @@ -1823,10 +1747,17 @@ xfs_dialloc_select_ag( > nextag_relse_buffer: > xfs_trans_brelse(*tpp, agbp); > nextag: > - if (++agno == mp->m_sb.sb_agcount) > - agno = 0; > - if (agno == start_agno) > + if (XFS_FORCED_SHUTDOWN(mp)) { > + error = -EFSCORRUPTED; > break; > + } > + if (++agno == mp->m_maxagi) > + agno = 0; > + if (agno == start_agno) { > + if (!flags) > + break; > + flags = 0; > + } > xfs_perag_put(pag); > } >
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 872591e8f5cb..79119af36d12 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -899,139 +899,6 @@ xfs_ialloc_ag_alloc( return 0; } -STATIC xfs_agnumber_t -xfs_ialloc_next_ag( - xfs_mount_t *mp) -{ - xfs_agnumber_t agno; - - spin_lock(&mp->m_agirotor_lock); - agno = mp->m_agirotor; - if (++mp->m_agirotor >= mp->m_maxagi) - mp->m_agirotor = 0; - spin_unlock(&mp->m_agirotor_lock); - - return agno; -} - -/* - * Select an allocation group to look for a free inode in, based on the parent - * inode and the mode. Return the allocation group buffer. - */ -STATIC xfs_agnumber_t -xfs_ialloc_ag_select( - xfs_trans_t *tp, /* transaction pointer */ - xfs_ino_t parent, /* parent directory inode number */ - umode_t mode) /* bits set to indicate file type */ -{ - xfs_agnumber_t agcount; /* number of ag's in the filesystem */ - xfs_agnumber_t agno; /* current ag number */ - int flags; /* alloc buffer locking flags */ - xfs_extlen_t ineed; /* blocks needed for inode allocation */ - xfs_extlen_t longest = 0; /* longest extent available */ - xfs_mount_t *mp; /* mount point structure */ - int needspace; /* file mode implies space allocated */ - xfs_perag_t *pag; /* per allocation group data */ - xfs_agnumber_t pagno; /* parent (starting) ag number */ - int error; - - /* - * Files of these types need at least one block if length > 0 - * (and they won't fit in the inode, but that's hard to figure out). - */ - needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); - mp = tp->t_mountp; - agcount = mp->m_maxagi; - if (S_ISDIR(mode)) - pagno = xfs_ialloc_next_ag(mp); - else { - pagno = XFS_INO_TO_AGNO(mp, parent); - if (pagno >= agcount) - pagno = 0; - } - - ASSERT(pagno < agcount); - - /* - * Loop through allocation groups, looking for one with a little - * free space in it. Note we don't look for free inodes, exactly. - * Instead, we include whether there is a need to allocate inodes - * to mean that blocks must be allocated for them, - * if none are currently free. - */ - agno = pagno; - flags = XFS_ALLOC_FLAG_TRYLOCK; - for (;;) { - pag = xfs_perag_get(mp, agno); - if (!pag->pagi_inodeok) { - xfs_ialloc_next_ag(mp); - goto nextag; - } - - if (!pag->pagi_init) { - error = xfs_ialloc_pagi_init(mp, tp, agno); - if (error) - goto nextag; - } - - if (pag->pagi_freecount) { - xfs_perag_put(pag); - return agno; - } - - if (!pag->pagf_init) { - error = xfs_alloc_pagf_init(mp, tp, agno, flags); - if (error) - goto nextag; - } - - /* - * Check that there is enough free space for the file plus a - * chunk of inodes if we need to allocate some. If this is the - * first pass across the AGs, take into account the potential - * space needed for alignment of inode chunks when checking the - * longest contiguous free space in the AG - this prevents us - * from getting ENOSPC because we have free space larger than - * ialloc_blks but alignment constraints prevent us from using - * it. - * - * If we can't find an AG with space for full alignment slack to - * be taken into account, we must be near ENOSPC in all AGs. - * Hence we don't include alignment for the second pass and so - * if we fail allocation due to alignment issues then it is most - * likely a real ENOSPC condition. - */ - ineed = M_IGEO(mp)->ialloc_min_blks; - if (flags && ineed > 1) - ineed += M_IGEO(mp)->cluster_align; - longest = pag->pagf_longest; - if (!longest) - longest = pag->pagf_flcount > 0; - - if (pag->pagf_freeblks >= needspace + ineed && - longest >= ineed) { - xfs_perag_put(pag); - return agno; - } -nextag: - xfs_perag_put(pag); - /* - * No point in iterating over the rest, if we're shutting - * down. - */ - if (XFS_FORCED_SHUTDOWN(mp)) - return NULLAGNUMBER; - agno++; - if (agno >= agcount) - agno = 0; - if (agno == pagno) { - if (flags == 0) - return NULLAGNUMBER; - flags = 0; - } - } -} - /* * Try to retrieve the next record to the left/right from the current one. */ @@ -1708,6 +1575,21 @@ xfs_dialloc_roll( return 0; } +STATIC xfs_agnumber_t +xfs_ialloc_next_ag( + xfs_mount_t *mp) +{ + xfs_agnumber_t agno; + + spin_lock(&mp->m_agirotor_lock); + agno = mp->m_agirotor; + if (++mp->m_agirotor >= mp->m_maxagi) + mp->m_agirotor = 0; + spin_unlock(&mp->m_agirotor_lock); + + return agno; +} + /* * Select and prepare an AG for inode allocation. * @@ -1734,16 +1616,24 @@ xfs_dialloc_select_ag( struct xfs_perag *pag; struct xfs_ino_geometry *igeo = M_IGEO(mp); bool okalloc = true; + int needspace; + int flags; *IO_agbp = NULL; /* - * We do not have an agbp, so select an initial allocation - * group for inode allocation. + * Directories, symlinks, and regular files frequently allocate at least + * one block, so factor that potential expansion when we examine whether + * an AG has enough space for file creation. */ - start_agno = xfs_ialloc_ag_select(*tpp, parent, mode); - if (start_agno == NULLAGNUMBER) - return -ENOSPC; + needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); + if (S_ISDIR(mode)) + start_agno = xfs_ialloc_next_ag(mp); + else { + start_agno = XFS_INO_TO_AGNO(mp, parent); + if (start_agno >= mp->m_maxagi) + start_agno = 0; + } /* * If we have already hit the ceiling of inode blocks then clear @@ -1765,12 +1655,14 @@ xfs_dialloc_select_ag( * allocation groups upward, wrapping at the end. */ agno = start_agno; + flags = XFS_ALLOC_FLAG_TRYLOCK; for (;;) { + xfs_extlen_t ineed; + xfs_extlen_t longest = 0; + pag = xfs_perag_get(mp, agno); - if (!pag->pagi_inodeok) { - xfs_ialloc_next_ag(mp); + if (!pag->pagi_inodeok) goto nextag; - } if (!pag->pagi_init) { error = xfs_ialloc_pagi_init(mp, *tpp, agno); @@ -1778,12 +1670,44 @@ xfs_dialloc_select_ag( break; } - /* - * Do a first racy fast path check if this AG is usable. - */ if (!pag->pagi_freecount && !okalloc) goto nextag; + if (!pag->pagf_init) { + error = xfs_alloc_pagf_init(mp, *tpp, agno, flags); + if (error) + goto nextag; + } + + /* + * Check that there is enough free space for the file plus a + * chunk of inodes if we need to allocate some. If this is the + * first pass across the AGs, take into account the potential + * space needed for alignment of inode chunks when checking the + * longest contiguous free space in the AG - this prevents us + * from getting ENOSPC because we have free space larger than + * ialloc_blks but alignment constraints prevent us from using + * it. + * + * If we can't find an AG with space for full alignment slack to + * be taken into account, we must be near ENOSPC in all AGs. + * Hence we don't include alignment for the second pass and so + * if we fail allocation due to alignment issues then it is most + * likely a real ENOSPC condition. + */ + if (!pag->pagi_freecount) { + ineed = M_IGEO(mp)->ialloc_min_blks; + if (flags && ineed > 1) + ineed += M_IGEO(mp)->cluster_align; + longest = pag->pagf_longest; + if (!longest) + longest = pag->pagf_flcount > 0; + + if (pag->pagf_freeblks < needspace + ineed || + longest < ineed) + goto nextag; + } + /* * Then read in the AGI buffer and recheck with the AGI buffer * lock held. @@ -1823,10 +1747,17 @@ xfs_dialloc_select_ag( nextag_relse_buffer: xfs_trans_brelse(*tpp, agbp); nextag: - if (++agno == mp->m_sb.sb_agcount) - agno = 0; - if (agno == start_agno) + if (XFS_FORCED_SHUTDOWN(mp)) { + error = -EFSCORRUPTED; break; + } + if (++agno == mp->m_maxagi) + agno = 0; + if (agno == start_agno) { + if (!flags) + break; + flags = 0; + } xfs_perag_put(pag); }