Message ID | 155546520611.176278.16800498529488238281.stgit@magnolia (mailing list archive) |
---|---|
State | Accepted, archived |
Headers | show |
Series | xfs: scrub filesystem summary counters | expand |
On Tue, Apr 16, 2019 at 06:40:06PM -0700, Darrick J. Wong wrote: > From: Darrick J. Wong <darrick.wong@oracle.com> > > Add a percpu counter to track the number of blocks directly reserved for > delayed allocations on the data device. This counter (in contrast to > i_delayed_blks) does not track allocated CoW staging extents or anything > going on with the realtime device. It will be used in the upcoming > summary counter scrub function to check the free block counts without > having to freeze the filesystem or walk all the inodes to find the > delayed allocations. > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Hmmm. Given that a lot of modifcations to this counter are going to be large quantities (much greater than the percpu counter batch size of 32) this effectively just degrades to spin lock protected global counter, yes? This is the reason we have XFS_FDBLOCKS_BATCH (1024) for the free block percpu counter so that changes from frequent updates via lots of smallish write()s don't need to take the global aggregation lock very often... So I'm guessing that this counter needs similar treatment for scalability on large machines. If it doesn't then an atomic64_t is all that is necessary here... Cheers, Dave.
On Thu, Apr 18, 2019 at 07:40:31AM +1000, Dave Chinner wrote: > On Tue, Apr 16, 2019 at 06:40:06PM -0700, Darrick J. Wong wrote: > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > Add a percpu counter to track the number of blocks directly reserved for > > delayed allocations on the data device. This counter (in contrast to > > i_delayed_blks) does not track allocated CoW staging extents or anything > > going on with the realtime device. It will be used in the upcoming > > summary counter scrub function to check the free block counts without > > having to freeze the filesystem or walk all the inodes to find the > > delayed allocations. > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > Hmmm. Given that a lot of modifcations to this counter are going to > be large quantities (much greater than the percpu counter batch size > of 32) this effectively just degrades to spin lock protected global > counter, yes? This is the reason we have XFS_FDBLOCKS_BATCH (1024) > for the free block percpu counter so that changes from frequent > updates via lots of smallish write()s don't need to take the global > aggregation lock very often... > > So I'm guessing that this counter needs similar treatment for > scalability on large machines. If it doesn't then an atomic64_t > is all that is necessary here... I wrote a silly little test to grow the delalloc count by one from a number of threads. It does cause an observable increase in time waiting for spinlocks. Since the only users of this counter are unmount check and the fscounter scrubber I think it's safe to use XFS_FDBLOCKS_BATCH for this because we aren't making any decisions that need high precision to maintain correct functioning of the system. --D > > Cheers, > > Dave. > -- > Dave Chinner > david@fromorbit.com
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 4637ae1ae91c..356ebd1cbe82 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -2009,6 +2009,9 @@ xfs_bmap_add_extent_delay_real( goto done; } + if (da_new != da_old) + xfs_mod_delalloc(mp, (int64_t)da_new - da_old); + if (bma->cur) { da_new += bma->cur->bc_private.b.allocated; bma->cur->bc_private.b.allocated = 0; @@ -2640,6 +2643,7 @@ xfs_bmap_add_extent_hole_delay( /* * Nothing to do for disk quota accounting here. */ + xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen); } } @@ -3352,8 +3356,10 @@ xfs_bmap_btalloc_accounting( * already have quota reservation and there's nothing to do * yet. */ - if (ap->wasdel) + if (ap->wasdel) { + xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len); return; + } /* * Otherwise, we've allocated blocks in a hole. The transaction @@ -3372,8 +3378,10 @@ xfs_bmap_btalloc_accounting( /* data/attr fork only */ ap->ip->i_d.di_nblocks += args->len; xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); - if (ap->wasdel) + if (ap->wasdel) { ap->ip->i_delayed_blks -= args->len; + xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len); + } xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, args->len); @@ -3969,6 +3977,7 @@ xfs_bmapi_reserve_delalloc( ip->i_delayed_blks += alen; + xfs_mod_delalloc(ip->i_mount, alen + indlen); got->br_startoff = aoff; got->br_startblock = nullstartblock(indlen); @@ -4840,8 +4849,10 @@ xfs_bmap_del_extent_delay( da_diff = da_old - da_new; if (!isrt) da_diff += del->br_blockcount; - if (da_diff) + if (da_diff) { xfs_mod_fdblocks(mp, da_diff, false); + xfs_mod_delalloc(mp, -da_diff); + } return error; } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 14fba76ab811..f5cf4eb22d8e 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -81,6 +81,12 @@ typedef struct xfs_mount { struct percpu_counter m_icount; /* allocated inodes counter */ struct percpu_counter m_ifree; /* free inodes counter */ struct percpu_counter m_fdblocks; /* free block counter */ + /* + * Count of data device blocks reserved for delayed allocations, + * including indlen blocks. Does not include allocated CoW staging + * extents or anything related to the rt device. + */ + struct percpu_counter m_delalloc_blks; struct xfs_buf *m_sb_bp; /* buffer for superblock */ char *m_fsname; /* filesystem name */ @@ -476,4 +482,13 @@ struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp, int error_class, int error); void xfs_force_summary_recalc(struct xfs_mount *mp); +/* Update the in-core delayed block counter. */ +static inline void +xfs_mod_delalloc( + struct xfs_mount *mp, + int64_t delta) +{ + percpu_counter_add(&mp->m_delalloc_blks, delta); +} + #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index df917f41ca46..3a870a194cd9 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1538,8 +1538,14 @@ xfs_init_percpu_counters( if (error) goto free_ifree; + error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); + if (error) + goto free_fdblocks; + return 0; +free_fdblocks: + percpu_counter_destroy(&mp->m_fdblocks); free_ifree: percpu_counter_destroy(&mp->m_ifree); free_icount: @@ -1563,6 +1569,9 @@ xfs_destroy_percpu_counters( percpu_counter_destroy(&mp->m_icount); percpu_counter_destroy(&mp->m_ifree); percpu_counter_destroy(&mp->m_fdblocks); + ASSERT(XFS_FORCED_SHUTDOWN(mp) || + percpu_counter_sum(&mp->m_delalloc_blks) == 0); + percpu_counter_destroy(&mp->m_delalloc_blks); } static struct xfs_mount *