[1/2] xfs_repair: check quota values if quota was loaded
diff mbox series

Message ID 158930842141.1920396.3267253462483632882.stgit@magnolia
State Superseded
Headers show
Series
  • xfs_repair: check quota counters
Related show

Commit Message

Darrick J. Wong May 12, 2020, 6:33 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

If the filesystem looks like it had up to date quota information, check
it against what's in the filesystem and report if we find discrepancies.
This closes one of the major gaps in corruptions that are detected by
xfs_check vs. xfs_repair.

While we're at it, fix the alphabetization of the makefile targets.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/Makefile     |   69 ++++++-
 repair/phase7.c     |   21 ++
 repair/quotacheck.c |  522 +++++++++++++++++++++++++++++++++++++++++++++++++++
 repair/quotacheck.h |   14 +
 4 files changed, 612 insertions(+), 14 deletions(-)
 create mode 100644 repair/quotacheck.c
 create mode 100644 repair/quotacheck.h

Comments

Eric Sandeen May 12, 2020, 9:57 p.m. UTC | #1
On 5/12/20 1:33 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> If the filesystem looks like it had up to date quota information, check
> it against what's in the filesystem and report if we find discrepancies.
> This closes one of the major gaps in corruptions that are detected by
> xfs_check vs. xfs_repair.

I'm musing about whether this should be (temporarily?) something we can
disable, either due to bugs or extraordinary memory use?

Speaking of memory use, does the memory calculation in main() need to
be updated if quota checking is going to allocate a bazillion dquot nodes?

/*
 * Adjust libxfs cache sizes based on system memory
 * filesystem size and inode count.
 * ...

ok and you just pointed out in realtime that the stuff allocated for rmap
isn't in here either.  ;)  Sooooo maybe this memory calculation needs
attention in any case?  

And we realized we can't size quota memory needs because we can't read the
on-disk dquots yet... this sounds like a different problem to fix (or not) :(

> While we're at it, fix the alphabetization of the makefile targets.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  repair/Makefile     |   69 ++++++-
>  repair/phase7.c     |   21 ++
>  repair/quotacheck.c |  522 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  repair/quotacheck.h |   14 +
>  4 files changed, 612 insertions(+), 14 deletions(-)
>  create mode 100644 repair/quotacheck.c
>  create mode 100644 repair/quotacheck.h
> 
> 
> diff --git a/repair/Makefile b/repair/Makefile
> index 8cc1ee68..e3a74adc 100644
> --- a/repair/Makefile
> +++ b/repair/Makefile
> @@ -9,16 +9,65 @@ LSRCFILES = README
>  
>  LTCOMMAND = xfs_repair
>  
> -HFILES = agheader.h attr_repair.h avl.h bload.h bmap.h btree.h \
> -	da_util.h dinode.h dir2.h err_protos.h globals.h incore.h protos.h \
> -	rt.h progress.h scan.h versions.h prefetch.h rmap.h slab.h threads.h
> -
> -CFILES = agheader.c attr_repair.c avl.c bload.c bmap.c btree.c \
> -	da_util.c dino_chunks.c dinode.c dir2.c globals.c incore.c \
> -	incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
> -	phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
> -	progress.c prefetch.c rmap.c rt.c sb.c scan.c slab.c threads.c \
> -	versions.c xfs_repair.c
> +HFILES = \
> +	agheader.h \
> +	attr_repair.h \
> +	avl.h \

...

all this prooooobabbly should be a different patch.


>  LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBXCMD) $(LIBFROG) $(LIBUUID) $(LIBRT) \
>  	$(LIBPTHREAD) $(LIBBLKID)
> diff --git a/repair/phase7.c b/repair/phase7.c
> index c2996470..47e76b56 100644
> --- a/repair/phase7.c
> +++ b/repair/phase7.c
> @@ -15,6 +15,7 @@

...



> +/* Find a qc_rec in the incore cache, or allocate one if need be. */
> +static struct qc_rec *
> +qc_rec_get(
> +	struct qc_dquots	*dquots,
> +	uint32_t		id,
> +	bool			can_alloc)
> +{
> +	struct qc_rec		*qrec;
> +	struct avl64node	*node;
> +
> +	pthread_mutex_lock(&dquots->lock);
> +	node = avl64_find(&dquots->tree, id);
> +	if (!node && can_alloc) {
> +		qrec = calloc(sizeof(struct qc_rec), 1);
> +		if (qrec) {
> +			qrec->id = id;
> +			node = avl64_insert(&dquots->tree, &qrec->node);
> +			if (!node)
> +				free(qrec);
> +			pthread_mutex_init(&qrec->lock, NULL);
> +		}
> +	}
> +	pthread_mutex_unlock(&dquots->lock);
> +
> +	return container_of(node, struct qc_rec, node);

this can blow up w/ node == NULL here if !can_alloc and not found, etc.

> +}
> +
> +/* Bump up an incore dquot's counters. */
> +static void
> +qc_adjust(
> +	struct qc_dquots	*dquots,
> +	uint32_t		id,
> +	uint64_t		bcount,
> +	uint64_t		rtbcount)
> +{
> +	struct qc_rec		*qrec = qc_rec_get(dquots, id, true);
> +
> +	pthread_mutex_lock(&qrec->lock);
> +	qrec->bcount += bcount;
> +	qrec->rtbcount += rtbcount;
> +	qrec->icount++;
> +	pthread_mutex_unlock(&qrec->lock);
> +}
> +
> +/* Count the realtime blocks allocated to a file. */
> +static xfs_filblks_t
> +qc_count_rtblocks(
> +	struct xfs_inode	*ip)
> +{
> +	struct xfs_iext_cursor	icur;
> +	struct xfs_bmbt_irec	got;
> +	xfs_filblks_t		count = 0;
> +	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> +	int			error;
> +
> +	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> +		error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
> +		if (error) {
> +			do_warn(
> +_("could not read ino %"PRIu64" extents, err=%d\n"),
> +				ip->i_ino, error);
> +			chkd_flags = 0;
> +			return 0;
> +		}
> +	}
> +
> +	for_each_xfs_iext(ifp, &icur, &got)
> +		if (!isnullstartblock(got.br_startblock))
> +			count += got.br_blockcount;
> +	return count;
> +}
> +
> +/* Add this inode's information to the quota counts. */
> +void
> +quotacheck_adjust(
> +	struct xfs_mount	*mp,
> +	xfs_ino_t		ino)
> +{
> +	struct xfs_inode	*ip;
> +	uint64_t		blocks;
> +	uint64_t		rtblks = 0;
> +	int			error;
> +
> +	/*
> +	 * If the fs doesn't have any quota files to check against, skip this
> +	 * step.
> +	 */
> +	if (!user_dquots && !group_dquots && !proj_dquots)
> +		return;
> +
> +	/* Skip if a previous quotacheck adjustment failed. */
> +	if (chkd_flags == 0)
> +		return;
> +
> +	/* Quota files are not included in quota counts. */
> +	if (ino == mp->m_sb.sb_uquotino ||
> +	    ino == mp->m_sb.sb_gquotino ||
> +	    ino == mp->m_sb.sb_pquotino)
> +		return;
> +
> +	error = -libxfs_iget(mp, NULL, ino, 0, &ip, &xfs_default_ifork_ops);
> +	if (error) {
> +		do_warn(_("could not iget %"PRIu64" for quotacheck, err=%d\n"),

"iget" vs "read" vs "open" again... just looking for some consistent do_warn
phrasing for quota inodes.

> +				ino, error);
> +		chkd_flags = 0;
> +		return;
> +	}
> +
> +	/* Count the file's blocks. */
> +	if (XFS_IS_REALTIME_INODE(ip))
> +		rtblks = qc_count_rtblocks(ip);
> +	blocks = ip->i_d.di_nblocks - rtblks;
> +
> +	if (user_dquots)
> +		qc_adjust(user_dquots, i_uid_read(VFS_I(ip)), blocks, rtblks);
> +	if (group_dquots)
> +		qc_adjust(group_dquots, i_gid_read(VFS_I(ip)), blocks, rtblks);
> +	if (proj_dquots)
> +		qc_adjust(proj_dquots, ip->i_d.di_projid, blocks, rtblks);
> +
> +	libxfs_irele(ip);
> +}
> +
> +/* Compare this on-disk dquot against whatever we observed. */
> +static void
> +qc_check_dquot(
> +	struct xfs_disk_dquot	*ddq,
> +	struct qc_dquots	*dquots)
> +{
> +	struct qc_rec		*qrec;
> +	struct qc_rec		empty = {
> +		.bcount		= 0,
> +		.rtbcount	= 0,
> +		.icount		= 0,
> +	};
> +	uint32_t		id = be32_to_cpu(ddq->d_id);
> +
> +	qrec = qc_rec_get(dquots, id, false);
> +	if (!qrec)
> +		qrec = &empty;
> +
> +	if (be64_to_cpu(ddq->d_bcount) != qrec->bcount) {
> +		do_warn(_("%s id %u has bcount %llu, expected %"PRIu64"\n"),
> +				qflags_typestr(dquots->type), id,
> +				be64_to_cpu(ddq->d_bcount), qrec->bcount);
> +		chkd_flags = 0;
> +	}
> +
> +	if (be64_to_cpu(ddq->d_rtbcount) != qrec->rtbcount) {
> +		do_warn(_("%s id %u has rtbcount %llu, expected %"PRIu64"\n"),
> +				qflags_typestr(dquots->type), id,
> +				be64_to_cpu(ddq->d_rtbcount), qrec->rtbcount);
> +		chkd_flags = 0;
> +	}
> +
> +	if (be64_to_cpu(ddq->d_icount) != qrec->icount) {
> +		do_warn(_("%s id %u has icount %llu, expected %"PRIu64"\n"),
> +				qflags_typestr(dquots->type), id,
> +				be64_to_cpu(ddq->d_icount), qrec->icount);
> +		chkd_flags = 0;
> +	}
> +
> +	/*
> +	 * Mark that we found the record on disk.  Skip locking here because
> +	 * we're checking the dquots serially.
> +	 */
> +	qrec->flags |= QC_REC_ONDISK;
> +}
> +
> +/* Walk every dquot in every block in this quota file extent and compare. */

can we be consistent in comments & warnings that when we're operaring on the
quota inode itself, we call it the quota inode and not a file?  "file" sounds
liek the bad old V1 vfs quota stuff where you could actually see the quota file.

> +static int
> +qc_walk_extent(
> +	struct xfs_inode	*ip,
> +	struct xfs_bmbt_irec	*map,
> +	struct qc_dquots	*dquots)
> +{
> +	struct xfs_mount	*mp = ip->i_mount;
> +	struct xfs_buf		*bp;
> +	struct xfs_dqblk	*dqb;
> +	xfs_filblks_t		dqchunklen;
> +	xfs_filblks_t		bno;
> +	unsigned int		dqperchunk;
> +	int			error = 0;
> +
> +	dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
> +	dqperchunk = libxfs_calc_dquots_per_chunk(dqchunklen);
> +
> +	for (bno = 0; bno < map->br_blockcount; bno++) {
> +		unsigned int	dqnr;
> +		uint64_t	dqid;
> +
> +		error = -libxfs_buf_read(mp->m_dev,
> +				XFS_FSB_TO_DADDR(mp, map->br_startblock + bno),
> +				dqchunklen, 0, &bp, &xfs_dquot_buf_ops);
> +		if (error) {
> +			do_warn(
> +_("cannot read %s file %"PRIu64", block %"PRIu64", disk block %"PRIu64", err=%d\n"),
> +				qflags_typestr(dquots->type), ip->i_ino,
> +				map->br_startoff + bno,
> +				map->br_startblock + bno, error);
> +			chkd_flags = 0;
> +			return error;
> +		}
> +
> +		dqb = bp->b_addr;
> +		dqid = map->br_startoff * dqperchunk;
> +		for (dqnr = 0;
> +		     dqnr < dqperchunk && dqid <= UINT_MAX;
> +		     dqnr++, dqb++, dqid++)
> +			qc_check_dquot(&dqb->dd_diskdq, dquots);
> +		libxfs_buf_relse(bp);
> +	}
> +
> +	return error;
> +}
> +
> +/* Check the incore quota counts with what's on disk. */
> +void
> +quotacheck_verify(
> +	struct xfs_mount	*mp,
> +	unsigned int		type)
> +{
> +	struct xfs_bmbt_irec	map;
> +	struct xfs_iext_cursor	icur;
> +	struct xfs_inode	*ip;
> +	struct xfs_ifork	*ifp;
> +	struct qc_dquots	*dquots = NULL;
> +	struct avl64node	*node, *n;
> +	xfs_ino_t		ino = NULLFSINO;
> +	uint16_t		qflag = 0;
> +	int			error;
> +
> +	switch (type) {
> +	case XFS_DQ_USER:
> +		ino = mp->m_sb.sb_uquotino;
> +		dquots = user_dquots;
> +		qflag = XFS_UQUOTA_CHKD;
> +		break;
> +	case XFS_DQ_GROUP:
> +		ino = mp->m_sb.sb_gquotino;
> +		dquots = group_dquots;
> +		qflag = XFS_GQUOTA_CHKD;
> +		break;
> +	case XFS_DQ_PROJ:
> +		ino = mp->m_sb.sb_pquotino;
> +		dquots = proj_dquots;
> +		qflag = XFS_PQUOTA_CHKD;
> +		break;
> +	}
> +
> +	/*
> +	 * If there's no incore records or there were errors in collecting
> +	 * them, bail out early.  No sense in complaining about more garbage.
> +	 */
> +	if (!dquots || !(chkd_flags & qflag))
> +		return;

It confuses me a little bit to check dquots as well as (chkd_flags & qflag)
which seems to imply that one could be true and not the other; I'm not sure
that's the case.

> +
> +	error = -libxfs_iget(mp, NULL, ino, 0, &ip, &xfs_default_ifork_ops);
> +	if (error) {
> +		do_warn(
> +	_("could not open %s file %"PRIu64" for quotacheck, err=%d\n"),

same here, let's say "read inode" not "open file" - etc in other places.

> +			qflags_typestr(type), ino, error);
> +		chkd_flags = 0;
> +		return;
> +	}
> +
> +	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> +	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> +		error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
> +		if (error) {
> +			do_warn(
> +	_("could not read %s file %"PRIu64" extents, err=%d\n"),
> +				qflags_typestr(type), ip->i_ino, error);
> +			chkd_flags = 0;
> +			goto err;
> +		}
> +	}
> +
> +	/* Walk each extent of the quota file and compare counters. */
> +	for_each_xfs_iext(ifp, &icur, &map) {
> +		if (map.br_startblock != HOLESTARTBLOCK) {
> +			error = qc_walk_extent(ip, &map, dquots);
> +			if (error)
> +				goto err;
> +		}
> +	}
> +
> +	/* Complain about counters that weren't seen on disk. */

Can this be made a little more clear about what was seen where and what
didn't match?  So this is "we found an inode for an ID that doesn't have
a corresponding dquot tracking it"

> +	qc_dquots_foreach(dquots, node, n) {
> +		struct qc_rec	*qrec;
> +
> +		qrec = container_of(node, struct qc_rec, node);
> +		if (!(qrec->flags & QC_REC_ONDISK)) {
> +			do_warn(
> +_("%s id %u not seen on disk (bcount %"PRIu64" rtbcount %"PRIu64" icount %"PRIu64")\n"),

this could be clearer too? maybe "no quota record found for ...?"

> +				qflags_typestr(type), qrec->id,
> +				qrec->bcount, qrec->rtbcount, qrec->icount);
> +			chkd_flags = 0;
> +		}
> +	}
> +err:
> +	libxfs_irele(ip);
> +}
> +
> +/*
> + * Decide if we want to run quotacheck on a particular quota type.  Returns
> + * true only if the inode isn't lost, the fs says quotacheck ran, and the inode
> + * pointer isn't "unset".

which pointer?  and what does "unset" mean?  Do you mean NULLFSINO?

> + */
> +static inline bool
> +qc_has_quotafile(
> +	struct xfs_mount	*mp,
> +	unsigned int		type)
> +{
> +	bool			lost;
> +	xfs_ino_t		ino;
> +	unsigned int		qflag;
> +
> +	switch (type) {
> +	case XFS_DQ_USER:
> +		lost = lost_uquotino;
> +		ino = mp->m_sb.sb_uquotino;
> +		qflag = XFS_UQUOTA_CHKD;
> +		break;
> +	case XFS_DQ_GROUP:
> +		lost = lost_gquotino;
> +		ino = mp->m_sb.sb_gquotino;
> +		qflag = XFS_GQUOTA_CHKD;
> +		break;
> +	case XFS_DQ_PROJ:
> +		lost = lost_pquotino;
> +		ino = mp->m_sb.sb_pquotino;
> +		qflag = XFS_PQUOTA_CHKD;
> +		break;
> +	default:
> +		return false;
> +	}
> +
> +	if (lost)
> +		return false;
> +	if (!(mp->m_sb.sb_qflags & qflag))
> +		return false;
> +	if (ino == NULLFSINO || ino == 0)
> +		return false;
> +	return true;
> +}

-Eric
Darrick J. Wong May 12, 2020, 10:48 p.m. UTC | #2
On Tue, May 12, 2020 at 04:57:51PM -0500, Eric Sandeen wrote:
> On 5/12/20 1:33 PM, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > If the filesystem looks like it had up to date quota information, check
> > it against what's in the filesystem and report if we find discrepancies.
> > This closes one of the major gaps in corruptions that are detected by
> > xfs_check vs. xfs_repair.
> 
> I'm musing about whether this should be (temporarily?) something we can
> disable, either due to bugs or extraordinary memory use?

Ok, I'll add -o noquota to disable the quotacheck.

> Speaking of memory use, does the memory calculation in main() need to
> be updated if quota checking is going to allocate a bazillion dquot nodes?

Hm, probably, but I can't think of a good way to guess how many dquots
we're going to need without opening the quota inodes.

> /*
>  * Adjust libxfs cache sizes based on system memory
>  * filesystem size and inode count.
>  * ...
> 
> ok and you just pointed out in realtime that the stuff allocated for rmap
> isn't in here either.  ;)  Sooooo maybe this memory calculation needs
> attention in any case?  

It does, but that's going to be tricky.  For non-reflink filesystems,
the worst case is 24 bytes for each non-free block to store the rmap
records.

For rmap+reflink filesystems the upper theoretical limit is
(blocks_used * 2^32) * 24 bytes to store the rmap records because
infinite sharing; and half again to store the 12-byte refcount records.

However, we can't figure out what the /real/ sharing factor is without
scanning the filesystem, which is probably why I never did much with
this. :(

> And we realized we can't size quota memory needs because we can't read the
> on-disk dquots yet... this sounds like a different problem to fix (or not) :(

<nod>

> > While we're at it, fix the alphabetization of the makefile targets.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  repair/Makefile     |   69 ++++++-
> >  repair/phase7.c     |   21 ++
> >  repair/quotacheck.c |  522 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  repair/quotacheck.h |   14 +
> >  4 files changed, 612 insertions(+), 14 deletions(-)
> >  create mode 100644 repair/quotacheck.c
> >  create mode 100644 repair/quotacheck.h
> > 
> > 
> > diff --git a/repair/Makefile b/repair/Makefile
> > index 8cc1ee68..e3a74adc 100644
> > --- a/repair/Makefile
> > +++ b/repair/Makefile
> > @@ -9,16 +9,65 @@ LSRCFILES = README
> >  
> >  LTCOMMAND = xfs_repair
> >  
> > -HFILES = agheader.h attr_repair.h avl.h bload.h bmap.h btree.h \
> > -	da_util.h dinode.h dir2.h err_protos.h globals.h incore.h protos.h \
> > -	rt.h progress.h scan.h versions.h prefetch.h rmap.h slab.h threads.h
> > -
> > -CFILES = agheader.c attr_repair.c avl.c bload.c bmap.c btree.c \
> > -	da_util.c dino_chunks.c dinode.c dir2.c globals.c incore.c \
> > -	incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
> > -	phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
> > -	progress.c prefetch.c rmap.c rt.c sb.c scan.c slab.c threads.c \
> > -	versions.c xfs_repair.c
> > +HFILES = \
> > +	agheader.h \
> > +	attr_repair.h \
> > +	avl.h \
> 
> ...
> 
> all this prooooobabbly should be a different patch.

DOH.  Will fix.

> >  LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBXCMD) $(LIBFROG) $(LIBUUID) $(LIBRT) \
> >  	$(LIBPTHREAD) $(LIBBLKID)
> > diff --git a/repair/phase7.c b/repair/phase7.c
> > index c2996470..47e76b56 100644
> > --- a/repair/phase7.c
> > +++ b/repair/phase7.c
> > @@ -15,6 +15,7 @@
> 
> ...
> 
> 
> 
> > +/* Find a qc_rec in the incore cache, or allocate one if need be. */
> > +static struct qc_rec *
> > +qc_rec_get(
> > +	struct qc_dquots	*dquots,
> > +	uint32_t		id,
> > +	bool			can_alloc)
> > +{
> > +	struct qc_rec		*qrec;
> > +	struct avl64node	*node;
> > +
> > +	pthread_mutex_lock(&dquots->lock);
> > +	node = avl64_find(&dquots->tree, id);
> > +	if (!node && can_alloc) {
> > +		qrec = calloc(sizeof(struct qc_rec), 1);
> > +		if (qrec) {
> > +			qrec->id = id;
> > +			node = avl64_insert(&dquots->tree, &qrec->node);
> > +			if (!node)
> > +				free(qrec);
> > +			pthread_mutex_init(&qrec->lock, NULL);
> > +		}
> > +	}
> > +	pthread_mutex_unlock(&dquots->lock);
> > +
> > +	return container_of(node, struct qc_rec, node);
> 
> this can blow up w/ node == NULL here if !can_alloc and not found, etc.

Fixed.

> > +}
> > +
> > +/* Bump up an incore dquot's counters. */
> > +static void
> > +qc_adjust(
> > +	struct qc_dquots	*dquots,
> > +	uint32_t		id,
> > +	uint64_t		bcount,
> > +	uint64_t		rtbcount)
> > +{
> > +	struct qc_rec		*qrec = qc_rec_get(dquots, id, true);
> > +
> > +	pthread_mutex_lock(&qrec->lock);
> > +	qrec->bcount += bcount;
> > +	qrec->rtbcount += rtbcount;
> > +	qrec->icount++;
> > +	pthread_mutex_unlock(&qrec->lock);
> > +}
> > +
> > +/* Count the realtime blocks allocated to a file. */
> > +static xfs_filblks_t
> > +qc_count_rtblocks(
> > +	struct xfs_inode	*ip)
> > +{
> > +	struct xfs_iext_cursor	icur;
> > +	struct xfs_bmbt_irec	got;
> > +	xfs_filblks_t		count = 0;
> > +	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> > +	int			error;
> > +
> > +	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> > +		error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
> > +		if (error) {
> > +			do_warn(
> > +_("could not read ino %"PRIu64" extents, err=%d\n"),
> > +				ip->i_ino, error);
> > +			chkd_flags = 0;
> > +			return 0;
> > +		}
> > +	}
> > +
> > +	for_each_xfs_iext(ifp, &icur, &got)
> > +		if (!isnullstartblock(got.br_startblock))
> > +			count += got.br_blockcount;
> > +	return count;
> > +}
> > +
> > +/* Add this inode's information to the quota counts. */
> > +void
> > +quotacheck_adjust(
> > +	struct xfs_mount	*mp,
> > +	xfs_ino_t		ino)
> > +{
> > +	struct xfs_inode	*ip;
> > +	uint64_t		blocks;
> > +	uint64_t		rtblks = 0;
> > +	int			error;
> > +
> > +	/*
> > +	 * If the fs doesn't have any quota files to check against, skip this
> > +	 * step.
> > +	 */
> > +	if (!user_dquots && !group_dquots && !proj_dquots)
> > +		return;
> > +
> > +	/* Skip if a previous quotacheck adjustment failed. */
> > +	if (chkd_flags == 0)
> > +		return;
> > +
> > +	/* Quota files are not included in quota counts. */
> > +	if (ino == mp->m_sb.sb_uquotino ||
> > +	    ino == mp->m_sb.sb_gquotino ||
> > +	    ino == mp->m_sb.sb_pquotino)
> > +		return;
> > +
> > +	error = -libxfs_iget(mp, NULL, ino, 0, &ip, &xfs_default_ifork_ops);
> > +	if (error) {
> > +		do_warn(_("could not iget %"PRIu64" for quotacheck, err=%d\n"),
> 
> "iget" vs "read" vs "open" again... just looking for some consistent do_warn
> phrasing for quota inodes.

I'll tighten the messaging to '[user|group|project] quota inode X" vs.
"file X".

> > +				ino, error);
> > +		chkd_flags = 0;
> > +		return;
> > +	}
> > +
> > +	/* Count the file's blocks. */
> > +	if (XFS_IS_REALTIME_INODE(ip))
> > +		rtblks = qc_count_rtblocks(ip);
> > +	blocks = ip->i_d.di_nblocks - rtblks;
> > +
> > +	if (user_dquots)
> > +		qc_adjust(user_dquots, i_uid_read(VFS_I(ip)), blocks, rtblks);
> > +	if (group_dquots)
> > +		qc_adjust(group_dquots, i_gid_read(VFS_I(ip)), blocks, rtblks);
> > +	if (proj_dquots)
> > +		qc_adjust(proj_dquots, ip->i_d.di_projid, blocks, rtblks);
> > +
> > +	libxfs_irele(ip);
> > +}
> > +
> > +/* Compare this on-disk dquot against whatever we observed. */
> > +static void
> > +qc_check_dquot(
> > +	struct xfs_disk_dquot	*ddq,
> > +	struct qc_dquots	*dquots)
> > +{
> > +	struct qc_rec		*qrec;
> > +	struct qc_rec		empty = {
> > +		.bcount		= 0,
> > +		.rtbcount	= 0,
> > +		.icount		= 0,
> > +	};
> > +	uint32_t		id = be32_to_cpu(ddq->d_id);
> > +
> > +	qrec = qc_rec_get(dquots, id, false);
> > +	if (!qrec)
> > +		qrec = &empty;
> > +
> > +	if (be64_to_cpu(ddq->d_bcount) != qrec->bcount) {
> > +		do_warn(_("%s id %u has bcount %llu, expected %"PRIu64"\n"),
> > +				qflags_typestr(dquots->type), id,
> > +				be64_to_cpu(ddq->d_bcount), qrec->bcount);
> > +		chkd_flags = 0;
> > +	}
> > +
> > +	if (be64_to_cpu(ddq->d_rtbcount) != qrec->rtbcount) {
> > +		do_warn(_("%s id %u has rtbcount %llu, expected %"PRIu64"\n"),
> > +				qflags_typestr(dquots->type), id,
> > +				be64_to_cpu(ddq->d_rtbcount), qrec->rtbcount);
> > +		chkd_flags = 0;
> > +	}
> > +
> > +	if (be64_to_cpu(ddq->d_icount) != qrec->icount) {
> > +		do_warn(_("%s id %u has icount %llu, expected %"PRIu64"\n"),
> > +				qflags_typestr(dquots->type), id,
> > +				be64_to_cpu(ddq->d_icount), qrec->icount);
> > +		chkd_flags = 0;
> > +	}
> > +
> > +	/*
> > +	 * Mark that we found the record on disk.  Skip locking here because
> > +	 * we're checking the dquots serially.
> > +	 */
> > +	qrec->flags |= QC_REC_ONDISK;
> > +}
> > +
> > +/* Walk every dquot in every block in this quota file extent and compare. */
> 
> can we be consistent in comments & warnings that when we're operaring on the
> quota inode itself, we call it the quota inode and not a file?  "file" sounds
> liek the bad old V1 vfs quota stuff where you could actually see the quota file.

<nod>

> > +static int
> > +qc_walk_extent(
> > +	struct xfs_inode	*ip,
> > +	struct xfs_bmbt_irec	*map,
> > +	struct qc_dquots	*dquots)
> > +{
> > +	struct xfs_mount	*mp = ip->i_mount;
> > +	struct xfs_buf		*bp;
> > +	struct xfs_dqblk	*dqb;
> > +	xfs_filblks_t		dqchunklen;
> > +	xfs_filblks_t		bno;
> > +	unsigned int		dqperchunk;
> > +	int			error = 0;
> > +
> > +	dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
> > +	dqperchunk = libxfs_calc_dquots_per_chunk(dqchunklen);
> > +
> > +	for (bno = 0; bno < map->br_blockcount; bno++) {
> > +		unsigned int	dqnr;
> > +		uint64_t	dqid;
> > +
> > +		error = -libxfs_buf_read(mp->m_dev,
> > +				XFS_FSB_TO_DADDR(mp, map->br_startblock + bno),
> > +				dqchunklen, 0, &bp, &xfs_dquot_buf_ops);
> > +		if (error) {
> > +			do_warn(
> > +_("cannot read %s file %"PRIu64", block %"PRIu64", disk block %"PRIu64", err=%d\n"),
> > +				qflags_typestr(dquots->type), ip->i_ino,
> > +				map->br_startoff + bno,
> > +				map->br_startblock + bno, error);
> > +			chkd_flags = 0;
> > +			return error;
> > +		}
> > +
> > +		dqb = bp->b_addr;
> > +		dqid = map->br_startoff * dqperchunk;
> > +		for (dqnr = 0;
> > +		     dqnr < dqperchunk && dqid <= UINT_MAX;
> > +		     dqnr++, dqb++, dqid++)
> > +			qc_check_dquot(&dqb->dd_diskdq, dquots);
> > +		libxfs_buf_relse(bp);
> > +	}
> > +
> > +	return error;
> > +}
> > +
> > +/* Check the incore quota counts with what's on disk. */
> > +void
> > +quotacheck_verify(
> > +	struct xfs_mount	*mp,
> > +	unsigned int		type)
> > +{
> > +	struct xfs_bmbt_irec	map;
> > +	struct xfs_iext_cursor	icur;
> > +	struct xfs_inode	*ip;
> > +	struct xfs_ifork	*ifp;
> > +	struct qc_dquots	*dquots = NULL;
> > +	struct avl64node	*node, *n;
> > +	xfs_ino_t		ino = NULLFSINO;
> > +	uint16_t		qflag = 0;
> > +	int			error;
> > +
> > +	switch (type) {
> > +	case XFS_DQ_USER:
> > +		ino = mp->m_sb.sb_uquotino;
> > +		dquots = user_dquots;
> > +		qflag = XFS_UQUOTA_CHKD;
> > +		break;
> > +	case XFS_DQ_GROUP:
> > +		ino = mp->m_sb.sb_gquotino;
> > +		dquots = group_dquots;
> > +		qflag = XFS_GQUOTA_CHKD;
> > +		break;
> > +	case XFS_DQ_PROJ:
> > +		ino = mp->m_sb.sb_pquotino;
> > +		dquots = proj_dquots;
> > +		qflag = XFS_PQUOTA_CHKD;
> > +		break;
> > +	}
> > +
> > +	/*
> > +	 * If there's no incore records or there were errors in collecting
> > +	 * them, bail out early.  No sense in complaining about more garbage.
> > +	 */
> > +	if (!dquots || !(chkd_flags & qflag))
> > +		return;
> 
> It confuses me a little bit to check dquots as well as (chkd_flags & qflag)
> which seems to imply that one could be true and not the other; I'm not sure
> that's the case.

Hm, yeah.  We zero chkd_flags on any error so this could be turned into:

if (!quots || !chkd_flags)
	return;

Admittedly this quotacheck code is sort of phoning it in, because it
doesn't correct any of the quota counters; repair still just clears the
CHKD flags and we let the kernel fix it next time.  Maybe that's fine,
since otherwise repair would have to learn how to allocate and
initialize blocks in the dquot inodes.

> > +
> > +	error = -libxfs_iget(mp, NULL, ino, 0, &ip, &xfs_default_ifork_ops);
> > +	if (error) {
> > +		do_warn(
> > +	_("could not open %s file %"PRIu64" for quotacheck, err=%d\n"),
> 
> same here, let's say "read inode" not "open file" - etc in other places.


Fixed.

> > +			qflags_typestr(type), ino, error);
> > +		chkd_flags = 0;
> > +		return;
> > +	}
> > +
> > +	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> > +	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> > +		error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
> > +		if (error) {
> > +			do_warn(
> > +	_("could not read %s file %"PRIu64" extents, err=%d\n"),
> > +				qflags_typestr(type), ip->i_ino, error);
> > +			chkd_flags = 0;
> > +			goto err;
> > +		}
> > +	}
> > +
> > +	/* Walk each extent of the quota file and compare counters. */
> > +	for_each_xfs_iext(ifp, &icur, &map) {
> > +		if (map.br_startblock != HOLESTARTBLOCK) {
> > +			error = qc_walk_extent(ip, &map, dquots);
> > +			if (error)
> > +				goto err;
> > +		}
> > +	}
> > +
> > +	/* Complain about counters that weren't seen on disk. */
> 
> Can this be made a little more clear about what was seen where and what
> didn't match?  So this is "we found an inode for an ID that doesn't have
> a corresponding dquot tracking it"

	/*
	 * We constructed incore dquots to account for every file we saw on
	 * disk, and then walked all on-disk dquots to compare.  Complain about
	 * incore dquots that weren't touched during the comparison, because
	 * that means something is missing from the dquot file.
	 */


> > +	qc_dquots_foreach(dquots, node, n) {
> > +		struct qc_rec	*qrec;
> > +
> > +		qrec = container_of(node, struct qc_rec, node);
> > +		if (!(qrec->flags & QC_REC_ONDISK)) {
> > +			do_warn(
> > +_("%s id %u not seen on disk (bcount %"PRIu64" rtbcount %"PRIu64" icount %"PRIu64")\n"),
> 
> this could be clearer too? maybe "no quota record found for ...?"

I'll change it to:
"project quota record for id 31337 not found on disk..."

> 
> > +				qflags_typestr(type), qrec->id,
> > +				qrec->bcount, qrec->rtbcount, qrec->icount);
> > +			chkd_flags = 0;
> > +		}
> > +	}
> > +err:
> > +	libxfs_irele(ip);
> > +}
> > +
> > +/*
> > + * Decide if we want to run quotacheck on a particular quota type.  Returns
> > + * true only if the inode isn't lost, the fs says quotacheck ran, and the inode
> > + * pointer isn't "unset".
> 
> which pointer?  and what does "unset" mean?  Do you mean NULLFSINO?

Sorry, I meant the ondisk pointer to the quota inode; and either
NULLFSINO or zero.  I'll make the comment say both of those things.

> > + */
> > +static inline bool
> > +qc_has_quotafile(
> > +	struct xfs_mount	*mp,
> > +	unsigned int		type)
> > +{
> > +	bool			lost;
> > +	xfs_ino_t		ino;
> > +	unsigned int		qflag;
> > +
> > +	switch (type) {
> > +	case XFS_DQ_USER:
> > +		lost = lost_uquotino;
> > +		ino = mp->m_sb.sb_uquotino;
> > +		qflag = XFS_UQUOTA_CHKD;
> > +		break;
> > +	case XFS_DQ_GROUP:
> > +		lost = lost_gquotino;
> > +		ino = mp->m_sb.sb_gquotino;
> > +		qflag = XFS_GQUOTA_CHKD;
> > +		break;
> > +	case XFS_DQ_PROJ:
> > +		lost = lost_pquotino;
> > +		ino = mp->m_sb.sb_pquotino;
> > +		qflag = XFS_PQUOTA_CHKD;
> > +		break;
> > +	default:
> > +		return false;
> > +	}
> > +
> > +	if (lost)
> > +		return false;
> > +	if (!(mp->m_sb.sb_qflags & qflag))
> > +		return false;
> > +	if (ino == NULLFSINO || ino == 0)
> > +		return false;
> > +	return true;
> > +}

Thx for the livereview. :)

--D

> -Eric

Patch
diff mbox series

diff --git a/repair/Makefile b/repair/Makefile
index 8cc1ee68..e3a74adc 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -9,16 +9,65 @@  LSRCFILES = README
 
 LTCOMMAND = xfs_repair
 
-HFILES = agheader.h attr_repair.h avl.h bload.h bmap.h btree.h \
-	da_util.h dinode.h dir2.h err_protos.h globals.h incore.h protos.h \
-	rt.h progress.h scan.h versions.h prefetch.h rmap.h slab.h threads.h
-
-CFILES = agheader.c attr_repair.c avl.c bload.c bmap.c btree.c \
-	da_util.c dino_chunks.c dinode.c dir2.c globals.c incore.c \
-	incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
-	phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
-	progress.c prefetch.c rmap.c rt.c sb.c scan.c slab.c threads.c \
-	versions.c xfs_repair.c
+HFILES = \
+	agheader.h \
+	attr_repair.h \
+	avl.h \
+	bload.h \
+	bmap.h \
+	btree.h \
+	da_util.h \
+	dinode.h \
+	dir2.h \
+	err_protos.h \
+	globals.h \
+	incore.h \
+	prefetch.h \
+	progress.h \
+	protos.h \
+	quotacheck.h \
+	rmap.h \
+	rt.h \
+	scan.h \
+	slab.h \
+	threads.h \
+	versions.h
+
+CFILES = \
+	agheader.c \
+	attr_repair.c \
+	avl.c \
+	bload.c \
+	bmap.c \
+	btree.c \
+	da_util.c \
+	dino_chunks.c \
+	dinode.c \
+	dir2.c \
+	globals.c \
+	incore_bmc.c \
+	incore.c \
+	incore_ext.c \
+	incore_ino.c \
+	init.c \
+	phase1.c \
+	phase2.c \
+	phase3.c \
+	phase4.c \
+	phase5.c \
+	phase6.c \
+	phase7.c \
+	prefetch.c \
+	progress.c \
+	quotacheck.c \
+	rmap.c \
+	rt.c \
+	sb.c \
+	scan.c \
+	slab.c \
+	threads.c \
+	versions.c \
+	xfs_repair.c
 
 LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBXCMD) $(LIBFROG) $(LIBUUID) $(LIBRT) \
 	$(LIBPTHREAD) $(LIBBLKID)
diff --git a/repair/phase7.c b/repair/phase7.c
index c2996470..47e76b56 100644
--- a/repair/phase7.c
+++ b/repair/phase7.c
@@ -15,6 +15,7 @@ 
 #include "versions.h"
 #include "progress.h"
 #include "threads.h"
+#include "quotacheck.h"
 
 static void
 update_inode_nlinks(
@@ -97,6 +98,10 @@  do_link_updates(
 
 	for (irec = findfirst_inode_rec(agno); irec;
 	     irec = next_ino_rec(irec)) {
+		xfs_ino_t	ino;
+
+		ino = XFS_AGINO_TO_INO(mp, agno, irec->ino_startnum);
+
 		for (j = 0; j < XFS_INODES_PER_CHUNK; j++)  {
 			ASSERT(is_inode_confirmed(irec, j));
 
@@ -109,10 +114,8 @@  do_link_updates(
 			ASSERT(no_modify || nrefs > 0);
 
 			if (get_inode_disk_nlinks(irec, j) != nrefs)
-				update_inode_nlinks(wq->wq_ctx,
-					XFS_AGINO_TO_INO(mp, agno,
-						irec->ino_startnum + j),
-					nrefs);
+				update_inode_nlinks(wq->wq_ctx, ino + j, nrefs);
+			quotacheck_adjust(mp, ino + j);
 		}
 	}
 
@@ -126,6 +129,7 @@  phase7(
 {
 	struct workqueue	wq;
 	int			agno;
+	int			ret;
 
 	if (!no_modify)
 		do_log(_("Phase 7 - verify and correct link counts...\n"));
@@ -134,6 +138,9 @@  phase7(
 
 	set_progress_msg(PROGRESS_FMT_CORR_LINK, (uint64_t) glob_agcount);
 
+	ret = quotacheck_setup(mp);
+	if (ret)
+		do_error(_("unable to set up quotacheck, err=%d\n"), ret);
 	create_work_queue(&wq, mp, scan_threads);
 
 	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)
@@ -141,5 +148,11 @@  phase7(
 
 	destroy_work_queue(&wq);
 
+	quotacheck_verify(mp, XFS_DQ_USER);
+	quotacheck_verify(mp, XFS_DQ_GROUP);
+	quotacheck_verify(mp, XFS_DQ_PROJ);
+
+	quotacheck_teardown();
+
 	print_final_rpt();
 }
diff --git a/repair/quotacheck.c b/repair/quotacheck.c
new file mode 100644
index 00000000..c176492f
--- /dev/null
+++ b/repair/quotacheck.c
@@ -0,0 +1,522 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include <libxfs.h>
+#include "globals.h"
+#include "versions.h"
+#include "err_protos.h"
+#include "libfrog/avl64.h"
+#include "quotacheck.h"
+
+/*
+ * XFS_*QUOTA_CHKD flags for all the quota types that we verified.  This field
+ * will be cleared if we encounter any problems (runtime errors, mismatches).
+ */
+static uint16_t chkd_flags;
+
+/* Global incore dquot tree */
+struct qc_dquots {
+	pthread_mutex_t		lock;
+	struct avl64tree_desc	tree;
+
+	/* One of XFS_DQ_{USER,GROUP,PROJ} */
+	uint16_t		type;
+};
+
+#define qc_dquots_foreach(dquots, pos, n) \
+	for (pos = (dquots)->tree.avl_firstino, n = pos ? pos->avl_nextino : NULL; \
+			pos != NULL; \
+			pos = n, n = pos ? pos->avl_nextino : NULL)
+
+static struct qc_dquots *user_dquots;
+static struct qc_dquots *group_dquots;
+static struct qc_dquots *proj_dquots;
+
+/* This record was found in the on-disk dquot information. */
+#define QC_REC_ONDISK		(1U << 31)
+
+struct qc_rec {
+	struct avl64node	node;
+	pthread_mutex_t		lock;
+
+	uint32_t		id;
+	uint32_t		flags;
+	uint64_t		bcount;
+	uint64_t		rtbcount;
+	uint64_t		icount;
+};
+
+static const char *
+qflags_typestr(
+	unsigned int		type)
+{
+	if (type & XFS_DQ_USER)
+		return _("user quota");
+	else if (type & XFS_DQ_GROUP)
+		return _("group quota");
+	else if (type & XFS_DQ_PROJ)
+		return _("project quota");
+	return NULL;
+}
+
+/* Operations for the avl64 tree. */
+
+static uint64_t
+qc_avl_start(
+	struct avl64node	*node)
+{
+	struct qc_rec		*qrec;
+
+	qrec = container_of(node, struct qc_rec, node);
+	return qrec->id;
+}
+
+static uint64_t
+qc_avl_end(
+	struct avl64node	*node)
+{
+	return qc_avl_start(node) + 1;
+}
+
+static struct avl64ops qc_cache_ops = {
+	.avl_start		= qc_avl_start,
+	.avl_end		= qc_avl_end,
+};
+
+/* Find a qc_rec in the incore cache, or allocate one if need be. */
+static struct qc_rec *
+qc_rec_get(
+	struct qc_dquots	*dquots,
+	uint32_t		id,
+	bool			can_alloc)
+{
+	struct qc_rec		*qrec;
+	struct avl64node	*node;
+
+	pthread_mutex_lock(&dquots->lock);
+	node = avl64_find(&dquots->tree, id);
+	if (!node && can_alloc) {
+		qrec = calloc(sizeof(struct qc_rec), 1);
+		if (qrec) {
+			qrec->id = id;
+			node = avl64_insert(&dquots->tree, &qrec->node);
+			if (!node)
+				free(qrec);
+			pthread_mutex_init(&qrec->lock, NULL);
+		}
+	}
+	pthread_mutex_unlock(&dquots->lock);
+
+	return container_of(node, struct qc_rec, node);
+}
+
+/* Bump up an incore dquot's counters. */
+static void
+qc_adjust(
+	struct qc_dquots	*dquots,
+	uint32_t		id,
+	uint64_t		bcount,
+	uint64_t		rtbcount)
+{
+	struct qc_rec		*qrec = qc_rec_get(dquots, id, true);
+
+	pthread_mutex_lock(&qrec->lock);
+	qrec->bcount += bcount;
+	qrec->rtbcount += rtbcount;
+	qrec->icount++;
+	pthread_mutex_unlock(&qrec->lock);
+}
+
+/* Count the realtime blocks allocated to a file. */
+static xfs_filblks_t
+qc_count_rtblocks(
+	struct xfs_inode	*ip)
+{
+	struct xfs_iext_cursor	icur;
+	struct xfs_bmbt_irec	got;
+	xfs_filblks_t		count = 0;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	int			error;
+
+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+		error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+		if (error) {
+			do_warn(
+_("could not read ino %"PRIu64" extents, err=%d\n"),
+				ip->i_ino, error);
+			chkd_flags = 0;
+			return 0;
+		}
+	}
+
+	for_each_xfs_iext(ifp, &icur, &got)
+		if (!isnullstartblock(got.br_startblock))
+			count += got.br_blockcount;
+	return count;
+}
+
+/* Add this inode's information to the quota counts. */
+void
+quotacheck_adjust(
+	struct xfs_mount	*mp,
+	xfs_ino_t		ino)
+{
+	struct xfs_inode	*ip;
+	uint64_t		blocks;
+	uint64_t		rtblks = 0;
+	int			error;
+
+	/*
+	 * If the fs doesn't have any quota files to check against, skip this
+	 * step.
+	 */
+	if (!user_dquots && !group_dquots && !proj_dquots)
+		return;
+
+	/* Skip if a previous quotacheck adjustment failed. */
+	if (chkd_flags == 0)
+		return;
+
+	/* Quota files are not included in quota counts. */
+	if (ino == mp->m_sb.sb_uquotino ||
+	    ino == mp->m_sb.sb_gquotino ||
+	    ino == mp->m_sb.sb_pquotino)
+		return;
+
+	error = -libxfs_iget(mp, NULL, ino, 0, &ip, &xfs_default_ifork_ops);
+	if (error) {
+		do_warn(_("could not iget %"PRIu64" for quotacheck, err=%d\n"),
+				ino, error);
+		chkd_flags = 0;
+		return;
+	}
+
+	/* Count the file's blocks. */
+	if (XFS_IS_REALTIME_INODE(ip))
+		rtblks = qc_count_rtblocks(ip);
+	blocks = ip->i_d.di_nblocks - rtblks;
+
+	if (user_dquots)
+		qc_adjust(user_dquots, i_uid_read(VFS_I(ip)), blocks, rtblks);
+	if (group_dquots)
+		qc_adjust(group_dquots, i_gid_read(VFS_I(ip)), blocks, rtblks);
+	if (proj_dquots)
+		qc_adjust(proj_dquots, ip->i_d.di_projid, blocks, rtblks);
+
+	libxfs_irele(ip);
+}
+
+/* Compare this on-disk dquot against whatever we observed. */
+static void
+qc_check_dquot(
+	struct xfs_disk_dquot	*ddq,
+	struct qc_dquots	*dquots)
+{
+	struct qc_rec		*qrec;
+	struct qc_rec		empty = {
+		.bcount		= 0,
+		.rtbcount	= 0,
+		.icount		= 0,
+	};
+	uint32_t		id = be32_to_cpu(ddq->d_id);
+
+	qrec = qc_rec_get(dquots, id, false);
+	if (!qrec)
+		qrec = &empty;
+
+	if (be64_to_cpu(ddq->d_bcount) != qrec->bcount) {
+		do_warn(_("%s id %u has bcount %llu, expected %"PRIu64"\n"),
+				qflags_typestr(dquots->type), id,
+				be64_to_cpu(ddq->d_bcount), qrec->bcount);
+		chkd_flags = 0;
+	}
+
+	if (be64_to_cpu(ddq->d_rtbcount) != qrec->rtbcount) {
+		do_warn(_("%s id %u has rtbcount %llu, expected %"PRIu64"\n"),
+				qflags_typestr(dquots->type), id,
+				be64_to_cpu(ddq->d_rtbcount), qrec->rtbcount);
+		chkd_flags = 0;
+	}
+
+	if (be64_to_cpu(ddq->d_icount) != qrec->icount) {
+		do_warn(_("%s id %u has icount %llu, expected %"PRIu64"\n"),
+				qflags_typestr(dquots->type), id,
+				be64_to_cpu(ddq->d_icount), qrec->icount);
+		chkd_flags = 0;
+	}
+
+	/*
+	 * Mark that we found the record on disk.  Skip locking here because
+	 * we're checking the dquots serially.
+	 */
+	qrec->flags |= QC_REC_ONDISK;
+}
+
+/* Walk every dquot in every block in this quota file extent and compare. */
+static int
+qc_walk_extent(
+	struct xfs_inode	*ip,
+	struct xfs_bmbt_irec	*map,
+	struct qc_dquots	*dquots)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_buf		*bp;
+	struct xfs_dqblk	*dqb;
+	xfs_filblks_t		dqchunklen;
+	xfs_filblks_t		bno;
+	unsigned int		dqperchunk;
+	int			error = 0;
+
+	dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+	dqperchunk = libxfs_calc_dquots_per_chunk(dqchunklen);
+
+	for (bno = 0; bno < map->br_blockcount; bno++) {
+		unsigned int	dqnr;
+		uint64_t	dqid;
+
+		error = -libxfs_buf_read(mp->m_dev,
+				XFS_FSB_TO_DADDR(mp, map->br_startblock + bno),
+				dqchunklen, 0, &bp, &xfs_dquot_buf_ops);
+		if (error) {
+			do_warn(
+_("cannot read %s file %"PRIu64", block %"PRIu64", disk block %"PRIu64", err=%d\n"),
+				qflags_typestr(dquots->type), ip->i_ino,
+				map->br_startoff + bno,
+				map->br_startblock + bno, error);
+			chkd_flags = 0;
+			return error;
+		}
+
+		dqb = bp->b_addr;
+		dqid = map->br_startoff * dqperchunk;
+		for (dqnr = 0;
+		     dqnr < dqperchunk && dqid <= UINT_MAX;
+		     dqnr++, dqb++, dqid++)
+			qc_check_dquot(&dqb->dd_diskdq, dquots);
+		libxfs_buf_relse(bp);
+	}
+
+	return error;
+}
+
+/* Check the incore quota counts with what's on disk. */
+void
+quotacheck_verify(
+	struct xfs_mount	*mp,
+	unsigned int		type)
+{
+	struct xfs_bmbt_irec	map;
+	struct xfs_iext_cursor	icur;
+	struct xfs_inode	*ip;
+	struct xfs_ifork	*ifp;
+	struct qc_dquots	*dquots = NULL;
+	struct avl64node	*node, *n;
+	xfs_ino_t		ino = NULLFSINO;
+	uint16_t		qflag = 0;
+	int			error;
+
+	switch (type) {
+	case XFS_DQ_USER:
+		ino = mp->m_sb.sb_uquotino;
+		dquots = user_dquots;
+		qflag = XFS_UQUOTA_CHKD;
+		break;
+	case XFS_DQ_GROUP:
+		ino = mp->m_sb.sb_gquotino;
+		dquots = group_dquots;
+		qflag = XFS_GQUOTA_CHKD;
+		break;
+	case XFS_DQ_PROJ:
+		ino = mp->m_sb.sb_pquotino;
+		dquots = proj_dquots;
+		qflag = XFS_PQUOTA_CHKD;
+		break;
+	}
+
+	/*
+	 * If there's no incore records or there were errors in collecting
+	 * them, bail out early.  No sense in complaining about more garbage.
+	 */
+	if (!dquots || !(chkd_flags & qflag))
+		return;
+
+	error = -libxfs_iget(mp, NULL, ino, 0, &ip, &xfs_default_ifork_ops);
+	if (error) {
+		do_warn(
+	_("could not open %s file %"PRIu64" for quotacheck, err=%d\n"),
+			qflags_typestr(type), ino, error);
+		chkd_flags = 0;
+		return;
+	}
+
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+		error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+		if (error) {
+			do_warn(
+	_("could not read %s file %"PRIu64" extents, err=%d\n"),
+				qflags_typestr(type), ip->i_ino, error);
+			chkd_flags = 0;
+			goto err;
+		}
+	}
+
+	/* Walk each extent of the quota file and compare counters. */
+	for_each_xfs_iext(ifp, &icur, &map) {
+		if (map.br_startblock != HOLESTARTBLOCK) {
+			error = qc_walk_extent(ip, &map, dquots);
+			if (error)
+				goto err;
+		}
+	}
+
+	/* Complain about counters that weren't seen on disk. */
+	qc_dquots_foreach(dquots, node, n) {
+		struct qc_rec	*qrec;
+
+		qrec = container_of(node, struct qc_rec, node);
+		if (!(qrec->flags & QC_REC_ONDISK)) {
+			do_warn(
+_("%s id %u not seen on disk (bcount %"PRIu64" rtbcount %"PRIu64" icount %"PRIu64")\n"),
+				qflags_typestr(type), qrec->id,
+				qrec->bcount, qrec->rtbcount, qrec->icount);
+			chkd_flags = 0;
+		}
+	}
+err:
+	libxfs_irele(ip);
+}
+
+/*
+ * Decide if we want to run quotacheck on a particular quota type.  Returns
+ * true only if the inode isn't lost, the fs says quotacheck ran, and the inode
+ * pointer isn't "unset".
+ */
+static inline bool
+qc_has_quotafile(
+	struct xfs_mount	*mp,
+	unsigned int		type)
+{
+	bool			lost;
+	xfs_ino_t		ino;
+	unsigned int		qflag;
+
+	switch (type) {
+	case XFS_DQ_USER:
+		lost = lost_uquotino;
+		ino = mp->m_sb.sb_uquotino;
+		qflag = XFS_UQUOTA_CHKD;
+		break;
+	case XFS_DQ_GROUP:
+		lost = lost_gquotino;
+		ino = mp->m_sb.sb_gquotino;
+		qflag = XFS_GQUOTA_CHKD;
+		break;
+	case XFS_DQ_PROJ:
+		lost = lost_pquotino;
+		ino = mp->m_sb.sb_pquotino;
+		qflag = XFS_PQUOTA_CHKD;
+		break;
+	default:
+		return false;
+	}
+
+	if (lost)
+		return false;
+	if (!(mp->m_sb.sb_qflags & qflag))
+		return false;
+	if (ino == NULLFSINO || ino == 0)
+		return false;
+	return true;
+}
+
+/* Initialize an incore dquot tree. */
+static struct qc_dquots *
+qc_dquots_init(
+	uint16_t		type)
+{
+	struct qc_dquots	*dquots;
+
+	dquots = calloc(1, sizeof(struct qc_dquots));
+	if (!dquots)
+		return NULL;
+
+	dquots->type = type;
+	pthread_mutex_init(&dquots->lock, NULL);
+	avl64_init_tree(&dquots->tree, &qc_cache_ops);
+	return dquots;
+}
+
+/* Set up incore context for quota checks. */
+int
+quotacheck_setup(
+	struct xfs_mount	*mp)
+{
+	chkd_flags = 0;
+
+	/*
+	 * If the superblock said quotas are disabled or was missing pointers
+	 * to any quota inodes, don't bother checking.
+	 */
+	if (!fs_quotas || lost_quotas)
+		return 0;
+
+	if (qc_has_quotafile(mp, XFS_DQ_USER)) {
+		user_dquots = qc_dquots_init(XFS_DQ_USER);
+		if (!user_dquots)
+			goto err;
+		chkd_flags |= XFS_UQUOTA_CHKD;
+	}
+
+	if (qc_has_quotafile(mp, XFS_DQ_GROUP)) {
+		group_dquots = qc_dquots_init(XFS_DQ_GROUP);
+		if (!group_dquots)
+			goto err;
+		chkd_flags |= XFS_GQUOTA_CHKD;
+	}
+
+	if (qc_has_quotafile(mp, XFS_DQ_PROJ)) {
+		proj_dquots = qc_dquots_init(XFS_DQ_PROJ);
+		if (!proj_dquots)
+			goto err;
+		chkd_flags |= XFS_PQUOTA_CHKD;
+	}
+
+	return 0;
+err:
+	chkd_flags = 0;
+	quotacheck_teardown();
+	return ENOMEM;
+}
+
+/* Purge all quotacheck records in a given cache. */
+static void
+qc_purge(
+	struct qc_dquots	**dquotsp)
+{
+	struct qc_dquots	*dquots = *dquotsp;
+	struct qc_rec		*qrec;
+	struct avl64node	*node;
+	struct avl64node	*n;
+
+	if (!dquots)
+		return;
+
+	qc_dquots_foreach(dquots, node, n) {
+		qrec = container_of(node, struct qc_rec, node);
+		free(qrec);
+	}
+	free(dquots);
+	*dquotsp = NULL;
+}
+
+/* Tear down all the incore context from quotacheck. */
+void
+quotacheck_teardown(void)
+{
+	qc_purge(&user_dquots);
+	qc_purge(&group_dquots);
+	qc_purge(&proj_dquots);
+}
diff --git a/repair/quotacheck.h b/repair/quotacheck.h
new file mode 100644
index 00000000..27865e32
--- /dev/null
+++ b/repair/quotacheck.h
@@ -0,0 +1,14 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_REPAIR_QUOTACHECK_H__
+#define __XFS_REPAIR_QUOTACHECK_H__
+
+void quotacheck_adjust(struct xfs_mount *mp, xfs_ino_t ino);
+void quotacheck_verify(struct xfs_mount *mp, unsigned int type);
+int quotacheck_setup(struct xfs_mount *mp);
+void quotacheck_teardown(void);
+
+#endif /* __XFS_REPAIR_QUOTACHECK_H__ */