diff mbox series

[5/9] xfs_repair: rebuild inode btrees with bulk loader

Message ID 158993947501.983175.11198846141379731761.stgit@magnolia (mailing list archive)
State Superseded
Headers show
Series xfs_repair: use btree bulk loading | expand

Commit Message

Darrick J. Wong May 20, 2020, 1:51 a.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Use the btree bulk loading functions to rebuild the inode btrees
and drop the open-coded implementation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    1 
 repair/phase5.c          |  642 +++++++++++++++++-----------------------------
 2 files changed, 240 insertions(+), 403 deletions(-)

Comments

Brian Foster May 28, 2020, 3:11 p.m. UTC | #1
On Tue, May 19, 2020 at 06:51:15PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Use the btree bulk loading functions to rebuild the inode btrees
> and drop the open-coded implementation.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  libxfs/libxfs_api_defs.h |    1 
>  repair/phase5.c          |  642 +++++++++++++++++-----------------------------
>  2 files changed, 240 insertions(+), 403 deletions(-)
> 
> 
...
> diff --git a/repair/phase5.c b/repair/phase5.c
> index e69b042c..38f30753 100644
> --- a/repair/phase5.c
> +++ b/repair/phase5.c
...
> @@ -372,6 +376,11 @@ estimate_ag_bload_slack(
>  		bload->node_slack = 0;
>  }
>  
> +static inline void skip_rebuild(struct bt_rebuild *btr)
> +{
> +	memset(btr, 0, sizeof(struct bt_rebuild));
> +}
> +

Is there any functional purpose to this? It looks like the memset could
be open-coded, but also seems like it could be elided if we just check
hasfinobt() before using the pointer..?

>  /* Initialize a btree rebuild context. */
>  static void
>  init_rebuild(
> @@ -765,48 +774,38 @@ _("Error %d while writing cntbt btree for AG %u.\n"), error, agno);
...
> +
> +/* Copy one incore inode record into the inobt cursor. */
> +static void
> +get_inode_data(
> +	struct xfs_btree_cur		*cur,
> +	struct ino_tree_node		*ino_rec,
> +	struct agi_stat			*agi_stat)
> +{
> +	struct xfs_inobt_rec_incore	*irec = &cur->bc_rec.i;
> +	int				inocnt = 0;
> +	int				finocnt = 0;
> +	int				k;
> +
> +	irec->ir_startino = ino_rec->ino_startnum;
> +	irec->ir_free = ino_rec->ir_free;
> +
> +	for (k = 0; k < sizeof(xfs_inofree_t) * NBBY; k++)  {
> +		ASSERT(is_inode_confirmed(ino_rec, k));
> +
> +		if (is_inode_sparse(ino_rec, k))
>  			continue;
> -
> -		nfinos += rec_nfinos;
> -		ninos += rec_ninos;
> -		num_recs++;
> +		if (is_inode_free(ino_rec, k))
> +			finocnt++;
> +		inocnt++;
>  	}
>  
> -	if (num_recs == 0) {
> -		/*
> -		 * easy corner-case -- no inode records
> -		 */
> -		lptr->num_blocks = 1;
> -		lptr->modulo = 0;
> -		lptr->num_recs_pb = 0;
> -		lptr->num_recs_tot = 0;
> -
> -		btree_curs->num_levels = 1;
> -		btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
> -
> -		setup_cursor(mp, agno, btree_curs);
> +	irec->ir_count = inocnt;
> +	irec->ir_freecount = finocnt;
>  
> -		return;
> -	}
> +	if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
> +		uint64_t		sparse;
> +		int			spmask;
> +		uint16_t		holemask;
>  
> -	blocks_allocated = lptr->num_blocks = howmany(num_recs,
> -					XR_INOBT_BLOCK_MAXRECS(mp, 0));
> -
> -	lptr->modulo = num_recs % lptr->num_blocks;
> -	lptr->num_recs_pb = num_recs / lptr->num_blocks;
> -	lptr->num_recs_tot = num_recs;
> -	level = 1;
> -
> -	if (lptr->num_blocks > 1)  {
> -		for (; btree_curs->level[level-1].num_blocks > 1
> -				&& level < XFS_BTREE_MAXLEVELS;
> -				level++)  {
> -			lptr = &btree_curs->level[level];
> -			p_lptr = &btree_curs->level[level - 1];
> -			lptr->num_blocks = howmany(p_lptr->num_blocks,
> -				XR_INOBT_BLOCK_MAXRECS(mp, level));
> -			lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
> -			lptr->num_recs_pb = p_lptr->num_blocks
> -					/ lptr->num_blocks;
> -			lptr->num_recs_tot = p_lptr->num_blocks;
> -
> -			blocks_allocated += lptr->num_blocks;
> +		/*
> +		 * Convert the 64-bit in-core sparse inode state to the
> +		 * 16-bit on-disk holemask.
> +		 */
> +		holemask = 0;
> +		spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
> +		sparse = ino_rec->ir_sparse;
> +		for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) {
> +			if (sparse & spmask) {
> +				ASSERT((sparse & spmask) == spmask);
> +				holemask |= (1 << k);
> +			} else
> +				ASSERT((sparse & spmask) == 0);
> +			sparse >>= XFS_INODES_PER_HOLEMASK_BIT;
>  		}
> +
> +		irec->ir_holemask = holemask;
> +	} else {
> +		irec->ir_holemask = 0;
>  	}
> -	ASSERT(lptr->num_blocks == 1);
> -	btree_curs->num_levels = level;
>  
> -	btree_curs->num_tot_blocks = btree_curs->num_free_blocks
> -			= blocks_allocated;
> +	if (!agi_stat)
> +		return;
>  
> -	setup_cursor(mp, agno, btree_curs);
> +	if (agi_stat->first_agino != NULLAGINO)
> +		agi_stat->first_agino = ino_rec->ino_startnum;

This is initialized to NULLAGINO. When do we ever update it?

> +	agi_stat->freecount += finocnt;
> +	agi_stat->count += inocnt;
> +}
>  
> -	*num_inos = ninos;
> -	*num_free_inos = nfinos;
> +/* Grab one inobt record. */
> +static int
> +get_inobt_record(
> +	struct xfs_btree_cur		*cur,
> +	void				*priv)
> +{
> +	struct bt_rebuild		*rebuild = priv;
>  
> -	return;
> +	get_inode_data(cur, rebuild->ino_rec, rebuild->agi_stat);
> +	rebuild->ino_rec = next_ino_rec(rebuild->ino_rec);
> +	return 0;
>  }
>  
> +/* Rebuild a inobt btree. */
>  static void
> -prop_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
> -	xfs_btnum_t btnum, xfs_agino_t startino, int level)
> +build_inobt(
> +	struct repair_ctx	*sc,
> +	xfs_agnumber_t		agno,
> +	struct bt_rebuild	*btr_ino,
> +	struct agi_stat		*agi_stat)
>  {
> -	struct xfs_btree_block	*bt_hdr;
> -	xfs_inobt_key_t		*bt_key;
> -	xfs_inobt_ptr_t		*bt_ptr;
> -	xfs_agblock_t		agbno;
> -	bt_stat_level_t		*lptr;
> -	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
>  	int			error;
>  
> -	level++;
> -
> -	if (level >= btree_curs->num_levels)
> -		return;
> -
> -	lptr = &btree_curs->level[level];
> -	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
> -
> -	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
> -		/*
> -		 * this only happens once to initialize the
> -		 * first path up the left side of the tree
> -		 * where the agbno's are already set up
> -		 */
> -		prop_ino_cursor(mp, agno, btree_curs, btnum, startino, level);
> -	}
> -
> -	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
> -				lptr->num_recs_pb + (lptr->modulo > 0))  {
> -		/*
> -		 * write out current prev block, grab us a new block,
> -		 * and set the rightsib pointer of current block
> -		 */
> -#ifdef XR_BLD_INO_TRACE
> -		fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
> -#endif
> -		if (lptr->prev_agbno != NULLAGBLOCK)  {
> -			ASSERT(lptr->prev_buf_p != NULL);
> -			libxfs_buf_mark_dirty(lptr->prev_buf_p);
> -			libxfs_buf_relse(lptr->prev_buf_p);
> -		}
> -		lptr->prev_agbno = lptr->agbno;;
> -		lptr->prev_buf_p = lptr->buf_p;
> -		agbno = get_next_blockaddr(agno, level, btree_curs);
> -
> -		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
> -
> -		error = -libxfs_buf_get(mp->m_dev,
> -				XFS_AGB_TO_DADDR(mp, agno, agbno),
> -				XFS_FSB_TO_BB(mp, 1), &lptr->buf_p);
> -		if (error)
> -			do_error(_("Cannot grab inode btree buffer, err=%d"),
> -					error);
> -		lptr->agbno = agbno;
> +	btr_ino->bload.get_record = get_inobt_record;
> +	btr_ino->bload.claim_block = rebuild_claim_block;
> +	agi_stat->count = agi_stat->freecount = 0;

These are already initialized to zero by the caller. I suppose we might
as well also move the ->first_agino init to where this is allocated.

Otherwise I mostly just have the same general feedback as for the
previous patch (wrt to the get_record() logic and build_[f]inobt()
duplication.

Brian

> +	agi_stat->first_agino = NULLAGINO;
> +	btr_ino->agi_stat = agi_stat;
> +	btr_ino->ino_rec = findfirst_inode_rec(agno);
> +
> +	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
> +	if (error)
> +		do_error(
> +_("Insufficient memory to construct inobt rebuild transaction.\n"));
> +
> +	/* Add all observed inobt records. */
> +	error = -libxfs_btree_bload(btr_ino->cur, &btr_ino->bload, btr_ino);
> +	if (error)
> +		do_error(
> +_("Error %d while creating inobt btree for AG %u.\n"), error, agno);
> +
> +	/* Since we're not writing the AGI yet, no need to commit the cursor */
> +	libxfs_btree_del_cursor(btr_ino->cur, 0);
> +	error = -libxfs_trans_commit(sc->tp);
> +	if (error)
> +		do_error(
> +_("Error %d while writing inobt btree for AG %u.\n"), error, agno);
> +	sc->tp = NULL;
> +}
>  
> -		if (lptr->modulo)
> -			lptr->modulo--;
> +/* Grab one finobt record. */
> +static int
> +get_finobt_record(
> +	struct xfs_btree_cur		*cur,
> +	void				*priv)
> +{
> +	struct bt_rebuild		*rebuild = priv;
>  
> -		/*
> -		 * initialize block header
> -		 */
> -		lptr->buf_p->b_ops = ops;
> -		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
> -		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
> -		libxfs_btree_init_block(mp, lptr->buf_p, btnum,
> -					level, 0, agno);
> +	get_inode_data(cur, rebuild->ino_rec, NULL);
> +	rebuild->ino_rec = next_free_ino_rec(rebuild->ino_rec);
> +	return 0;
> +}
>  
> -		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
> +/* Rebuild a finobt btree. */
> +static void
> +build_finobt(
> +	struct repair_ctx	*sc,
> +	xfs_agnumber_t		agno,
> +	struct bt_rebuild	*btr_fino)
> +{
> +	int			error;
>  
> -		/*
> -		 * propagate extent record for first extent in new block up
> -		 */
> -		prop_ino_cursor(mp, agno, btree_curs, btnum, startino, level);
> -	}
> -	/*
> -	 * add inode info to current block
> -	 */
> -	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
> -
> -	bt_key = XFS_INOBT_KEY_ADDR(mp, bt_hdr,
> -				    be16_to_cpu(bt_hdr->bb_numrecs));
> -	bt_ptr = XFS_INOBT_PTR_ADDR(mp, bt_hdr,
> -				    be16_to_cpu(bt_hdr->bb_numrecs),
> -				    M_IGEO(mp)->inobt_mxr[1]);
> -
> -	bt_key->ir_startino = cpu_to_be32(startino);
> -	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
> +	btr_fino->bload.get_record = get_finobt_record;
> +	btr_fino->bload.claim_block = rebuild_claim_block;
> +	btr_fino->ino_rec = findfirst_free_inode_rec(agno);
> +
> +	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
> +	if (error)
> +		do_error(
> +_("Insufficient memory to construct finobt rebuild transaction.\n"));
> +
> +	/* Add all observed finobt records. */
> +	error = -libxfs_btree_bload(btr_fino->cur, &btr_fino->bload, btr_fino);
> +	if (error)
> +		do_error(
> +_("Error %d while creating finobt btree for AG %u.\n"), error, agno);
> +
> +	/* Since we're not writing the AGI yet, no need to commit the cursor */
> +	libxfs_btree_del_cursor(btr_fino->cur, 0);
> +	error = -libxfs_trans_commit(sc->tp);
> +	if (error)
> +		do_error(
> +_("Error %d while writing finobt btree for AG %u.\n"), error, agno);
> +	sc->tp = NULL;
>  }
>  
>  /*
>   * XXX: yet more code that can be shared with mkfs, growfs.
>   */
>  static void
> -build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
> -		bt_status_t *finobt_curs, struct agi_stat *agi_stat)
> +build_agi(
> +	struct xfs_mount	*mp,
> +	xfs_agnumber_t		agno,
> +	struct bt_rebuild	*btr_ino,
> +	struct bt_rebuild	*btr_fino,
> +	struct agi_stat		*agi_stat)
>  {
> -	xfs_buf_t	*agi_buf;
> -	xfs_agi_t	*agi;
> -	int		i;
> -	int		error;
> +	struct xfs_buf		*agi_buf;
> +	struct xfs_agi		*agi;
> +	int			i;
> +	int			error;
>  
>  	error = -libxfs_buf_get(mp->m_dev,
>  			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
> @@ -1008,8 +1053,8 @@ build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
>  		agi->agi_length = cpu_to_be32(mp->m_sb.sb_dblocks -
>  			(xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno);
>  	agi->agi_count = cpu_to_be32(agi_stat->count);
> -	agi->agi_root = cpu_to_be32(btree_curs->root);
> -	agi->agi_level = cpu_to_be32(btree_curs->num_levels);
> +	agi->agi_root = cpu_to_be32(btr_ino->newbt.afake.af_root);
> +	agi->agi_level = cpu_to_be32(btr_ino->newbt.afake.af_levels);
>  	agi->agi_freecount = cpu_to_be32(agi_stat->freecount);
>  	agi->agi_newino = cpu_to_be32(agi_stat->first_agino);
>  	agi->agi_dirino = cpu_to_be32(NULLAGINO);
> @@ -1021,203 +1066,16 @@ build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
>  		platform_uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
>  
>  	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
> -		agi->agi_free_root = cpu_to_be32(finobt_curs->root);
> -		agi->agi_free_level = cpu_to_be32(finobt_curs->num_levels);
> +		agi->agi_free_root =
> +				cpu_to_be32(btr_fino->newbt.afake.af_root);
> +		agi->agi_free_level =
> +				cpu_to_be32(btr_fino->newbt.afake.af_levels);
>  	}
>  
>  	libxfs_buf_mark_dirty(agi_buf);
>  	libxfs_buf_relse(agi_buf);
>  }
>  
> -/*
> - * rebuilds an inode tree given a cursor.  We're lazy here and call
> - * the routine that builds the agi
> - */
> -static void
> -build_ino_tree(xfs_mount_t *mp, xfs_agnumber_t agno,
> -		bt_status_t *btree_curs, xfs_btnum_t btnum,
> -		struct agi_stat *agi_stat)
> -{
> -	xfs_agnumber_t		i;
> -	xfs_agblock_t		j;
> -	xfs_agblock_t		agbno;
> -	xfs_agino_t		first_agino;
> -	struct xfs_btree_block	*bt_hdr;
> -	xfs_inobt_rec_t		*bt_rec;
> -	ino_tree_node_t		*ino_rec;
> -	bt_stat_level_t		*lptr;
> -	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
> -	xfs_agino_t		count = 0;
> -	xfs_agino_t		freecount = 0;
> -	int			inocnt;
> -	uint8_t			finocnt;
> -	int			k;
> -	int			level = btree_curs->num_levels;
> -	int			spmask;
> -	uint64_t		sparse;
> -	uint16_t		holemask;
> -	int			error;
> -
> -	ASSERT(btnum == XFS_BTNUM_INO || btnum == XFS_BTNUM_FINO);
> -
> -	for (i = 0; i < level; i++)  {
> -		lptr = &btree_curs->level[i];
> -
> -		agbno = get_next_blockaddr(agno, i, btree_curs);
> -		error = -libxfs_buf_get(mp->m_dev,
> -				XFS_AGB_TO_DADDR(mp, agno, agbno),
> -				XFS_FSB_TO_BB(mp, 1), &lptr->buf_p);
> -		if (error)
> -			do_error(_("Cannot grab inode btree buffer, err=%d"),
> -					error);
> -
> -		if (i == btree_curs->num_levels - 1)
> -			btree_curs->root = agbno;
> -
> -		lptr->agbno = agbno;
> -		lptr->prev_agbno = NULLAGBLOCK;
> -		lptr->prev_buf_p = NULL;
> -		/*
> -		 * initialize block header
> -		 */
> -
> -		lptr->buf_p->b_ops = ops;
> -		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
> -		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
> -		libxfs_btree_init_block(mp, lptr->buf_p, btnum, i, 0, agno);
> -	}
> -
> -	/*
> -	 * run along leaf, setting up records.  as we have to switch
> -	 * blocks, call the prop_ino_cursor routine to set up the new
> -	 * pointers for the parent.  that can recurse up to the root
> -	 * if required.  set the sibling pointers for leaf level here.
> -	 */
> -	if (btnum == XFS_BTNUM_FINO)
> -		ino_rec = findfirst_free_inode_rec(agno);
> -	else
> -		ino_rec = findfirst_inode_rec(agno);
> -
> -	if (ino_rec != NULL)
> -		first_agino = ino_rec->ino_startnum;
> -	else
> -		first_agino = NULLAGINO;
> -
> -	lptr = &btree_curs->level[0];
> -
> -	for (i = 0; i < lptr->num_blocks; i++)  {
> -		/*
> -		 * block initialization, lay in block header
> -		 */
> -		lptr->buf_p->b_ops = ops;
> -		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
> -		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
> -		libxfs_btree_init_block(mp, lptr->buf_p, btnum, 0, 0, agno);
> -
> -		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
> -		bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb +
> -							(lptr->modulo > 0));
> -
> -		if (lptr->modulo > 0)
> -			lptr->modulo--;
> -
> -		if (lptr->num_recs_pb > 0)
> -			prop_ino_cursor(mp, agno, btree_curs, btnum,
> -					ino_rec->ino_startnum, 0);
> -
> -		bt_rec = (xfs_inobt_rec_t *)
> -			  ((char *)bt_hdr + XFS_INOBT_BLOCK_LEN(mp));
> -		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
> -			ASSERT(ino_rec != NULL);
> -			bt_rec[j].ir_startino =
> -					cpu_to_be32(ino_rec->ino_startnum);
> -			bt_rec[j].ir_free = cpu_to_be64(ino_rec->ir_free);
> -
> -			inocnt = finocnt = 0;
> -			for (k = 0; k < sizeof(xfs_inofree_t)*NBBY; k++)  {
> -				ASSERT(is_inode_confirmed(ino_rec, k));
> -
> -				if (is_inode_sparse(ino_rec, k))
> -					continue;
> -				if (is_inode_free(ino_rec, k))
> -					finocnt++;
> -				inocnt++;
> -			}
> -
> -			/*
> -			 * Set the freecount and check whether we need to update
> -			 * the sparse format fields. Otherwise, skip to the next
> -			 * record.
> -			 */
> -			inorec_set_freecount(mp, &bt_rec[j], finocnt);
> -			if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
> -				goto nextrec;
> -
> -			/*
> -			 * Convert the 64-bit in-core sparse inode state to the
> -			 * 16-bit on-disk holemask.
> -			 */
> -			holemask = 0;
> -			spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
> -			sparse = ino_rec->ir_sparse;
> -			for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) {
> -				if (sparse & spmask) {
> -					ASSERT((sparse & spmask) == spmask);
> -					holemask |= (1 << k);
> -				} else
> -					ASSERT((sparse & spmask) == 0);
> -				sparse >>= XFS_INODES_PER_HOLEMASK_BIT;
> -			}
> -
> -			bt_rec[j].ir_u.sp.ir_count = inocnt;
> -			bt_rec[j].ir_u.sp.ir_holemask = cpu_to_be16(holemask);
> -
> -nextrec:
> -			freecount += finocnt;
> -			count += inocnt;
> -
> -			if (btnum == XFS_BTNUM_FINO)
> -				ino_rec = next_free_ino_rec(ino_rec);
> -			else
> -				ino_rec = next_ino_rec(ino_rec);
> -		}
> -
> -		if (ino_rec != NULL)  {
> -			/*
> -			 * get next leaf level block
> -			 */
> -			if (lptr->prev_buf_p != NULL)  {
> -#ifdef XR_BLD_INO_TRACE
> -				fprintf(stderr, "writing inobt agbno %u\n",
> -					lptr->prev_agbno);
> -#endif
> -				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
> -				libxfs_buf_mark_dirty(lptr->prev_buf_p);
> -				libxfs_buf_relse(lptr->prev_buf_p);
> -			}
> -			lptr->prev_buf_p = lptr->buf_p;
> -			lptr->prev_agbno = lptr->agbno;
> -			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
> -			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
> -
> -			error = -libxfs_buf_get(mp->m_dev,
> -					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
> -					XFS_FSB_TO_BB(mp, 1),
> -					&lptr->buf_p);
> -			if (error)
> -				do_error(
> -	_("Cannot grab inode btree buffer, err=%d"),
> -						error);
> -		}
> -	}
> -
> -	if (agi_stat) {
> -		agi_stat->first_agino = first_agino;
> -		agi_stat->count = count;
> -		agi_stat->freecount = freecount;
> -	}
> -}
> -
>  /* rebuild the rmap tree */
>  
>  /*
> @@ -2142,14 +2000,10 @@ phase5_func(
>  {
>  	struct repair_ctx	sc = { .mp = mp, };
>  	struct agi_stat		agi_stat = {0,};
> -	uint64_t		num_inos;
> -	uint64_t		num_free_inos;
> -	uint64_t		finobt_num_inos;
> -	uint64_t		finobt_num_free_inos;
>  	struct bt_rebuild	btr_bno;
>  	struct bt_rebuild	btr_cnt;
> -	bt_status_t		ino_btree_curs;
> -	bt_status_t		fino_btree_curs;
> +	struct bt_rebuild	btr_ino;
> +	struct bt_rebuild	btr_fino;
>  	bt_status_t		rmap_btree_curs;
>  	bt_status_t		refcnt_btree_curs;
>  	int			extra_blocks = 0;
> @@ -2184,19 +2038,8 @@ _("unable to rebuild AG %u.  Not enough free space in on-disk AG.\n"),
>  			agno);
>  	}
>  
> -	/*
> -	 * ok, now set up the btree cursors for the on-disk btrees (includes
> -	 * pre-allocating all required blocks for the trees themselves)
> -	 */
> -	init_ino_cursor(mp, agno, &ino_btree_curs, &num_inos,
> -			&num_free_inos, 0);
> -
> -	if (xfs_sb_version_hasfinobt(&mp->m_sb))
> -		init_ino_cursor(mp, agno, &fino_btree_curs, &finobt_num_inos,
> -				&finobt_num_free_inos, 1);
> -
> -	sb_icount_ag[agno] += num_inos;
> -	sb_ifree_ag[agno] += num_free_inos;
> +	init_ino_cursors(&sc, agno, num_freeblocks, &sb_icount_ag[agno],
> +			&sb_ifree_ag[agno], &btr_ino, &btr_fino);
>  
>  	/*
>  	 * Set up the btree cursors for the on-disk rmap btrees, which includes
> @@ -2287,34 +2130,27 @@ _("unable to rebuild AG %u.  Not enough free space in on-disk AG.\n"),
>  			&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
>  
>  	/*
> -	 * build inode allocation tree.
> +	 * build inode allocation trees.
>  	 */
> -	build_ino_tree(mp, agno, &ino_btree_curs, XFS_BTNUM_INO, &agi_stat);
> -	write_cursor(&ino_btree_curs);
> -
> -	/*
> -	 * build free inode tree
> -	 */
> -	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
> -		build_ino_tree(mp, agno, &fino_btree_curs,
> -				XFS_BTNUM_FINO, NULL);
> -		write_cursor(&fino_btree_curs);
> -	}
> +	build_inobt(&sc, agno, &btr_ino, &agi_stat);
> +	if (xfs_sb_version_hasfinobt(&mp->m_sb))
> +		build_finobt(&sc, agno, &btr_fino);
>  
>  	/* build the agi */
> -	build_agi(mp, agno, &ino_btree_curs, &fino_btree_curs, &agi_stat);
> +	build_agi(mp, agno, &btr_ino, &btr_fino, &agi_stat);
>  
>  	/*
>  	 * tear down cursors
>  	 */
>  	finish_rebuild(mp, &btr_bno, lost_fsb);
>  	finish_rebuild(mp, &btr_cnt, lost_fsb);
> +	finish_rebuild(mp, &btr_ino, lost_fsb);
> +	if (xfs_sb_version_hasfinobt(&mp->m_sb))
> +		finish_rebuild(mp, &btr_fino, lost_fsb);
>  	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
>  		finish_cursor(&rmap_btree_curs);
>  	if (xfs_sb_version_hasreflink(&mp->m_sb))
>  		finish_cursor(&refcnt_btree_curs);
> -	if (xfs_sb_version_hasfinobt(&mp->m_sb))
> -		finish_cursor(&fino_btree_curs);
>  
>  	/*
>  	 * release the incore per-AG bno/bcnt trees so the extent nodes
>
Darrick J. Wong May 29, 2020, 10:18 p.m. UTC | #2
On Thu, May 28, 2020 at 11:11:21AM -0400, Brian Foster wrote:
> On Tue, May 19, 2020 at 06:51:15PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Use the btree bulk loading functions to rebuild the inode btrees
> > and drop the open-coded implementation.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  libxfs/libxfs_api_defs.h |    1 
> >  repair/phase5.c          |  642 +++++++++++++++++-----------------------------
> >  2 files changed, 240 insertions(+), 403 deletions(-)
> > 
> > 
> ...
> > diff --git a/repair/phase5.c b/repair/phase5.c
> > index e69b042c..38f30753 100644
> > --- a/repair/phase5.c
> > +++ b/repair/phase5.c
> ...
> > @@ -372,6 +376,11 @@ estimate_ag_bload_slack(
> >  		bload->node_slack = 0;
> >  }
> >  
> > +static inline void skip_rebuild(struct bt_rebuild *btr)
> > +{
> > +	memset(btr, 0, sizeof(struct bt_rebuild));
> > +}
> > +
> 
> Is there any functional purpose to this? It looks like the memset could
> be open-coded, but also seems like it could be elided if we just check
> hasfinobt() before using the pointer..?

Hm, yeah, ok, I'll go audit this.

> >  /* Initialize a btree rebuild context. */
> >  static void
> >  init_rebuild(
> > @@ -765,48 +774,38 @@ _("Error %d while writing cntbt btree for AG %u.\n"), error, agno);
> ...
> > +
> > +/* Copy one incore inode record into the inobt cursor. */
> > +static void
> > +get_inode_data(
> > +	struct xfs_btree_cur		*cur,
> > +	struct ino_tree_node		*ino_rec,
> > +	struct agi_stat			*agi_stat)
> > +{
> > +	struct xfs_inobt_rec_incore	*irec = &cur->bc_rec.i;
> > +	int				inocnt = 0;
> > +	int				finocnt = 0;
> > +	int				k;
> > +
> > +	irec->ir_startino = ino_rec->ino_startnum;
> > +	irec->ir_free = ino_rec->ir_free;
> > +
> > +	for (k = 0; k < sizeof(xfs_inofree_t) * NBBY; k++)  {
> > +		ASSERT(is_inode_confirmed(ino_rec, k));
> > +
> > +		if (is_inode_sparse(ino_rec, k))
> >  			continue;
> > -
> > -		nfinos += rec_nfinos;
> > -		ninos += rec_ninos;
> > -		num_recs++;
> > +		if (is_inode_free(ino_rec, k))
> > +			finocnt++;
> > +		inocnt++;
> >  	}
> >  
> > -	if (num_recs == 0) {
> > -		/*
> > -		 * easy corner-case -- no inode records
> > -		 */
> > -		lptr->num_blocks = 1;
> > -		lptr->modulo = 0;
> > -		lptr->num_recs_pb = 0;
> > -		lptr->num_recs_tot = 0;
> > -
> > -		btree_curs->num_levels = 1;
> > -		btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
> > -
> > -		setup_cursor(mp, agno, btree_curs);
> > +	irec->ir_count = inocnt;
> > +	irec->ir_freecount = finocnt;
> >  
> > -		return;
> > -	}
> > +	if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
> > +		uint64_t		sparse;
> > +		int			spmask;
> > +		uint16_t		holemask;
> >  
> > -	blocks_allocated = lptr->num_blocks = howmany(num_recs,
> > -					XR_INOBT_BLOCK_MAXRECS(mp, 0));
> > -
> > -	lptr->modulo = num_recs % lptr->num_blocks;
> > -	lptr->num_recs_pb = num_recs / lptr->num_blocks;
> > -	lptr->num_recs_tot = num_recs;
> > -	level = 1;
> > -
> > -	if (lptr->num_blocks > 1)  {
> > -		for (; btree_curs->level[level-1].num_blocks > 1
> > -				&& level < XFS_BTREE_MAXLEVELS;
> > -				level++)  {
> > -			lptr = &btree_curs->level[level];
> > -			p_lptr = &btree_curs->level[level - 1];
> > -			lptr->num_blocks = howmany(p_lptr->num_blocks,
> > -				XR_INOBT_BLOCK_MAXRECS(mp, level));
> > -			lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
> > -			lptr->num_recs_pb = p_lptr->num_blocks
> > -					/ lptr->num_blocks;
> > -			lptr->num_recs_tot = p_lptr->num_blocks;
> > -
> > -			blocks_allocated += lptr->num_blocks;
> > +		/*
> > +		 * Convert the 64-bit in-core sparse inode state to the
> > +		 * 16-bit on-disk holemask.
> > +		 */
> > +		holemask = 0;
> > +		spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
> > +		sparse = ino_rec->ir_sparse;
> > +		for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) {
> > +			if (sparse & spmask) {
> > +				ASSERT((sparse & spmask) == spmask);
> > +				holemask |= (1 << k);
> > +			} else
> > +				ASSERT((sparse & spmask) == 0);
> > +			sparse >>= XFS_INODES_PER_HOLEMASK_BIT;
> >  		}
> > +
> > +		irec->ir_holemask = holemask;
> > +	} else {
> > +		irec->ir_holemask = 0;
> >  	}
> > -	ASSERT(lptr->num_blocks == 1);
> > -	btree_curs->num_levels = level;
> >  
> > -	btree_curs->num_tot_blocks = btree_curs->num_free_blocks
> > -			= blocks_allocated;
> > +	if (!agi_stat)
> > +		return;
> >  
> > -	setup_cursor(mp, agno, btree_curs);
> > +	if (agi_stat->first_agino != NULLAGINO)
> > +		agi_stat->first_agino = ino_rec->ino_startnum;
> 
> This is initialized to NULLAGINO. When do we ever update it?

I don't understand your question.  The purpose of the first_agino code
is to set agi_newino to the first inode cluster in the AG (or NULLAGINO
if there are no inodes) so we initialize first_ino to NULLAGINO and if
we process any inode records, we'll update it to ir_startino of the
first inode record that we put in the btree.

> > +	agi_stat->freecount += finocnt;
> > +	agi_stat->count += inocnt;
> > +}
> >  
> > -	*num_inos = ninos;
> > -	*num_free_inos = nfinos;
> > +/* Grab one inobt record. */
> > +static int
> > +get_inobt_record(
> > +	struct xfs_btree_cur		*cur,
> > +	void				*priv)
> > +{
> > +	struct bt_rebuild		*rebuild = priv;
> >  
> > -	return;
> > +	get_inode_data(cur, rebuild->ino_rec, rebuild->agi_stat);
> > +	rebuild->ino_rec = next_ino_rec(rebuild->ino_rec);
> > +	return 0;
> >  }
> >  
> > +/* Rebuild a inobt btree. */
> >  static void
> > -prop_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
> > -	xfs_btnum_t btnum, xfs_agino_t startino, int level)
> > +build_inobt(
> > +	struct repair_ctx	*sc,
> > +	xfs_agnumber_t		agno,
> > +	struct bt_rebuild	*btr_ino,
> > +	struct agi_stat		*agi_stat)
> >  {
> > -	struct xfs_btree_block	*bt_hdr;
> > -	xfs_inobt_key_t		*bt_key;
> > -	xfs_inobt_ptr_t		*bt_ptr;
> > -	xfs_agblock_t		agbno;
> > -	bt_stat_level_t		*lptr;
> > -	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
> >  	int			error;
> >  
> > -	level++;
> > -
> > -	if (level >= btree_curs->num_levels)
> > -		return;
> > -
> > -	lptr = &btree_curs->level[level];
> > -	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
> > -
> > -	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
> > -		/*
> > -		 * this only happens once to initialize the
> > -		 * first path up the left side of the tree
> > -		 * where the agbno's are already set up
> > -		 */
> > -		prop_ino_cursor(mp, agno, btree_curs, btnum, startino, level);
> > -	}
> > -
> > -	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
> > -				lptr->num_recs_pb + (lptr->modulo > 0))  {
> > -		/*
> > -		 * write out current prev block, grab us a new block,
> > -		 * and set the rightsib pointer of current block
> > -		 */
> > -#ifdef XR_BLD_INO_TRACE
> > -		fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
> > -#endif
> > -		if (lptr->prev_agbno != NULLAGBLOCK)  {
> > -			ASSERT(lptr->prev_buf_p != NULL);
> > -			libxfs_buf_mark_dirty(lptr->prev_buf_p);
> > -			libxfs_buf_relse(lptr->prev_buf_p);
> > -		}
> > -		lptr->prev_agbno = lptr->agbno;;
> > -		lptr->prev_buf_p = lptr->buf_p;
> > -		agbno = get_next_blockaddr(agno, level, btree_curs);
> > -
> > -		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
> > -
> > -		error = -libxfs_buf_get(mp->m_dev,
> > -				XFS_AGB_TO_DADDR(mp, agno, agbno),
> > -				XFS_FSB_TO_BB(mp, 1), &lptr->buf_p);
> > -		if (error)
> > -			do_error(_("Cannot grab inode btree buffer, err=%d"),
> > -					error);
> > -		lptr->agbno = agbno;
> > +	btr_ino->bload.get_record = get_inobt_record;
> > +	btr_ino->bload.claim_block = rebuild_claim_block;
> > +	agi_stat->count = agi_stat->freecount = 0;
> 
> These are already initialized to zero by the caller. I suppose we might
> as well also move the ->first_agino init to where this is allocated.

TBH we don't need to (re)init agi_stat at all.  I'll drop these two
lines.

> Otherwise I mostly just have the same general feedback as for the
> previous patch (wrt to the get_record() logic and build_[f]inobt()
> duplication.

<nod> Will fix that too.

> Brian
> 
> > +	agi_stat->first_agino = NULLAGINO;
> > +	btr_ino->agi_stat = agi_stat;
> > +	btr_ino->ino_rec = findfirst_inode_rec(agno);
> > +
> > +	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
> > +	if (error)
> > +		do_error(
> > +_("Insufficient memory to construct inobt rebuild transaction.\n"));
> > +
> > +	/* Add all observed inobt records. */
> > +	error = -libxfs_btree_bload(btr_ino->cur, &btr_ino->bload, btr_ino);
> > +	if (error)
> > +		do_error(
> > +_("Error %d while creating inobt btree for AG %u.\n"), error, agno);
> > +
> > +	/* Since we're not writing the AGI yet, no need to commit the cursor */
> > +	libxfs_btree_del_cursor(btr_ino->cur, 0);
> > +	error = -libxfs_trans_commit(sc->tp);
> > +	if (error)
> > +		do_error(
> > +_("Error %d while writing inobt btree for AG %u.\n"), error, agno);
> > +	sc->tp = NULL;
> > +}
> >  
> > -		if (lptr->modulo)
> > -			lptr->modulo--;
> > +/* Grab one finobt record. */
> > +static int
> > +get_finobt_record(
> > +	struct xfs_btree_cur		*cur,
> > +	void				*priv)
> > +{
> > +	struct bt_rebuild		*rebuild = priv;
> >  
> > -		/*
> > -		 * initialize block header
> > -		 */
> > -		lptr->buf_p->b_ops = ops;
> > -		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
> > -		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
> > -		libxfs_btree_init_block(mp, lptr->buf_p, btnum,
> > -					level, 0, agno);
> > +	get_inode_data(cur, rebuild->ino_rec, NULL);
> > +	rebuild->ino_rec = next_free_ino_rec(rebuild->ino_rec);
> > +	return 0;
> > +}
> >  
> > -		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
> > +/* Rebuild a finobt btree. */
> > +static void
> > +build_finobt(
> > +	struct repair_ctx	*sc,
> > +	xfs_agnumber_t		agno,
> > +	struct bt_rebuild	*btr_fino)
> > +{
> > +	int			error;
> >  
> > -		/*
> > -		 * propagate extent record for first extent in new block up
> > -		 */
> > -		prop_ino_cursor(mp, agno, btree_curs, btnum, startino, level);
> > -	}
> > -	/*
> > -	 * add inode info to current block
> > -	 */
> > -	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
> > -
> > -	bt_key = XFS_INOBT_KEY_ADDR(mp, bt_hdr,
> > -				    be16_to_cpu(bt_hdr->bb_numrecs));
> > -	bt_ptr = XFS_INOBT_PTR_ADDR(mp, bt_hdr,
> > -				    be16_to_cpu(bt_hdr->bb_numrecs),
> > -				    M_IGEO(mp)->inobt_mxr[1]);
> > -
> > -	bt_key->ir_startino = cpu_to_be32(startino);
> > -	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
> > +	btr_fino->bload.get_record = get_finobt_record;
> > +	btr_fino->bload.claim_block = rebuild_claim_block;
> > +	btr_fino->ino_rec = findfirst_free_inode_rec(agno);
> > +
> > +	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
> > +	if (error)
> > +		do_error(
> > +_("Insufficient memory to construct finobt rebuild transaction.\n"));
> > +
> > +	/* Add all observed finobt records. */
> > +	error = -libxfs_btree_bload(btr_fino->cur, &btr_fino->bload, btr_fino);
> > +	if (error)
> > +		do_error(
> > +_("Error %d while creating finobt btree for AG %u.\n"), error, agno);
> > +
> > +	/* Since we're not writing the AGI yet, no need to commit the cursor */
> > +	libxfs_btree_del_cursor(btr_fino->cur, 0);
> > +	error = -libxfs_trans_commit(sc->tp);
> > +	if (error)
> > +		do_error(
> > +_("Error %d while writing finobt btree for AG %u.\n"), error, agno);
> > +	sc->tp = NULL;
> >  }
> >  
> >  /*
> >   * XXX: yet more code that can be shared with mkfs, growfs.
> >   */
> >  static void
> > -build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
> > -		bt_status_t *finobt_curs, struct agi_stat *agi_stat)
> > +build_agi(
> > +	struct xfs_mount	*mp,
> > +	xfs_agnumber_t		agno,
> > +	struct bt_rebuild	*btr_ino,
> > +	struct bt_rebuild	*btr_fino,
> > +	struct agi_stat		*agi_stat)
> >  {
> > -	xfs_buf_t	*agi_buf;
> > -	xfs_agi_t	*agi;
> > -	int		i;
> > -	int		error;
> > +	struct xfs_buf		*agi_buf;
> > +	struct xfs_agi		*agi;
> > +	int			i;
> > +	int			error;
> >  
> >  	error = -libxfs_buf_get(mp->m_dev,
> >  			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
> > @@ -1008,8 +1053,8 @@ build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
> >  		agi->agi_length = cpu_to_be32(mp->m_sb.sb_dblocks -
> >  			(xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno);
> >  	agi->agi_count = cpu_to_be32(agi_stat->count);
> > -	agi->agi_root = cpu_to_be32(btree_curs->root);
> > -	agi->agi_level = cpu_to_be32(btree_curs->num_levels);
> > +	agi->agi_root = cpu_to_be32(btr_ino->newbt.afake.af_root);
> > +	agi->agi_level = cpu_to_be32(btr_ino->newbt.afake.af_levels);
> >  	agi->agi_freecount = cpu_to_be32(agi_stat->freecount);
> >  	agi->agi_newino = cpu_to_be32(agi_stat->first_agino);
> >  	agi->agi_dirino = cpu_to_be32(NULLAGINO);
> > @@ -1021,203 +1066,16 @@ build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
> >  		platform_uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
> >  
> >  	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
> > -		agi->agi_free_root = cpu_to_be32(finobt_curs->root);
> > -		agi->agi_free_level = cpu_to_be32(finobt_curs->num_levels);
> > +		agi->agi_free_root =
> > +				cpu_to_be32(btr_fino->newbt.afake.af_root);
> > +		agi->agi_free_level =
> > +				cpu_to_be32(btr_fino->newbt.afake.af_levels);
> >  	}
> >  
> >  	libxfs_buf_mark_dirty(agi_buf);
> >  	libxfs_buf_relse(agi_buf);
> >  }
> >  
> > -/*
> > - * rebuilds an inode tree given a cursor.  We're lazy here and call
> > - * the routine that builds the agi
> > - */
> > -static void
> > -build_ino_tree(xfs_mount_t *mp, xfs_agnumber_t agno,
> > -		bt_status_t *btree_curs, xfs_btnum_t btnum,
> > -		struct agi_stat *agi_stat)
> > -{
> > -	xfs_agnumber_t		i;
> > -	xfs_agblock_t		j;
> > -	xfs_agblock_t		agbno;
> > -	xfs_agino_t		first_agino;
> > -	struct xfs_btree_block	*bt_hdr;
> > -	xfs_inobt_rec_t		*bt_rec;
> > -	ino_tree_node_t		*ino_rec;
> > -	bt_stat_level_t		*lptr;
> > -	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
> > -	xfs_agino_t		count = 0;
> > -	xfs_agino_t		freecount = 0;
> > -	int			inocnt;
> > -	uint8_t			finocnt;
> > -	int			k;
> > -	int			level = btree_curs->num_levels;
> > -	int			spmask;
> > -	uint64_t		sparse;
> > -	uint16_t		holemask;
> > -	int			error;
> > -
> > -	ASSERT(btnum == XFS_BTNUM_INO || btnum == XFS_BTNUM_FINO);
> > -
> > -	for (i = 0; i < level; i++)  {
> > -		lptr = &btree_curs->level[i];
> > -
> > -		agbno = get_next_blockaddr(agno, i, btree_curs);
> > -		error = -libxfs_buf_get(mp->m_dev,
> > -				XFS_AGB_TO_DADDR(mp, agno, agbno),
> > -				XFS_FSB_TO_BB(mp, 1), &lptr->buf_p);
> > -		if (error)
> > -			do_error(_("Cannot grab inode btree buffer, err=%d"),
> > -					error);
> > -
> > -		if (i == btree_curs->num_levels - 1)
> > -			btree_curs->root = agbno;
> > -
> > -		lptr->agbno = agbno;
> > -		lptr->prev_agbno = NULLAGBLOCK;
> > -		lptr->prev_buf_p = NULL;
> > -		/*
> > -		 * initialize block header
> > -		 */
> > -
> > -		lptr->buf_p->b_ops = ops;
> > -		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
> > -		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
> > -		libxfs_btree_init_block(mp, lptr->buf_p, btnum, i, 0, agno);
> > -	}
> > -
> > -	/*
> > -	 * run along leaf, setting up records.  as we have to switch
> > -	 * blocks, call the prop_ino_cursor routine to set up the new
> > -	 * pointers for the parent.  that can recurse up to the root
> > -	 * if required.  set the sibling pointers for leaf level here.
> > -	 */
> > -	if (btnum == XFS_BTNUM_FINO)
> > -		ino_rec = findfirst_free_inode_rec(agno);
> > -	else
> > -		ino_rec = findfirst_inode_rec(agno);
> > -
> > -	if (ino_rec != NULL)
> > -		first_agino = ino_rec->ino_startnum;
> > -	else
> > -		first_agino = NULLAGINO;
> > -
> > -	lptr = &btree_curs->level[0];
> > -
> > -	for (i = 0; i < lptr->num_blocks; i++)  {
> > -		/*
> > -		 * block initialization, lay in block header
> > -		 */
> > -		lptr->buf_p->b_ops = ops;
> > -		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
> > -		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
> > -		libxfs_btree_init_block(mp, lptr->buf_p, btnum, 0, 0, agno);
> > -
> > -		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
> > -		bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb +
> > -							(lptr->modulo > 0));
> > -
> > -		if (lptr->modulo > 0)
> > -			lptr->modulo--;
> > -
> > -		if (lptr->num_recs_pb > 0)
> > -			prop_ino_cursor(mp, agno, btree_curs, btnum,
> > -					ino_rec->ino_startnum, 0);
> > -
> > -		bt_rec = (xfs_inobt_rec_t *)
> > -			  ((char *)bt_hdr + XFS_INOBT_BLOCK_LEN(mp));
> > -		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
> > -			ASSERT(ino_rec != NULL);
> > -			bt_rec[j].ir_startino =
> > -					cpu_to_be32(ino_rec->ino_startnum);
> > -			bt_rec[j].ir_free = cpu_to_be64(ino_rec->ir_free);
> > -
> > -			inocnt = finocnt = 0;
> > -			for (k = 0; k < sizeof(xfs_inofree_t)*NBBY; k++)  {
> > -				ASSERT(is_inode_confirmed(ino_rec, k));
> > -
> > -				if (is_inode_sparse(ino_rec, k))
> > -					continue;
> > -				if (is_inode_free(ino_rec, k))
> > -					finocnt++;
> > -				inocnt++;
> > -			}
> > -
> > -			/*
> > -			 * Set the freecount and check whether we need to update
> > -			 * the sparse format fields. Otherwise, skip to the next
> > -			 * record.
> > -			 */
> > -			inorec_set_freecount(mp, &bt_rec[j], finocnt);
> > -			if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
> > -				goto nextrec;
> > -
> > -			/*
> > -			 * Convert the 64-bit in-core sparse inode state to the
> > -			 * 16-bit on-disk holemask.
> > -			 */
> > -			holemask = 0;
> > -			spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
> > -			sparse = ino_rec->ir_sparse;
> > -			for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) {
> > -				if (sparse & spmask) {
> > -					ASSERT((sparse & spmask) == spmask);
> > -					holemask |= (1 << k);
> > -				} else
> > -					ASSERT((sparse & spmask) == 0);
> > -				sparse >>= XFS_INODES_PER_HOLEMASK_BIT;
> > -			}
> > -
> > -			bt_rec[j].ir_u.sp.ir_count = inocnt;
> > -			bt_rec[j].ir_u.sp.ir_holemask = cpu_to_be16(holemask);
> > -
> > -nextrec:
> > -			freecount += finocnt;
> > -			count += inocnt;
> > -
> > -			if (btnum == XFS_BTNUM_FINO)
> > -				ino_rec = next_free_ino_rec(ino_rec);
> > -			else
> > -				ino_rec = next_ino_rec(ino_rec);
> > -		}
> > -
> > -		if (ino_rec != NULL)  {
> > -			/*
> > -			 * get next leaf level block
> > -			 */
> > -			if (lptr->prev_buf_p != NULL)  {
> > -#ifdef XR_BLD_INO_TRACE
> > -				fprintf(stderr, "writing inobt agbno %u\n",
> > -					lptr->prev_agbno);
> > -#endif
> > -				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
> > -				libxfs_buf_mark_dirty(lptr->prev_buf_p);
> > -				libxfs_buf_relse(lptr->prev_buf_p);
> > -			}
> > -			lptr->prev_buf_p = lptr->buf_p;
> > -			lptr->prev_agbno = lptr->agbno;
> > -			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
> > -			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
> > -
> > -			error = -libxfs_buf_get(mp->m_dev,
> > -					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
> > -					XFS_FSB_TO_BB(mp, 1),
> > -					&lptr->buf_p);
> > -			if (error)
> > -				do_error(
> > -	_("Cannot grab inode btree buffer, err=%d"),
> > -						error);
> > -		}
> > -	}
> > -
> > -	if (agi_stat) {
> > -		agi_stat->first_agino = first_agino;
> > -		agi_stat->count = count;
> > -		agi_stat->freecount = freecount;
> > -	}
> > -}
> > -
> >  /* rebuild the rmap tree */
> >  
> >  /*
> > @@ -2142,14 +2000,10 @@ phase5_func(
> >  {
> >  	struct repair_ctx	sc = { .mp = mp, };
> >  	struct agi_stat		agi_stat = {0,};
> > -	uint64_t		num_inos;
> > -	uint64_t		num_free_inos;
> > -	uint64_t		finobt_num_inos;
> > -	uint64_t		finobt_num_free_inos;
> >  	struct bt_rebuild	btr_bno;
> >  	struct bt_rebuild	btr_cnt;
> > -	bt_status_t		ino_btree_curs;
> > -	bt_status_t		fino_btree_curs;
> > +	struct bt_rebuild	btr_ino;
> > +	struct bt_rebuild	btr_fino;
> >  	bt_status_t		rmap_btree_curs;
> >  	bt_status_t		refcnt_btree_curs;
> >  	int			extra_blocks = 0;
> > @@ -2184,19 +2038,8 @@ _("unable to rebuild AG %u.  Not enough free space in on-disk AG.\n"),
> >  			agno);
> >  	}
> >  
> > -	/*
> > -	 * ok, now set up the btree cursors for the on-disk btrees (includes
> > -	 * pre-allocating all required blocks for the trees themselves)
> > -	 */
> > -	init_ino_cursor(mp, agno, &ino_btree_curs, &num_inos,
> > -			&num_free_inos, 0);
> > -
> > -	if (xfs_sb_version_hasfinobt(&mp->m_sb))
> > -		init_ino_cursor(mp, agno, &fino_btree_curs, &finobt_num_inos,
> > -				&finobt_num_free_inos, 1);
> > -
> > -	sb_icount_ag[agno] += num_inos;
> > -	sb_ifree_ag[agno] += num_free_inos;
> > +	init_ino_cursors(&sc, agno, num_freeblocks, &sb_icount_ag[agno],
> > +			&sb_ifree_ag[agno], &btr_ino, &btr_fino);
> >  
> >  	/*
> >  	 * Set up the btree cursors for the on-disk rmap btrees, which includes
> > @@ -2287,34 +2130,27 @@ _("unable to rebuild AG %u.  Not enough free space in on-disk AG.\n"),
> >  			&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
> >  
> >  	/*
> > -	 * build inode allocation tree.
> > +	 * build inode allocation trees.
> >  	 */
> > -	build_ino_tree(mp, agno, &ino_btree_curs, XFS_BTNUM_INO, &agi_stat);
> > -	write_cursor(&ino_btree_curs);
> > -
> > -	/*
> > -	 * build free inode tree
> > -	 */
> > -	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
> > -		build_ino_tree(mp, agno, &fino_btree_curs,
> > -				XFS_BTNUM_FINO, NULL);
> > -		write_cursor(&fino_btree_curs);
> > -	}
> > +	build_inobt(&sc, agno, &btr_ino, &agi_stat);
> > +	if (xfs_sb_version_hasfinobt(&mp->m_sb))
> > +		build_finobt(&sc, agno, &btr_fino);
> >  
> >  	/* build the agi */
> > -	build_agi(mp, agno, &ino_btree_curs, &fino_btree_curs, &agi_stat);
> > +	build_agi(mp, agno, &btr_ino, &btr_fino, &agi_stat);
> >  
> >  	/*
> >  	 * tear down cursors
> >  	 */
> >  	finish_rebuild(mp, &btr_bno, lost_fsb);
> >  	finish_rebuild(mp, &btr_cnt, lost_fsb);
> > +	finish_rebuild(mp, &btr_ino, lost_fsb);
> > +	if (xfs_sb_version_hasfinobt(&mp->m_sb))
> > +		finish_rebuild(mp, &btr_fino, lost_fsb);
> >  	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
> >  		finish_cursor(&rmap_btree_curs);
> >  	if (xfs_sb_version_hasreflink(&mp->m_sb))
> >  		finish_cursor(&refcnt_btree_curs);
> > -	if (xfs_sb_version_hasfinobt(&mp->m_sb))
> > -		finish_cursor(&fino_btree_curs);
> >  
> >  	/*
> >  	 * release the incore per-AG bno/bcnt trees so the extent nodes
> > 
>
Darrick J. Wong May 29, 2020, 10:32 p.m. UTC | #3
On Fri, May 29, 2020 at 03:18:40PM -0700, Darrick J. Wong wrote:
> On Thu, May 28, 2020 at 11:11:21AM -0400, Brian Foster wrote:
> > On Tue, May 19, 2020 at 06:51:15PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Use the btree bulk loading functions to rebuild the inode btrees
> > > and drop the open-coded implementation.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > >  libxfs/libxfs_api_defs.h |    1 
> > >  repair/phase5.c          |  642 +++++++++++++++++-----------------------------
> > >  2 files changed, 240 insertions(+), 403 deletions(-)
> > > 
> > > 
> > ...
> > > diff --git a/repair/phase5.c b/repair/phase5.c
> > > index e69b042c..38f30753 100644
> > > --- a/repair/phase5.c
> > > +++ b/repair/phase5.c
> > ...
> > > @@ -372,6 +376,11 @@ estimate_ag_bload_slack(
> > >  		bload->node_slack = 0;
> > >  }
> > >  
> > > +static inline void skip_rebuild(struct bt_rebuild *btr)
> > > +{
> > > +	memset(btr, 0, sizeof(struct bt_rebuild));
> > > +}
> > > +
> > 
> > Is there any functional purpose to this? It looks like the memset could
> > be open-coded, but also seems like it could be elided if we just check
> > hasfinobt() before using the pointer..?
> 
> Hm, yeah, ok, I'll go audit this.
> 
> > >  /* Initialize a btree rebuild context. */
> > >  static void
> > >  init_rebuild(
> > > @@ -765,48 +774,38 @@ _("Error %d while writing cntbt btree for AG %u.\n"), error, agno);
> > ...
> > > +
> > > +/* Copy one incore inode record into the inobt cursor. */
> > > +static void
> > > +get_inode_data(
> > > +	struct xfs_btree_cur		*cur,
> > > +	struct ino_tree_node		*ino_rec,
> > > +	struct agi_stat			*agi_stat)
> > > +{
> > > +	struct xfs_inobt_rec_incore	*irec = &cur->bc_rec.i;
> > > +	int				inocnt = 0;
> > > +	int				finocnt = 0;
> > > +	int				k;
> > > +
> > > +	irec->ir_startino = ino_rec->ino_startnum;
> > > +	irec->ir_free = ino_rec->ir_free;
> > > +
> > > +	for (k = 0; k < sizeof(xfs_inofree_t) * NBBY; k++)  {
> > > +		ASSERT(is_inode_confirmed(ino_rec, k));
> > > +
> > > +		if (is_inode_sparse(ino_rec, k))
> > >  			continue;
> > > -
> > > -		nfinos += rec_nfinos;
> > > -		ninos += rec_ninos;
> > > -		num_recs++;
> > > +		if (is_inode_free(ino_rec, k))
> > > +			finocnt++;
> > > +		inocnt++;
> > >  	}
> > >  
> > > -	if (num_recs == 0) {
> > > -		/*
> > > -		 * easy corner-case -- no inode records
> > > -		 */
> > > -		lptr->num_blocks = 1;
> > > -		lptr->modulo = 0;
> > > -		lptr->num_recs_pb = 0;
> > > -		lptr->num_recs_tot = 0;
> > > -
> > > -		btree_curs->num_levels = 1;
> > > -		btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
> > > -
> > > -		setup_cursor(mp, agno, btree_curs);
> > > +	irec->ir_count = inocnt;
> > > +	irec->ir_freecount = finocnt;
> > >  
> > > -		return;
> > > -	}
> > > +	if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
> > > +		uint64_t		sparse;
> > > +		int			spmask;
> > > +		uint16_t		holemask;
> > >  
> > > -	blocks_allocated = lptr->num_blocks = howmany(num_recs,
> > > -					XR_INOBT_BLOCK_MAXRECS(mp, 0));
> > > -
> > > -	lptr->modulo = num_recs % lptr->num_blocks;
> > > -	lptr->num_recs_pb = num_recs / lptr->num_blocks;
> > > -	lptr->num_recs_tot = num_recs;
> > > -	level = 1;
> > > -
> > > -	if (lptr->num_blocks > 1)  {
> > > -		for (; btree_curs->level[level-1].num_blocks > 1
> > > -				&& level < XFS_BTREE_MAXLEVELS;
> > > -				level++)  {
> > > -			lptr = &btree_curs->level[level];
> > > -			p_lptr = &btree_curs->level[level - 1];
> > > -			lptr->num_blocks = howmany(p_lptr->num_blocks,
> > > -				XR_INOBT_BLOCK_MAXRECS(mp, level));
> > > -			lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
> > > -			lptr->num_recs_pb = p_lptr->num_blocks
> > > -					/ lptr->num_blocks;
> > > -			lptr->num_recs_tot = p_lptr->num_blocks;
> > > -
> > > -			blocks_allocated += lptr->num_blocks;
> > > +		/*
> > > +		 * Convert the 64-bit in-core sparse inode state to the
> > > +		 * 16-bit on-disk holemask.
> > > +		 */
> > > +		holemask = 0;
> > > +		spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
> > > +		sparse = ino_rec->ir_sparse;
> > > +		for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) {
> > > +			if (sparse & spmask) {
> > > +				ASSERT((sparse & spmask) == spmask);
> > > +				holemask |= (1 << k);
> > > +			} else
> > > +				ASSERT((sparse & spmask) == 0);
> > > +			sparse >>= XFS_INODES_PER_HOLEMASK_BIT;
> > >  		}
> > > +
> > > +		irec->ir_holemask = holemask;
> > > +	} else {
> > > +		irec->ir_holemask = 0;
> > >  	}
> > > -	ASSERT(lptr->num_blocks == 1);
> > > -	btree_curs->num_levels = level;
> > >  
> > > -	btree_curs->num_tot_blocks = btree_curs->num_free_blocks
> > > -			= blocks_allocated;
> > > +	if (!agi_stat)
> > > +		return;
> > >  
> > > -	setup_cursor(mp, agno, btree_curs);
> > > +	if (agi_stat->first_agino != NULLAGINO)
> > > +		agi_stat->first_agino = ino_rec->ino_startnum;
> > 
> > This is initialized to NULLAGINO. When do we ever update it?
> 
> I don't understand your question.  The purpose of the first_agino code

Aaaaha, the test here is wrong.  This should be:

if (agi_stat->first_agino == NULLAGINO)
	agi_stat->first_agino = ino_rec->ino_startnum;

And while I'm at it I'll get rid of agi_stat.

--D

> is to set agi_newino to the first inode cluster in the AG (or NULLAGINO
> if there are no inodes) so we initialize first_ino to NULLAGINO and if
> we process any inode records, we'll update it to ir_startino of the
> first inode record that we put in the btree.
> 
> > > +	agi_stat->freecount += finocnt;
> > > +	agi_stat->count += inocnt;
> > > +}
> > >  
> > > -	*num_inos = ninos;
> > > -	*num_free_inos = nfinos;
> > > +/* Grab one inobt record. */
> > > +static int
> > > +get_inobt_record(
> > > +	struct xfs_btree_cur		*cur,
> > > +	void				*priv)
> > > +{
> > > +	struct bt_rebuild		*rebuild = priv;
> > >  
> > > -	return;
> > > +	get_inode_data(cur, rebuild->ino_rec, rebuild->agi_stat);
> > > +	rebuild->ino_rec = next_ino_rec(rebuild->ino_rec);
> > > +	return 0;
> > >  }
> > >  
> > > +/* Rebuild a inobt btree. */
> > >  static void
> > > -prop_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
> > > -	xfs_btnum_t btnum, xfs_agino_t startino, int level)
> > > +build_inobt(
> > > +	struct repair_ctx	*sc,
> > > +	xfs_agnumber_t		agno,
> > > +	struct bt_rebuild	*btr_ino,
> > > +	struct agi_stat		*agi_stat)
> > >  {
> > > -	struct xfs_btree_block	*bt_hdr;
> > > -	xfs_inobt_key_t		*bt_key;
> > > -	xfs_inobt_ptr_t		*bt_ptr;
> > > -	xfs_agblock_t		agbno;
> > > -	bt_stat_level_t		*lptr;
> > > -	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
> > >  	int			error;
> > >  
> > > -	level++;
> > > -
> > > -	if (level >= btree_curs->num_levels)
> > > -		return;
> > > -
> > > -	lptr = &btree_curs->level[level];
> > > -	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
> > > -
> > > -	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
> > > -		/*
> > > -		 * this only happens once to initialize the
> > > -		 * first path up the left side of the tree
> > > -		 * where the agbno's are already set up
> > > -		 */
> > > -		prop_ino_cursor(mp, agno, btree_curs, btnum, startino, level);
> > > -	}
> > > -
> > > -	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
> > > -				lptr->num_recs_pb + (lptr->modulo > 0))  {
> > > -		/*
> > > -		 * write out current prev block, grab us a new block,
> > > -		 * and set the rightsib pointer of current block
> > > -		 */
> > > -#ifdef XR_BLD_INO_TRACE
> > > -		fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
> > > -#endif
> > > -		if (lptr->prev_agbno != NULLAGBLOCK)  {
> > > -			ASSERT(lptr->prev_buf_p != NULL);
> > > -			libxfs_buf_mark_dirty(lptr->prev_buf_p);
> > > -			libxfs_buf_relse(lptr->prev_buf_p);
> > > -		}
> > > -		lptr->prev_agbno = lptr->agbno;;
> > > -		lptr->prev_buf_p = lptr->buf_p;
> > > -		agbno = get_next_blockaddr(agno, level, btree_curs);
> > > -
> > > -		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
> > > -
> > > -		error = -libxfs_buf_get(mp->m_dev,
> > > -				XFS_AGB_TO_DADDR(mp, agno, agbno),
> > > -				XFS_FSB_TO_BB(mp, 1), &lptr->buf_p);
> > > -		if (error)
> > > -			do_error(_("Cannot grab inode btree buffer, err=%d"),
> > > -					error);
> > > -		lptr->agbno = agbno;
> > > +	btr_ino->bload.get_record = get_inobt_record;
> > > +	btr_ino->bload.claim_block = rebuild_claim_block;
> > > +	agi_stat->count = agi_stat->freecount = 0;
> > 
> > These are already initialized to zero by the caller. I suppose we might
> > as well also move the ->first_agino init to where this is allocated.
> 
> TBH we don't need to (re)init agi_stat at all.  I'll drop these two
> lines.
> 
> > Otherwise I mostly just have the same general feedback as for the
> > previous patch (wrt to the get_record() logic and build_[f]inobt()
> > duplication.
> 
> <nod> Will fix that too.
> 
> > Brian
> > 
> > > +	agi_stat->first_agino = NULLAGINO;
> > > +	btr_ino->agi_stat = agi_stat;
> > > +	btr_ino->ino_rec = findfirst_inode_rec(agno);
> > > +
> > > +	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
> > > +	if (error)
> > > +		do_error(
> > > +_("Insufficient memory to construct inobt rebuild transaction.\n"));
> > > +
> > > +	/* Add all observed inobt records. */
> > > +	error = -libxfs_btree_bload(btr_ino->cur, &btr_ino->bload, btr_ino);
> > > +	if (error)
> > > +		do_error(
> > > +_("Error %d while creating inobt btree for AG %u.\n"), error, agno);
> > > +
> > > +	/* Since we're not writing the AGI yet, no need to commit the cursor */
> > > +	libxfs_btree_del_cursor(btr_ino->cur, 0);
> > > +	error = -libxfs_trans_commit(sc->tp);
> > > +	if (error)
> > > +		do_error(
> > > +_("Error %d while writing inobt btree for AG %u.\n"), error, agno);
> > > +	sc->tp = NULL;
> > > +}
> > >  
> > > -		if (lptr->modulo)
> > > -			lptr->modulo--;
> > > +/* Grab one finobt record. */
> > > +static int
> > > +get_finobt_record(
> > > +	struct xfs_btree_cur		*cur,
> > > +	void				*priv)
> > > +{
> > > +	struct bt_rebuild		*rebuild = priv;
> > >  
> > > -		/*
> > > -		 * initialize block header
> > > -		 */
> > > -		lptr->buf_p->b_ops = ops;
> > > -		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
> > > -		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
> > > -		libxfs_btree_init_block(mp, lptr->buf_p, btnum,
> > > -					level, 0, agno);
> > > +	get_inode_data(cur, rebuild->ino_rec, NULL);
> > > +	rebuild->ino_rec = next_free_ino_rec(rebuild->ino_rec);
> > > +	return 0;
> > > +}
> > >  
> > > -		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
> > > +/* Rebuild a finobt btree. */
> > > +static void
> > > +build_finobt(
> > > +	struct repair_ctx	*sc,
> > > +	xfs_agnumber_t		agno,
> > > +	struct bt_rebuild	*btr_fino)
> > > +{
> > > +	int			error;
> > >  
> > > -		/*
> > > -		 * propagate extent record for first extent in new block up
> > > -		 */
> > > -		prop_ino_cursor(mp, agno, btree_curs, btnum, startino, level);
> > > -	}
> > > -	/*
> > > -	 * add inode info to current block
> > > -	 */
> > > -	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
> > > -
> > > -	bt_key = XFS_INOBT_KEY_ADDR(mp, bt_hdr,
> > > -				    be16_to_cpu(bt_hdr->bb_numrecs));
> > > -	bt_ptr = XFS_INOBT_PTR_ADDR(mp, bt_hdr,
> > > -				    be16_to_cpu(bt_hdr->bb_numrecs),
> > > -				    M_IGEO(mp)->inobt_mxr[1]);
> > > -
> > > -	bt_key->ir_startino = cpu_to_be32(startino);
> > > -	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
> > > +	btr_fino->bload.get_record = get_finobt_record;
> > > +	btr_fino->bload.claim_block = rebuild_claim_block;
> > > +	btr_fino->ino_rec = findfirst_free_inode_rec(agno);
> > > +
> > > +	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
> > > +	if (error)
> > > +		do_error(
> > > +_("Insufficient memory to construct finobt rebuild transaction.\n"));
> > > +
> > > +	/* Add all observed finobt records. */
> > > +	error = -libxfs_btree_bload(btr_fino->cur, &btr_fino->bload, btr_fino);
> > > +	if (error)
> > > +		do_error(
> > > +_("Error %d while creating finobt btree for AG %u.\n"), error, agno);
> > > +
> > > +	/* Since we're not writing the AGI yet, no need to commit the cursor */
> > > +	libxfs_btree_del_cursor(btr_fino->cur, 0);
> > > +	error = -libxfs_trans_commit(sc->tp);
> > > +	if (error)
> > > +		do_error(
> > > +_("Error %d while writing finobt btree for AG %u.\n"), error, agno);
> > > +	sc->tp = NULL;
> > >  }
> > >  
> > >  /*
> > >   * XXX: yet more code that can be shared with mkfs, growfs.
> > >   */
> > >  static void
> > > -build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
> > > -		bt_status_t *finobt_curs, struct agi_stat *agi_stat)
> > > +build_agi(
> > > +	struct xfs_mount	*mp,
> > > +	xfs_agnumber_t		agno,
> > > +	struct bt_rebuild	*btr_ino,
> > > +	struct bt_rebuild	*btr_fino,
> > > +	struct agi_stat		*agi_stat)
> > >  {
> > > -	xfs_buf_t	*agi_buf;
> > > -	xfs_agi_t	*agi;
> > > -	int		i;
> > > -	int		error;
> > > +	struct xfs_buf		*agi_buf;
> > > +	struct xfs_agi		*agi;
> > > +	int			i;
> > > +	int			error;
> > >  
> > >  	error = -libxfs_buf_get(mp->m_dev,
> > >  			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
> > > @@ -1008,8 +1053,8 @@ build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
> > >  		agi->agi_length = cpu_to_be32(mp->m_sb.sb_dblocks -
> > >  			(xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno);
> > >  	agi->agi_count = cpu_to_be32(agi_stat->count);
> > > -	agi->agi_root = cpu_to_be32(btree_curs->root);
> > > -	agi->agi_level = cpu_to_be32(btree_curs->num_levels);
> > > +	agi->agi_root = cpu_to_be32(btr_ino->newbt.afake.af_root);
> > > +	agi->agi_level = cpu_to_be32(btr_ino->newbt.afake.af_levels);
> > >  	agi->agi_freecount = cpu_to_be32(agi_stat->freecount);
> > >  	agi->agi_newino = cpu_to_be32(agi_stat->first_agino);
> > >  	agi->agi_dirino = cpu_to_be32(NULLAGINO);
> > > @@ -1021,203 +1066,16 @@ build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
> > >  		platform_uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
> > >  
> > >  	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
> > > -		agi->agi_free_root = cpu_to_be32(finobt_curs->root);
> > > -		agi->agi_free_level = cpu_to_be32(finobt_curs->num_levels);
> > > +		agi->agi_free_root =
> > > +				cpu_to_be32(btr_fino->newbt.afake.af_root);
> > > +		agi->agi_free_level =
> > > +				cpu_to_be32(btr_fino->newbt.afake.af_levels);
> > >  	}
> > >  
> > >  	libxfs_buf_mark_dirty(agi_buf);
> > >  	libxfs_buf_relse(agi_buf);
> > >  }
> > >  
> > > -/*
> > > - * rebuilds an inode tree given a cursor.  We're lazy here and call
> > > - * the routine that builds the agi
> > > - */
> > > -static void
> > > -build_ino_tree(xfs_mount_t *mp, xfs_agnumber_t agno,
> > > -		bt_status_t *btree_curs, xfs_btnum_t btnum,
> > > -		struct agi_stat *agi_stat)
> > > -{
> > > -	xfs_agnumber_t		i;
> > > -	xfs_agblock_t		j;
> > > -	xfs_agblock_t		agbno;
> > > -	xfs_agino_t		first_agino;
> > > -	struct xfs_btree_block	*bt_hdr;
> > > -	xfs_inobt_rec_t		*bt_rec;
> > > -	ino_tree_node_t		*ino_rec;
> > > -	bt_stat_level_t		*lptr;
> > > -	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
> > > -	xfs_agino_t		count = 0;
> > > -	xfs_agino_t		freecount = 0;
> > > -	int			inocnt;
> > > -	uint8_t			finocnt;
> > > -	int			k;
> > > -	int			level = btree_curs->num_levels;
> > > -	int			spmask;
> > > -	uint64_t		sparse;
> > > -	uint16_t		holemask;
> > > -	int			error;
> > > -
> > > -	ASSERT(btnum == XFS_BTNUM_INO || btnum == XFS_BTNUM_FINO);
> > > -
> > > -	for (i = 0; i < level; i++)  {
> > > -		lptr = &btree_curs->level[i];
> > > -
> > > -		agbno = get_next_blockaddr(agno, i, btree_curs);
> > > -		error = -libxfs_buf_get(mp->m_dev,
> > > -				XFS_AGB_TO_DADDR(mp, agno, agbno),
> > > -				XFS_FSB_TO_BB(mp, 1), &lptr->buf_p);
> > > -		if (error)
> > > -			do_error(_("Cannot grab inode btree buffer, err=%d"),
> > > -					error);
> > > -
> > > -		if (i == btree_curs->num_levels - 1)
> > > -			btree_curs->root = agbno;
> > > -
> > > -		lptr->agbno = agbno;
> > > -		lptr->prev_agbno = NULLAGBLOCK;
> > > -		lptr->prev_buf_p = NULL;
> > > -		/*
> > > -		 * initialize block header
> > > -		 */
> > > -
> > > -		lptr->buf_p->b_ops = ops;
> > > -		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
> > > -		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
> > > -		libxfs_btree_init_block(mp, lptr->buf_p, btnum, i, 0, agno);
> > > -	}
> > > -
> > > -	/*
> > > -	 * run along leaf, setting up records.  as we have to switch
> > > -	 * blocks, call the prop_ino_cursor routine to set up the new
> > > -	 * pointers for the parent.  that can recurse up to the root
> > > -	 * if required.  set the sibling pointers for leaf level here.
> > > -	 */
> > > -	if (btnum == XFS_BTNUM_FINO)
> > > -		ino_rec = findfirst_free_inode_rec(agno);
> > > -	else
> > > -		ino_rec = findfirst_inode_rec(agno);
> > > -
> > > -	if (ino_rec != NULL)
> > > -		first_agino = ino_rec->ino_startnum;
> > > -	else
> > > -		first_agino = NULLAGINO;
> > > -
> > > -	lptr = &btree_curs->level[0];
> > > -
> > > -	for (i = 0; i < lptr->num_blocks; i++)  {
> > > -		/*
> > > -		 * block initialization, lay in block header
> > > -		 */
> > > -		lptr->buf_p->b_ops = ops;
> > > -		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
> > > -		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
> > > -		libxfs_btree_init_block(mp, lptr->buf_p, btnum, 0, 0, agno);
> > > -
> > > -		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
> > > -		bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb +
> > > -							(lptr->modulo > 0));
> > > -
> > > -		if (lptr->modulo > 0)
> > > -			lptr->modulo--;
> > > -
> > > -		if (lptr->num_recs_pb > 0)
> > > -			prop_ino_cursor(mp, agno, btree_curs, btnum,
> > > -					ino_rec->ino_startnum, 0);
> > > -
> > > -		bt_rec = (xfs_inobt_rec_t *)
> > > -			  ((char *)bt_hdr + XFS_INOBT_BLOCK_LEN(mp));
> > > -		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
> > > -			ASSERT(ino_rec != NULL);
> > > -			bt_rec[j].ir_startino =
> > > -					cpu_to_be32(ino_rec->ino_startnum);
> > > -			bt_rec[j].ir_free = cpu_to_be64(ino_rec->ir_free);
> > > -
> > > -			inocnt = finocnt = 0;
> > > -			for (k = 0; k < sizeof(xfs_inofree_t)*NBBY; k++)  {
> > > -				ASSERT(is_inode_confirmed(ino_rec, k));
> > > -
> > > -				if (is_inode_sparse(ino_rec, k))
> > > -					continue;
> > > -				if (is_inode_free(ino_rec, k))
> > > -					finocnt++;
> > > -				inocnt++;
> > > -			}
> > > -
> > > -			/*
> > > -			 * Set the freecount and check whether we need to update
> > > -			 * the sparse format fields. Otherwise, skip to the next
> > > -			 * record.
> > > -			 */
> > > -			inorec_set_freecount(mp, &bt_rec[j], finocnt);
> > > -			if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
> > > -				goto nextrec;
> > > -
> > > -			/*
> > > -			 * Convert the 64-bit in-core sparse inode state to the
> > > -			 * 16-bit on-disk holemask.
> > > -			 */
> > > -			holemask = 0;
> > > -			spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
> > > -			sparse = ino_rec->ir_sparse;
> > > -			for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) {
> > > -				if (sparse & spmask) {
> > > -					ASSERT((sparse & spmask) == spmask);
> > > -					holemask |= (1 << k);
> > > -				} else
> > > -					ASSERT((sparse & spmask) == 0);
> > > -				sparse >>= XFS_INODES_PER_HOLEMASK_BIT;
> > > -			}
> > > -
> > > -			bt_rec[j].ir_u.sp.ir_count = inocnt;
> > > -			bt_rec[j].ir_u.sp.ir_holemask = cpu_to_be16(holemask);
> > > -
> > > -nextrec:
> > > -			freecount += finocnt;
> > > -			count += inocnt;
> > > -
> > > -			if (btnum == XFS_BTNUM_FINO)
> > > -				ino_rec = next_free_ino_rec(ino_rec);
> > > -			else
> > > -				ino_rec = next_ino_rec(ino_rec);
> > > -		}
> > > -
> > > -		if (ino_rec != NULL)  {
> > > -			/*
> > > -			 * get next leaf level block
> > > -			 */
> > > -			if (lptr->prev_buf_p != NULL)  {
> > > -#ifdef XR_BLD_INO_TRACE
> > > -				fprintf(stderr, "writing inobt agbno %u\n",
> > > -					lptr->prev_agbno);
> > > -#endif
> > > -				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
> > > -				libxfs_buf_mark_dirty(lptr->prev_buf_p);
> > > -				libxfs_buf_relse(lptr->prev_buf_p);
> > > -			}
> > > -			lptr->prev_buf_p = lptr->buf_p;
> > > -			lptr->prev_agbno = lptr->agbno;
> > > -			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
> > > -			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
> > > -
> > > -			error = -libxfs_buf_get(mp->m_dev,
> > > -					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
> > > -					XFS_FSB_TO_BB(mp, 1),
> > > -					&lptr->buf_p);
> > > -			if (error)
> > > -				do_error(
> > > -	_("Cannot grab inode btree buffer, err=%d"),
> > > -						error);
> > > -		}
> > > -	}
> > > -
> > > -	if (agi_stat) {
> > > -		agi_stat->first_agino = first_agino;
> > > -		agi_stat->count = count;
> > > -		agi_stat->freecount = freecount;
> > > -	}
> > > -}
> > > -
> > >  /* rebuild the rmap tree */
> > >  
> > >  /*
> > > @@ -2142,14 +2000,10 @@ phase5_func(
> > >  {
> > >  	struct repair_ctx	sc = { .mp = mp, };
> > >  	struct agi_stat		agi_stat = {0,};
> > > -	uint64_t		num_inos;
> > > -	uint64_t		num_free_inos;
> > > -	uint64_t		finobt_num_inos;
> > > -	uint64_t		finobt_num_free_inos;
> > >  	struct bt_rebuild	btr_bno;
> > >  	struct bt_rebuild	btr_cnt;
> > > -	bt_status_t		ino_btree_curs;
> > > -	bt_status_t		fino_btree_curs;
> > > +	struct bt_rebuild	btr_ino;
> > > +	struct bt_rebuild	btr_fino;
> > >  	bt_status_t		rmap_btree_curs;
> > >  	bt_status_t		refcnt_btree_curs;
> > >  	int			extra_blocks = 0;
> > > @@ -2184,19 +2038,8 @@ _("unable to rebuild AG %u.  Not enough free space in on-disk AG.\n"),
> > >  			agno);
> > >  	}
> > >  
> > > -	/*
> > > -	 * ok, now set up the btree cursors for the on-disk btrees (includes
> > > -	 * pre-allocating all required blocks for the trees themselves)
> > > -	 */
> > > -	init_ino_cursor(mp, agno, &ino_btree_curs, &num_inos,
> > > -			&num_free_inos, 0);
> > > -
> > > -	if (xfs_sb_version_hasfinobt(&mp->m_sb))
> > > -		init_ino_cursor(mp, agno, &fino_btree_curs, &finobt_num_inos,
> > > -				&finobt_num_free_inos, 1);
> > > -
> > > -	sb_icount_ag[agno] += num_inos;
> > > -	sb_ifree_ag[agno] += num_free_inos;
> > > +	init_ino_cursors(&sc, agno, num_freeblocks, &sb_icount_ag[agno],
> > > +			&sb_ifree_ag[agno], &btr_ino, &btr_fino);
> > >  
> > >  	/*
> > >  	 * Set up the btree cursors for the on-disk rmap btrees, which includes
> > > @@ -2287,34 +2130,27 @@ _("unable to rebuild AG %u.  Not enough free space in on-disk AG.\n"),
> > >  			&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
> > >  
> > >  	/*
> > > -	 * build inode allocation tree.
> > > +	 * build inode allocation trees.
> > >  	 */
> > > -	build_ino_tree(mp, agno, &ino_btree_curs, XFS_BTNUM_INO, &agi_stat);
> > > -	write_cursor(&ino_btree_curs);
> > > -
> > > -	/*
> > > -	 * build free inode tree
> > > -	 */
> > > -	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
> > > -		build_ino_tree(mp, agno, &fino_btree_curs,
> > > -				XFS_BTNUM_FINO, NULL);
> > > -		write_cursor(&fino_btree_curs);
> > > -	}
> > > +	build_inobt(&sc, agno, &btr_ino, &agi_stat);
> > > +	if (xfs_sb_version_hasfinobt(&mp->m_sb))
> > > +		build_finobt(&sc, agno, &btr_fino);
> > >  
> > >  	/* build the agi */
> > > -	build_agi(mp, agno, &ino_btree_curs, &fino_btree_curs, &agi_stat);
> > > +	build_agi(mp, agno, &btr_ino, &btr_fino, &agi_stat);
> > >  
> > >  	/*
> > >  	 * tear down cursors
> > >  	 */
> > >  	finish_rebuild(mp, &btr_bno, lost_fsb);
> > >  	finish_rebuild(mp, &btr_cnt, lost_fsb);
> > > +	finish_rebuild(mp, &btr_ino, lost_fsb);
> > > +	if (xfs_sb_version_hasfinobt(&mp->m_sb))
> > > +		finish_rebuild(mp, &btr_fino, lost_fsb);
> > >  	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
> > >  		finish_cursor(&rmap_btree_curs);
> > >  	if (xfs_sb_version_hasreflink(&mp->m_sb))
> > >  		finish_cursor(&refcnt_btree_curs);
> > > -	if (xfs_sb_version_hasfinobt(&mp->m_sb))
> > > -		finish_cursor(&fino_btree_curs);
> > >  
> > >  	/*
> > >  	 * release the incore per-AG bno/bcnt trees so the extent nodes
> > > 
> >
diff mbox series

Patch

diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index bace739c..5d0868c2 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -115,6 +115,7 @@ 
 #define xfs_init_local_fork		libxfs_init_local_fork
 
 #define xfs_inobt_maxrecs		libxfs_inobt_maxrecs
+#define xfs_inobt_stage_cursor		libxfs_inobt_stage_cursor
 #define xfs_inode_from_disk		libxfs_inode_from_disk
 #define xfs_inode_to_disk		libxfs_inode_to_disk
 #define xfs_inode_validate_cowextsize	libxfs_inode_validate_cowextsize
diff --git a/repair/phase5.c b/repair/phase5.c
index e69b042c..38f30753 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -84,6 +84,10 @@  struct bt_rebuild {
 			struct extent_tree_node	*bno_rec;
 			xfs_agblock_t		*freeblks;
 		};
+		struct {
+			struct ino_tree_node	*ino_rec;
+			struct agi_stat		*agi_stat;
+		};
 	};
 };
 
@@ -372,6 +376,11 @@  estimate_ag_bload_slack(
 		bload->node_slack = 0;
 }
 
+static inline void skip_rebuild(struct bt_rebuild *btr)
+{
+	memset(btr, 0, sizeof(struct bt_rebuild));
+}
+
 /* Initialize a btree rebuild context. */
 static void
 init_rebuild(
@@ -765,48 +774,38 @@  _("Error %d while writing cntbt btree for AG %u.\n"), error, agno);
 	sc->tp = NULL;
 }
 
-/*
- * XXX(hch): any reason we don't just look at mp->m_inobt_mxr?
- */
-#define XR_INOBT_BLOCK_MAXRECS(mp, level) \
-			libxfs_inobt_maxrecs((mp), (mp)->m_sb.sb_blocksize, \
-						(level) == 0)
+/* Inode Btrees */
 
-/*
- * we don't have to worry here about how chewing up free extents
- * may perturb things because inode tree building happens before
- * freespace tree building.
- */
+/* Initialize both inode btree cursors as needed. */
 static void
-init_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
-		uint64_t *num_inos, uint64_t *num_free_inos, int finobt)
+init_ino_cursors(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	unsigned int		free_space,
+	uint64_t		*num_inos,
+	uint64_t		*num_free_inos,
+	struct bt_rebuild	*btr_ino,
+	struct bt_rebuild	*btr_fino)
 {
-	uint64_t		ninos;
-	uint64_t		nfinos;
-	int			rec_nfinos;
-	int			rec_ninos;
-	ino_tree_node_t		*ino_rec;
-	int			num_recs;
-	int			level;
-	bt_stat_level_t		*lptr;
-	bt_stat_level_t		*p_lptr;
-	xfs_extlen_t		blocks_allocated;
-	int			i;
+	struct ino_tree_node	*ino_rec;
+	unsigned int		ino_recs = 0;
+	unsigned int		fino_recs = 0;
+	bool			finobt;
+	int			error;
 
-	*num_inos = *num_free_inos = 0;
-	ninos = nfinos = 0;
+	finobt = xfs_sb_version_hasfinobt(&sc->mp->m_sb);
+	init_rebuild(sc, &XFS_RMAP_OINFO_INOBT, free_space, btr_ino);
 
-	lptr = &btree_curs->level[0];
-	btree_curs->init = 1;
-	btree_curs->owner = XFS_RMAP_OWN_INOBT;
+	/* Compute inode statistics. */
+	*num_free_inos = 0;
+	*num_inos = 0;
+	for (ino_rec = findfirst_inode_rec(agno);
+	     ino_rec != NULL;
+	     ino_rec = next_ino_rec(ino_rec))  {
+		unsigned int	rec_ninos = 0;
+		unsigned int	rec_nfinos = 0;
+		int		i;
 
-	/*
-	 * build up statistics
-	 */
-	ino_rec = findfirst_inode_rec(agno);
-	for (num_recs = 0; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec))  {
-		rec_ninos = 0;
-		rec_nfinos = 0;
 		for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
 			ASSERT(is_inode_confirmed(ino_rec, i));
 			/*
@@ -820,174 +819,220 @@  init_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
 			rec_ninos++;
 		}
 
-		/*
-		 * finobt only considers records with free inodes
-		 */
-		if (finobt && !rec_nfinos)
+		*num_free_inos += rec_nfinos;
+		*num_inos += rec_ninos;
+		ino_recs++;
+
+		/* finobt only considers records with free inodes */
+		if (rec_nfinos)
+			fino_recs++;
+	}
+
+	btr_ino->cur = libxfs_inobt_stage_cursor(sc->mp, &btr_ino->newbt.afake,
+			agno, XFS_BTNUM_INO);
+
+	/* Compute how many inobt blocks we'll need. */
+	error = -libxfs_btree_bload_compute_geometry(btr_ino->cur,
+			&btr_ino->bload, ino_recs);
+	if (error)
+		do_error(
+_("Unable to compute inode btree geometry, error %d.\n"), error);
+
+	setup_rebuild(sc->mp, agno, btr_ino, btr_ino->bload.nr_blocks);
+
+	if (!finobt) {
+		skip_rebuild(btr_fino);
+		return;
+	}
+
+	init_rebuild(sc, &XFS_RMAP_OINFO_INOBT, free_space, btr_fino);
+	btr_fino->cur = libxfs_inobt_stage_cursor(sc->mp,
+			&btr_fino->newbt.afake, agno, XFS_BTNUM_FINO);
+
+	/* Compute how many finobt blocks we'll need. */
+	error = -libxfs_btree_bload_compute_geometry(btr_fino->cur,
+			&btr_fino->bload, fino_recs);
+	if (error)
+		do_error(
+_("Unable to compute free inode btree geometry, error %d.\n"), error);
+
+	setup_rebuild(sc->mp, agno, btr_fino, btr_fino->bload.nr_blocks);
+}
+
+/* Copy one incore inode record into the inobt cursor. */
+static void
+get_inode_data(
+	struct xfs_btree_cur		*cur,
+	struct ino_tree_node		*ino_rec,
+	struct agi_stat			*agi_stat)
+{
+	struct xfs_inobt_rec_incore	*irec = &cur->bc_rec.i;
+	int				inocnt = 0;
+	int				finocnt = 0;
+	int				k;
+
+	irec->ir_startino = ino_rec->ino_startnum;
+	irec->ir_free = ino_rec->ir_free;
+
+	for (k = 0; k < sizeof(xfs_inofree_t) * NBBY; k++)  {
+		ASSERT(is_inode_confirmed(ino_rec, k));
+
+		if (is_inode_sparse(ino_rec, k))
 			continue;
-
-		nfinos += rec_nfinos;
-		ninos += rec_ninos;
-		num_recs++;
+		if (is_inode_free(ino_rec, k))
+			finocnt++;
+		inocnt++;
 	}
 
-	if (num_recs == 0) {
-		/*
-		 * easy corner-case -- no inode records
-		 */
-		lptr->num_blocks = 1;
-		lptr->modulo = 0;
-		lptr->num_recs_pb = 0;
-		lptr->num_recs_tot = 0;
-
-		btree_curs->num_levels = 1;
-		btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
-
-		setup_cursor(mp, agno, btree_curs);
+	irec->ir_count = inocnt;
+	irec->ir_freecount = finocnt;
 
-		return;
-	}
+	if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+		uint64_t		sparse;
+		int			spmask;
+		uint16_t		holemask;
 
-	blocks_allocated = lptr->num_blocks = howmany(num_recs,
-					XR_INOBT_BLOCK_MAXRECS(mp, 0));
-
-	lptr->modulo = num_recs % lptr->num_blocks;
-	lptr->num_recs_pb = num_recs / lptr->num_blocks;
-	lptr->num_recs_tot = num_recs;
-	level = 1;
-
-	if (lptr->num_blocks > 1)  {
-		for (; btree_curs->level[level-1].num_blocks > 1
-				&& level < XFS_BTREE_MAXLEVELS;
-				level++)  {
-			lptr = &btree_curs->level[level];
-			p_lptr = &btree_curs->level[level - 1];
-			lptr->num_blocks = howmany(p_lptr->num_blocks,
-				XR_INOBT_BLOCK_MAXRECS(mp, level));
-			lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
-			lptr->num_recs_pb = p_lptr->num_blocks
-					/ lptr->num_blocks;
-			lptr->num_recs_tot = p_lptr->num_blocks;
-
-			blocks_allocated += lptr->num_blocks;
+		/*
+		 * Convert the 64-bit in-core sparse inode state to the
+		 * 16-bit on-disk holemask.
+		 */
+		holemask = 0;
+		spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
+		sparse = ino_rec->ir_sparse;
+		for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) {
+			if (sparse & spmask) {
+				ASSERT((sparse & spmask) == spmask);
+				holemask |= (1 << k);
+			} else
+				ASSERT((sparse & spmask) == 0);
+			sparse >>= XFS_INODES_PER_HOLEMASK_BIT;
 		}
+
+		irec->ir_holemask = holemask;
+	} else {
+		irec->ir_holemask = 0;
 	}
-	ASSERT(lptr->num_blocks == 1);
-	btree_curs->num_levels = level;
 
-	btree_curs->num_tot_blocks = btree_curs->num_free_blocks
-			= blocks_allocated;
+	if (!agi_stat)
+		return;
 
-	setup_cursor(mp, agno, btree_curs);
+	if (agi_stat->first_agino != NULLAGINO)
+		agi_stat->first_agino = ino_rec->ino_startnum;
+	agi_stat->freecount += finocnt;
+	agi_stat->count += inocnt;
+}
 
-	*num_inos = ninos;
-	*num_free_inos = nfinos;
+/* Grab one inobt record. */
+static int
+get_inobt_record(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
+{
+	struct bt_rebuild		*rebuild = priv;
 
-	return;
+	get_inode_data(cur, rebuild->ino_rec, rebuild->agi_stat);
+	rebuild->ino_rec = next_ino_rec(rebuild->ino_rec);
+	return 0;
 }
 
+/* Rebuild a inobt btree. */
 static void
-prop_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
-	xfs_btnum_t btnum, xfs_agino_t startino, int level)
+build_inobt(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr_ino,
+	struct agi_stat		*agi_stat)
 {
-	struct xfs_btree_block	*bt_hdr;
-	xfs_inobt_key_t		*bt_key;
-	xfs_inobt_ptr_t		*bt_ptr;
-	xfs_agblock_t		agbno;
-	bt_stat_level_t		*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
 	int			error;
 
-	level++;
-
-	if (level >= btree_curs->num_levels)
-		return;
-
-	lptr = &btree_curs->level[level];
-	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
-		/*
-		 * this only happens once to initialize the
-		 * first path up the left side of the tree
-		 * where the agbno's are already set up
-		 */
-		prop_ino_cursor(mp, agno, btree_curs, btnum, startino, level);
-	}
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
-				lptr->num_recs_pb + (lptr->modulo > 0))  {
-		/*
-		 * write out current prev block, grab us a new block,
-		 * and set the rightsib pointer of current block
-		 */
-#ifdef XR_BLD_INO_TRACE
-		fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
-#endif
-		if (lptr->prev_agbno != NULLAGBLOCK)  {
-			ASSERT(lptr->prev_buf_p != NULL);
-			libxfs_buf_mark_dirty(lptr->prev_buf_p);
-			libxfs_buf_relse(lptr->prev_buf_p);
-		}
-		lptr->prev_agbno = lptr->agbno;;
-		lptr->prev_buf_p = lptr->buf_p;
-		agbno = get_next_blockaddr(agno, level, btree_curs);
-
-		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
-
-		error = -libxfs_buf_get(mp->m_dev,
-				XFS_AGB_TO_DADDR(mp, agno, agbno),
-				XFS_FSB_TO_BB(mp, 1), &lptr->buf_p);
-		if (error)
-			do_error(_("Cannot grab inode btree buffer, err=%d"),
-					error);
-		lptr->agbno = agbno;
+	btr_ino->bload.get_record = get_inobt_record;
+	btr_ino->bload.claim_block = rebuild_claim_block;
+	agi_stat->count = agi_stat->freecount = 0;
+	agi_stat->first_agino = NULLAGINO;
+	btr_ino->agi_stat = agi_stat;
+	btr_ino->ino_rec = findfirst_inode_rec(agno);
+
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
+	if (error)
+		do_error(
+_("Insufficient memory to construct inobt rebuild transaction.\n"));
+
+	/* Add all observed inobt records. */
+	error = -libxfs_btree_bload(btr_ino->cur, &btr_ino->bload, btr_ino);
+	if (error)
+		do_error(
+_("Error %d while creating inobt btree for AG %u.\n"), error, agno);
+
+	/* Since we're not writing the AGI yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(btr_ino->cur, 0);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing inobt btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
+}
 
-		if (lptr->modulo)
-			lptr->modulo--;
+/* Grab one finobt record. */
+static int
+get_finobt_record(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
+{
+	struct bt_rebuild		*rebuild = priv;
 
-		/*
-		 * initialize block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum,
-					level, 0, agno);
+	get_inode_data(cur, rebuild->ino_rec, NULL);
+	rebuild->ino_rec = next_free_ino_rec(rebuild->ino_rec);
+	return 0;
+}
 
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
+/* Rebuild a finobt btree. */
+static void
+build_finobt(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr_fino)
+{
+	int			error;
 
-		/*
-		 * propagate extent record for first extent in new block up
-		 */
-		prop_ino_cursor(mp, agno, btree_curs, btnum, startino, level);
-	}
-	/*
-	 * add inode info to current block
-	 */
-	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
-
-	bt_key = XFS_INOBT_KEY_ADDR(mp, bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs));
-	bt_ptr = XFS_INOBT_PTR_ADDR(mp, bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs),
-				    M_IGEO(mp)->inobt_mxr[1]);
-
-	bt_key->ir_startino = cpu_to_be32(startino);
-	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
+	btr_fino->bload.get_record = get_finobt_record;
+	btr_fino->bload.claim_block = rebuild_claim_block;
+	btr_fino->ino_rec = findfirst_free_inode_rec(agno);
+
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
+	if (error)
+		do_error(
+_("Insufficient memory to construct finobt rebuild transaction.\n"));
+
+	/* Add all observed finobt records. */
+	error = -libxfs_btree_bload(btr_fino->cur, &btr_fino->bload, btr_fino);
+	if (error)
+		do_error(
+_("Error %d while creating finobt btree for AG %u.\n"), error, agno);
+
+	/* Since we're not writing the AGI yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(btr_fino->cur, 0);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing finobt btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
 }
 
 /*
  * XXX: yet more code that can be shared with mkfs, growfs.
  */
 static void
-build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
-		bt_status_t *finobt_curs, struct agi_stat *agi_stat)
+build_agi(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr_ino,
+	struct bt_rebuild	*btr_fino,
+	struct agi_stat		*agi_stat)
 {
-	xfs_buf_t	*agi_buf;
-	xfs_agi_t	*agi;
-	int		i;
-	int		error;
+	struct xfs_buf		*agi_buf;
+	struct xfs_agi		*agi;
+	int			i;
+	int			error;
 
 	error = -libxfs_buf_get(mp->m_dev,
 			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
@@ -1008,8 +1053,8 @@  build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
 		agi->agi_length = cpu_to_be32(mp->m_sb.sb_dblocks -
 			(xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno);
 	agi->agi_count = cpu_to_be32(agi_stat->count);
-	agi->agi_root = cpu_to_be32(btree_curs->root);
-	agi->agi_level = cpu_to_be32(btree_curs->num_levels);
+	agi->agi_root = cpu_to_be32(btr_ino->newbt.afake.af_root);
+	agi->agi_level = cpu_to_be32(btr_ino->newbt.afake.af_levels);
 	agi->agi_freecount = cpu_to_be32(agi_stat->freecount);
 	agi->agi_newino = cpu_to_be32(agi_stat->first_agino);
 	agi->agi_dirino = cpu_to_be32(NULLAGINO);
@@ -1021,203 +1066,16 @@  build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
 		platform_uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
 
 	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
-		agi->agi_free_root = cpu_to_be32(finobt_curs->root);
-		agi->agi_free_level = cpu_to_be32(finobt_curs->num_levels);
+		agi->agi_free_root =
+				cpu_to_be32(btr_fino->newbt.afake.af_root);
+		agi->agi_free_level =
+				cpu_to_be32(btr_fino->newbt.afake.af_levels);
 	}
 
 	libxfs_buf_mark_dirty(agi_buf);
 	libxfs_buf_relse(agi_buf);
 }
 
-/*
- * rebuilds an inode tree given a cursor.  We're lazy here and call
- * the routine that builds the agi
- */
-static void
-build_ino_tree(xfs_mount_t *mp, xfs_agnumber_t agno,
-		bt_status_t *btree_curs, xfs_btnum_t btnum,
-		struct agi_stat *agi_stat)
-{
-	xfs_agnumber_t		i;
-	xfs_agblock_t		j;
-	xfs_agblock_t		agbno;
-	xfs_agino_t		first_agino;
-	struct xfs_btree_block	*bt_hdr;
-	xfs_inobt_rec_t		*bt_rec;
-	ino_tree_node_t		*ino_rec;
-	bt_stat_level_t		*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
-	xfs_agino_t		count = 0;
-	xfs_agino_t		freecount = 0;
-	int			inocnt;
-	uint8_t			finocnt;
-	int			k;
-	int			level = btree_curs->num_levels;
-	int			spmask;
-	uint64_t		sparse;
-	uint16_t		holemask;
-	int			error;
-
-	ASSERT(btnum == XFS_BTNUM_INO || btnum == XFS_BTNUM_FINO);
-
-	for (i = 0; i < level; i++)  {
-		lptr = &btree_curs->level[i];
-
-		agbno = get_next_blockaddr(agno, i, btree_curs);
-		error = -libxfs_buf_get(mp->m_dev,
-				XFS_AGB_TO_DADDR(mp, agno, agbno),
-				XFS_FSB_TO_BB(mp, 1), &lptr->buf_p);
-		if (error)
-			do_error(_("Cannot grab inode btree buffer, err=%d"),
-					error);
-
-		if (i == btree_curs->num_levels - 1)
-			btree_curs->root = agbno;
-
-		lptr->agbno = agbno;
-		lptr->prev_agbno = NULLAGBLOCK;
-		lptr->prev_buf_p = NULL;
-		/*
-		 * initialize block header
-		 */
-
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum, i, 0, agno);
-	}
-
-	/*
-	 * run along leaf, setting up records.  as we have to switch
-	 * blocks, call the prop_ino_cursor routine to set up the new
-	 * pointers for the parent.  that can recurse up to the root
-	 * if required.  set the sibling pointers for leaf level here.
-	 */
-	if (btnum == XFS_BTNUM_FINO)
-		ino_rec = findfirst_free_inode_rec(agno);
-	else
-		ino_rec = findfirst_inode_rec(agno);
-
-	if (ino_rec != NULL)
-		first_agino = ino_rec->ino_startnum;
-	else
-		first_agino = NULLAGINO;
-
-	lptr = &btree_curs->level[0];
-
-	for (i = 0; i < lptr->num_blocks; i++)  {
-		/*
-		 * block initialization, lay in block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum, 0, 0, agno);
-
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-		bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb +
-							(lptr->modulo > 0));
-
-		if (lptr->modulo > 0)
-			lptr->modulo--;
-
-		if (lptr->num_recs_pb > 0)
-			prop_ino_cursor(mp, agno, btree_curs, btnum,
-					ino_rec->ino_startnum, 0);
-
-		bt_rec = (xfs_inobt_rec_t *)
-			  ((char *)bt_hdr + XFS_INOBT_BLOCK_LEN(mp));
-		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
-			ASSERT(ino_rec != NULL);
-			bt_rec[j].ir_startino =
-					cpu_to_be32(ino_rec->ino_startnum);
-			bt_rec[j].ir_free = cpu_to_be64(ino_rec->ir_free);
-
-			inocnt = finocnt = 0;
-			for (k = 0; k < sizeof(xfs_inofree_t)*NBBY; k++)  {
-				ASSERT(is_inode_confirmed(ino_rec, k));
-
-				if (is_inode_sparse(ino_rec, k))
-					continue;
-				if (is_inode_free(ino_rec, k))
-					finocnt++;
-				inocnt++;
-			}
-
-			/*
-			 * Set the freecount and check whether we need to update
-			 * the sparse format fields. Otherwise, skip to the next
-			 * record.
-			 */
-			inorec_set_freecount(mp, &bt_rec[j], finocnt);
-			if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
-				goto nextrec;
-
-			/*
-			 * Convert the 64-bit in-core sparse inode state to the
-			 * 16-bit on-disk holemask.
-			 */
-			holemask = 0;
-			spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
-			sparse = ino_rec->ir_sparse;
-			for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) {
-				if (sparse & spmask) {
-					ASSERT((sparse & spmask) == spmask);
-					holemask |= (1 << k);
-				} else
-					ASSERT((sparse & spmask) == 0);
-				sparse >>= XFS_INODES_PER_HOLEMASK_BIT;
-			}
-
-			bt_rec[j].ir_u.sp.ir_count = inocnt;
-			bt_rec[j].ir_u.sp.ir_holemask = cpu_to_be16(holemask);
-
-nextrec:
-			freecount += finocnt;
-			count += inocnt;
-
-			if (btnum == XFS_BTNUM_FINO)
-				ino_rec = next_free_ino_rec(ino_rec);
-			else
-				ino_rec = next_ino_rec(ino_rec);
-		}
-
-		if (ino_rec != NULL)  {
-			/*
-			 * get next leaf level block
-			 */
-			if (lptr->prev_buf_p != NULL)  {
-#ifdef XR_BLD_INO_TRACE
-				fprintf(stderr, "writing inobt agbno %u\n",
-					lptr->prev_agbno);
-#endif
-				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
-				libxfs_buf_mark_dirty(lptr->prev_buf_p);
-				libxfs_buf_relse(lptr->prev_buf_p);
-			}
-			lptr->prev_buf_p = lptr->buf_p;
-			lptr->prev_agbno = lptr->agbno;
-			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
-			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
-
-			error = -libxfs_buf_get(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
-					XFS_FSB_TO_BB(mp, 1),
-					&lptr->buf_p);
-			if (error)
-				do_error(
-	_("Cannot grab inode btree buffer, err=%d"),
-						error);
-		}
-	}
-
-	if (agi_stat) {
-		agi_stat->first_agino = first_agino;
-		agi_stat->count = count;
-		agi_stat->freecount = freecount;
-	}
-}
-
 /* rebuild the rmap tree */
 
 /*
@@ -2142,14 +2000,10 @@  phase5_func(
 {
 	struct repair_ctx	sc = { .mp = mp, };
 	struct agi_stat		agi_stat = {0,};
-	uint64_t		num_inos;
-	uint64_t		num_free_inos;
-	uint64_t		finobt_num_inos;
-	uint64_t		finobt_num_free_inos;
 	struct bt_rebuild	btr_bno;
 	struct bt_rebuild	btr_cnt;
-	bt_status_t		ino_btree_curs;
-	bt_status_t		fino_btree_curs;
+	struct bt_rebuild	btr_ino;
+	struct bt_rebuild	btr_fino;
 	bt_status_t		rmap_btree_curs;
 	bt_status_t		refcnt_btree_curs;
 	int			extra_blocks = 0;
@@ -2184,19 +2038,8 @@  _("unable to rebuild AG %u.  Not enough free space in on-disk AG.\n"),
 			agno);
 	}
 
-	/*
-	 * ok, now set up the btree cursors for the on-disk btrees (includes
-	 * pre-allocating all required blocks for the trees themselves)
-	 */
-	init_ino_cursor(mp, agno, &ino_btree_curs, &num_inos,
-			&num_free_inos, 0);
-
-	if (xfs_sb_version_hasfinobt(&mp->m_sb))
-		init_ino_cursor(mp, agno, &fino_btree_curs, &finobt_num_inos,
-				&finobt_num_free_inos, 1);
-
-	sb_icount_ag[agno] += num_inos;
-	sb_ifree_ag[agno] += num_free_inos;
+	init_ino_cursors(&sc, agno, num_freeblocks, &sb_icount_ag[agno],
+			&sb_ifree_ag[agno], &btr_ino, &btr_fino);
 
 	/*
 	 * Set up the btree cursors for the on-disk rmap btrees, which includes
@@ -2287,34 +2130,27 @@  _("unable to rebuild AG %u.  Not enough free space in on-disk AG.\n"),
 			&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
 
 	/*
-	 * build inode allocation tree.
+	 * build inode allocation trees.
 	 */
-	build_ino_tree(mp, agno, &ino_btree_curs, XFS_BTNUM_INO, &agi_stat);
-	write_cursor(&ino_btree_curs);
-
-	/*
-	 * build free inode tree
-	 */
-	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
-		build_ino_tree(mp, agno, &fino_btree_curs,
-				XFS_BTNUM_FINO, NULL);
-		write_cursor(&fino_btree_curs);
-	}
+	build_inobt(&sc, agno, &btr_ino, &agi_stat);
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		build_finobt(&sc, agno, &btr_fino);
 
 	/* build the agi */
-	build_agi(mp, agno, &ino_btree_curs, &fino_btree_curs, &agi_stat);
+	build_agi(mp, agno, &btr_ino, &btr_fino, &agi_stat);
 
 	/*
 	 * tear down cursors
 	 */
 	finish_rebuild(mp, &btr_bno, lost_fsb);
 	finish_rebuild(mp, &btr_cnt, lost_fsb);
+	finish_rebuild(mp, &btr_ino, lost_fsb);
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		finish_rebuild(mp, &btr_fino, lost_fsb);
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 		finish_cursor(&rmap_btree_curs);
 	if (xfs_sb_version_hasreflink(&mp->m_sb))
 		finish_cursor(&refcnt_btree_curs);
-	if (xfs_sb_version_hasfinobt(&mp->m_sb))
-		finish_cursor(&fino_btree_curs);
 
 	/*
 	 * release the incore per-AG bno/bcnt trees so the extent nodes