@@ -4534,6 +4534,40 @@ xfs_btree_compute_maxlevels(
return level;
}
+/*
+ * Compute the maximum height of a btree that is allowed to consume up to the
+ * given number of blocks.
+ */
+unsigned int
+xfs_btree_compute_maxlevels_size(
+ unsigned long long max_btblocks,
+ unsigned int leaf_mnr)
+{
+ unsigned long long leaf_blocks = leaf_mnr;
+ unsigned long long blocks_left;
+ unsigned int maxlevels;
+
+ if (max_btblocks < 1)
+ return 0;
+
+ /*
+ * The loop increments maxlevels as long as there would be enough
+ * blocks left in the reservation to handle each node block at the
+ * current level pointing to the minimum possible number of leaf blocks
+ * at the next level down. We start the loop assuming a single-level
+ * btree consuming one block.
+ */
+ maxlevels = 1;
+ blocks_left = max_btblocks - 1;
+ while (leaf_blocks < blocks_left) {
+ maxlevels++;
+ blocks_left -= leaf_blocks;
+ leaf_blocks *= leaf_mnr;
+ }
+
+ return maxlevels;
+}
+
/*
* Query a regular btree for all records overlapping a given interval.
* Start with a LE lookup of the key of low_rec and return all records
@@ -484,6 +484,8 @@ xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp,
unsigned int max_recs);
uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len);
+unsigned int xfs_btree_compute_maxlevels_size(unsigned long long max_btblocks,
+ unsigned int leaf_mnr);
unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len);
/*
@@ -535,30 +535,32 @@ xfs_rmapbt_maxrecs(
}
/* Compute the maximum height of an rmap btree. */
-void
+unsigned int
xfs_rmapbt_compute_maxlevels(
- struct xfs_mount *mp)
+ struct xfs_mount *mp)
{
+ if (!xfs_has_reflink(mp)) {
+ /*
+ * If there's no block sharing, compute the maximum rmapbt
+ * height assuming one rmap record per AG block.
+ */
+ return xfs_btree_compute_maxlevels(mp->m_rmap_mnr,
+ mp->m_sb.sb_agblocks);
+ }
+
/*
- * On a non-reflink filesystem, the maximum number of rmap
- * records is the number of blocks in the AG, hence the max
- * rmapbt height is log_$maxrecs($agblocks). However, with
- * reflink each AG block can have up to 2^32 (per the refcount
- * record format) owners, which means that theoretically we
- * could face up to 2^64 rmap records.
+ * Compute the asymptotic maxlevels for an rmapbt on a reflink fs.
*
- * That effectively means that the max rmapbt height must be
- * XFS_BTREE_MAXLEVELS. "Fortunately" we'll run out of AG
- * blocks to feed the rmapbt long before the rmapbt reaches
- * maximum height. The reflink code uses ag_resv_critical to
- * disallow reflinking when less than 10% of the per-AG metadata
- * block reservation since the fallback is a regular file copy.
+ * On a reflink filesystem, each AG block can have up to 2^32 (per the
+ * refcount record format) owners, which means that theoretically we
+ * could face up to 2^64 rmap records. However, we're likely to run
+ * out of blocks in the AG long before that happens, which means that
+ * we must compute the max height based on what the btree will look
+ * like if it consumes almost all the blocks in the AG due to maximal
+ * sharing factor.
*/
- if (xfs_has_reflink(mp))
- mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS;
- else
- mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
- mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
+ return xfs_btree_compute_maxlevels_size(mp->m_sb.sb_agblocks,
+ mp->m_rmap_mnr[1]);
}
/* Calculate the refcount btree size for some records. */
@@ -49,7 +49,7 @@ struct xfs_btree_cur *xfs_rmapbt_stage_cursor(struct xfs_mount *mp,
void xfs_rmapbt_commit_staged_btree(struct xfs_btree_cur *cur,
struct xfs_trans *tp, struct xfs_buf *agbp);
int xfs_rmapbt_maxrecs(int blocklen, int leaf);
-extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
+unsigned int xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp,
unsigned long long len);
@@ -814,6 +814,15 @@ xfs_trans_resv_calc(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
+ unsigned int rmap_maxlevels = mp->m_rmap_maxlevels;
+
+ /*
+ * In the early days of rmap+reflink, we hardcoded the rmap maxlevels
+ * to 9 even if the AG size was smaller.
+ */
+ if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp))
+ mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS;
+
/*
* The following transactions are logged in physical format and
* require a permanent reservation on space.
@@ -916,4 +925,7 @@ xfs_trans_resv_calc(
resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
+
+ /* Put everything back the way it was. This goes at the end. */
+ mp->m_rmap_maxlevels = rmap_maxlevels;
}
@@ -17,6 +17,13 @@
/* Adding one rmap could split every level up to the top of the tree. */
#define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels)
+/*
+ * Note that we historically set m_rmap_maxlevels to 9 when reflink was
+ * enabled, so we must preserve this behavior to avoid changing the transaction
+ * space reservations.
+ */
+#define XFS_OLD_REFLINK_RMAP_MAXLEVELS (9)
+
/* Blocks we might need to add "b" rmaps to a tree. */
#define XFS_NRMAPADD_SPACE_RES(mp, b)\
(((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \
@@ -635,7 +635,7 @@ xfs_mountfs(
xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
xfs_mount_setup_inode_geom(mp);
- xfs_rmapbt_compute_maxlevels(mp);
+ mp->m_rmap_maxlevels = xfs_rmapbt_compute_maxlevels(mp);
xfs_refcountbt_compute_maxlevels(mp);
/*