[06/37] xfs: add realtime rmap btree operations

Message ID	173463579857.1571512.12532208026121340533.stgit@frogsfrogsfrogs (mailing list archive)
State	New
Headers	show Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B14571A2389 for <linux-xfs@vger.kernel.org>; Thu, 19 Dec 2024 19:24:58 +0000 (UTC) Date: Thu, 19 Dec 2024 11:24:57 -0800 Subject: [PATCH 06/37] xfs: add realtime rmap btree operations From: "Darrick J. Wong" <djwong@kernel.org> To: djwong@kernel.org Cc: hch@lst.de, linux-xfs@vger.kernel.org, hch@lst.de Message-ID: <173463579857.1571512.12532208026121340533.stgit@frogsfrogsfrogs> In-Reply-To: <173463579653.1571512.7862891421559358642.stgit@frogsfrogsfrogs> References: <173463579653.1571512.7862891421559358642.stgit@frogsfrogsfrogs> Precedence: bulk MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit
Series	[01/37] xfs: add some rtgroup inode helpers \| expand [01/37] xfs: add some rtgroup inode helpers [02/37] xfs: prepare rmap btree cursor tracepoints for realtime [03/37] xfs: simplify the xfs_rmap_{alloc,free}_extent calling conventions [04/37] xfs: introduce realtime rmap btree ondisk definitions [05/37] xfs: realtime rmap btree transaction reservations [06/37] xfs: add realtime rmap btree operations [07/37] xfs: prepare rmap functions to deal with rtrmapbt [08/37] xfs: add a realtime flag to the rmap update log redo items [09/37] xfs: support recovering rmap intent items targetting realtime extents [10/37] xfs: pretty print metadata file types in error messages [11/37] xfs: support file data forks containing metadata btrees [12/37] xfs: add realtime reverse map inode to metadata directory [13/37] xfs: add metadata reservations for realtime rmap btrees [14/37] xfs: wire up a new metafile type for the realtime rmap [15/37] xfs: wire up rmap map and unmap to the realtime rmapbt [16/37] xfs: create routine to allocate and initialize a realtime rmap btree inode [17/37] xfs: wire up getfsmap to the realtime reverse mapping btree [18/37] xfs: check that the rtrmapbt maxlevels doesn't increase when growing fs [19/37] xfs: report realtime rmap btree corruption errors to the health system [20/37] xfs: allow queued realtime intents to drain before scrubbing [21/37] xfs: scrub the realtime rmapbt [22/37] xfs: cross-reference realtime bitmap to realtime rmapbt scrubber [23/37] xfs: cross-reference the realtime rmapbt [24/37] xfs: scan rt rmap when we're doing an intense rmap check of bmbt mappings [25/37] xfs: scrub the metadir path of rt rmap btree files [26/37] xfs: walk the rt reverse mapping tree when rebuilding rmap [27/37] xfs: online repair of realtime file bmaps [28/37] xfs: repair inodes that have realtime extents [29/37] xfs: repair rmap btree inodes [30/37] xfs: online repair of realtime bitmaps for a realtime group [31/37] xfs: support repairing metadata btrees rooted in metadir inodes [32/37] xfs: online repair of the realtime rmap btree [33/37] xfs: create a shadow rmap btree during realtime rmap repair [34/37] xfs: hook live realtime rmap operations during a repair operation [35/37] xfs: don't shut down the filesystem for media failures beyond end of log [36/37] xfs: react to fsdax failure notifications on the rt device [37/37] xfs: enable realtime rmap btree

diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 0e271919374780..36ab06f8a3bc99 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -31,6 +31,10 @@ #include "xfs_buf_mem.h" #include "xfs_btree_mem.h" #include "xfs_rtrmap_btree.h" +#include "xfs_bmap.h" +#include "xfs_rmap.h" +#include "xfs_quota.h" +#include "xfs_metafile.h" /* * Btree magic numbers. @@ -5576,3 +5580,67 @@ xfs_btree_goto_left_edge( return 0; } + +/* Allocate a block for an inode-rooted metadata btree. */ +int +xfs_btree_alloc_metafile_block( + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + struct xfs_alloc_arg args = { + .mp = cur->bc_mp, + .tp = cur->bc_tp, + .resv = XFS_AG_RESV_METAFILE, + .minlen = 1, + .maxlen = 1, + .prod = 1, + }; + struct xfs_inode *ip = cur->bc_ino.ip; + int error; + + ASSERT(xfs_is_metadir_inode(ip)); + + xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, cur->bc_ino.whichfork); + error = xfs_alloc_vextent_start_ag(&args, + XFS_INO_TO_FSB(cur->bc_mp, ip->i_ino)); + if (error) + return error; + if (args.fsbno == NULLFSBLOCK) { + *stat = 0; + return 0; + } + ASSERT(args.len == 1); + + xfs_metafile_resv_alloc_space(ip, &args); + + new->l = cpu_to_be64(args.fsbno); + *stat = 1; + return 0; +} + +/* Free a block from an inode-rooted metadata btree. */ +int +xfs_btree_free_metafile_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + struct xfs_owner_info oinfo; + struct xfs_mount *mp = cur->bc_mp; + struct xfs_inode *ip = cur->bc_ino.ip; + struct xfs_trans *tp = cur->bc_tp; + xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); + int error; + + ASSERT(xfs_is_metadir_inode(ip)); + + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); + error = xfs_free_extent_later(tp, fsbno, 1, &oinfo, XFS_AG_RESV_METAFILE, + 0); + if (error) + return error; + + xfs_metafile_resv_free_space(ip, tp, 1); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 3b8c2ccad90847..ee82dc777d6d5b 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -703,4 +703,10 @@ xfs_btree_at_iroot( level == cur->bc_nlevels - 1; } +int xfs_btree_alloc_metafile_block(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, union xfs_btree_ptr *newp, + int *stat); +int xfs_btree_free_metafile_block(struct xfs_btree_cur *cur, + struct xfs_buf *bp); + #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.c b/fs/xfs/libxfs/xfs_rtrmap_btree.c index d3e4c52dcaa9d0..99d828bb5fe7c3 100644 --- a/fs/xfs/libxfs/xfs_rtrmap_btree.c +++ b/fs/xfs/libxfs/xfs_rtrmap_btree.c @@ -18,12 +18,14 @@ #include "xfs_alloc.h" #include "xfs_btree.h" #include "xfs_btree_staging.h" +#include "xfs_rmap.h" #include "xfs_rtrmap_btree.h" #include "xfs_trace.h" #include "xfs_cksum.h" #include "xfs_error.h" #include "xfs_extent_busy.h" #include "xfs_rtgroup.h" +#include "xfs_bmap.h" static struct kmem_cache *xfs_rtrmapbt_cur_cache; @@ -44,6 +46,182 @@ xfs_rtrmapbt_dup_cursor( return xfs_rtrmapbt_init_cursor(cur->bc_tp, to_rtg(cur->bc_group)); } +STATIC int +xfs_rtrmapbt_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + if (level == cur->bc_nlevels - 1) { + struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); + + return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, + level == 0) / 2; + } + + return cur->bc_mp->m_rtrmap_mnr[level != 0]; +} + +STATIC int +xfs_rtrmapbt_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + if (level == cur->bc_nlevels - 1) { + struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); + + return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, + level == 0); + } + + return cur->bc_mp->m_rtrmap_mxr[level != 0]; +} + +/* + * Convert the ondisk record's offset field into the ondisk key's offset field. + * Fork and bmbt are significant parts of the rmap record key, but written + * status is merely a record attribute. + */ +static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec) +{ + return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN); +} + +STATIC void +xfs_rtrmapbt_init_key_from_rec( + union xfs_btree_key *key, + const union xfs_btree_rec *rec) +{ + key->rmap.rm_startblock = rec->rmap.rm_startblock; + key->rmap.rm_owner = rec->rmap.rm_owner; + key->rmap.rm_offset = ondisk_rec_offset_to_key(rec); +} + +STATIC void +xfs_rtrmapbt_init_high_key_from_rec( + union xfs_btree_key *key, + const union xfs_btree_rec *rec) +{ + uint64_t off; + int adj; + + adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1; + + key->rmap.rm_startblock = rec->rmap.rm_startblock; + be32_add_cpu(&key->rmap.rm_startblock, adj); + key->rmap.rm_owner = rec->rmap.rm_owner; + key->rmap.rm_offset = ondisk_rec_offset_to_key(rec); + if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) || + XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset))) + return; + off = be64_to_cpu(key->rmap.rm_offset); + off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK); + key->rmap.rm_offset = cpu_to_be64(off); +} + +STATIC void +xfs_rtrmapbt_init_rec_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock); + rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount); + rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner); + rec->rmap.rm_offset = cpu_to_be64( + xfs_rmap_irec_offset_pack(&cur->bc_rec.r)); +} + +STATIC void +xfs_rtrmapbt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + ptr->l = 0; +} + +/* + * Mask the appropriate parts of the ondisk key field for a key comparison. + * Fork and bmbt are significant parts of the rmap record key, but written + * status is merely a record attribute. + */ +static inline uint64_t offset_keymask(uint64_t offset) +{ + return offset & ~XFS_RMAP_OFF_UNWRITTEN; +} + +STATIC int64_t +xfs_rtrmapbt_key_diff( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key) +{ + struct xfs_rmap_irec *rec = &cur->bc_rec.r; + const struct xfs_rmap_key *kp = &key->rmap; + __u64 x, y; + int64_t d; + + d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; + if (d) + return d; + + x = be64_to_cpu(kp->rm_owner); + y = rec->rm_owner; + if (x > y) + return 1; + else if (y > x) + return -1; + + x = offset_keymask(be64_to_cpu(kp->rm_offset)); + y = offset_keymask(xfs_rmap_irec_offset_pack(rec)); + if (x > y) + return 1; + else if (y > x) + return -1; + return 0; +} + +STATIC int64_t +xfs_rtrmapbt_diff_two_keys( + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2, + const union xfs_btree_key *mask) +{ + const struct xfs_rmap_key *kp1 = &k1->rmap; + const struct xfs_rmap_key *kp2 = &k2->rmap; + int64_t d; + __u64 x, y; + + /* Doesn't make sense to mask off the physical space part */ + ASSERT(!mask || mask->rmap.rm_startblock); + + d = (int64_t)be32_to_cpu(kp1->rm_startblock) - + be32_to_cpu(kp2->rm_startblock); + if (d) + return d; + + if (!mask || mask->rmap.rm_owner) { + x = be64_to_cpu(kp1->rm_owner); + y = be64_to_cpu(kp2->rm_owner); + if (x > y) + return 1; + else if (y > x) + return -1; + } + + if (!mask || mask->rmap.rm_offset) { + /* Doesn't make sense to allow offset but not owner */ + ASSERT(!mask || mask->rmap.rm_owner); + + x = offset_keymask(be64_to_cpu(kp1->rm_offset)); + y = offset_keymask(be64_to_cpu(kp2->rm_offset)); + if (x > y) + return 1; + else if (y > x) + return -1; + } + + return 0; +} + static xfs_failaddr_t xfs_rtrmapbt_verify( struct xfs_buf *bp) @@ -110,6 +288,86 @@ const struct xfs_buf_ops xfs_rtrmapbt_buf_ops = { .verify_struct = xfs_rtrmapbt_verify, }; +STATIC int +xfs_rtrmapbt_keys_inorder( + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) +{ + uint32_t x; + uint32_t y; + uint64_t a; + uint64_t b; + + x = be32_to_cpu(k1->rmap.rm_startblock); + y = be32_to_cpu(k2->rmap.rm_startblock); + if (x < y) + return 1; + else if (x > y) + return 0; + a = be64_to_cpu(k1->rmap.rm_owner); + b = be64_to_cpu(k2->rmap.rm_owner); + if (a < b) + return 1; + else if (a > b) + return 0; + a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset)); + b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset)); + if (a <= b) + return 1; + return 0; +} + +STATIC int +xfs_rtrmapbt_recs_inorder( + struct xfs_btree_cur *cur, + const union xfs_btree_rec *r1, + const union xfs_btree_rec *r2) +{ + uint32_t x; + uint32_t y; + uint64_t a; + uint64_t b; + + x = be32_to_cpu(r1->rmap.rm_startblock); + y = be32_to_cpu(r2->rmap.rm_startblock); + if (x < y) + return 1; + else if (x > y) + return 0; + a = be64_to_cpu(r1->rmap.rm_owner); + b = be64_to_cpu(r2->rmap.rm_owner); + if (a < b) + return 1; + else if (a > b) + return 0; + a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset)); + b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset)); + if (a <= b) + return 1; + return 0; +} + +STATIC enum xbtree_key_contig +xfs_rtrmapbt_keys_contiguous( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2, + const union xfs_btree_key *mask) +{ + ASSERT(!mask || mask->rmap.rm_startblock); + + /* + * We only support checking contiguity of the physical space component. + * If any callers ever need more specificity than that, they'll have to + * implement it here. + */ + ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset)); + + return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock), + be32_to_cpu(key2->rmap.rm_startblock)); +} + const struct xfs_btree_ops xfs_rtrmapbt_ops = { .name = "rtrmap", .type = XFS_BTREE_TYPE_INODE, @@ -125,7 +383,20 @@ const struct xfs_btree_ops xfs_rtrmapbt_ops = { .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_2), .dup_cursor = xfs_rtrmapbt_dup_cursor, + .alloc_block = xfs_btree_alloc_metafile_block, + .free_block = xfs_btree_free_metafile_block, + .get_minrecs = xfs_rtrmapbt_get_minrecs, + .get_maxrecs = xfs_rtrmapbt_get_maxrecs, + .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec, + .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec, + .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur, + .init_ptr_from_cur = xfs_rtrmapbt_init_ptr_from_cur, + .key_diff = xfs_rtrmapbt_key_diff, .buf_ops = &xfs_rtrmapbt_buf_ops, + .diff_two_keys = xfs_rtrmapbt_diff_two_keys, + .keys_inorder = xfs_rtrmapbt_keys_inorder, + .recs_inorder = xfs_rtrmapbt_recs_inorder, + .keys_contiguous = xfs_rtrmapbt_keys_contiguous, }; /* Allocate a new rt rmap btree cursor. */

[06/37] xfs: add realtime rmap btree operations

Commit Message

Patch