From patchwork Fri May 26 01:04:50 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13255970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C884DC77B7E for ; Fri, 26 May 2023 01:04:55 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229878AbjEZBEy (ORCPT ); Thu, 25 May 2023 21:04:54 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52456 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229567AbjEZBEx (ORCPT ); Thu, 25 May 2023 21:04:53 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4C1D4195; Thu, 25 May 2023 18:04:52 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id CFC2260C3F; Fri, 26 May 2023 01:04:51 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3D621C433EF; Fri, 26 May 2023 01:04:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685063091; bh=HfCTt6ykRXErlMmoLlfsIDXmzBepQMiTuZZMR7GtI+o=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=lToyAzi1DWlfdcSSIjkyXzBs6MJEfhWw9bC8psNTmmcS2/DNqh/ypDI609k+76v/0 1rDGfXTWJq8N/T86jJLaq+Alg9eT0aGD7lWyHLrxZ4NHOycqMP6ZypOFMQ0BXlAgfw kMexIMeMqx39PVgx3CMNNvNWSudbwv9FLh1pbq3rgyqdGcAL9vjAFyB3LoWLccXEdS NptbIv2zHqIrLYl+pUyjojY3usRWQS2JU1wegWPhcfx+Qfiei+Mo5xotw1YynxHEdo ++2wgChYjp6ZLtS5tXKMG3U6YOVRsfqW0zSMGAHlm4bTt5urRNzleqCX1iHNOVc87s KiLqoO0Jo1oLw== Date: Thu, 25 May 2023 18:04:50 -0700 Subject: [PATCH 1/9] xfs: dump xfiles for debugging purposes From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, willy@infradead.org, linux-fsdevel@vger.kernel.org Message-ID: <168506061865.3733082.14220362905242831184.stgit@frogsfrogsfrogs> In-Reply-To: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> References: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Darrick J. Wong Add a debug function to dump an xfile's contents for debug purposes. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/xfile.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/xfile.h | 2 + 2 files changed, 100 insertions(+) diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c index d3e678cd4a2f..851aeb244660 100644 --- a/fs/xfs/scrub/xfile.c +++ b/fs/xfs/scrub/xfile.c @@ -431,3 +431,101 @@ xfile_put_page( return -EIO; return 0; } + +/* Dump an xfile to dmesg. */ +int +xfile_dump( + struct xfile *xf) +{ + struct xfile_stat sb; + struct inode *inode = file_inode(xf->file); + struct address_space *mapping = inode->i_mapping; + loff_t holepos = 0; + loff_t datapos; + loff_t ret; + unsigned int pflags; + bool all_zeroes = true; + int error = 0; + + error = xfile_stat(xf, &sb); + if (error) + return error; + + printk(KERN_ALERT "xfile ino 0x%lx isize 0x%llx dump:", inode->i_ino, + sb.size); + pflags = memalloc_nofs_save(); + + while ((ret = vfs_llseek(xf->file, holepos, SEEK_DATA)) >= 0) { + datapos = rounddown_64(ret, PAGE_SIZE); + ret = vfs_llseek(xf->file, datapos, SEEK_HOLE); + if (ret < 0) + break; + holepos = min_t(loff_t, sb.size, roundup_64(ret, PAGE_SIZE)); + + while (datapos < holepos) { + struct page *page = NULL; + void *p, *kaddr; + u64 datalen = holepos - datapos; + unsigned int pagepos; + unsigned int pagelen; + + cond_resched(); + + if (fatal_signal_pending(current)) { + error = -EINTR; + goto out_pflags; + } + + pagelen = min_t(u64, datalen, PAGE_SIZE); + + page = shmem_read_mapping_page_gfp(mapping, + datapos >> PAGE_SHIFT, __GFP_NOWARN); + if (IS_ERR(page)) { + error = PTR_ERR(page); + if (error == -EIO) + printk(KERN_ALERT "%.8llx: poisoned", + datapos); + else if (error != -ENOMEM) + goto out_pflags; + + goto next_pgoff; + } + + if (!PageUptodate(page)) + goto next_page; + + kaddr = kmap_local_page(page); + p = kaddr; + + for (pagepos = 0; pagepos < pagelen; pagepos += 16) { + char prefix[16]; + unsigned int linelen; + + linelen = min_t(unsigned int, pagelen, 16); + + if (!memchr_inv(p + pagepos, 0, linelen)) + continue; + + snprintf(prefix, 16, "%.8llx: ", + datapos + pagepos); + + all_zeroes = false; + print_hex_dump(KERN_ALERT, prefix, + DUMP_PREFIX_NONE, 16, 1, + p + pagepos, linelen, true); + } + kunmap_local(kaddr); +next_page: + put_page(page); +next_pgoff: + datapos += PAGE_SIZE; + } + } + if (all_zeroes) + printk(KERN_ALERT ""); + if (ret != -ENXIO) + error = ret; +out_pflags: + memalloc_nofs_restore(pflags); + return error; +} diff --git a/fs/xfs/scrub/xfile.h b/fs/xfs/scrub/xfile.h index 1aae2cd91720..adf5dbdc4c21 100644 --- a/fs/xfs/scrub/xfile.h +++ b/fs/xfs/scrub/xfile.h @@ -75,4 +75,6 @@ int xfile_get_page(struct xfile *xf, loff_t offset, unsigned int len, struct xfile_page *xbuf); int xfile_put_page(struct xfile *xf, struct xfile_page *xbuf); +int xfile_dump(struct xfile *xf); + #endif /* __XFS_SCRUB_XFILE_H__ */ From patchwork Fri May 26 01:05:06 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13255971 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 678FBC77B7E for ; Fri, 26 May 2023 01:05:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230103AbjEZBFL (ORCPT ); Thu, 25 May 2023 21:05:11 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52506 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229885AbjEZBFJ (ORCPT ); Thu, 25 May 2023 21:05:09 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D39BB195; Thu, 25 May 2023 18:05:07 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 6FA1C649F2; Fri, 26 May 2023 01:05:07 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id CED8DC4339B; Fri, 26 May 2023 01:05:06 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685063106; bh=1qHFcDcHQhjxqOKRBgvJc8bu4CdrCzwaU7lBiNuE1qk=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=o0+o4sK4ElGInASeQg77I1945HPhARBDmQWrfsUA8kF95d1alAV+QLh+A5Mh8+Obo +Tmnu8ruuAbvk5GnWCW6sQub/gWOz9YQRiq0IwtFPLJo3iWqWbDlgx4aDE24UDScop v2a9tgAduxoz2ftscOlkqxhd8Wtogw0ynjqODZe41vxh/9SnEgSsWWHj5tr3zS54X+ EktNtpNkQ5rfda/HaQQcoy4hjf0Evp9nGrNPOaPu8sLRTDyV1wHoRvj/8LuxpaPqqY eALKJ9r+i2ZI5JtKhhTUdi0yqRAmy/8Bf3gcd84vzUx6pg5BeYaDvT96r29KAbFBeL /XO+j1xZ6XE2A== Date: Thu, 25 May 2023 18:05:06 -0700 Subject: [PATCH 2/9] xfs: teach buftargs to maintain their own buffer hashtable From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, willy@infradead.org, linux-fsdevel@vger.kernel.org Message-ID: <168506061880.3733082.7782494430395906650.stgit@frogsfrogsfrogs> In-Reply-To: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> References: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Darrick J. Wong Currently, cached buffers are indexed by per-AG hashtables. This works great for the data device, but won't work for in-memory btrees. Make it so that buftargs can index buffers too. We accomplish this by hoisting the rhashtable and its lock into a separate xfs_buf_cache structure and reworking various functions to use it. Next, we introduce to the buftarg a new XFS_BUFTARG_SELF_CACHED flag to indicate that the buftarg's cache is active (vs. the per-ag cache for the regular filesystem). Finally, make it so that each xfs_buf points to its cache if there is one. This is how we distinguish uncached buffers from now on. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_ag.c | 6 +- fs/xfs/libxfs/xfs_ag.h | 4 - fs/xfs/xfs_buf.c | 140 +++++++++++++++++++++++++++++++++--------------- fs/xfs/xfs_buf.h | 10 +++ fs/xfs/xfs_mount.h | 3 - 5 files changed, 110 insertions(+), 53 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index b36ec110ad17..d274ec8bd237 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -264,7 +264,7 @@ xfs_free_perag( xfs_defer_drain_free(&pag->pag_intents_drain); cancel_delayed_work_sync(&pag->pag_blockgc_work); - xfs_buf_hash_destroy(pag); + xfs_buf_cache_destroy(&pag->pag_bcache); /* drop the mount's active reference */ xfs_perag_rele(pag); @@ -394,7 +394,7 @@ xfs_initialize_perag( pag->pagb_tree = RB_ROOT; #endif /* __KERNEL__ */ - error = xfs_buf_hash_init(pag); + error = xfs_buf_cache_init(&pag->pag_bcache); if (error) goto out_remove_pag; @@ -434,7 +434,7 @@ xfs_initialize_perag( pag = radix_tree_delete(&mp->m_perag_tree, index); if (!pag) break; - xfs_buf_hash_destroy(pag); + xfs_buf_cache_destroy(&pag->pag_bcache); xfs_defer_drain_free(&pag->pag_intents_drain); kmem_free(pag); } diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 616812911a23..a682ddd8fc4c 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -104,9 +104,7 @@ struct xfs_perag { int pag_ici_reclaimable; /* reclaimable inodes */ unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ - /* buffer cache index */ - spinlock_t pag_buf_lock; /* lock for pag_buf_hash */ - struct rhashtable pag_buf_hash; + struct xfs_buf_cache pag_bcache; /* background prealloc block trimming */ struct delayed_work pag_blockgc_work; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 2a1a641c2b87..dd16dfb669d8 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -499,18 +499,18 @@ static const struct rhashtable_params xfs_buf_hash_params = { }; int -xfs_buf_hash_init( - struct xfs_perag *pag) +xfs_buf_cache_init( + struct xfs_buf_cache *bch) { - spin_lock_init(&pag->pag_buf_lock); - return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params); + spin_lock_init(&bch->bc_lock); + return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params); } void -xfs_buf_hash_destroy( - struct xfs_perag *pag) +xfs_buf_cache_destroy( + struct xfs_buf_cache *bch) { - rhashtable_destroy(&pag->pag_buf_hash); + rhashtable_destroy(&bch->bc_hash); } static int @@ -569,7 +569,7 @@ xfs_buf_find_lock( static inline int xfs_buf_lookup( - struct xfs_perag *pag, + struct xfs_buf_cache *bch, struct xfs_buf_map *map, xfs_buf_flags_t flags, struct xfs_buf **bpp) @@ -578,7 +578,7 @@ xfs_buf_lookup( int error; rcu_read_lock(); - bp = rhashtable_lookup(&pag->pag_buf_hash, map, xfs_buf_hash_params); + bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params); if (!bp || !atomic_inc_not_zero(&bp->b_hold)) { rcu_read_unlock(); return -ENOENT; @@ -603,6 +603,7 @@ xfs_buf_lookup( static int xfs_buf_find_insert( struct xfs_buftarg *btp, + struct xfs_buf_cache *bch, struct xfs_perag *pag, struct xfs_buf_map *cmap, struct xfs_buf_map *map, @@ -631,18 +632,18 @@ xfs_buf_find_insert( goto out_free_buf; } - spin_lock(&pag->pag_buf_lock); - bp = rhashtable_lookup_get_insert_fast(&pag->pag_buf_hash, + spin_lock(&bch->bc_lock); + bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash, &new_bp->b_rhash_head, xfs_buf_hash_params); if (IS_ERR(bp)) { error = PTR_ERR(bp); - spin_unlock(&pag->pag_buf_lock); + spin_unlock(&bch->bc_lock); goto out_free_buf; } if (bp) { /* found an existing buffer */ atomic_inc(&bp->b_hold); - spin_unlock(&pag->pag_buf_lock); + spin_unlock(&bch->bc_lock); error = xfs_buf_find_lock(bp, flags); if (error) xfs_buf_rele(bp); @@ -653,17 +654,38 @@ xfs_buf_find_insert( /* The new buffer keeps the perag reference until it is freed. */ new_bp->b_pag = pag; - spin_unlock(&pag->pag_buf_lock); + new_bp->b_cache = bch; + spin_unlock(&bch->bc_lock); *bpp = new_bp; return 0; out_free_buf: xfs_buf_free(new_bp); out_drop_pag: - xfs_perag_put(pag); + if (pag) + xfs_perag_put(pag); return error; } +/* Find the buffer cache for a particular buftarg and map. */ +static inline struct xfs_buf_cache * +xfs_buftarg_get_cache( + struct xfs_buftarg *btp, + const struct xfs_buf_map *map, + struct xfs_perag **pagp) +{ + struct xfs_mount *mp = btp->bt_mount; + + if (btp->bt_cache) { + *pagp = NULL; + return btp->bt_cache; + } + + *pagp = xfs_perag_get(mp, xfs_daddr_to_agno(mp, map->bm_bn)); + ASSERT(*pagp != NULL); + return &(*pagp)->pag_bcache; +} + /* * Assembles a buffer covering the specified range. The code is optimised for * cache hits, as metadata intensive workloads will see 3 orders of magnitude @@ -677,6 +699,7 @@ xfs_buf_get_map( xfs_buf_flags_t flags, struct xfs_buf **bpp) { + struct xfs_buf_cache *bch; struct xfs_perag *pag; struct xfs_buf *bp = NULL; struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; @@ -692,10 +715,9 @@ xfs_buf_get_map( if (error) return error; - pag = xfs_perag_get(btp->bt_mount, - xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn)); + bch = xfs_buftarg_get_cache(btp, &cmap, &pag); - error = xfs_buf_lookup(pag, &cmap, flags, &bp); + error = xfs_buf_lookup(bch, &cmap, flags, &bp); if (error && error != -ENOENT) goto out_put_perag; @@ -707,13 +729,14 @@ xfs_buf_get_map( goto out_put_perag; /* xfs_buf_find_insert() consumes the perag reference. */ - error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps, + error = xfs_buf_find_insert(btp, bch, pag, &cmap, map, nmaps, flags, &bp); if (error) return error; } else { XFS_STATS_INC(btp->bt_mount, xb_get_locked); - xfs_perag_put(pag); + if (pag) + xfs_perag_put(pag); } /* We do not hold a perag reference anymore. */ @@ -741,7 +764,8 @@ xfs_buf_get_map( return 0; out_put_perag: - xfs_perag_put(pag); + if (pag) + xfs_perag_put(pag); return error; } @@ -995,12 +1019,13 @@ xfs_buf_rele( struct xfs_buf *bp) { struct xfs_perag *pag = bp->b_pag; + struct xfs_buf_cache *bch = bp->b_cache; bool release; bool freebuf = false; trace_xfs_buf_rele(bp, _RET_IP_); - if (!pag) { + if (!bch) { ASSERT(list_empty(&bp->b_lru)); if (atomic_dec_and_test(&bp->b_hold)) { xfs_buf_ioacct_dec(bp); @@ -1022,7 +1047,7 @@ xfs_buf_rele( * leading to a use-after-free scenario. */ spin_lock(&bp->b_lock); - release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock); + release = atomic_dec_and_lock(&bp->b_hold, &bch->bc_lock); if (!release) { /* * Drop the in-flight state if the buffer is already on the LRU @@ -1047,7 +1072,7 @@ xfs_buf_rele( bp->b_state &= ~XFS_BSTATE_DISPOSE; atomic_inc(&bp->b_hold); } - spin_unlock(&pag->pag_buf_lock); + spin_unlock(&bch->bc_lock); } else { /* * most of the time buffers will already be removed from the @@ -1062,10 +1087,13 @@ xfs_buf_rele( } ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); - rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head, - xfs_buf_hash_params); - spin_unlock(&pag->pag_buf_lock); - xfs_perag_put(pag); + rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head, + xfs_buf_hash_params); + spin_unlock(&bch->bc_lock); + if (pag) + xfs_perag_put(pag); + bp->b_cache = NULL; + bp->b_pag = NULL; freebuf = true; } @@ -1989,24 +2017,18 @@ xfs_setsize_buftarg_early( return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev)); } -struct xfs_buftarg * -xfs_alloc_buftarg( +static struct xfs_buftarg * +xfs_alloc_buftarg_common( struct xfs_mount *mp, - struct block_device *bdev) + const char *descr) { - xfs_buftarg_t *btp; - const struct dax_holder_operations *ops = NULL; + struct xfs_buftarg *btp; -#if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE) - ops = &xfs_dax_holder_operations; -#endif btp = kmem_zalloc(sizeof(*btp), KM_NOFS); + if (!btp) + return NULL; btp->bt_mount = mp; - btp->bt_dev = bdev->bd_dev; - btp->bt_bdev = bdev; - btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off, - mp, ops); /* * Buffer IO error rate limiting. Limit it to no more than 10 messages @@ -2015,9 +2037,6 @@ xfs_alloc_buftarg( ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ, DEFAULT_RATELIMIT_BURST); - if (xfs_setsize_buftarg_early(btp, bdev)) - goto error_free; - if (list_lru_init(&btp->bt_lru)) goto error_free; @@ -2028,9 +2047,10 @@ xfs_alloc_buftarg( btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; - if (register_shrinker(&btp->bt_shrinker, "xfs-buf:%s", + if (register_shrinker(&btp->bt_shrinker, "xfs-%s:%s", descr, mp->m_super->s_id)) goto error_pcpu; + return btp; error_pcpu: @@ -2042,6 +2062,38 @@ xfs_alloc_buftarg( return NULL; } +/* Allocate a buffer cache target for a persistent block device. */ +struct xfs_buftarg * +xfs_alloc_buftarg( + struct xfs_mount *mp, + struct block_device *bdev) +{ + struct xfs_buftarg *btp; + const struct dax_holder_operations *ops = NULL; + +#if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE) + ops = &xfs_dax_holder_operations; +#endif + + btp = xfs_alloc_buftarg_common(mp, "buf"); + if (!btp) + return NULL; + + btp->bt_dev = bdev->bd_dev; + btp->bt_bdev = bdev; + btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off, + mp, ops); + + if (xfs_setsize_buftarg_early(btp, bdev)) + goto error_free; + + return btp; + +error_free: + xfs_free_buftarg(btp); + return NULL; +} + /* * Cancel a delayed write list. * diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 467ddb2e2f0d..d17ec9274d99 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -83,6 +83,14 @@ typedef unsigned int xfs_buf_flags_t; #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ +struct xfs_buf_cache { + spinlock_t bc_lock; + struct rhashtable bc_hash; +}; + +int xfs_buf_cache_init(struct xfs_buf_cache *bch); +void xfs_buf_cache_destroy(struct xfs_buf_cache *bch); + /* * The xfs_buftarg contains 2 notions of "sector size" - * @@ -102,6 +110,7 @@ typedef struct xfs_buftarg { struct dax_device *bt_daxdev; u64 bt_dax_part_off; struct xfs_mount *bt_mount; + struct xfs_buf_cache *bt_cache; unsigned int bt_meta_sectorsize; size_t bt_meta_sectormask; size_t bt_logical_sectorsize; @@ -208,6 +217,7 @@ struct xfs_buf { int b_last_error; const struct xfs_buf_ops *b_ops; + struct xfs_buf_cache *b_cache; struct rcu_head b_rcu; }; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index fc8d4de55cd1..622cd805dc48 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -486,9 +486,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks); } -int xfs_buf_hash_init(struct xfs_perag *pag); -void xfs_buf_hash_destroy(struct xfs_perag *pag); - extern void xfs_uuid_table_free(void); extern uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); From patchwork Fri May 26 01:05:22 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13255972 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6434DC7EE2E for ; Fri, 26 May 2023 01:05:28 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230077AbjEZBF0 (ORCPT ); Thu, 25 May 2023 21:05:26 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52564 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230020AbjEZBFZ (ORCPT ); Thu, 25 May 2023 21:05:25 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 79A5F125; Thu, 25 May 2023 18:05:23 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 0752560C3F; Fri, 26 May 2023 01:05:23 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 693E5C433D2; Fri, 26 May 2023 01:05:22 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685063122; bh=Op/+e031SmaVVcRXMMAFCXNHvL6xt1AEkE6Rd8qXEOY=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=TJgcl1smDJw0cROFIH4qrn0LRF3AlQ3GuDu5i+Yqz5N4fXc0SzpkRkAEIr7j/YsKk 4N3UfEMt0hHoGixNiftYx6ZizK8Ap9fU6U2qW41a1LZrR6/d8+LLGJPc633DrOoIvU uT3Z/7zR3go1ZhLntidJCh4WZgEQspSM3lh9zGOua+9Q8dW8naNjnQUQr98/z9RTQJ cE4ZGicEkweNeTqn538ltFt4HzeQAKMTsog/8bCSHURuHS3IHoAe87K+KtpUyqNXue ZeO2LYfrxlazdcszy0WJRgfXmo9xniKIf9hfWm8NYCfbKtBnqOXeyBOBpSvATJxS+G Srxo9AAbtFSGg== Date: Thu, 25 May 2023 18:05:22 -0700 Subject: [PATCH 3/9] xfs: create buftarg helpers to abstract block_device operations From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, willy@infradead.org, linux-fsdevel@vger.kernel.org Message-ID: <168506061894.3733082.5871675246659728914.stgit@frogsfrogsfrogs> In-Reply-To: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> References: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Darrick J. Wong In the next few patches, we're going into introduce buffer targets that are not block devices. Introduce block_device helpers so that the compiler can check that we're not feeding an xfile object to something expecting a block device. Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_aops.c | 5 ++++- fs/xfs/xfs_bmap_util.c | 8 ++++---- fs/xfs/xfs_buf.h | 37 +++++++++++++++++++++++++++++++++++-- fs/xfs/xfs_discard.c | 8 ++++---- fs/xfs/xfs_file.c | 6 +++--- fs/xfs/xfs_ioctl.c | 3 ++- fs/xfs/xfs_iomap.c | 4 ++-- fs/xfs/xfs_log.c | 4 ++-- fs/xfs/xfs_log_cil.c | 3 ++- fs/xfs/xfs_log_recover.c | 3 ++- fs/xfs/xfs_super.c | 4 ++-- 11 files changed, 62 insertions(+), 23 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 2ef78aa1d3f6..90f9fdbda20b 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -569,7 +569,10 @@ xfs_iomap_swapfile_activate( struct file *swap_file, sector_t *span) { - sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; + struct xfs_inode *ip = XFS_I(file_inode(swap_file)); + struct xfs_buftarg *btp = xfs_inode_buftarg(ip); + + sis->bdev = xfs_buftarg_bdev(btp); return iomap_swapfile_activate(sis, swap_file, span, &xfs_read_iomap_ops); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index fbb675563208..a847dbd76537 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -62,10 +62,10 @@ xfs_zero_extent( xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); sector_t block = XFS_BB_TO_FSBT(mp, sector); - return blkdev_issue_zeroout(target->bt_bdev, - block << (mp->m_super->s_blocksize_bits - 9), - count_fsb << (mp->m_super->s_blocksize_bits - 9), - GFP_NOFS, 0); + return xfs_buftarg_zeroout(target, + block << (mp->m_super->s_blocksize_bits - 9), + count_fsb << (mp->m_super->s_blocksize_bits - 9), + GFP_NOFS, 0); } #ifdef CONFIG_XFS_RT diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d17ec9274d99..dd7964bc76d7 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -378,8 +378,41 @@ extern void xfs_buftarg_wait(struct xfs_buftarg *); extern void xfs_buftarg_drain(struct xfs_buftarg *); extern int xfs_setsize_buftarg(struct xfs_buftarg *, unsigned int); -#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) -#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) +static inline struct block_device * +xfs_buftarg_bdev(struct xfs_buftarg *btp) +{ + return btp->bt_bdev; +} + +static inline unsigned int +xfs_getsize_buftarg(struct xfs_buftarg *btp) +{ + return block_size(btp->bt_bdev); +} + +static inline bool +xfs_readonly_buftarg(struct xfs_buftarg *btp) +{ + return bdev_read_only(btp->bt_bdev); +} + +static inline int +xfs_buftarg_flush(struct xfs_buftarg *btp) +{ + return blkdev_issue_flush(btp->bt_bdev); +} + +static inline int +xfs_buftarg_zeroout( + struct xfs_buftarg *btp, + sector_t sector, + sector_t nr_sects, + gfp_t gfp_mask, + unsigned flags) +{ + return blkdev_issue_zeroout(btp->bt_bdev, sector, nr_sects, gfp_mask, + flags); +} int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 96f2263fe9b7..3d074d094bf4 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -29,7 +29,7 @@ xfs_trim_extents( uint64_t *blocks_trimmed) { struct xfs_mount *mp = pag->pag_mount; - struct block_device *bdev = mp->m_ddev_targp->bt_bdev; + struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp); struct xfs_btree_cur *cur; struct xfs_buf *agbp; struct xfs_agf *agf; @@ -150,8 +150,8 @@ xfs_ioc_trim( struct fstrim_range __user *urange) { struct xfs_perag *pag; - unsigned int granularity = - bdev_discard_granularity(mp->m_ddev_targp->bt_bdev); + struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp); + unsigned int granularity = bdev_discard_granularity(bdev); struct fstrim_range range; xfs_daddr_t start, end, minlen; xfs_agnumber_t agno; @@ -160,7 +160,7 @@ xfs_ioc_trim( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) + if (!bdev_max_discard_sectors(bdev)) return -EOPNOTSUPP; /* diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index aede746541f8..2380067aa154 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -164,9 +164,9 @@ xfs_file_fsync( * inode size in case of an extending write. */ if (XFS_IS_REALTIME_INODE(ip)) - error = blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev); + error = xfs_buftarg_flush(mp->m_rtdev_targp); else if (mp->m_logdev_targp != mp->m_ddev_targp) - error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); + error = xfs_buftarg_flush(mp->m_ddev_targp); /* * Any inode that has dirty modifications in the log is pinned. The @@ -189,7 +189,7 @@ xfs_file_fsync( */ if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) && mp->m_logdev_targp == mp->m_ddev_targp) { - err2 = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); + err2 = xfs_buftarg_flush(mp->m_ddev_targp); if (err2 && !error) error = err2; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 55bb01173cde..0667e088a289 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1762,6 +1762,7 @@ xfs_ioc_setlabel( char __user *newlabel) { struct xfs_sb *sbp = &mp->m_sb; + struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp); char label[XFSLABEL_MAX + 1]; size_t len; int error; @@ -1808,7 +1809,7 @@ xfs_ioc_setlabel( error = xfs_update_secondary_sbs(mp); mutex_unlock(&mp->m_growlock); - invalidate_bdev(mp->m_ddev_targp->bt_bdev); + invalidate_bdev(bdev); out: mnt_drop_write_file(filp); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 0ff46e3997e0..559e8e785595 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -129,7 +129,7 @@ xfs_bmbt_to_iomap( if (mapping_flags & IOMAP_DAX) iomap->dax_dev = target->bt_daxdev; else - iomap->bdev = target->bt_bdev; + iomap->bdev = xfs_buftarg_bdev(target); iomap->flags = iomap_flags; if (xfs_ipincount(ip) && @@ -154,7 +154,7 @@ xfs_hole_to_iomap( iomap->type = IOMAP_HOLE; iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb); iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb); - iomap->bdev = target->bt_bdev; + iomap->bdev = xfs_buftarg_bdev(target); iomap->dax_dev = target->bt_daxdev; } diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index fc61cc024023..b32a8e57f576 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1938,7 +1938,7 @@ xlog_write_iclog( * writeback throttle from throttling log writes behind background * metadata writeback and causing priority inversions. */ - bio_init(&iclog->ic_bio, log->l_targ->bt_bdev, iclog->ic_bvec, + bio_init(&iclog->ic_bio, xfs_buftarg_bdev(log->l_targ), iclog->ic_bvec, howmany(count, PAGE_SIZE), REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE); iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; @@ -1959,7 +1959,7 @@ xlog_write_iclog( * avoid shutdown re-entering this path and erroring out again. */ if (log->l_targ != log->l_mp->m_ddev_targp && - blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) { + xfs_buftarg_flush(log->l_mp->m_ddev_targp)) { xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); return; } diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index eccbfb99e894..12cd2874048f 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -742,7 +742,8 @@ xlog_discard_busy_extents( trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, busyp->length); - error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, + error = __blkdev_issue_discard( + xfs_buftarg_bdev(mp->m_ddev_targp), XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), XFS_FSB_TO_BB(mp, busyp->length), GFP_NOFS, &bio); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 322eb2ee6c55..6b1f37bc3e95 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -137,7 +137,8 @@ xlog_do_io( nbblks = round_up(nbblks, log->l_sectBBsize); ASSERT(nbblks > 0); - error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, + error = xfs_rw_bdev(xfs_buftarg_bdev(log->l_targ), + log->l_logBBstart + blk_no, BBTOB(nbblks), data, op); if (error && !xlog_is_shutdown(log)) { xfs_alert(log->l_mp, diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 67ebb9d5ed21..f661aaaeac35 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -408,13 +408,13 @@ xfs_close_devices( struct xfs_mount *mp) { if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { - struct block_device *logdev = mp->m_logdev_targp->bt_bdev; + struct block_device *logdev = xfs_buftarg_bdev(mp->m_logdev_targp); xfs_free_buftarg(mp->m_logdev_targp); xfs_blkdev_put(logdev); } if (mp->m_rtdev_targp) { - struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; + struct block_device *rtdev = xfs_buftarg_bdev(mp->m_rtdev_targp); xfs_free_buftarg(mp->m_rtdev_targp); xfs_blkdev_put(rtdev); From patchwork Fri May 26 01:05:37 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13255973 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id F2316C77B7A for ; Fri, 26 May 2023 01:05:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229999AbjEZBFv (ORCPT ); Thu, 25 May 2023 21:05:51 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52606 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230020AbjEZBFk (ORCPT ); Thu, 25 May 2023 21:05:40 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2A40C194; Thu, 25 May 2023 18:05:39 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id B2E4160C3F; Fri, 26 May 2023 01:05:38 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 1DA63C433EF; Fri, 26 May 2023 01:05:38 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685063138; bh=PZWZv0a4Q5qwiDZJNMurO2DnHLCJizoAxEAz53c/FcM=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=lF7YlJ3GlEBe+UV00uLomkBE6HTfXw+SceoYKGoNQfLUb9xrz2HrIFCixOqH/jMbb 8BfdOEYz2g3LDVj7rrG9nJx64UZb/ikRFTgF5Kotoi+7MNC0O4Q9DtiYSbl7e0vH3w kP9Nu9+0fE6owi+azO5wkdOTJMerciXB1LviPgChiKKc2Z8PvSsWlb8ojNj8bELrM6 LfBkobkFMVBeUDrHBgtyxloQ8rVPfwx9otxSSxbugZk2b2wzppfKc8F7U6oV0Ocjo7 zycw3alX0d+XbbrspOjpkf20HXgrar1uRc2/lzoifRwETDzSEWmARSME91ro6etOkT hx/rGlvEbqteQ== Date: Thu, 25 May 2023 18:05:37 -0700 Subject: [PATCH 4/9] xfs: make GFP_ usage consistent when allocating buftargs From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, willy@infradead.org, linux-fsdevel@vger.kernel.org Message-ID: <168506061909.3733082.8525276312138360536.stgit@frogsfrogsfrogs> In-Reply-To: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> References: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Darrick J. Wong Convert kmem_zalloc to kzalloc, and make it so that both memory allocation functions in this function use GFP_NOFS. Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index dd16dfb669d8..19cefed4dca7 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1978,7 +1978,7 @@ xfs_free_buftarg( invalidate_bdev(btp->bt_bdev); fs_put_dax(btp->bt_daxdev, btp->bt_mount); - kmem_free(btp); + kvfree(btp); } int @@ -2024,7 +2024,7 @@ xfs_alloc_buftarg_common( { struct xfs_buftarg *btp; - btp = kmem_zalloc(sizeof(*btp), KM_NOFS); + btp = kzalloc(sizeof(*btp), GFP_NOFS); if (!btp) return NULL; @@ -2040,7 +2040,7 @@ xfs_alloc_buftarg_common( if (list_lru_init(&btp->bt_lru)) goto error_free; - if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) + if (percpu_counter_init(&btp->bt_io_count, 0, GFP_NOFS)) goto error_lru; btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; @@ -2058,7 +2058,7 @@ xfs_alloc_buftarg_common( error_lru: list_lru_destroy(&btp->bt_lru); error_free: - kmem_free(btp); + kvfree(btp); return NULL; } From patchwork Fri May 26 01:05:53 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13255974 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2463BC7EE2E for ; Fri, 26 May 2023 01:05:59 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230203AbjEZBF6 (ORCPT ); Thu, 25 May 2023 21:05:58 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52692 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229832AbjEZBF4 (ORCPT ); Thu, 25 May 2023 21:05:56 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E2ACE125; Thu, 25 May 2023 18:05:54 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 653C6647D0; Fri, 26 May 2023 01:05:54 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id C4882C433D2; Fri, 26 May 2023 01:05:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685063153; bh=J1D6ggHB8DbIN616QetckYX4EhQV1VenPI7GCUqux8Q=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=Y8ousCEaePNm1yLJYDrN9hi/YUSHMvaFvXp6ZX2VmOvl5oXrdbIp7TwyYGTjvOlTx zaHmB2aOpLacHAdLzSJZNvqRL06wumgHo/RY+qnNRdM4oEx7QCJWU5Z1Bv0Usobgor Pno5UVrJDw0OIJdvVtjHFO/ypZX6c5/WytSD+cnSN0tkYU0liNmVKQeWQroWRR8xoO iteXjJpFPdr65RONBYgUw7aNIGE3hKMevR9jLd8DJsDJ8Xv9igYyC88fVAyDYrlXdW l7Rn/r5KQ5pzoglzuCIlC1dqoz7eYk3kFBSmpigOYpDllAAiSnQ/AeGSQPs1w5TeuH AqwXHf5FPUeGA== Date: Thu, 25 May 2023 18:05:53 -0700 Subject: [PATCH 5/9] xfs: support in-memory buffer cache targets From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, willy@infradead.org, linux-fsdevel@vger.kernel.org Message-ID: <168506061924.3733082.12588796681828249746.stgit@frogsfrogsfrogs> In-Reply-To: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> References: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Darrick J. Wong Allow the buffer cache to target in-memory files by connecting it to xfiles. Signed-off-by: Darrick J. Wong --- fs/xfs/Kconfig | 4 ++ fs/xfs/Makefile | 1 + fs/xfs/scrub/xfile.h | 16 +++++++++ fs/xfs/xfs_buf.c | 44 ++++++++++++++++++++++-- fs/xfs/xfs_buf.h | 26 +++++++++++++- fs/xfs/xfs_buf_xfile.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_buf_xfile.h | 18 ++++++++++ 7 files changed, 193 insertions(+), 5 deletions(-) create mode 100644 fs/xfs/xfs_buf_xfile.c create mode 100644 fs/xfs/xfs_buf_xfile.h diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index acd56ebe77f9..71fd486eaca1 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -128,6 +128,9 @@ config XFS_LIVE_HOOKS bool select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL +config XFS_IN_MEMORY_FILE + bool + config XFS_ONLINE_SCRUB bool "XFS online metadata check support" default n @@ -135,6 +138,7 @@ config XFS_ONLINE_SCRUB depends on TMPFS && SHMEM select XFS_LIVE_HOOKS select XFS_DRAIN_INTENTS + select XFS_IN_MEMORY_FILE help If you say Y here you will be able to check metadata on a mounted XFS filesystem. This feature is intended to reduce diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index ea90abdd9941..fc44611cf723 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -138,6 +138,7 @@ endif xfs-$(CONFIG_XFS_DRAIN_INTENTS) += xfs_drain.o xfs-$(CONFIG_XFS_LIVE_HOOKS) += xfs_hooks.o +xfs-$(CONFIG_XFS_IN_MEMORY_FILE) += xfs_buf_xfile.o # online scrub/repair ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y) diff --git a/fs/xfs/scrub/xfile.h b/fs/xfs/scrub/xfile.h index adf5dbdc4c21..083348b4cdaf 100644 --- a/fs/xfs/scrub/xfile.h +++ b/fs/xfs/scrub/xfile.h @@ -6,6 +6,8 @@ #ifndef __XFS_SCRUB_XFILE_H__ #define __XFS_SCRUB_XFILE_H__ +#ifdef CONFIG_XFS_IN_MEMORY_FILE + struct xfile_page { struct page *page; void *fsdata; @@ -24,6 +26,7 @@ static inline pgoff_t xfile_page_index(const struct xfile_page *xfpage) struct xfile { struct file *file; + struct xfs_buf_cache bcache; }; int xfile_create(struct xfs_mount *mp, const char *description, loff_t isize, @@ -76,5 +79,18 @@ int xfile_get_page(struct xfile *xf, loff_t offset, unsigned int len, int xfile_put_page(struct xfile *xf, struct xfile_page *xbuf); int xfile_dump(struct xfile *xf); +#else +static inline int +xfile_obj_load(struct xfile *xf, void *buf, size_t count, loff_t offset) +{ + return -EIO; +} + +static inline int +xfile_obj_store(struct xfile *xf, const void *buf, size_t count, loff_t offset) +{ + return -EIO; +} +#endif /* CONFIG_XFS_IN_MEMORY_FILE */ #endif /* __XFS_SCRUB_XFILE_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 19cefed4dca7..e3f24594e575 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -21,6 +21,7 @@ #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_ag.h" +#include "xfs_buf_xfile.h" struct kmem_cache *xfs_buf_cache; @@ -1552,6 +1553,30 @@ xfs_buf_ioapply_map( } +/* Start a synchronous process-context buffer IO. */ +static inline void +xfs_buf_start_sync_io( + struct xfs_buf *bp) +{ + atomic_inc(&bp->b_io_remaining); +} + +/* Finish a synchronous bprocess-context uffer IO. */ +static void +xfs_buf_end_sync_io( + struct xfs_buf *bp, + int error) +{ + if (error) + cmpxchg(&bp->b_io_error, 0, error); + + if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) + invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); + + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) + xfs_buf_ioend(bp); +} + STATIC void _xfs_buf_ioapply( struct xfs_buf *bp) @@ -1609,6 +1634,15 @@ _xfs_buf_ioapply( /* we only use the buffer cache for meta-data */ op |= REQ_META; + if (bp->b_target->bt_flags & XFS_BUFTARG_XFILE) { + int error; + + xfs_buf_start_sync_io(bp); + error = xfile_buf_ioapply(bp); + xfs_buf_end_sync_io(bp, error); + return; + } + /* * Walk all the vectors issuing IO on them. Set up the initial offset * into the buffer and the desired IO size before we start - @@ -1974,9 +2008,11 @@ xfs_free_buftarg( percpu_counter_destroy(&btp->bt_io_count); list_lru_destroy(&btp->bt_lru); - blkdev_issue_flush(btp->bt_bdev); - invalidate_bdev(btp->bt_bdev); - fs_put_dax(btp->bt_daxdev, btp->bt_mount); + if (!(btp->bt_flags & XFS_BUFTARG_XFILE)) { + blkdev_issue_flush(btp->bt_bdev); + invalidate_bdev(btp->bt_bdev); + fs_put_dax(btp->bt_daxdev, btp->bt_mount); + } kvfree(btp); } @@ -2017,7 +2053,7 @@ xfs_setsize_buftarg_early( return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev)); } -static struct xfs_buftarg * +struct xfs_buftarg * xfs_alloc_buftarg_common( struct xfs_mount *mp, const char *descr) diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index dd7964bc76d7..90b67a11e3c1 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -21,6 +21,7 @@ extern struct kmem_cache *xfs_buf_cache; * Base types */ struct xfs_buf; +struct xfile; #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) @@ -106,11 +107,15 @@ void xfs_buf_cache_destroy(struct xfs_buf_cache *bch); */ typedef struct xfs_buftarg { dev_t bt_dev; - struct block_device *bt_bdev; + union { + struct block_device *bt_bdev; + struct xfile *bt_xfile; + }; struct dax_device *bt_daxdev; u64 bt_dax_part_off; struct xfs_mount *bt_mount; struct xfs_buf_cache *bt_cache; + unsigned int bt_flags; unsigned int bt_meta_sectorsize; size_t bt_meta_sectormask; size_t bt_logical_sectorsize; @@ -124,6 +129,13 @@ typedef struct xfs_buftarg { struct ratelimit_state bt_ioerror_rl; } xfs_buftarg_t; +#ifdef CONFIG_XFS_IN_MEMORY_FILE +/* in-memory buftarg via bt_xfile */ +# define XFS_BUFTARG_XFILE (1U << 0) +#else +# define XFS_BUFTARG_XFILE (0) +#endif + #define XB_PAGES 2 struct xfs_buf_map { @@ -371,6 +383,8 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) /* * Handling of buftargs. */ +struct xfs_buftarg *xfs_alloc_buftarg_common(struct xfs_mount *mp, + const char *descr); struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp, struct block_device *bdev); extern void xfs_free_buftarg(struct xfs_buftarg *); @@ -381,24 +395,32 @@ extern int xfs_setsize_buftarg(struct xfs_buftarg *, unsigned int); static inline struct block_device * xfs_buftarg_bdev(struct xfs_buftarg *btp) { + if (btp->bt_flags & XFS_BUFTARG_XFILE) + return NULL; return btp->bt_bdev; } static inline unsigned int xfs_getsize_buftarg(struct xfs_buftarg *btp) { + if (btp->bt_flags & XFS_BUFTARG_XFILE) + return SECTOR_SIZE; return block_size(btp->bt_bdev); } static inline bool xfs_readonly_buftarg(struct xfs_buftarg *btp) { + if (btp->bt_flags & XFS_BUFTARG_XFILE) + return false; return bdev_read_only(btp->bt_bdev); } static inline int xfs_buftarg_flush(struct xfs_buftarg *btp) { + if (btp->bt_flags & XFS_BUFTARG_XFILE) + return 0; return blkdev_issue_flush(btp->bt_bdev); } @@ -410,6 +432,8 @@ xfs_buftarg_zeroout( gfp_t gfp_mask, unsigned flags) { + if (btp->bt_flags & XFS_BUFTARG_XFILE) + return -EOPNOTSUPP; return blkdev_issue_zeroout(btp->bt_bdev, sector, nr_sects, gfp_mask, flags); } diff --git a/fs/xfs/xfs_buf_xfile.c b/fs/xfs/xfs_buf_xfile.c new file mode 100644 index 000000000000..69f1d62e0fcb --- /dev/null +++ b/fs/xfs/xfs_buf_xfile.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_buf.h" +#include "xfs_buf_xfile.h" +#include "scrub/xfile.h" + +/* Perform a buffer IO to an xfile. Caller must be in process context. */ +int +xfile_buf_ioapply( + struct xfs_buf *bp) +{ + struct xfile *xfile = bp->b_target->bt_xfile; + loff_t pos = BBTOB(xfs_buf_daddr(bp)); + size_t size = BBTOB(bp->b_length); + + if (bp->b_map_count > 1) { + /* We don't need or support multi-map buffers. */ + ASSERT(0); + return -EIO; + } + + if (bp->b_flags & XBF_WRITE) + return xfile_obj_store(xfile, bp->b_addr, size, pos); + return xfile_obj_load(xfile, bp->b_addr, size, pos); +} + +/* Allocate a buffer cache target for a memory-backed file. */ +int +xfile_alloc_buftarg( + struct xfs_mount *mp, + const char *descr, + struct xfs_buftarg **btpp) +{ + struct xfs_buftarg *btp; + struct xfile *xfile; + int error; + + error = xfile_create(mp, descr, 0, &xfile); + if (error) + return error; + + error = xfs_buf_cache_init(&xfile->bcache); + if (error) + goto out_xfile; + + btp = xfs_alloc_buftarg_common(mp, descr); + if (!btp) { + error = -ENOMEM; + goto out_bcache; + } + + btp->bt_xfile = xfile; + btp->bt_dev = (dev_t)-1U; + btp->bt_flags |= XFS_BUFTARG_XFILE; + btp->bt_cache = &xfile->bcache; + + btp->bt_meta_sectorsize = SECTOR_SIZE; + btp->bt_meta_sectormask = SECTOR_SIZE - 1; + btp->bt_logical_sectorsize = SECTOR_SIZE; + btp->bt_logical_sectormask = SECTOR_SIZE - 1; + + *btpp = btp; + return 0; + +out_bcache: + xfs_buf_cache_destroy(&xfile->bcache); +out_xfile: + xfile_destroy(xfile); + return error; +} + +/* Free a buffer cache target for a memory-backed file. */ +void +xfile_free_buftarg( + struct xfs_buftarg *btp) +{ + struct xfile *xfile = btp->bt_xfile; + + ASSERT(btp->bt_flags & XFS_BUFTARG_XFILE); + + xfs_free_buftarg(btp); + xfs_buf_cache_destroy(&xfile->bcache); + xfile_destroy(xfile); +} diff --git a/fs/xfs/xfs_buf_xfile.h b/fs/xfs/xfs_buf_xfile.h new file mode 100644 index 000000000000..29efaf06a676 --- /dev/null +++ b/fs/xfs/xfs_buf_xfile.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_BUF_XFILE_H__ +#define __XFS_BUF_XFILE_H__ + +#ifdef CONFIG_XFS_IN_MEMORY_FILE +int xfile_buf_ioapply(struct xfs_buf *bp); +int xfile_alloc_buftarg(struct xfs_mount *mp, const char *descr, + struct xfs_buftarg **btpp); +void xfile_free_buftarg(struct xfs_buftarg *btp); +#else +# define xfile_buf_ioapply(bp) (-EOPNOTSUPP) +#endif /* CONFIG_XFS_IN_MEMORY_FILE */ + +#endif /* __XFS_BUF_XFILE_H__ */ From patchwork Fri May 26 01:06:08 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13255975 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2F653C77B7E for ; Fri, 26 May 2023 01:06:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230132AbjEZBGY (ORCPT ); Thu, 25 May 2023 21:06:24 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52746 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231451AbjEZBGL (ORCPT ); Thu, 25 May 2023 21:06:11 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7BDB5195; Thu, 25 May 2023 18:06:10 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 1073B60B88; Fri, 26 May 2023 01:06:10 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6E93CC433D2; Fri, 26 May 2023 01:06:09 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685063169; bh=EUvdd0iDUwZGM9hcyEMnSfmxwU+J/ssfS/OWmW/X4Cc=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=uRGDfn6LxjogH4oOY88TnNZi7tmurowmY02dSJC52VJG6EG14JzyOWQ8j7cQB6koR 3Nd3xpsu1kjQTPY0GhgrpHb5Ybm07ohL68o7FgjZClr3nM/oEqikIOLwvU7IEdhpgv /uZFJ5VEcY1NvyhxtBU1m8TOcRMRWp/7uDuw26cFTOiEgnXDXqSupyJg9EjtCHU+EG 3Uwiu3RzIfzzHEtOVGnwfMCAYCDBwaIG2+n2SdrxYXQxAfUcJHb0+wlgMk5ePETplh 2b68G/8aLVFl3BwLVsVN3ao7U0Wxaj7OJCKcjM0YHQTTiyIsiKetQ79AgsmdAlwgBh 0FouQ/MmJ5AtA== Date: Thu, 25 May 2023 18:06:08 -0700 Subject: [PATCH 6/9] xfs: consolidate btree block freeing tracepoints From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, willy@infradead.org, linux-fsdevel@vger.kernel.org Message-ID: <168506061939.3733082.2797950084991348215.stgit@frogsfrogsfrogs> In-Reply-To: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> References: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Darrick J. Wong Don't waste tracepoint segment memory on per-btree block freeing tracepoints when we can do it from the generic btree code. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_btree.c | 2 ++ fs/xfs/libxfs/xfs_refcount_btree.c | 2 -- fs/xfs/libxfs/xfs_rmap_btree.c | 2 -- fs/xfs/xfs_trace.h | 32 ++++++++++++++++++++++++++++++-- 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 28ba52808688..3e966182b90a 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -414,6 +414,8 @@ xfs_btree_free_block( { int error; + trace_xfs_btree_free_block(cur, bp); + error = cur->bc_ops->free_block(cur, bp); if (!error) { xfs_trans_binval(cur->bc_tp, bp); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index efe22aa1c906..978f00e9e99e 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -108,8 +108,6 @@ xfs_refcountbt_free_block( xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); int error; - trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, - XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 6c81b20e97d2..0dc086bc528f 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -125,8 +125,6 @@ xfs_rmapbt_free_block( int error; bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp)); - trace_xfs_rmapbt_free_block(cur->bc_mp, pag->pag_agno, - bno, 1); be32_add_cpu(&agf->agf_rmap_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); error = xfs_alloc_put_freelist(pag, cur->bc_tp, agbp, NULL, bno, 1); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index e57bf37d4993..10fb261e6c17 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2523,6 +2523,36 @@ DEFINE_EVENT(xfs_btree_cur_class, name, \ DEFINE_BTREE_CUR_EVENT(xfs_btree_updkeys); DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range); +TRACE_EVENT(xfs_btree_free_block, + TP_PROTO(struct xfs_btree_cur *cur, struct xfs_buf *bp), + TP_ARGS(cur, bp), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_ino_t, ino) + __field(xfs_btnum_t, btnum) + __field(xfs_agblock_t, agbno) + ), + TP_fast_assign( + __entry->dev = cur->bc_mp->m_super->s_dev; + __entry->agno = xfs_daddr_to_agno(cur->bc_mp, + xfs_buf_daddr(bp)); + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + __entry->ino = cur->bc_ino.ip->i_ino; + else + __entry->ino = 0; + __entry->btnum = cur->bc_btnum; + __entry->agbno = xfs_daddr_to_agbno(cur->bc_mp, + xfs_buf_daddr(bp)); + ), + TP_printk("dev %d:%d btree %s agno 0x%x ino 0x%llx agbno 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), + __entry->agno, + __entry->ino, + __entry->agbno) +); + /* deferred ops */ struct xfs_defer_pending; @@ -2877,7 +2907,6 @@ DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_defer); DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_deferred); DEFINE_BUSY_EVENT(xfs_rmapbt_alloc_block); -DEFINE_BUSY_EVENT(xfs_rmapbt_free_block); DEFINE_RMAPBT_EVENT(xfs_rmap_update); DEFINE_RMAPBT_EVENT(xfs_rmap_insert); DEFINE_RMAPBT_EVENT(xfs_rmap_delete); @@ -3236,7 +3265,6 @@ DEFINE_EVENT(xfs_refcount_triple_extent_class, name, \ /* refcount btree tracepoints */ DEFINE_BUSY_EVENT(xfs_refcountbt_alloc_block); -DEFINE_BUSY_EVENT(xfs_refcountbt_free_block); DEFINE_AG_BTREE_LOOKUP_EVENT(xfs_refcount_lookup); DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_get); DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_update); From patchwork Fri May 26 01:06:24 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13255976 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C06B6C7EE2E for ; Fri, 26 May 2023 01:06:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233106AbjEZBGa (ORCPT ); Thu, 25 May 2023 21:06:30 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52920 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233300AbjEZBG2 (ORCPT ); Thu, 25 May 2023 21:06:28 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1C03119C; Thu, 25 May 2023 18:06:26 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id A54C8647D0; Fri, 26 May 2023 01:06:25 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0A755C433EF; Fri, 26 May 2023 01:06:25 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685063185; bh=vmCUSlSSDF+eK3AGAteLNbmM2hGiFQQx8DhQOStWmx4=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=sHyb4hMVdjBDSnSd1hXIAyVnxufMtGdOopNduiI/3pFOum/o1VWEPnrOCeaStEKlg Q0W+bGOoSJHPW1jRD46YTgDNh8NIH2nRK5D+tA3dYVB/A9HRSZoXSmymVwAujiaLWP zcDCBtr/staVV8C94aEAD+3Yu/QdObBnIUrRiNq08vH+tebTlHLll7AHcqJcNPgdEN wS4Wk7WLnbAqQt3uaRzHR9F/Ch0/2l1sQ7pOU6TjKpS2EldsjA+8CX0ZpUOMaTAkdg B6lMhuU8MVjbQVYMynC4Z4FLThpGd8SeA7R+tSg96EZOLxEeAxjkhzBD0Tg2NfgTjM ppwOAqPXGUiIQ== Date: Thu, 25 May 2023 18:06:24 -0700 Subject: [PATCH 7/9] xfs: consolidate btree block allocation tracepoints From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, willy@infradead.org, linux-fsdevel@vger.kernel.org Message-ID: <168506061953.3733082.1567323682056667252.stgit@frogsfrogsfrogs> In-Reply-To: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> References: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Darrick J. Wong Don't waste tracepoint segment memory on per-btree block allocation tracepoints when we can do it from the generic btree code. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_btree.c | 20 ++++++++++++--- fs/xfs/libxfs/xfs_refcount_btree.c | 2 - fs/xfs/libxfs/xfs_rmap_btree.c | 2 - fs/xfs/xfs_trace.h | 49 +++++++++++++++++++++++++++++++++++- 4 files changed, 64 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 3e966182b90a..fbed51b4462e 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -2693,6 +2693,20 @@ xfs_btree_rshift( return error; } +static inline int +xfs_btree_alloc_block( + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *hint_block, + union xfs_btree_ptr *new_block, + int *stat) +{ + int error; + + error = cur->bc_ops->alloc_block(cur, hint_block, new_block, stat); + trace_xfs_btree_alloc_block(cur, new_block, *stat, error); + return error; +} + /* * Split cur/level block in half. * Return new block number and the key to its first @@ -2736,7 +2750,7 @@ __xfs_btree_split( xfs_btree_buf_to_ptr(cur, lbp, &lptr); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); + error = xfs_btree_alloc_block(cur, &lptr, &rptr, stat); if (error) goto error0; if (*stat == 0) @@ -3016,7 +3030,7 @@ xfs_btree_new_iroot( pp = xfs_btree_ptr_addr(cur, 1, block); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); + error = xfs_btree_alloc_block(cur, pp, &nptr, stat); if (error) goto error0; if (*stat == 0) @@ -3116,7 +3130,7 @@ xfs_btree_new_root( cur->bc_ops->init_ptr_from_cur(cur, &rptr); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); + error = xfs_btree_alloc_block(cur, &rptr, &lptr, stat); if (error) goto error0; if (*stat == 0) diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 978f00e9e99e..c5b99f1322ba 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -77,8 +77,6 @@ xfs_refcountbt_alloc_block( xfs_refc_block(args.mp))); if (error) goto out_error; - trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, - args.agbno, 1); if (args.fsbno == NULLFSBLOCK) { *stat = 0; return 0; diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 0dc086bc528f..43ff2236f623 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -94,8 +94,6 @@ xfs_rmapbt_alloc_block( &bno, 1); if (error) return error; - - trace_xfs_rmapbt_alloc_block(cur->bc_mp, pag->pag_agno, bno, 1); if (bno == NULLAGBLOCK) { *stat = 0; return 0; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 10fb261e6c17..b1a1c90d8feb 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2523,6 +2523,53 @@ DEFINE_EVENT(xfs_btree_cur_class, name, \ DEFINE_BTREE_CUR_EVENT(xfs_btree_updkeys); DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range); +TRACE_EVENT(xfs_btree_alloc_block, + TP_PROTO(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, int stat, + int error), + TP_ARGS(cur, ptr, stat, error), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_ino_t, ino) + __field(xfs_btnum_t, btnum) + __field(int, error) + __field(xfs_agblock_t, agbno) + ), + TP_fast_assign( + __entry->dev = cur->bc_mp->m_super->s_dev; + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + __entry->agno = 0; + __entry->ino = cur->bc_ino.ip->i_ino; + } else { + __entry->agno = cur->bc_ag.pag->pag_agno; + __entry->ino = 0; + } + __entry->btnum = cur->bc_btnum; + __entry->error = error; + if (!error && stat) { + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + xfs_fsblock_t fsb = be64_to_cpu(ptr->l); + + __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, + fsb); + __entry->agbno = XFS_FSB_TO_AGBNO(cur->bc_mp, + fsb); + } else { + __entry->agbno = be32_to_cpu(ptr->s); + } + } else { + __entry->agbno = NULLAGBLOCK; + } + ), + TP_printk("dev %d:%d btree %s agno 0x%x ino 0x%llx agbno 0x%x error %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), + __entry->agno, + __entry->ino, + __entry->agbno, + __entry->error) +); + TRACE_EVENT(xfs_btree_free_block, TP_PROTO(struct xfs_btree_cur *cur, struct xfs_buf *bp), TP_ARGS(cur, bp), @@ -2906,7 +2953,6 @@ DEFINE_EVENT(xfs_rmapbt_class, name, \ DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_defer); DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_deferred); -DEFINE_BUSY_EVENT(xfs_rmapbt_alloc_block); DEFINE_RMAPBT_EVENT(xfs_rmap_update); DEFINE_RMAPBT_EVENT(xfs_rmap_insert); DEFINE_RMAPBT_EVENT(xfs_rmap_delete); @@ -3264,7 +3310,6 @@ DEFINE_EVENT(xfs_refcount_triple_extent_class, name, \ TP_ARGS(mp, agno, i1, i2, i3)) /* refcount btree tracepoints */ -DEFINE_BUSY_EVENT(xfs_refcountbt_alloc_block); DEFINE_AG_BTREE_LOOKUP_EVENT(xfs_refcount_lookup); DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_get); DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_update); From patchwork Fri May 26 01:06:40 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13255977 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 167FAC77B7E for ; Fri, 26 May 2023 01:07:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233448AbjEZBHA (ORCPT ); Thu, 25 May 2023 21:07:00 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:53124 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234230AbjEZBGy (ORCPT ); Thu, 25 May 2023 21:06:54 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0B6B31B0; Thu, 25 May 2023 18:06:42 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 3EB7F64C1F; Fri, 26 May 2023 01:06:41 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 921D2C433D2; Fri, 26 May 2023 01:06:40 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685063200; bh=UBPQlKUZ9PYuAdQcB1fZUAWkRFuYvmVx2VggnnBlpzc=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=NwZK9XD7+AJTV3Z2WBkLQNm1yPl2sWVih8oT2Bth7xJU+Pcz63xmp+mjTADc2Mftx bBCkEZaRBO/F6AYBMXvLghycOtvNN+Wp65V3rz9aE2W9dMkGForQwM0AT8NBdoYCb2 AH0hKa+5A9oz+5uPg7PubYwNPFf7NdVkmeZllpohwO17pdv0FnF/LXJy96vLsH9LvF TVJCjV19xcnDdXHNwNGoHIAiWzRFZuOTlndXsPHqoJ1QU3Naj1fCzYPSQvxVx7oCBW d5NXfzylXF7siIjyF70sw2ABfB8TS0MFC2YnWUDczTERurGPpDeWMnQ3J9bzeqXkfN ylcUgj+biryDQ== Date: Thu, 25 May 2023 18:06:40 -0700 Subject: [PATCH 8/9] xfs: support in-memory btrees From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, willy@infradead.org, linux-fsdevel@vger.kernel.org Message-ID: <168506061968.3733082.10575751587029711709.stgit@frogsfrogsfrogs> In-Reply-To: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> References: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Darrick J. Wong Adapt the generic btree cursor code to be able to create a btree whose buffers come from a (presumably in-memory) buftarg with a header block that's specific to in-memory btrees. We'll connect this to other parts of online scrub in the next patches. Note that in-memory btrees always have a block size matching the system memory page size for efficiency reasons. Signed-off-by: Darrick J. Wong --- fs/xfs/Kconfig | 4 fs/xfs/Makefile | 1 fs/xfs/libxfs/xfs_btree.c | 151 ++++++++++++++---- fs/xfs/libxfs/xfs_btree.h | 17 ++ fs/xfs/libxfs/xfs_btree_mem.h | 87 ++++++++++ fs/xfs/scrub/xfbtree.c | 352 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/xfbtree.h | 34 ++++ fs/xfs/scrub/xfile.h | 46 +++++ fs/xfs/xfs_buf.c | 10 + fs/xfs/xfs_buf.h | 10 + fs/xfs/xfs_buf_xfile.c | 8 + fs/xfs/xfs_buf_xfile.h | 2 fs/xfs/xfs_health.c | 3 fs/xfs/xfs_trace.c | 3 fs/xfs/xfs_trace.h | 5 - 15 files changed, 704 insertions(+), 29 deletions(-) create mode 100644 fs/xfs/libxfs/xfs_btree_mem.h create mode 100644 fs/xfs/scrub/xfbtree.c create mode 100644 fs/xfs/scrub/xfbtree.h diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 71fd486eaca1..59cbafe8310d 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -131,6 +131,9 @@ config XFS_LIVE_HOOKS config XFS_IN_MEMORY_FILE bool +config XFS_BTREE_IN_XFILE + bool + config XFS_ONLINE_SCRUB bool "XFS online metadata check support" default n @@ -188,6 +191,7 @@ config XFS_ONLINE_REPAIR bool "XFS online metadata repair support" default n depends on XFS_FS && XFS_ONLINE_SCRUB + select XFS_BTREE_IN_XFILE help If you say Y here you will be able to repair metadata on a mounted XFS filesystem. This feature is intended to reduce diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index fc44611cf723..8602e14354c9 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -197,6 +197,7 @@ xfs-y += $(addprefix scrub/, \ reap.o \ refcount_repair.o \ repair.o \ + xfbtree.o \ ) xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \ diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index fbed51b4462e..dbd048bc1e8e 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -28,6 +28,9 @@ #include "xfs_rmap_btree.h" #include "xfs_refcount_btree.h" #include "xfs_health.h" +#include "scrub/xfile.h" +#include "scrub/xfbtree.h" +#include "xfs_btree_mem.h" /* * Btree magic numbers. @@ -82,6 +85,9 @@ xfs_btree_check_lblock_siblings( if (level >= 0) { if (!xfs_btree_check_lptr(cur, sibling, level + 1)) return __this_address; + } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) { + if (!xfbtree_verify_xfileoff(cur, sibling)) + return __this_address; } else { if (!xfs_verify_fsbno(mp, sibling)) return __this_address; @@ -109,6 +115,9 @@ xfs_btree_check_sblock_siblings( if (level >= 0) { if (!xfs_btree_check_sptr(cur, sibling, level + 1)) return __this_address; + } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) { + if (!xfbtree_verify_xfileoff(cur, sibling)) + return __this_address; } else { if (!xfs_verify_agbno(pag, sibling)) return __this_address; @@ -151,7 +160,9 @@ __xfs_btree_check_lblock( cur->bc_ops->get_maxrecs(cur, level)) return __this_address; - if (bp) + if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp) + fsb = xfbtree_buf_to_xfoff(cur, bp); + else if (bp) fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb, @@ -218,8 +229,12 @@ __xfs_btree_check_sblock( cur->bc_ops->get_maxrecs(cur, level)) return __this_address; - if (bp) + if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp) { + pag = NULL; + agbno = xfbtree_buf_to_xfoff(cur, bp); + } else if (bp) { agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); + } fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno, block->bb_u.s.bb_leftsib); @@ -276,6 +291,8 @@ xfs_btree_check_lptr( { if (level <= 0) return false; + if (cur->bc_flags & XFS_BTREE_IN_XFILE) + return xfbtree_verify_xfileoff(cur, fsbno); return xfs_verify_fsbno(cur->bc_mp, fsbno); } @@ -288,6 +305,8 @@ xfs_btree_check_sptr( { if (level <= 0) return false; + if (cur->bc_flags & XFS_BTREE_IN_XFILE) + return xfbtree_verify_xfileoff(cur, agbno); return xfs_verify_agbno(cur->bc_ag.pag, agbno); } @@ -302,6 +321,9 @@ xfs_btree_check_ptr( int index, int level) { + if (cur->bc_flags & XFS_BTREE_IN_XFILE) + return xfbtree_check_ptr(cur, ptr, index, level); + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]), level)) @@ -458,11 +480,36 @@ xfs_btree_del_cursor( xfs_is_shutdown(cur->bc_mp) || error != 0); if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) kmem_free(cur->bc_ops); - if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag) + if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && + !(cur->bc_flags & XFS_BTREE_IN_XFILE) && cur->bc_ag.pag) xfs_perag_put(cur->bc_ag.pag); + if (cur->bc_flags & XFS_BTREE_IN_XFILE) { + if (cur->bc_mem.pag) + xfs_perag_put(cur->bc_mem.pag); + } kmem_cache_free(cur->bc_cache, cur); } +/* Return the buffer target for this btree's buffer. */ +static inline struct xfs_buftarg * +xfs_btree_buftarg( + struct xfs_btree_cur *cur) +{ + if (cur->bc_flags & XFS_BTREE_IN_XFILE) + return xfbtree_target(cur->bc_mem.xfbtree); + return cur->bc_mp->m_ddev_targp; +} + +/* Return the block size (in units of 512b sectors) for this btree. */ +static inline unsigned int +xfs_btree_bbsize( + struct xfs_btree_cur *cur) +{ + if (cur->bc_flags & XFS_BTREE_IN_XFILE) + return xfbtree_bbsize(); + return cur->bc_mp->m_bsize; +} + /* * Duplicate the btree cursor. * Allocate a new one, copy the record, re-get the buffers. @@ -500,10 +547,11 @@ xfs_btree_dup_cursor( new->bc_levels[i].ra = cur->bc_levels[i].ra; bp = cur->bc_levels[i].bp; if (bp) { - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - xfs_buf_daddr(bp), mp->m_bsize, - 0, &bp, - cur->bc_ops->buf_ops); + error = xfs_trans_read_buf(mp, tp, + xfs_btree_buftarg(cur), + xfs_buf_daddr(bp), + xfs_btree_bbsize(cur), 0, &bp, + cur->bc_ops->buf_ops); if (xfs_metadata_is_sick(error)) xfs_btree_mark_sick(new); if (error) { @@ -944,6 +992,9 @@ xfs_btree_readahead_lblock( xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); + if (cur->bc_flags & XFS_BTREE_IN_XFILE) + return 0; + if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { xfs_btree_reada_bufl(cur->bc_mp, left, 1, cur->bc_ops->buf_ops); @@ -969,6 +1020,8 @@ xfs_btree_readahead_sblock( xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); + if (cur->bc_flags & XFS_BTREE_IN_XFILE) + return 0; if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno, @@ -1030,6 +1083,11 @@ xfs_btree_ptr_to_daddr( if (error) return error; + if (cur->bc_flags & XFS_BTREE_IN_XFILE) { + *daddr = xfbtree_ptr_to_daddr(cur, ptr); + return 0; + } + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { fsbno = be64_to_cpu(ptr->l); *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno); @@ -1058,8 +1116,9 @@ xfs_btree_readahead_ptr( if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr)) return; - xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr, - cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); + xfs_buf_readahead(xfs_btree_buftarg(cur), daddr, + xfs_btree_bbsize(cur) * count, + cur->bc_ops->buf_ops); } /* @@ -1233,7 +1292,9 @@ xfs_btree_init_block_cur( * change in future, but is safe for current users of the generic btree * code. */ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + if (cur->bc_flags & XFS_BTREE_IN_XFILE) + owner = xfbtree_owner(cur); + else if (cur->bc_flags & XFS_BTREE_LONG_PTRS) owner = cur->bc_ino.ip->i_ino; else owner = cur->bc_ag.pag->pag_agno; @@ -1273,6 +1334,11 @@ xfs_btree_buf_to_ptr( struct xfs_buf *bp, union xfs_btree_ptr *ptr) { + if (cur->bc_flags & XFS_BTREE_IN_XFILE) { + xfbtree_buf_to_ptr(cur, bp, ptr); + return; + } + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp))); @@ -1317,15 +1383,14 @@ xfs_btree_get_buf_block( struct xfs_btree_block **block, struct xfs_buf **bpp) { - struct xfs_mount *mp = cur->bc_mp; - xfs_daddr_t d; - int error; + xfs_daddr_t d; + int error; error = xfs_btree_ptr_to_daddr(cur, ptr, &d); if (error) return error; - error = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize, - 0, bpp); + error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d, + xfs_btree_bbsize(cur), 0, bpp); if (error) return error; @@ -1356,9 +1421,9 @@ xfs_btree_read_buf_block( error = xfs_btree_ptr_to_daddr(cur, ptr, &d); if (error) return error; - error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, - mp->m_bsize, flags, bpp, - cur->bc_ops->buf_ops); + error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d, + xfs_btree_bbsize(cur), flags, bpp, + cur->bc_ops->buf_ops); if (xfs_metadata_is_sick(error)) xfs_btree_mark_sick(cur); if (error) @@ -1798,6 +1863,37 @@ xfs_btree_decrement( return error; } +/* + * Check the btree block owner now that we have the context to know who the + * real owner is. + */ +static inline xfs_failaddr_t +xfs_btree_check_block_owner( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block) +{ + if (!xfs_has_crc(cur->bc_mp)) + return NULL; + + if (cur->bc_flags & XFS_BTREE_IN_XFILE) + return xfbtree_check_block_owner(cur, block); + + if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) { + if (be32_to_cpu(block->bb_u.s.bb_owner) != + cur->bc_ag.pag->pag_agno) + return __this_address; + return NULL; + } + + if (cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER) + return NULL; + + if (be64_to_cpu(block->bb_u.l.bb_owner) != cur->bc_ino.ip->i_ino) + return __this_address; + + return NULL; +} + int xfs_btree_lookup_get_block( struct xfs_btree_cur *cur, /* btree cursor */ @@ -1836,11 +1932,7 @@ xfs_btree_lookup_get_block( return error; /* Check the inode owner since the verifiers don't. */ - if (xfs_has_crc(cur->bc_mp) && - !(cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER) && - (cur->bc_flags & XFS_BTREE_LONG_PTRS) && - be64_to_cpu((*blkp)->bb_u.l.bb_owner) != - cur->bc_ino.ip->i_ino) + if (xfs_btree_check_block_owner(cur, *blkp) != NULL) goto out_bad; /* Did we get the level we were looking for? */ @@ -4386,7 +4478,7 @@ xfs_btree_visit_block( { struct xfs_btree_block *block; struct xfs_buf *bp; - union xfs_btree_ptr rptr; + union xfs_btree_ptr rptr, bufptr; int error; /* do right sibling readahead */ @@ -4409,15 +4501,14 @@ xfs_btree_visit_block( * return the same block without checking if the right sibling points * back to us and creates a cyclic reference in the btree. */ + xfs_btree_buf_to_ptr(cur, bp, &bufptr); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (be64_to_cpu(rptr.l) == XFS_DADDR_TO_FSB(cur->bc_mp, - xfs_buf_daddr(bp))) { + if (rptr.l == bufptr.l) { xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } } else { - if (be32_to_cpu(rptr.s) == xfs_daddr_to_agbno(cur->bc_mp, - xfs_buf_daddr(bp))) { + if (rptr.s == bufptr.s) { xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } @@ -4599,6 +4690,8 @@ xfs_btree_lblock_verify( xfs_fsblock_t fsb; xfs_failaddr_t fa; + ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE)); + /* numrecs verification */ if (be16_to_cpu(block->bb_numrecs) > max_recs) return __this_address; @@ -4654,6 +4747,8 @@ xfs_btree_sblock_verify( xfs_agblock_t agbno; xfs_failaddr_t fa; + ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE)); + /* numrecs verification */ if (be16_to_cpu(block->bb_numrecs) > max_recs) return __this_address; diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 5525d3715d57..a1e7fb0e5806 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -248,6 +248,15 @@ struct xfs_btree_cur_ino { #define XFS_BTCUR_BMBT_INVALID_OWNER (1 << 1) }; +/* In-memory btree information */ +struct xfbtree; + +struct xfs_btree_cur_mem { + struct xfbtree *xfbtree; + struct xfs_buf *head_bp; + struct xfs_perag *pag; +}; + struct xfs_btree_level { /* buffer pointer */ struct xfs_buf *bp; @@ -287,6 +296,7 @@ struct xfs_btree_cur union { struct xfs_btree_cur_ag bc_ag; struct xfs_btree_cur_ino bc_ino; + struct xfs_btree_cur_mem bc_mem; }; /* Must be at the end of the struct! */ @@ -317,6 +327,13 @@ xfs_btree_cur_sizeof(unsigned int nlevels) */ #define XFS_BTREE_STAGING (1<<5) +/* btree stored in memory; not compatible with ROOT_IN_INODE */ +#ifdef CONFIG_XFS_BTREE_IN_XFILE +# define XFS_BTREE_IN_XFILE (1<<7) +#else +# define XFS_BTREE_IN_XFILE (0) +#endif + #define XFS_BTREE_NOERROR 0 #define XFS_BTREE_ERROR 1 diff --git a/fs/xfs/libxfs/xfs_btree_mem.h b/fs/xfs/libxfs/xfs_btree_mem.h new file mode 100644 index 000000000000..5e3d58175596 --- /dev/null +++ b/fs/xfs/libxfs/xfs_btree_mem.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021-2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_BTREE_MEM_H__ +#define __XFS_BTREE_MEM_H__ + +struct xfbtree; + +#ifdef CONFIG_XFS_BTREE_IN_XFILE +unsigned int xfs_btree_mem_head_nlevels(struct xfs_buf *head_bp); + +struct xfs_buftarg *xfbtree_target(struct xfbtree *xfbtree); +int xfbtree_check_ptr(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, int index, int level); +xfs_daddr_t xfbtree_ptr_to_daddr(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr); +void xfbtree_buf_to_ptr(struct xfs_btree_cur *cur, struct xfs_buf *bp, + union xfs_btree_ptr *ptr); + +unsigned int xfbtree_bbsize(void); + +void xfbtree_set_root(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, int inc); +void xfbtree_init_ptr_from_cur(struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr); +struct xfs_btree_cur *xfbtree_dup_cursor(struct xfs_btree_cur *cur); +bool xfbtree_verify_xfileoff(struct xfs_btree_cur *cur, + unsigned long long xfoff); +xfs_failaddr_t xfbtree_check_block_owner(struct xfs_btree_cur *cur, + struct xfs_btree_block *block); +unsigned long long xfbtree_owner(struct xfs_btree_cur *cur); +xfs_failaddr_t xfbtree_lblock_verify(struct xfs_buf *bp, unsigned int max_recs); +xfs_failaddr_t xfbtree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); +unsigned long long xfbtree_buf_to_xfoff(struct xfs_btree_cur *cur, + struct xfs_buf *bp); +#else +static inline unsigned int xfs_btree_mem_head_nlevels(struct xfs_buf *head_bp) +{ + return 0; +} + +static inline struct xfs_buftarg * +xfbtree_target(struct xfbtree *xfbtree) +{ + return NULL; +} + +static inline int +xfbtree_check_ptr(struct xfs_btree_cur *cur, const union xfs_btree_ptr *ptr, + int index, int level) +{ + return 0; +} + +static inline xfs_daddr_t +xfbtree_ptr_to_daddr(struct xfs_btree_cur *cur, const union xfs_btree_ptr *ptr) +{ + return 0; +} + +static inline void +xfbtree_buf_to_ptr( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + union xfs_btree_ptr *ptr) +{ + memset(ptr, 0xFF, sizeof(*ptr)); +} + +static inline unsigned int xfbtree_bbsize(void) +{ + return 0; +} + +#define xfbtree_set_root NULL +#define xfbtree_init_ptr_from_cur NULL +#define xfbtree_dup_cursor NULL +#define xfbtree_verify_xfileoff(cur, xfoff) (false) +#define xfbtree_check_block_owner(cur, block) NULL +#define xfbtree_owner(cur) (0ULL) +#define xfbtree_buf_to_xfoff(cur, bp) (-1) + +#endif /* CONFIG_XFS_BTREE_IN_XFILE */ + +#endif /* __XFS_BTREE_MEM_H__ */ diff --git a/fs/xfs/scrub/xfbtree.c b/fs/xfs/scrub/xfbtree.c new file mode 100644 index 000000000000..41aed95a1ee7 --- /dev/null +++ b/fs/xfs/scrub/xfbtree.c @@ -0,0 +1,352 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021-2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_trans.h" +#include "xfs_btree.h" +#include "xfs_error.h" +#include "xfs_btree_mem.h" +#include "xfs_ag.h" +#include "scrub/xfile.h" +#include "scrub/xfbtree.h" + +/* btree ops functions for in-memory btrees. */ + +static xfs_failaddr_t +xfs_btree_mem_head_verify( + struct xfs_buf *bp) +{ + struct xfs_btree_mem_head *mhead = bp->b_addr; + struct xfs_mount *mp = bp->b_mount; + + if (!xfs_verify_magic(bp, mhead->mh_magic)) + return __this_address; + if (be32_to_cpu(mhead->mh_nlevels) == 0) + return __this_address; + if (!uuid_equal(&mhead->mh_uuid, &mp->m_sb.sb_meta_uuid)) + return __this_address; + + return NULL; +} + +static void +xfs_btree_mem_head_read_verify( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa = xfs_btree_mem_head_verify(bp); + + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); +} + +static void +xfs_btree_mem_head_write_verify( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa = xfs_btree_mem_head_verify(bp); + + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); +} + +static const struct xfs_buf_ops xfs_btree_mem_head_buf_ops = { + .name = "xfs_btree_mem_head", + .magic = { cpu_to_be32(XFS_BTREE_MEM_HEAD_MAGIC), + cpu_to_be32(XFS_BTREE_MEM_HEAD_MAGIC) }, + .verify_read = xfs_btree_mem_head_read_verify, + .verify_write = xfs_btree_mem_head_write_verify, + .verify_struct = xfs_btree_mem_head_verify, +}; + +/* Initialize the header block for an in-memory btree. */ +static inline void +xfs_btree_mem_head_init( + struct xfs_buf *head_bp, + unsigned long long owner, + xfileoff_t leaf_xfoff) +{ + struct xfs_btree_mem_head *mhead = head_bp->b_addr; + struct xfs_mount *mp = head_bp->b_mount; + + mhead->mh_magic = cpu_to_be32(XFS_BTREE_MEM_HEAD_MAGIC); + mhead->mh_nlevels = cpu_to_be32(1); + mhead->mh_owner = cpu_to_be64(owner); + mhead->mh_root = cpu_to_be64(leaf_xfoff); + uuid_copy(&mhead->mh_uuid, &mp->m_sb.sb_meta_uuid); + + head_bp->b_ops = &xfs_btree_mem_head_buf_ops; +} + +/* Return tree height from the in-memory btree head. */ +unsigned int +xfs_btree_mem_head_nlevels( + struct xfs_buf *head_bp) +{ + struct xfs_btree_mem_head *mhead = head_bp->b_addr; + + return be32_to_cpu(mhead->mh_nlevels); +} + +/* Extract the buftarg target for this xfile btree. */ +struct xfs_buftarg * +xfbtree_target(struct xfbtree *xfbtree) +{ + return xfbtree->target; +} + +/* Is this daddr (sector offset) contained within the buffer target? */ +static inline bool +xfbtree_verify_buftarg_xfileoff( + struct xfs_buftarg *btp, + xfileoff_t xfoff) +{ + xfs_daddr_t xfoff_daddr = xfo_to_daddr(xfoff); + + return xfs_buftarg_verify_daddr(btp, xfoff_daddr); +} + +/* Is this btree xfile offset contained within the xfile? */ +bool +xfbtree_verify_xfileoff( + struct xfs_btree_cur *cur, + unsigned long long xfoff) +{ + struct xfs_buftarg *btp = xfbtree_target(cur->bc_mem.xfbtree); + + return xfbtree_verify_buftarg_xfileoff(btp, xfoff); +} + +/* Check if a btree pointer is reasonable. */ +int +xfbtree_check_ptr( + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + int index, + int level) +{ + xfileoff_t bt_xfoff; + xfs_failaddr_t fa = NULL; + + ASSERT(cur->bc_flags & XFS_BTREE_IN_XFILE); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + bt_xfoff = be64_to_cpu(ptr->l); + else + bt_xfoff = be32_to_cpu(ptr->s); + + if (!xfbtree_verify_xfileoff(cur, bt_xfoff)) + fa = __this_address; + + if (fa) { + xfs_err(cur->bc_mp, +"In-memory: Corrupt btree %d flags 0x%x pointer at level %d index %d fa %pS.", + cur->bc_btnum, cur->bc_flags, level, index, + fa); + return -EFSCORRUPTED; + } + return 0; +} + +/* Convert a btree pointer to a daddr */ +xfs_daddr_t +xfbtree_ptr_to_daddr( + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr) +{ + xfileoff_t bt_xfoff; + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + bt_xfoff = be64_to_cpu(ptr->l); + else + bt_xfoff = be32_to_cpu(ptr->s); + return xfo_to_daddr(bt_xfoff); +} + +/* Set the pointer to point to this buffer. */ +void +xfbtree_buf_to_ptr( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + union xfs_btree_ptr *ptr) +{ + xfileoff_t xfoff = xfs_daddr_to_xfo(xfs_buf_daddr(bp)); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + ptr->l = cpu_to_be64(xfoff); + else + ptr->s = cpu_to_be32(xfoff); +} + +/* Return the in-memory btree block size, in units of 512 bytes. */ +unsigned int xfbtree_bbsize(void) +{ + return xfo_to_daddr(1); +} + +/* Set the root of an in-memory btree. */ +void +xfbtree_set_root( + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + int inc) +{ + struct xfs_buf *head_bp = cur->bc_mem.head_bp; + struct xfs_btree_mem_head *mhead = head_bp->b_addr; + + ASSERT(cur->bc_flags & XFS_BTREE_IN_XFILE); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + mhead->mh_root = ptr->l; + } else { + uint32_t root = be32_to_cpu(ptr->s); + + mhead->mh_root = cpu_to_be64(root); + } + be32_add_cpu(&mhead->mh_nlevels, inc); + xfs_trans_log_buf(cur->bc_tp, head_bp, 0, sizeof(*mhead) - 1); +} + +/* Initialize a pointer from the in-memory btree header. */ +void +xfbtree_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xfs_buf *head_bp = cur->bc_mem.head_bp; + struct xfs_btree_mem_head *mhead = head_bp->b_addr; + + ASSERT(cur->bc_flags & XFS_BTREE_IN_XFILE); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + ptr->l = mhead->mh_root; + } else { + uint64_t root = be64_to_cpu(mhead->mh_root); + + ptr->s = cpu_to_be32(root); + } +} + +/* Duplicate an in-memory btree cursor. */ +struct xfs_btree_cur * +xfbtree_dup_cursor( + struct xfs_btree_cur *cur) +{ + struct xfs_btree_cur *ncur; + + ASSERT(cur->bc_flags & XFS_BTREE_IN_XFILE); + + ncur = xfs_btree_alloc_cursor(cur->bc_mp, cur->bc_tp, cur->bc_btnum, + cur->bc_maxlevels, cur->bc_cache); + ncur->bc_flags = cur->bc_flags; + ncur->bc_nlevels = cur->bc_nlevels; + ncur->bc_statoff = cur->bc_statoff; + ncur->bc_ops = cur->bc_ops; + memcpy(&ncur->bc_mem, &cur->bc_mem, sizeof(cur->bc_mem)); + + if (cur->bc_mem.pag) + ncur->bc_mem.pag = xfs_perag_hold(cur->bc_mem.pag); + + return ncur; +} + +/* Check the owner of an in-memory btree block. */ +xfs_failaddr_t +xfbtree_check_block_owner( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block) +{ + struct xfbtree *xfbt = cur->bc_mem.xfbtree; + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (be64_to_cpu(block->bb_u.l.bb_owner) != xfbt->owner) + return __this_address; + + return NULL; + } + + if (be32_to_cpu(block->bb_u.s.bb_owner) != xfbt->owner) + return __this_address; + + return NULL; +} + +/* Return the owner of this in-memory btree. */ +unsigned long long +xfbtree_owner( + struct xfs_btree_cur *cur) +{ + return cur->bc_mem.xfbtree->owner; +} + +/* Return the xfile offset (in blocks) of a btree buffer. */ +unsigned long long +xfbtree_buf_to_xfoff( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + ASSERT(cur->bc_flags & XFS_BTREE_IN_XFILE); + + return xfs_daddr_to_xfo(xfs_buf_daddr(bp)); +} + +/* Verify a long-format btree block. */ +xfs_failaddr_t +xfbtree_lblock_verify( + struct xfs_buf *bp, + unsigned int max_recs) +{ + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_buftarg *btp = bp->b_target; + + /* numrecs verification */ + if (be16_to_cpu(block->bb_numrecs) > max_recs) + return __this_address; + + /* sibling pointer verification */ + if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && + !xfbtree_verify_buftarg_xfileoff(btp, + be64_to_cpu(block->bb_u.l.bb_leftsib))) + return __this_address; + + if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && + !xfbtree_verify_buftarg_xfileoff(btp, + be64_to_cpu(block->bb_u.l.bb_rightsib))) + return __this_address; + + return NULL; +} + +/* Verify a short-format btree block. */ +xfs_failaddr_t +xfbtree_sblock_verify( + struct xfs_buf *bp, + unsigned int max_recs) +{ + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_buftarg *btp = bp->b_target; + + /* numrecs verification */ + if (be16_to_cpu(block->bb_numrecs) > max_recs) + return __this_address; + + /* sibling pointer verification */ + if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && + !xfbtree_verify_buftarg_xfileoff(btp, + be32_to_cpu(block->bb_u.s.bb_leftsib))) + return __this_address; + + if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && + !xfbtree_verify_buftarg_xfileoff(btp, + be32_to_cpu(block->bb_u.s.bb_rightsib))) + return __this_address; + + return NULL; +} diff --git a/fs/xfs/scrub/xfbtree.h b/fs/xfs/scrub/xfbtree.h new file mode 100644 index 000000000000..e8d8c67641f8 --- /dev/null +++ b/fs/xfs/scrub/xfbtree.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021-2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef XFS_SCRUB_XFBTREE_H__ +#define XFS_SCRUB_XFBTREE_H__ + +#ifdef CONFIG_XFS_BTREE_IN_XFILE + +/* Root block for an in-memory btree. */ +struct xfs_btree_mem_head { + __be32 mh_magic; + __be32 mh_nlevels; + __be64 mh_owner; + __be64 mh_root; + uuid_t mh_uuid; +}; + +#define XFS_BTREE_MEM_HEAD_MAGIC 0x4341544D /* "CATM" */ + +/* xfile-backed in-memory btrees */ + +struct xfbtree { + /* buffer cache target for this in-memory btree */ + struct xfs_buftarg *target; + + /* Owner of this btree. */ + unsigned long long owner; +}; + +#endif /* CONFIG_XFS_BTREE_IN_XFILE */ + +#endif /* XFS_SCRUB_XFBTREE_H__ */ diff --git a/fs/xfs/scrub/xfile.h b/fs/xfs/scrub/xfile.h index 083348b4cdaf..c6d7851b01ca 100644 --- a/fs/xfs/scrub/xfile.h +++ b/fs/xfs/scrub/xfile.h @@ -79,6 +79,47 @@ int xfile_get_page(struct xfile *xf, loff_t offset, unsigned int len, int xfile_put_page(struct xfile *xf, struct xfile_page *xbuf); int xfile_dump(struct xfile *xf); + +static inline loff_t xfile_size(struct xfile *xf) +{ + return i_size_read(file_inode(xf->file)); +} + +/* file block (aka system page size) to basic block conversions. */ +typedef unsigned long long xfileoff_t; +#define XFB_BLOCKSIZE (PAGE_SIZE) +#define XFB_BSHIFT (PAGE_SHIFT) +#define XFB_SHIFT (XFB_BSHIFT - BBSHIFT) + +static inline loff_t xfo_to_b(xfileoff_t xfoff) +{ + return xfoff << XFB_BSHIFT; +} + +static inline xfileoff_t b_to_xfo(loff_t pos) +{ + return (pos + (XFB_BLOCKSIZE - 1)) >> XFB_BSHIFT; +} + +static inline xfileoff_t b_to_xfot(loff_t pos) +{ + return pos >> XFB_BSHIFT; +} + +static inline xfs_daddr_t xfo_to_daddr(xfileoff_t xfoff) +{ + return xfoff << XFB_SHIFT; +} + +static inline xfileoff_t xfs_daddr_to_xfo(xfs_daddr_t bb) +{ + return (bb + (xfo_to_daddr(1) - 1)) >> XFB_SHIFT; +} + +static inline xfileoff_t xfs_daddr_to_xfot(xfs_daddr_t bb) +{ + return bb >> XFB_SHIFT; +} #else static inline int xfile_obj_load(struct xfile *xf, void *buf, size_t count, loff_t offset) @@ -91,6 +132,11 @@ xfile_obj_store(struct xfile *xf, const void *buf, size_t count, loff_t offset) { return -EIO; } + +static inline loff_t xfile_size(struct xfile *xf) +{ + return 0; +} #endif /* CONFIG_XFS_IN_MEMORY_FILE */ #endif /* __XFS_SCRUB_XFILE_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index e3f24594e575..2d717808ef7a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2486,3 +2486,13 @@ xfs_verify_magic16( return false; return dmagic == bp->b_ops->magic16[idx]; } + +/* Return the number of sectors for a buffer target. */ +xfs_daddr_t +xfs_buftarg_nr_sectors( + struct xfs_buftarg *btp) +{ + if (btp->bt_flags & XFS_BUFTARG_XFILE) + return xfile_buftarg_nr_sectors(btp); + return bdev_nr_sectors(btp->bt_bdev); +} diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 90b67a11e3c1..661cd16ff64e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -438,6 +438,16 @@ xfs_buftarg_zeroout( flags); } +xfs_daddr_t xfs_buftarg_nr_sectors(struct xfs_buftarg *btp); + +static inline bool +xfs_buftarg_verify_daddr( + struct xfs_buftarg *btp, + xfs_daddr_t daddr) +{ + return daddr < xfs_buftarg_nr_sectors(btp); +} + int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); diff --git a/fs/xfs/xfs_buf_xfile.c b/fs/xfs/xfs_buf_xfile.c index 69f1d62e0fcb..61cc9b1dbed6 100644 --- a/fs/xfs/xfs_buf_xfile.c +++ b/fs/xfs/xfs_buf_xfile.c @@ -87,3 +87,11 @@ xfile_free_buftarg( xfs_buf_cache_destroy(&xfile->bcache); xfile_destroy(xfile); } + +/* Sector count for this xfile buftarg. */ +xfs_daddr_t +xfile_buftarg_nr_sectors( + struct xfs_buftarg *btp) +{ + return xfile_size(btp->bt_xfile) >> SECTOR_SHIFT; +} diff --git a/fs/xfs/xfs_buf_xfile.h b/fs/xfs/xfs_buf_xfile.h index 29efaf06a676..c3f0bb31a31a 100644 --- a/fs/xfs/xfs_buf_xfile.h +++ b/fs/xfs/xfs_buf_xfile.h @@ -11,8 +11,10 @@ int xfile_buf_ioapply(struct xfs_buf *bp); int xfile_alloc_buftarg(struct xfs_mount *mp, const char *descr, struct xfs_buftarg **btpp); void xfile_free_buftarg(struct xfs_buftarg *btp); +xfs_daddr_t xfile_buftarg_nr_sectors(struct xfs_buftarg *btp); #else # define xfile_buf_ioapply(bp) (-EOPNOTSUPP) +# define xfile_buftarg_nr_sectors(btp) (0) #endif /* CONFIG_XFS_IN_MEMORY_FILE */ #endif /* __XFS_BUF_XFILE_H__ */ diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 74a4620d763b..93ebf6f9807f 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -508,6 +508,9 @@ xfs_btree_mark_sick( { unsigned int mask; + if (cur->bc_flags & XFS_BTREE_IN_XFILE) + return; + switch (cur->bc_btnum) { case XFS_BTNUM_BMAP: xfs_bmap_mark_sick(cur->bc_ino.ip, cur->bc_ino.whichfork); diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 8a5dc1538aa8..2d49310fb912 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -36,6 +36,9 @@ #include "xfs_error.h" #include #include "xfs_iomap.h" +#include "scrub/xfile.h" +#include "scrub/xfbtree.h" +#include "xfs_btree_mem.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index b1a1c90d8feb..ab9217c1c3d8 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2537,7 +2537,10 @@ TRACE_EVENT(xfs_btree_alloc_block, ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + if (cur->bc_flags & XFS_BTREE_IN_XFILE) { + __entry->agno = 0; + __entry->ino = 0; + } else if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { __entry->agno = 0; __entry->ino = cur->bc_ino.ip->i_ino; } else { From patchwork Fri May 26 01:06:55 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13255978 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4DB48C7EE2E for ; Fri, 26 May 2023 01:07:21 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233473AbjEZBHU (ORCPT ); Thu, 25 May 2023 21:07:20 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:53674 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232994AbjEZBHT (ORCPT ); Thu, 25 May 2023 21:07:19 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7DB8819C; Thu, 25 May 2023 18:06:57 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id EFBA4647D0; Fri, 26 May 2023 01:06:56 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 58DEDC433EF; Fri, 26 May 2023 01:06:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685063216; bh=I/Xlv3lbixieitB1zxg+DOjpSlIYFhEXILuxHnXbAM0=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=BdlBXoejBrg3bIaVACnYRGWB2aYhRdfirjtQvJjsSov63NIcpl7KuK8XUFwEupdf4 e9wVjsWog+zSr7rzXXqsq8GUHcj51vWgJ881CxxGUEXsFmSEksKbFT0hdhGs+phE2+ 0C6fqeg2Jx5oD/EYfby/viaF9jlBjcOQ/tZXHhxDec/zCioCQOTVspLLu++kirzu+V xCt4eIm+M1bXfflk42NBm71GLl7giGc40/zH8VSXbEUL6yLHR+RdjVBTmkNWJVC8CN kq5N4jnPE6Vp0IOd0ZRWgt3NJVHkLZV3NVhBAQcVnPmiOL5TLnYWBiDP0CxGTGAieI GJjrBmHQWcKww== Date: Thu, 25 May 2023 18:06:55 -0700 Subject: [PATCH 9/9] xfs: connect in-memory btrees to xfiles From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, willy@infradead.org, linux-fsdevel@vger.kernel.org Message-ID: <168506061984.3733082.16963285502416228181.stgit@frogsfrogsfrogs> In-Reply-To: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> References: <168506061839.3733082.9818919714772025609.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Darrick J. Wong Add to our stubbed-out in-memory btrees the ability to connect them with an actual in-memory backing file (aka xfiles) and the necessary pieces to track free space in the xfile and flush dirty xfbtree buffers on demand, which we'll need for online repair. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_btree_mem.h | 41 ++++ fs/xfs/scrub/bitmap.c | 28 ++ fs/xfs/scrub/bitmap.h | 3 fs/xfs/scrub/scrub.c | 5 fs/xfs/scrub/scrub.h | 3 fs/xfs/scrub/trace.c | 12 + fs/xfs/scrub/trace.h | 110 ++++++++++ fs/xfs/scrub/xfbtree.c | 466 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/xfbtree.h | 25 ++ fs/xfs/scrub/xfile.c | 83 +++++++ fs/xfs/scrub/xfile.h | 2 fs/xfs/xfs_trace.h | 1 fs/xfs/xfs_trans.h | 1 fs/xfs/xfs_trans_buf.c | 42 ++++ 14 files changed, 820 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree_mem.h b/fs/xfs/libxfs/xfs_btree_mem.h index 5e3d58175596..c82d3e6d220a 100644 --- a/fs/xfs/libxfs/xfs_btree_mem.h +++ b/fs/xfs/libxfs/xfs_btree_mem.h @@ -8,6 +8,26 @@ struct xfbtree; +struct xfbtree_config { + /* Buffer ops for the btree root block */ + const struct xfs_btree_ops *btree_ops; + + /* Buffer target for the xfile backing this btree. */ + struct xfs_buftarg *target; + + /* Owner of this btree. */ + unsigned long long owner; + + /* Btree type number */ + xfs_btnum_t btnum; + + /* XFBTREE_CREATE_* flags */ + unsigned int flags; +}; + +/* btree has long pointers */ +#define XFBTREE_CREATE_LONG_PTRS (1U << 0) + #ifdef CONFIG_XFS_BTREE_IN_XFILE unsigned int xfs_btree_mem_head_nlevels(struct xfs_buf *head_bp); @@ -35,6 +55,16 @@ xfs_failaddr_t xfbtree_lblock_verify(struct xfs_buf *bp, unsigned int max_recs); xfs_failaddr_t xfbtree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); unsigned long long xfbtree_buf_to_xfoff(struct xfs_btree_cur *cur, struct xfs_buf *bp); + +int xfbtree_get_minrecs(struct xfs_btree_cur *cur, int level); +int xfbtree_get_maxrecs(struct xfs_btree_cur *cur, int level); + +int xfbtree_create(struct xfs_mount *mp, const struct xfbtree_config *cfg, + struct xfbtree **xfbtreep); +int xfbtree_alloc_block(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, union xfs_btree_ptr *ptr, + int *stat); +int xfbtree_free_block(struct xfs_btree_cur *cur, struct xfs_buf *bp); #else static inline unsigned int xfs_btree_mem_head_nlevels(struct xfs_buf *head_bp) { @@ -77,11 +107,22 @@ static inline unsigned int xfbtree_bbsize(void) #define xfbtree_set_root NULL #define xfbtree_init_ptr_from_cur NULL #define xfbtree_dup_cursor NULL +#define xfbtree_get_minrecs NULL +#define xfbtree_get_maxrecs NULL +#define xfbtree_alloc_block NULL +#define xfbtree_free_block NULL #define xfbtree_verify_xfileoff(cur, xfoff) (false) #define xfbtree_check_block_owner(cur, block) NULL #define xfbtree_owner(cur) (0ULL) #define xfbtree_buf_to_xfoff(cur, bp) (-1) +static inline int +xfbtree_create(struct xfs_mount *mp, const struct xfbtree_config *cfg, + struct xfbtree **xfbtreep) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_XFS_BTREE_IN_XFILE */ #endif /* __XFS_BTREE_MEM_H__ */ diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index e0c89a9a0ca0..d74f706ff33c 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -379,3 +379,31 @@ xbitmap_test( *len = bn->bn_start - start; return false; } + +/* + * Find the first set bit in this bitmap, clear it, and return the index of + * that bit in @valp. Returns -ENODATA if no bits were set, or the usual + * negative errno. + */ +int +xbitmap_take_first_set( + struct xbitmap *bitmap, + uint64_t start, + uint64_t last, + uint64_t *valp) +{ + struct xbitmap_node *bn; + uint64_t val; + int error; + + bn = xbitmap_tree_iter_first(&bitmap->xb_root, start, last); + if (!bn) + return -ENODATA; + + val = bn->bn_start; + error = xbitmap_clear(bitmap, bn->bn_start, 1); + if (error) + return error; + *valp = val; + return 0; +} diff --git a/fs/xfs/scrub/bitmap.h b/fs/xfs/scrub/bitmap.h index 2518e642f4d3..8159a3c4173d 100644 --- a/fs/xfs/scrub/bitmap.h +++ b/fs/xfs/scrub/bitmap.h @@ -32,6 +32,9 @@ int xbitmap_walk(struct xbitmap *bitmap, xbitmap_walk_fn fn, bool xbitmap_empty(struct xbitmap *bitmap); bool xbitmap_test(struct xbitmap *bitmap, uint64_t start, uint64_t *len); +int xbitmap_take_first_set(struct xbitmap *bitmap, uint64_t start, + uint64_t last, uint64_t *valp); + /* Bitmaps, but for type-checked for xfs_agblock_t */ struct xagb_bitmap { diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index cf8e78c16670..e57c8e7ad48a 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -17,6 +17,7 @@ #include "xfs_scrub.h" #include "xfs_btree.h" #include "xfs_btree_staging.h" +#include "xfs_buf_xfile.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -191,6 +192,10 @@ xchk_teardown( sc->flags &= ~XCHK_HAVE_FREEZE_PROT; mnt_drop_write_file(sc->file); } + if (sc->xfile_buftarg) { + xfile_free_buftarg(sc->xfile_buftarg); + sc->xfile_buftarg = NULL; + } if (sc->xfile) { xfile_destroy(sc->xfile); sc->xfile = NULL; diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index a41ba8d319b6..2f8da220c9e7 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -99,6 +99,9 @@ struct xfs_scrub { /* xfile used by the scrubbers; freed at teardown. */ struct xfile *xfile; + /* buffer target for the xfile; also freed at teardown. */ + struct xfs_buftarg *xfile_buftarg; + /* Lock flags for @ip. */ uint ilock_flags; diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 1fe5c5a9a1ba..d3164c59b0ba 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -12,15 +12,18 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_mem.h" #include "xfs_ag.h" #include "xfs_quota_defs.h" #include "xfs_dir2.h" +#include "xfs_da_format.h" #include "scrub/scrub.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/iscan.h" #include "scrub/nlinks.h" #include "scrub/fscounters.h" +#include "scrub/xfbtree.h" /* Figure out which block the btree cursor was pointing to. */ static inline xfs_fsblock_t @@ -39,6 +42,15 @@ xchk_btree_cur_fsbno( return NULLFSBLOCK; } +#ifdef CONFIG_XFS_BTREE_IN_XFILE +static inline unsigned long +xfbtree_ino( + struct xfbtree *xfbt) +{ + return file_inode(xfbt->target->bt_xfile->file)->i_ino; +} +#endif /* CONFIG_XFS_BTREE_IN_XFILE */ + /* * We include this last to have the helpers above available for the trace * event implementations. diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 4aefa0533a12..edc86a06da21 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -24,6 +24,8 @@ struct xfarray_sortinfo; struct xchk_iscan; struct xchk_nlink; struct xchk_fscounters; +struct xfbtree; +struct xfbtree_config; /* * ftrace's __print_symbolic requires that all enum values be wrapped in the @@ -866,6 +868,8 @@ DEFINE_XFILE_EVENT(xfile_pwrite); DEFINE_XFILE_EVENT(xfile_seek_data); DEFINE_XFILE_EVENT(xfile_get_page); DEFINE_XFILE_EVENT(xfile_put_page); +DEFINE_XFILE_EVENT(xfile_discard); +DEFINE_XFILE_EVENT(xfile_prealloc); TRACE_EVENT(xfarray_create, TP_PROTO(struct xfarray *xfa, unsigned long long required_capacity), @@ -2023,8 +2027,114 @@ DEFINE_XREP_DQUOT_EVENT(xrep_quotacheck_dquot); DEFINE_SCRUB_NLINKS_DIFF_EVENT(xrep_nlinks_update_inode); DEFINE_SCRUB_NLINKS_DIFF_EVENT(xrep_nlinks_unfixable_inode); +TRACE_EVENT(xfbtree_create, + TP_PROTO(struct xfs_mount *mp, const struct xfbtree_config *cfg, + struct xfbtree *xfbt), + TP_ARGS(mp, cfg, xfbt), + TP_STRUCT__entry( + __field(xfs_btnum_t, btnum) + __field(unsigned int, xfbtree_flags) + __field(unsigned long, xfino) + __field(unsigned int, leaf_mxr) + __field(unsigned int, leaf_mnr) + __field(unsigned int, node_mxr) + __field(unsigned int, node_mnr) + __field(unsigned long long, owner) + ), + TP_fast_assign( + __entry->btnum = cfg->btnum; + __entry->xfbtree_flags = cfg->flags; + __entry->xfino = xfbtree_ino(xfbt); + __entry->leaf_mxr = xfbt->maxrecs[0]; + __entry->node_mxr = xfbt->maxrecs[1]; + __entry->leaf_mnr = xfbt->minrecs[0]; + __entry->node_mnr = xfbt->minrecs[1]; + __entry->owner = cfg->owner; + ), + TP_printk("xfino 0x%lx btnum %s owner 0x%llx leaf_mxr %u leaf_mnr %u node_mxr %u node_mnr %u", + __entry->xfino, + __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), + __entry->owner, + __entry->leaf_mxr, + __entry->leaf_mnr, + __entry->node_mxr, + __entry->node_mnr) +); + +DECLARE_EVENT_CLASS(xfbtree_buf_class, + TP_PROTO(struct xfbtree *xfbt, struct xfs_buf *bp), + TP_ARGS(xfbt, bp), + TP_STRUCT__entry( + __field(unsigned long, xfino) + __field(xfs_daddr_t, bno) + __field(int, nblks) + __field(int, hold) + __field(int, pincount) + __field(unsigned, lockval) + __field(unsigned, flags) + ), + TP_fast_assign( + __entry->xfino = xfbtree_ino(xfbt); + __entry->bno = xfs_buf_daddr(bp); + __entry->nblks = bp->b_length; + __entry->hold = atomic_read(&bp->b_hold); + __entry->pincount = atomic_read(&bp->b_pin_count); + __entry->lockval = bp->b_sema.count; + __entry->flags = bp->b_flags; + ), + TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d " + "lock %d flags %s", + __entry->xfino, + (unsigned long long)__entry->bno, + __entry->nblks, + __entry->hold, + __entry->pincount, + __entry->lockval, + __print_flags(__entry->flags, "|", XFS_BUF_FLAGS)) +) + +#define DEFINE_XFBTREE_BUF_EVENT(name) \ +DEFINE_EVENT(xfbtree_buf_class, name, \ + TP_PROTO(struct xfbtree *xfbt, struct xfs_buf *bp), \ + TP_ARGS(xfbt, bp)) +DEFINE_XFBTREE_BUF_EVENT(xfbtree_create_root_buf); +DEFINE_XFBTREE_BUF_EVENT(xfbtree_trans_commit_buf); +DEFINE_XFBTREE_BUF_EVENT(xfbtree_trans_cancel_buf); + +DECLARE_EVENT_CLASS(xfbtree_freesp_class, + TP_PROTO(struct xfbtree *xfbt, struct xfs_btree_cur *cur, + xfs_fileoff_t fileoff), + TP_ARGS(xfbt, cur, fileoff), + TP_STRUCT__entry( + __field(unsigned long, xfino) + __field(xfs_btnum_t, btnum) + __field(int, nlevels) + __field(xfs_fileoff_t, fileoff) + ), + TP_fast_assign( + __entry->xfino = xfbtree_ino(xfbt); + __entry->btnum = cur->bc_btnum; + __entry->nlevels = cur->bc_nlevels; + __entry->fileoff = fileoff; + ), + TP_printk("xfino 0x%lx btree %s nlevels %d fileoff 0x%llx", + __entry->xfino, + __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), + __entry->nlevels, + (unsigned long long)__entry->fileoff) +) + +#define DEFINE_XFBTREE_FREESP_EVENT(name) \ +DEFINE_EVENT(xfbtree_freesp_class, name, \ + TP_PROTO(struct xfbtree *xfbt, struct xfs_btree_cur *cur, \ + xfs_fileoff_t fileoff), \ + TP_ARGS(xfbt, cur, fileoff)) +DEFINE_XFBTREE_FREESP_EVENT(xfbtree_alloc_block); +DEFINE_XFBTREE_FREESP_EVENT(xfbtree_free_block); + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ + #endif /* _TRACE_XFS_SCRUB_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/scrub/xfbtree.c b/fs/xfs/scrub/xfbtree.c index 41aed95a1ee7..5cd03457091c 100644 --- a/fs/xfs/scrub/xfbtree.c +++ b/fs/xfs/scrub/xfbtree.c @@ -9,14 +9,19 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" +#include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_trans.h" +#include "xfs_buf_item.h" #include "xfs_btree.h" #include "xfs_error.h" #include "xfs_btree_mem.h" #include "xfs_ag.h" +#include "scrub/scrub.h" #include "scrub/xfile.h" #include "scrub/xfbtree.h" +#include "scrub/bitmap.h" +#include "scrub/trace.h" /* btree ops functions for in-memory btrees. */ @@ -142,9 +147,18 @@ xfbtree_check_ptr( else bt_xfoff = be32_to_cpu(ptr->s); - if (!xfbtree_verify_xfileoff(cur, bt_xfoff)) + if (!xfbtree_verify_xfileoff(cur, bt_xfoff)) { fa = __this_address; + goto done; + } + /* Can't point to the head or anything before it */ + if (bt_xfoff < XFBTREE_INIT_LEAF_BLOCK) { + fa = __this_address; + goto done; + } + +done: if (fa) { xfs_err(cur->bc_mp, "In-memory: Corrupt btree %d flags 0x%x pointer at level %d index %d fa %pS.", @@ -350,3 +364,453 @@ xfbtree_sblock_verify( return NULL; } + +/* Close the btree xfile and release all resources. */ +void +xfbtree_destroy( + struct xfbtree *xfbt) +{ + xbitmap_destroy(xfbt->freespace); + kfree(xfbt->freespace); + xfs_buftarg_drain(xfbt->target); + kfree(xfbt); +} + +/* Compute the number of bytes available for records. */ +static inline unsigned int +xfbtree_rec_bytes( + struct xfs_mount *mp, + const struct xfbtree_config *cfg) +{ + unsigned int blocklen = xfo_to_b(1); + + if (cfg->flags & XFBTREE_CREATE_LONG_PTRS) { + if (xfs_has_crc(mp)) + return blocklen - XFS_BTREE_LBLOCK_CRC_LEN; + + return blocklen - XFS_BTREE_LBLOCK_LEN; + } + + if (xfs_has_crc(mp)) + return blocklen - XFS_BTREE_SBLOCK_CRC_LEN; + + return blocklen - XFS_BTREE_SBLOCK_LEN; +} + +/* Initialize an empty leaf block as the btree root. */ +STATIC int +xfbtree_init_leaf_block( + struct xfs_mount *mp, + struct xfbtree *xfbt, + const struct xfbtree_config *cfg) +{ + struct xfs_buf *bp; + xfs_daddr_t daddr; + int error; + unsigned int bc_flags = 0; + + if (cfg->flags & XFBTREE_CREATE_LONG_PTRS) + bc_flags |= XFS_BTREE_LONG_PTRS; + + daddr = xfo_to_daddr(XFBTREE_INIT_LEAF_BLOCK); + error = xfs_buf_get(xfbt->target, daddr, xfbtree_bbsize(), &bp); + if (error) + return error; + + trace_xfbtree_create_root_buf(xfbt, bp); + + bp->b_ops = cfg->btree_ops->buf_ops; + xfs_btree_init_block_int(mp, bp->b_addr, daddr, cfg->btnum, 0, 0, + cfg->owner, bc_flags); + error = xfs_bwrite(bp); + xfs_buf_relse(bp); + if (error) + return error; + + xfbt->xf_used++; + return 0; +} + +/* Initialize the in-memory btree header block. */ +STATIC int +xfbtree_init_head( + struct xfbtree *xfbt) +{ + struct xfs_buf *bp; + xfs_daddr_t daddr; + int error; + + daddr = xfo_to_daddr(XFBTREE_HEAD_BLOCK); + error = xfs_buf_get(xfbt->target, daddr, xfbtree_bbsize(), &bp); + if (error) + return error; + + xfs_btree_mem_head_init(bp, xfbt->owner, XFBTREE_INIT_LEAF_BLOCK); + error = xfs_bwrite(bp); + xfs_buf_relse(bp); + if (error) + return error; + + xfbt->xf_used++; + return 0; +} + +/* Create an xfile btree backing thing that can be used for in-memory btrees. */ +int +xfbtree_create( + struct xfs_mount *mp, + const struct xfbtree_config *cfg, + struct xfbtree **xfbtreep) +{ + struct xfbtree *xfbt; + unsigned int blocklen = xfbtree_rec_bytes(mp, cfg); + unsigned int keyptr_len = cfg->btree_ops->key_len; + int error; + + /* Requires an xfile-backed buftarg. */ + if (!(cfg->target->bt_flags & XFS_BUFTARG_XFILE)) { + ASSERT(cfg->target->bt_flags & XFS_BUFTARG_XFILE); + return -EINVAL; + } + + xfbt = kzalloc(sizeof(struct xfbtree), XCHK_GFP_FLAGS); + if (!xfbt) + return -ENOMEM; + + /* Assign our memory file and the free space bitmap. */ + xfbt->target = cfg->target; + xfbt->freespace = kmalloc(sizeof(struct xbitmap), XCHK_GFP_FLAGS); + if (!xfbt->freespace) { + error = -ENOMEM; + goto err_buftarg; + } + xbitmap_init(xfbt->freespace); + + /* Set up min/maxrecs for this btree. */ + if (cfg->flags & XFBTREE_CREATE_LONG_PTRS) + keyptr_len += sizeof(__be64); + else + keyptr_len += sizeof(__be32); + xfbt->maxrecs[0] = blocklen / cfg->btree_ops->rec_len; + xfbt->maxrecs[1] = blocklen / keyptr_len; + xfbt->minrecs[0] = xfbt->maxrecs[0] / 2; + xfbt->minrecs[1] = xfbt->maxrecs[1] / 2; + xfbt->owner = cfg->owner; + + /* Initialize the empty btree. */ + error = xfbtree_init_leaf_block(mp, xfbt, cfg); + if (error) + goto err_freesp; + + error = xfbtree_init_head(xfbt); + if (error) + goto err_freesp; + + trace_xfbtree_create(mp, cfg, xfbt); + + *xfbtreep = xfbt; + return 0; + +err_freesp: + xbitmap_destroy(xfbt->freespace); + kfree(xfbt->freespace); +err_buftarg: + xfs_buftarg_drain(xfbt->target); + kfree(xfbt); + return error; +} + +/* Read the in-memory btree head. */ +int +xfbtree_head_read_buf( + struct xfbtree *xfbt, + struct xfs_trans *tp, + struct xfs_buf **bpp) +{ + struct xfs_buftarg *btp = xfbt->target; + struct xfs_mount *mp = btp->bt_mount; + struct xfs_btree_mem_head *mhead; + struct xfs_buf *bp; + xfs_daddr_t daddr; + int error; + + daddr = xfo_to_daddr(XFBTREE_HEAD_BLOCK); + error = xfs_trans_read_buf(mp, tp, btp, daddr, xfbtree_bbsize(), 0, + &bp, &xfs_btree_mem_head_buf_ops); + if (error) + return error; + + mhead = bp->b_addr; + if (be64_to_cpu(mhead->mh_owner) != xfbt->owner) { + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); + xfs_trans_brelse(tp, bp); + return -EFSCORRUPTED; + } + + *bpp = bp; + return 0; +} + +static inline struct xfile *xfbtree_xfile(struct xfbtree *xfbt) +{ + return xfbt->target->bt_xfile; +} + +/* Allocate a block to our in-memory btree. */ +int +xfbtree_alloc_block( + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + struct xfbtree *xfbt = cur->bc_mem.xfbtree; + xfileoff_t bt_xfoff; + loff_t pos; + int error; + + ASSERT(cur->bc_flags & XFS_BTREE_IN_XFILE); + + /* + * Find the first free block in the free space bitmap and take it. If + * none are found, seek to end of the file. + */ + error = xbitmap_take_first_set(xfbt->freespace, 0, -1ULL, &bt_xfoff); + if (error == -ENODATA) { + bt_xfoff = xfbt->xf_used; + xfbt->xf_used++; + } else if (error) { + return error; + } + + trace_xfbtree_alloc_block(xfbt, cur, bt_xfoff); + + /* Fail if the block address exceeds the maximum for short pointers. */ + if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && bt_xfoff >= INT_MAX) { + *stat = 0; + return 0; + } + + /* Make sure we actually can write to the block before we return it. */ + pos = xfo_to_b(bt_xfoff); + error = xfile_prealloc(xfbtree_xfile(xfbt), pos, xfo_to_b(1)); + if (error) + return error; + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + new->l = cpu_to_be64(bt_xfoff); + else + new->s = cpu_to_be32(bt_xfoff); + + *stat = 1; + return 0; +} + +/* Free a block from our in-memory btree. */ +int +xfbtree_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + struct xfbtree *xfbt = cur->bc_mem.xfbtree; + xfileoff_t bt_xfoff, bt_xflen; + + ASSERT(cur->bc_flags & XFS_BTREE_IN_XFILE); + + bt_xfoff = xfs_daddr_to_xfot(xfs_buf_daddr(bp)); + bt_xflen = xfs_daddr_to_xfot(bp->b_length); + + trace_xfbtree_free_block(xfbt, cur, bt_xfoff); + + return xbitmap_set(xfbt->freespace, bt_xfoff, bt_xflen); +} + +/* Return the minimum number of records for a btree block. */ +int +xfbtree_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + struct xfbtree *xfbt = cur->bc_mem.xfbtree; + + return xfbt->minrecs[level != 0]; +} + +/* Return the maximum number of records for a btree block. */ +int +xfbtree_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + struct xfbtree *xfbt = cur->bc_mem.xfbtree; + + return xfbt->maxrecs[level != 0]; +} + +/* If this log item is a buffer item that came from the xfbtree, return it. */ +static inline struct xfs_buf * +xfbtree_buf_match( + struct xfbtree *xfbt, + const struct xfs_log_item *lip) +{ + const struct xfs_buf_log_item *bli; + struct xfs_buf *bp; + + if (lip->li_type != XFS_LI_BUF) + return NULL; + + bli = container_of(lip, struct xfs_buf_log_item, bli_item); + bp = bli->bli_buf; + if (bp->b_target != xfbt->target) + return NULL; + + return bp; +} + +/* + * Detach this (probably dirty) xfbtree buffer from the transaction by any + * means necessary. Returns true if the buffer needs to be written. + */ +STATIC bool +xfbtree_trans_bdetach( + struct xfs_trans *tp, + struct xfs_buf *bp) +{ + struct xfs_buf_log_item *bli = bp->b_log_item; + bool dirty; + + ASSERT(bli != NULL); + + dirty = bli->bli_flags & (XFS_BLI_DIRTY | XFS_BLI_ORDERED); + + bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED | + XFS_BLI_LOGGED | XFS_BLI_STALE); + clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags); + + while (bp->b_log_item != NULL) + xfs_trans_bdetach(tp, bp); + + return dirty; +} + +/* + * Commit changes to the incore btree immediately by writing all dirty xfbtree + * buffers to the backing xfile. This detaches all xfbtree buffers from the + * transaction, even on failure. The buffer locks are dropped between the + * delwri queue and submit, so the caller must synchronize btree access. + * + * Normally we'd let the buffers commit with the transaction and get written to + * the xfile via the log, but online repair stages ephemeral btrees in memory + * and uses the btree_staging functions to write new btrees to disk atomically. + * The in-memory btree (and its backing store) are discarded at the end of the + * repair phase, which means that xfbtree buffers cannot commit with the rest + * of a transaction. + * + * In other words, online repair only needs the transaction to collect buffer + * pointers and to avoid buffer deadlocks, not to guarantee consistency of + * updates. + */ +int +xfbtree_trans_commit( + struct xfbtree *xfbt, + struct xfs_trans *tp) +{ + LIST_HEAD(buffer_list); + struct xfs_log_item *lip, *n; + bool corrupt = false; + bool tp_dirty = false; + + /* + * For each xfbtree buffer attached to the transaction, write the dirty + * buffers to the xfile and release them. + */ + list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) { + struct xfs_buf *bp = xfbtree_buf_match(xfbt, lip); + bool dirty; + + if (!bp) { + if (test_bit(XFS_LI_DIRTY, &lip->li_flags)) + tp_dirty |= true; + continue; + } + + trace_xfbtree_trans_commit_buf(xfbt, bp); + + dirty = xfbtree_trans_bdetach(tp, bp); + if (dirty && !corrupt) { + xfs_failaddr_t fa = bp->b_ops->verify_struct(bp); + + /* + * Because this btree is ephemeral, validate the buffer + * structure before delwri_submit so that we can return + * corruption errors to the caller without shutting + * down the filesystem. + * + * If the buffer fails verification, log the failure + * but continue walking the transaction items so that + * we remove all ephemeral btree buffers. + */ + if (fa) { + corrupt = true; + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } else { + xfs_buf_delwri_queue_here(bp, &buffer_list); + } + } + + xfs_buf_relse(bp); + } + + /* + * Reset the transaction's dirty flag to reflect the dirty state of the + * log items that are still attached. + */ + tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) | + (tp_dirty ? XFS_TRANS_DIRTY : 0); + + if (corrupt) { + xfs_buf_delwri_cancel(&buffer_list); + return -EFSCORRUPTED; + } + + if (list_empty(&buffer_list)) + return 0; + + return xfs_buf_delwri_submit(&buffer_list); +} + +/* + * Cancel changes to the incore btree by detaching all the xfbtree buffers. + * Changes are not written to the backing store. This is needed for online + * repair btrees, which are by nature ephemeral. + */ +void +xfbtree_trans_cancel( + struct xfbtree *xfbt, + struct xfs_trans *tp) +{ + struct xfs_log_item *lip, *n; + bool tp_dirty = false; + + list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) { + struct xfs_buf *bp = xfbtree_buf_match(xfbt, lip); + + if (!bp) { + if (test_bit(XFS_LI_DIRTY, &lip->li_flags)) + tp_dirty |= true; + continue; + } + + trace_xfbtree_trans_cancel_buf(xfbt, bp); + + xfbtree_trans_bdetach(tp, bp); + xfs_buf_relse(bp); + } + + /* + * Reset the transaction's dirty flag to reflect the dirty state of the + * log items that are still attached. + */ + tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) | + (tp_dirty ? XFS_TRANS_DIRTY : 0); +} diff --git a/fs/xfs/scrub/xfbtree.h b/fs/xfs/scrub/xfbtree.h index e8d8c67641f8..8bd4f2bee1a8 100644 --- a/fs/xfs/scrub/xfbtree.h +++ b/fs/xfs/scrub/xfbtree.h @@ -22,13 +22,36 @@ struct xfs_btree_mem_head { /* xfile-backed in-memory btrees */ struct xfbtree { - /* buffer cache target for this in-memory btree */ + /* buffer cache target for the xfile backing this in-memory btree */ struct xfs_buftarg *target; + /* Bitmap of free space from pos to used */ + struct xbitmap *freespace; + + /* Number of xfile blocks actually used by this xfbtree. */ + xfileoff_t xf_used; + /* Owner of this btree. */ unsigned long long owner; + + /* Minimum and maximum records per block. */ + unsigned int maxrecs[2]; + unsigned int minrecs[2]; }; +/* The head of the in-memory btree is always at block 0 */ +#define XFBTREE_HEAD_BLOCK 0 + +/* in-memory btrees are always created with an empty leaf block at block 1 */ +#define XFBTREE_INIT_LEAF_BLOCK 1 + +int xfbtree_head_read_buf(struct xfbtree *xfbt, struct xfs_trans *tp, + struct xfs_buf **bpp); + +void xfbtree_destroy(struct xfbtree *xfbt); +int xfbtree_trans_commit(struct xfbtree *xfbt, struct xfs_trans *tp); +void xfbtree_trans_cancel(struct xfbtree *xfbt, struct xfs_trans *tp); + #endif /* CONFIG_XFS_BTREE_IN_XFILE */ #endif /* XFS_SCRUB_XFBTREE_H__ */ diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c index 851aeb244660..40801b08a2b2 100644 --- a/fs/xfs/scrub/xfile.c +++ b/fs/xfs/scrub/xfile.c @@ -292,6 +292,89 @@ xfile_pwrite( return error; } +/* Discard pages backing a range of the xfile. */ +void +xfile_discard( + struct xfile *xf, + loff_t pos, + u64 count) +{ + trace_xfile_discard(xf, pos, count); + shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1); +} + +/* Ensure that there is storage backing the given range. */ +int +xfile_prealloc( + struct xfile *xf, + loff_t pos, + u64 count) +{ + struct inode *inode = file_inode(xf->file); + struct address_space *mapping = inode->i_mapping; + const struct address_space_operations *aops = mapping->a_ops; + struct page *page = NULL; + unsigned int pflags; + int error = 0; + + if (count > MAX_RW_COUNT) + return -E2BIG; + if (inode->i_sb->s_maxbytes - pos < count) + return -EFBIG; + + trace_xfile_prealloc(xf, pos, count); + + pflags = memalloc_nofs_save(); + while (count > 0) { + void *fsdata = NULL; + unsigned int len; + int ret; + + len = min_t(ssize_t, count, PAGE_SIZE - offset_in_page(pos)); + + /* + * We call write_begin directly here to avoid all the freezer + * protection lock-taking that happens in the normal path. + * shmem doesn't support fs freeze, but lockdep doesn't know + * that and will trip over that. + */ + error = aops->write_begin(NULL, mapping, pos, len, &page, + &fsdata); + if (error) + break; + + /* + * xfile pages must never be mapped into userspace, so we skip + * the dcache flush. If the page is not uptodate, zero it to + * ensure we never go lacking for space here. + */ + if (!PageUptodate(page)) { + void *kaddr = kmap_local_page(page); + + memset(kaddr, 0, PAGE_SIZE); + SetPageUptodate(page); + kunmap_local(kaddr); + } + + ret = aops->write_end(NULL, mapping, pos, len, len, page, + fsdata); + if (ret < 0) { + error = ret; + break; + } + if (ret != len) { + error = -EIO; + break; + } + + count -= len; + pos += len; + } + memalloc_nofs_restore(pflags); + + return error; +} + /* Find the next written area in the xfile data for a given offset. */ loff_t xfile_seek_data( diff --git a/fs/xfs/scrub/xfile.h b/fs/xfs/scrub/xfile.h index c6d7851b01ca..d3b52f8069f2 100644 --- a/fs/xfs/scrub/xfile.h +++ b/fs/xfs/scrub/xfile.h @@ -65,6 +65,8 @@ xfile_obj_store(struct xfile *xf, const void *buf, size_t count, loff_t pos) return 0; } +void xfile_discard(struct xfile *xf, loff_t pos, u64 count); +int xfile_prealloc(struct xfile *xf, loff_t pos, u64 count); loff_t xfile_seek_data(struct xfile *xf, loff_t pos); struct xfile_stat { diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ab9217c1c3d8..e4fd81549e00 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -637,6 +637,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf); DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur); DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf); DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse); +DEFINE_BUF_ITEM_EVENT(xfs_trans_bdetach); DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin); DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index d32abdd1e014..83e29bd2b2fd 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -219,6 +219,7 @@ struct xfs_buf *xfs_trans_getsb(struct xfs_trans *); void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); +void xfs_trans_bdetach(struct xfs_trans *tp, struct xfs_buf *bp); void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 6549e50d852c..e28ab74af4f0 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -392,6 +392,48 @@ xfs_trans_brelse( xfs_buf_relse(bp); } +/* + * Forcibly detach a buffer previously joined to the transaction. The caller + * will retain its locked reference to the buffer after this function returns. + * The buffer must be completely clean and must not be held to the transaction. + */ +void +xfs_trans_bdetach( + struct xfs_trans *tp, + struct xfs_buf *bp) +{ + struct xfs_buf_log_item *bip = bp->b_log_item; + + ASSERT(tp != NULL); + ASSERT(bp->b_transp == tp); + ASSERT(bip->bli_item.li_type == XFS_LI_BUF); + ASSERT(atomic_read(&bip->bli_refcount) > 0); + + trace_xfs_trans_bdetach(bip); + + /* + * Erase all recursion count, since we're removing this buffer from the + * transaction. + */ + bip->bli_recur = 0; + + /* + * The buffer must be completely clean. Specifically, it had better + * not be dirty, stale, logged, ordered, or held to the transaction. + */ + ASSERT(!test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags)); + ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY)); + ASSERT(!(bip->bli_flags & XFS_BLI_HOLD)); + ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); + ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED)); + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + + /* Unlink the log item from the transaction and drop the log item. */ + xfs_trans_del_item(&bip->bli_item); + xfs_buf_item_put(bip); + bp->b_transp = NULL; +} + /* * Mark the buffer as not needing to be unlocked when the buf item's * iop_committing() routine is called. The buffer must already be locked