From patchwork Tue Dec 31 23:38:04 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13924032 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5FBA613FD72 for ; Tue, 31 Dec 2024 23:38:05 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1735688285; cv=none; b=AEarqV0LC2fNLypz1L/ce/kk/nMMFNwugHr+BCduhihGXkKTUgICFK90nUlYin5rVH1D8VaMvQ8BJtkUsM1pG7ELq3lEkPljxFemGZWx4Zi60dDOo/4AhTTnn3Iomj6UuRLfqcdOyydBFLDEMza30eJuq15Y97wezT/OiO6ATmY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1735688285; c=relaxed/simple; bh=rITO614x3bjweEZx+igIx+iOphWDnfQg9W705lo2+Aw=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=lW8WjHdKrwPMcREmzaaueOU4TWH+oZF+Z7oG+kIrPlhUk2K3JrjBey+T4ZD+PQqSlYgN9HKfkRzG+1SmvGNzGhyYuTEnJCT7rhdvRUWwesV1X5bLN8XaM1dWjmrGeY3+vCtgNK8bmnHPjcxMOZ59jhIX2maicxYjcXLfTN5VoTs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=d4YNJhkW; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="d4YNJhkW" Received: by smtp.kernel.org (Postfix) with ESMTPSA id DD8B7C4CED2; Tue, 31 Dec 2024 23:38:04 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1735688284; bh=rITO614x3bjweEZx+igIx+iOphWDnfQg9W705lo2+Aw=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=d4YNJhkWDqPIrwzuOLgHYFu9v2o9zswqYVEgVH1t3tj8RItxXxpdyYsdAO2Gw95xA ZqmSP+kFZWLS/O5EDqsL1Qc8RDUEOXEmmgtyVvWjhYb+GKMSGK9qdgJSCZRWAqut3u 3Us5rG90vmKKfviOMe6HYIZAtnOmzI/3XefUtEaIwMEOebtbWQ4ECd7efekWRWUJXV RONsqC+WzajyLSXoKrdpqc5ooWbL9XIRSfhJOdme3Pku4s3+xv+Dd4EzPoVVYhKayR cI8BSnx6fFE9COM32ImVrPDvOXPhywotx1co5xEmwPGWVmpzgh/gpTiBsnD+MGuTtv VKju3QqvisQfQ== Date: Tue, 31 Dec 2024 15:38:04 -0800 Subject: [PATCH 1/4] xfs: export realtime refcount information From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <173568754232.2704719.3818969425291903972.stgit@frogsfrogsfrogs> In-Reply-To: <173568754204.2704719.1892779733633851572.stgit@frogsfrogsfrogs> References: <173568754204.2704719.1892779733633851572.stgit@frogsfrogsfrogs> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Add support for reporting space refcount information from the realtime volume. Signed-off-by: "Darrick J. Wong" --- fs/xfs/xfs_fsrefs.c | 405 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 405 insertions(+) diff --git a/fs/xfs/xfs_fsrefs.c b/fs/xfs/xfs_fsrefs.c index 85e109dba20f99..d5b77fe79b2653 100644 --- a/fs/xfs/xfs_fsrefs.c +++ b/fs/xfs/xfs_fsrefs.c @@ -478,6 +478,395 @@ xfs_fsrefs_logdev( return xfs_fsrefs_helper(tp, info, &frec); } +#ifdef CONFIG_XFS_RT +/* Synthesize fsrefs records from rtbitmap records. */ +STATIC int +xfs_fsrefs_rtdev_bitmap_helper( + struct xfs_rtgroup *rtg, + struct xfs_trans *tp, + const struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_fsrefs_irec frec = { + .refcount = 1, + }; + struct xfs_mount *mp = rtg_mount(rtg); + struct xfs_fsrefs_info *info = priv; + xfs_rtblock_t next_rtb, rec_rtb, rtb; + xfs_rgnumber_t next_rgno; + xfs_rgblock_t next_rgbno; + xfs_rgblock_t rec_rgbno; + + /* Translate the free space record to group and block number. */ + rec_rtb = xfs_rtx_to_rtb(rtg, rec->ar_startext); + rec_rgbno = xfs_rtb_to_rgbno(mp, rec_rtb); + + /* + * Figure out if there's a gap between the last fsrefs record we + * emitted and this free extent. If there is, report the gap as a + * refcount==1 record. + */ + next_rtb = xfs_daddr_to_rtb(mp, info->next_daddr); + next_rgno = xfs_rtb_to_rgno(mp, next_rtb); + next_rgbno = xfs_rtb_to_rgbno(mp, next_rtb); + + ASSERT(next_rgno >= info->group->xg_gno); + ASSERT(rec_rgbno >= next_rgbno); + + /* + * If we've already moved on to the next rtgroup, we don't have any + * fsrefs records to synthesize. + */ + if (next_rgno > info->group->xg_gno) + return 0; + + rtb = xfs_rtx_to_rtb(rtg, rec->ar_startext + rec->ar_extcount); + info->next_daddr = xfs_rtb_to_daddr(mp, rtb); + + if (rec_rtb == next_rtb) + return 0; + + /* Emit a record for the in-use space. */ + frec.start_daddr = xfs_rtb_to_daddr(mp, next_rtb); + frec.len_daddr = XFS_FSB_TO_BB(mp, rec_rgbno - next_rgbno); + frec.rec_key = next_rgbno; + return xfs_fsrefs_helper(tp, info, &frec); +} + +/* Emit records to fill a gap in the refcount btree with singly-owned blocks. */ +STATIC int +xfs_fsrefs_rtdev_fill_refcount_gap( + struct xfs_trans *tp, + struct xfs_fsrefs_info *info, + xfs_rgblock_t rgbno) +{ + struct xfs_rtalloc_rec high = { 0 }; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_rtgroup *rtg = to_rtg(info->group); + xfs_rtblock_t start_rtbno = + xfs_daddr_to_rtb(mp, info->next_daddr); + xfs_rtblock_t end_rtbno = + xfs_rgbno_to_rtb(rtg, rgbno); + xfs_rtxnum_t low_rtx; + xfs_daddr_t rec_daddr; + int error; + + ASSERT(xfs_rtb_to_rgno(mp, start_rtbno) == info->group->xg_gno); + + low_rtx = xfs_rtb_to_rtx(mp, start_rtbno); + if (rgbno == -1U) { + /* + * If the caller passes in an all 1s high key to signify the + * end of the group, set the extent to all 1s as well. + */ + high.ar_startext = -1ULL; + } else { + high.ar_startext = xfs_rtb_to_rtx(mp, + end_rtbno + mp->m_sb.sb_rextsize - 1); + } + if (low_rtx >= high.ar_startext) + return 0; + + error = xfs_rtalloc_query_range(rtg, tp, low_rtx, high.ar_startext, + xfs_fsrefs_rtdev_bitmap_helper, info); + if (error) + return error; + + /* + * Synthesize records for single-owner extents between the last + * fsrefcount record emitted and the end of the query range. + */ + high.ar_startext = min(high.ar_startext, rtg->rtg_extents); + rec_daddr = xfs_rtb_to_daddr(mp, xfs_rtx_to_rtb(rtg, high.ar_startext)); + if (info->next_daddr > rec_daddr) + return 0; + + info->last = true; + return xfs_fsrefs_rtdev_bitmap_helper(rtg, tp, &high, info); +} + +/* Transform a absolute-startblock refcount (rtdev, logdev) into a fsrefs */ +STATIC int +xfs_fsrefs_rtdev_refcountbt_helper( + struct xfs_btree_cur *cur, + const struct xfs_refcount_irec *rec, + void *priv) +{ + struct xfs_fsrefs_irec frec = { + .refcount = rec->rc_refcount, + .rec_key = rec->rc_startblock, + }; + struct xfs_mount *mp = cur->bc_mp; + struct xfs_fsrefs_info *info = priv; + struct xfs_rtgroup *rtg = to_rtg(info->group); + xfs_rtblock_t rec_rtbno; + int error; + + /* + * Stop once we get to the CoW staging extents; they're all shoved to + * the right side of the btree and were already covered by the rtbitmap + * scan. + */ + if (rec->rc_domain != XFS_REFC_DOMAIN_SHARED) + return -ECANCELED; + + /* Report on any gaps first */ + error = xfs_fsrefs_rtdev_fill_refcount_gap(cur->bc_tp, info, + rec->rc_startblock); + if (error) + return error; + + /* Report the refcount record from the refcount btree. */ + rec_rtbno = xfs_rgbno_to_rtb(rtg, rec->rc_startblock); + frec.start_daddr = xfs_rtb_to_daddr(mp, rec_rtbno); + frec.len_daddr = XFS_FSB_TO_BB(mp, rec->rc_blockcount); + info->next_daddr = xfs_rtb_to_daddr(mp, rec_rtbno + rec->rc_blockcount); + return xfs_fsrefs_helper(cur->bc_tp, info, &frec); +} + +#define XFS_RTGLOCK_FSREFS (XFS_RTGLOCK_BITMAP | XFS_RTGLOCK_REFCOUNT) + +/* Execute a getfsrefs query against the realtime device. */ +STATIC int +xfs_fsrefs_rtdev( + struct xfs_trans *tp, + const struct xfs_fsrefs *keys, + struct xfs_fsrefs_info *info) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_rtgroup *rtg = NULL, *locked_rtg = NULL; + xfs_rtblock_t start_rtbno; + xfs_rtblock_t end_rtbno; + xfs_rgnumber_t start_rg; + xfs_rgnumber_t end_rg; + uint64_t eofs; + int error = 0; + + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); + if (keys[0].fcr_physical >= eofs) + return 0; + start_rtbno = xfs_daddr_to_rtb(mp, keys[0].fcr_physical); + end_rtbno = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fcr_physical)); + + info->refc_cur = info->bno_cur = NULL; + + /* + * Convert the fsrefs low/high keys to rtgroup based keys. Initialize + * low to the fsrefs low key and max out the high key to the end of the + * rtgroup. + */ + info->low.rc_startblock = xfs_rtb_to_rgbno(mp, start_rtbno); + info->low.rc_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fcr_length); + info->low.rc_refcount = 0; + info->low.rc_domain = XFS_REFC_DOMAIN_SHARED; + + /* Adjust the low key if we are continuing from where we left off. */ + if (info->low.rc_blockcount > 0) { + info->low.rc_startblock += info->low.rc_blockcount; + + start_rtbno += info->low.rc_blockcount; + if (xfs_rtb_to_daddr(mp, start_rtbno) >= eofs) + return 0; + } + + info->high.rc_startblock = -1U; + info->high.rc_blockcount = 0; + info->high.rc_refcount = 0; + info->high.rc_domain = XFS_REFC_DOMAIN_SHARED; + + start_rg = xfs_rtb_to_rgno(mp, start_rtbno); + end_rg = xfs_rtb_to_rgno(mp, end_rtbno); + + /* Query each rtgroup */ + while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rg, end_rg))) { + info->group = rtg_group(rtg); + + /* + * Set the rtgroup high key from the fsrefs high key if this + * is the last rtgroup that we're querying. + */ + if (rtg_rgno(rtg) == end_rg) + info->high.rc_startblock = xfs_rtb_to_rgbno(mp, + end_rtbno); + + if (info->refc_cur) { + xfs_btree_del_cursor(info->refc_cur, XFS_BTREE_NOERROR); + info->refc_cur = NULL; + } + if (locked_rtg) + xfs_rtgroup_unlock(locked_rtg, XFS_RTGLOCK_FSREFS); + + trace_xfs_fsrefs_low_group_key(mp, info->dev, info->group, + &info->low); + trace_xfs_fsrefs_high_group_key(mp, info->dev, info->group, + &info->high); + + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_FSREFS); + locked_rtg = rtg; + + /* + * Fill the query with refcount records and synthesize + * singly-owned block records from free space data. + */ + if (xfs_has_rtreflink(mp)) { + info->refc_cur = xfs_rtrefcountbt_init_cursor(tp, rtg); + + error = xfs_refcount_query_range(info->refc_cur, + &info->low, &info->high, + xfs_fsrefs_rtdev_refcountbt_helper, + info); + if (error && error != -ECANCELED) + break; + } + + /* + * Synthesize refcount==1 records from the free space data + * between the end of the last fsrefs record reported and the + * end of the range. If we don't have refcount support, the + * starting point will be the start of the query range. + */ + error = xfs_fsrefs_rtdev_fill_refcount_gap(tp, info, + info->high.rc_startblock); + if (error) + break; + + /* + * Set the rtgroup low key to the start of the rtgroup prior to + * moving on to the next rtgroup. + */ + if (rtg_rgno(rtg) == start_rg) + memset(&info->low, 0, sizeof(info->low)); + info->group = NULL; + } + + if (info->refc_cur) { + xfs_btree_del_cursor(info->refc_cur, error); + info->refc_cur = NULL; + } + if (locked_rtg) + xfs_rtgroup_unlock(locked_rtg, XFS_RTGLOCK_FSREFS); + if (info->group) { + xfs_rtgroup_rele(rtg); + info->group = NULL; + } else if (rtg) { + /* loop termination case */ + xfs_rtgroup_rele(rtg); + } + + return error; +} + +/* Synthesize fsrefs records from 64-bit rtbitmap records. */ +STATIC int +xfs_fsrefs_rtdev_nogroups_helper( + struct xfs_rtgroup *rtg, + struct xfs_trans *tp, + const struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_fsrefs_irec frec = { + .refcount = 1, + }; + struct xfs_mount *mp = rtg_mount(rtg); + struct xfs_fsrefs_info *info = priv; + xfs_rtblock_t next_rtb, rec_rtb, rtb; + + /* Translate the free space record to group and block number. */ + rec_rtb = xfs_rtx_to_rtb(rtg, rec->ar_startext); + + /* + * Figure out if there's a gap between the last fsrefs record we + * emitted and this free extent. If there is, report the gap as a + * refcount==1 record. + */ + next_rtb = xfs_daddr_to_rtb(mp, info->next_daddr); + + ASSERT(rec_rtb >= next_rtb); + + rtb = xfs_rtx_to_rtb(rtg, rec->ar_startext + rec->ar_extcount); + info->next_daddr = xfs_rtb_to_daddr(mp, rtb); + + if (rec_rtb == next_rtb) + return 0; + + /* Emit records for the in-use space. */ + frec.start_daddr = xfs_rtb_to_daddr(mp, next_rtb); + frec.len_daddr = xfs_rtb_to_daddr(mp, rec_rtb - next_rtb); + return xfs_fsrefs_helper(tp, info, &frec); +} + +/* + * Synthesize refcount information from the rtbitmap for a pre-rtgroups + * filesystem. + */ +STATIC int +xfs_fsrefs_rtdev_nogroups( + struct xfs_trans *tp, + const struct xfs_fsrefs *keys, + struct xfs_fsrefs_info *info) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_rtgroup *rtg = NULL; + xfs_rtblock_t start_rtbno; + xfs_rtblock_t end_rtbno; + xfs_rtxnum_t low_rtx; + xfs_rtxnum_t high_rtx; + uint64_t eofs; + int error = 0; + + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); + if (keys[0].fcr_physical >= eofs) + return 0; + start_rtbno = xfs_daddr_to_rtb(mp, keys[0].fcr_physical); + end_rtbno = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fcr_physical)); + + info->refc_cur = info->bno_cur = NULL; + + /* + * Convert the fsrefs low/high keys to rtgroup based keys. Initialize + * low to the fsrefs low key and max out the high key to the end of the + * rtgroup. + */ + info->low_daddr = keys[0].fcr_physical; + + /* Adjust the low key if we are continuing from where we left off. */ + if (keys[0].fcr_length > 0) { + info->low_daddr += keys[0].fcr_length; + if (info->low_daddr >= eofs) + return 0; + } + + rtg = xfs_rtgroup_grab(mp, 0); + if (!rtg) + return -EFSCORRUPTED; + + info->group = rtg_group(rtg); + + trace_xfs_fsrefs_low_linear_key(mp, info->dev, start_rtbno); + trace_xfs_fsrefs_high_linear_key(mp, info->dev, end_rtbno); + + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP); + + /* + * Walk the whole rtbitmap. Without rtgroups, the startext values can + * be more than 32-bits wide, which is why we need this separate + * implementation. + */ + low_rtx = xfs_rtb_to_rtx(mp, start_rtbno); + high_rtx = xfs_rtb_to_rtx(mp, end_rtbno + mp->m_sb.sb_rextsize - 1); + if (low_rtx < high_rtx) + error = xfs_rtalloc_query_range(rtg, tp, low_rtx, high_rtx, + xfs_fsrefs_rtdev_nogroups_helper, info); + + info->group = NULL; + + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP); + xfs_rtgroup_rele(rtg); + + return error; +} +#endif + /* Do we recognize the device? */ STATIC bool xfs_fsrefs_is_valid_device( @@ -515,7 +904,14 @@ xfs_fsrefs_check_keys( return false; } +/* + * There are only two devices if we didn't configure RT devices at build time. + */ +#ifdef CONFIG_XFS_RT +#define XFS_GETFSREFS_DEVS 3 +#else #define XFS_GETFSREFS_DEVS 2 +#endif /* CONFIG_XFS_RT */ /* * Get filesystem's extent refcounts as described in head, and format for @@ -569,6 +965,15 @@ xfs_getfsrefs( handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev); handlers[1].fn = xfs_fsrefs_logdev; } +#ifdef CONFIG_XFS_RT + if (mp->m_rtdev_targp) { + handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev); + if (xfs_has_rtgroups(mp)) + handlers[2].fn = xfs_fsrefs_rtdev; + else + handlers[2].fn = xfs_fsrefs_rtdev_nogroups; + } +#endif /* CONFIG_XFS_RT */ xfs_sort(handlers, XFS_GETFSREFS_DEVS, sizeof(struct xfs_fsrefs_dev), xfs_fsrefs_dev_compare); From patchwork Tue Dec 31 23:38:20 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13924033 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 092E313FD72 for ; Tue, 31 Dec 2024 23:38:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1735688302; cv=none; b=U/m0W7EONlKjHYtpDvRt2cg5x/x2O+uNmUg05GjIMg/6DSWZPkfzNgd0ku+39PQ2fMyBHLr5JLXjlfY+AG+lmfD4OWIxL0mhCng43L2YQhqo6OkZN/KEfg3Vgr01BaG5b9YbVV40h9soqvkY7tPUwrrWh+iDeYAnRh+jX4Kw/4Q= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1735688302; c=relaxed/simple; bh=MqVgvSJLFTkcGCLOJKr+k7tACSnjkpQ9NLB1k8eCDwI=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=prRKJX1ByqOnTsqfPmTuZsCoIWczIrkLDGmThs31i3F33oRJSY5+Jw6dkTwx/q3AAsej0gUdEqz9ye0jBQEFzkSWgp+k1KxMGl/BgQF/N8CmPI5McpJlb4zlbcRvXygnvoZHLAGc+k5SaEW3y/eJrOP0ipV6vcyLSKBOzi4040s= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=S6maF0bZ; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="S6maF0bZ" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 78F8EC4CED2; Tue, 31 Dec 2024 23:38:20 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1735688300; bh=MqVgvSJLFTkcGCLOJKr+k7tACSnjkpQ9NLB1k8eCDwI=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=S6maF0bZ5psFXG9jncATnFu71PS2YID4Im3fia2afy09/6G4kmJ7ysLbKGQWhSwK0 W0R0sRRnM3QU/mlG8EeF6Vu+Xc9LHVrf5Kr8mIXC6PZhWtaURK1U9eul7598l/bdn2 ko/LI0Sgz+i4JDBGftG2PF3syCYzuJ+RmLiu1vnRA8VCIugD3bmjHXVkE0011v4HnM hTpE8JeiO5hyQSq4UY20D9M25madWJdZcaauGSQi28MC/pAv5pMUoPEAWi3RRZNdIQ rpUEZ834r8T1rHrt15Syg+c+fsajB7Cpq2ovBgSJr/RRZspXWuHrqvuFvg63BSeFUY anTv8LspqRKfw== Date: Tue, 31 Dec 2024 15:38:20 -0800 Subject: [PATCH 2/4] xfs: capture the offset and length in fallocate tracepoints From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <173568754249.2704719.5267977950716130700.stgit@frogsfrogsfrogs> In-Reply-To: <173568754204.2704719.1892779733633851572.stgit@frogsfrogsfrogs> References: <173568754204.2704719.1892779733633851572.stgit@frogsfrogsfrogs> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Change the class of the fallocate tracepoints to capture the offset and length of the requested operation. Signed-off-by: "Darrick J. Wong" --- fs/xfs/xfs_bmap_util.c | 8 ++++---- fs/xfs/xfs_file.c | 2 +- fs/xfs/xfs_trace.h | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 783349f2361ad3..c9e60fb2693c9b 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -652,7 +652,7 @@ xfs_alloc_file_space( if (xfs_is_always_cow_inode(ip)) return 0; - trace_xfs_alloc_file_space(ip); + trace_xfs_alloc_file_space(ip, offset, len); if (xfs_is_shutdown(mp)) return -EIO; @@ -839,7 +839,7 @@ xfs_free_file_space( xfs_fileoff_t endoffset_fsb; int done = 0, error; - trace_xfs_free_file_space(ip); + trace_xfs_free_file_space(ip, offset, len); error = xfs_qm_dqattach(ip); if (error) @@ -987,7 +987,7 @@ xfs_collapse_file_space( xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); - trace_xfs_collapse_file_space(ip); + trace_xfs_collapse_file_space(ip, offset, len); error = xfs_free_file_space(ip, offset, len, ac); if (error) @@ -1056,7 +1056,7 @@ xfs_insert_file_space( xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); - trace_xfs_insert_file_space(ip); + trace_xfs_insert_file_space(ip, offset, len); error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb); if (error) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index d31ad7bf29885d..b8f0b9a2998b9c 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1362,7 +1362,7 @@ xfs_falloc_zero_range( loff_t new_size = 0; int error; - trace_xfs_zero_file_space(XFS_I(inode)); + trace_xfs_zero_file_space(XFS_I(inode), offset, len); error = xfs_falloc_newsize(file, mode, offset, len, &new_size); if (error) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7043b6481d5f97..e81247b3024e53 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -928,11 +928,6 @@ DEFINE_INODE_EVENT(xfs_getattr); DEFINE_INODE_EVENT(xfs_setattr); DEFINE_INODE_EVENT(xfs_readlink); DEFINE_INODE_EVENT(xfs_inactive_symlink); -DEFINE_INODE_EVENT(xfs_alloc_file_space); -DEFINE_INODE_EVENT(xfs_free_file_space); -DEFINE_INODE_EVENT(xfs_zero_file_space); -DEFINE_INODE_EVENT(xfs_collapse_file_space); -DEFINE_INODE_EVENT(xfs_insert_file_space); DEFINE_INODE_EVENT(xfs_readdir); #ifdef CONFIG_XFS_POSIX_ACL DEFINE_INODE_EVENT(xfs_get_acl); @@ -1732,6 +1727,11 @@ DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten); DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append); DEFINE_SIMPLE_IO_EVENT(xfs_file_splice_read); DEFINE_SIMPLE_IO_EVENT(xfs_zoned_map_blocks); +DEFINE_SIMPLE_IO_EVENT(xfs_alloc_file_space); +DEFINE_SIMPLE_IO_EVENT(xfs_free_file_space); +DEFINE_SIMPLE_IO_EVENT(xfs_zero_file_space); +DEFINE_SIMPLE_IO_EVENT(xfs_collapse_file_space); +DEFINE_SIMPLE_IO_EVENT(xfs_insert_file_space); DECLARE_EVENT_CLASS(xfs_itrunc_class, TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), From patchwork Tue Dec 31 23:38:35 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13924034 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9FE9D13FD72 for ; Tue, 31 Dec 2024 23:38:36 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1735688316; cv=none; b=GLV1h24z8xVvE36ke8qrP9nCA1FpBaeuyAyqIJiEAKB2jxva8gUeT0nXiTiYHMbnHZs1xvKZuduY3pFO3nVYsfwESbC18RQEh5phcoGcBYqnNJ5D4ZM/h45e3vSmDPvyXSHtE6T96OFQESEeQ7/3GAiT7IzD+OFj10o4Rgyrc2Y= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1735688316; c=relaxed/simple; bh=f1LSdOf2k5INgZr3CvP8U+Rs+5Tii23dw/73eMXMqa8=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=odUHa7QcobNLNi3yzHXuX9rw2neTYVj4+MzlxFHHfpujORLv3umTR3W8LeXPaqCOU1F4ym3U7ajMZ8FowsEh+syfrtzFMP6bWxMwNP19Ee4uyz5PdByPbmk0rLLXEpWMKR7TeUtTtWw1fEqg6FAmjyEGKm9/+oJ3RsfBhAeT7Zk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=QPJunkyr; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="QPJunkyr" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 29672C4CED2; Tue, 31 Dec 2024 23:38:36 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1735688316; bh=f1LSdOf2k5INgZr3CvP8U+Rs+5Tii23dw/73eMXMqa8=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=QPJunkyrLcyLt4iD5/B4KEFAzBbKSp1xy8UI7IYJDzFedwHCj+Nz02VTUSnlWFT1j ILy6IBRy+cFq2Z4jKznPLEyGl8O/fkZjMK/XcnKi6Z7UKyuETVKnia5ocf+TUysOhs HNQ5x0LiwA2nNBMPErM1DsP388QYUccjopfJzda4gdHMrIUOh11FJ27t196I693/dQ sWYXvKbClgeJW+xqyFFGy4RZ1houOrC0Ah9PuB5d/t4ZdRTyvHro+yCbIf35FaNh1N J547PhnnIQ5wBUmz5Q/DuhR7X8KVODx0h7MW8IkSVjmK9brlDUPn7J/bdty6HkN0FR N6ekwekDlwslQ== Date: Tue, 31 Dec 2024 15:38:35 -0800 Subject: [PATCH 3/4] xfs: add an ioctl to map free space into a file From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <173568754266.2704719.15632107218632778714.stgit@frogsfrogsfrogs> In-Reply-To: <173568754204.2704719.1892779733633851572.stgit@frogsfrogsfrogs> References: <173568754204.2704719.1892779733633851572.stgit@frogsfrogsfrogs> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Add a new ioctl to map free physical space into a file, at the same file offset as if the file were a sparse image of the physical device backing the filesystem. The intent here is to use this to prototype a free space defragmentation tool. Signed-off-by: "Darrick J. Wong" --- fs/xfs/libxfs/xfs_alloc.c | 88 +++++++++++++ fs/xfs/libxfs/xfs_alloc.h | 3 fs/xfs/libxfs/xfs_bmap.c | 1 fs/xfs/libxfs/xfs_fs.h | 14 ++ fs/xfs/xfs_bmap_util.c | 303 +++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_bmap_util.h | 1 fs/xfs/xfs_file.c | 139 +++++++++++++++++++++ fs/xfs/xfs_file.h | 2 fs/xfs/xfs_ioctl.c | 5 + fs/xfs/xfs_trace.h | 35 +++++ 10 files changed, 591 insertions(+) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 3d33e17f2e5ce0..e689ec5cbccd7e 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -4168,3 +4168,91 @@ xfs_extfree_intent_destroy_cache(void) kmem_cache_destroy(xfs_extfree_item_cache); xfs_extfree_item_cache = NULL; } + +/* + * Find the next chunk of free space in @pag starting at @agbno and going no + * higher than @end_agbno. Set @agbno and @len to whatever free space we find, + * or to @end_agbno if we find no space. + */ +int +xfs_alloc_find_freesp( + struct xfs_trans *tp, + struct xfs_perag *pag, + xfs_agblock_t *agbno, + xfs_agblock_t end_agbno, + xfs_extlen_t *len) +{ + struct xfs_mount *mp = pag_mount(pag); + struct xfs_btree_cur *cur; + struct xfs_buf *agf_bp = NULL; + xfs_agblock_t found_agbno; + xfs_extlen_t found_len; + int found; + int error; + + trace_xfs_alloc_find_freesp(pag_group(pag), *agbno, + end_agbno - *agbno); + + error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp); + if (error) + return error; + + cur = xfs_bnobt_init_cursor(mp, tp, agf_bp, pag); + + /* Try to find a free extent that starts before here. */ + error = xfs_alloc_lookup_le(cur, *agbno, 0, &found); + if (error) + goto out_cur; + if (found) { + error = xfs_alloc_get_rec(cur, &found_agbno, &found_len, + &found); + if (error) + goto out_cur; + if (XFS_IS_CORRUPT(mp, !found)) { + xfs_btree_mark_sick(cur); + error = -EFSCORRUPTED; + goto out_cur; + } + + if (found_agbno + found_len > *agbno) + goto found; + } + + /* Examine the next record if free extent not in range. */ + error = xfs_btree_increment(cur, 0, &found); + if (error) + goto out_cur; + if (!found) + goto next_ag; + + error = xfs_alloc_get_rec(cur, &found_agbno, &found_len, &found); + if (error) + goto out_cur; + if (XFS_IS_CORRUPT(mp, !found)) { + xfs_btree_mark_sick(cur); + error = -EFSCORRUPTED; + goto out_cur; + } + + if (found_agbno >= end_agbno) + goto next_ag; + +found: + /* Found something, so update the mapping. */ + trace_xfs_alloc_find_freesp_done(pag_group(pag), found_agbno, + found_len); + if (found_agbno < *agbno) { + found_len -= *agbno - found_agbno; + found_agbno = *agbno; + } + *len = found_len; + *agbno = found_agbno; + goto out_cur; +next_ag: + /* Found nothing, so advance the cursor beyond the end of the range. */ + *agbno = end_agbno; + *len = 0; +out_cur: + xfs_btree_del_cursor(cur, error); + return error; +} diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 50ef79a1ed41a1..069077d9ad2f8c 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -286,5 +286,8 @@ void xfs_extfree_intent_destroy_cache(void); xfs_failaddr_t xfs_validate_ag_length(struct xfs_buf *bp, uint32_t seqno, uint32_t length); +int xfs_alloc_find_freesp(struct xfs_trans *tp, struct xfs_perag *pag, + xfs_agblock_t *agbno, xfs_agblock_t end_agbno, + xfs_extlen_t *len); #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 8c9d540c3ba91a..11dab550ca0fb6 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -41,6 +41,7 @@ #include "xfs_inode_util.h" #include "xfs_rtgroup.h" #include "xfs_zone_alloc.h" +#include "xfs_rtalloc.h" struct kmem_cache *xfs_bmap_intent_cache; diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 936f719236944f..f4128dbdf3b9a2 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1087,6 +1087,19 @@ xfs_getfsrefs_advance( /* fcr_flags values - returned for each non-header segment */ #define FCR_OF_LAST (1U << 0) /* last record in the dataset */ +/* map free space to file */ + +/* + * XFS_IOC_MAP_FREESP maps all the free physical space in the filesystem into + * the file at the same offsets. This ioctl requires CAP_SYS_ADMIN. + */ +struct xfs_map_freesp { + __s64 offset; /* disk address to map, in bytes */ + __s64 len; /* length in bytes */ + __u64 flags; /* must be zero */ + __u64 pad; /* must be zero */ +}; + /* * ioctl commands that are used by Linux filesystems */ @@ -1127,6 +1140,7 @@ xfs_getfsrefs_advance( #define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head) #define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry) #define XFS_IOC_GETFSREFCOUNTS _IOWR('X', 66, struct xfs_getfsrefs_head) +#define XFS_IOC_MAP_FREESP _IOW ('X', 67, struct xfs_map_freesp) /* * ioctl commands that replace IRIX syssgi()'s diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index c9e60fb2693c9b..8d5c2072bcd533 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -31,6 +31,10 @@ #include "xfs_rtbitmap.h" #include "xfs_rtgroup.h" #include "xfs_zone_alloc.h" +#include "xfs_health.h" +#include "xfs_alloc_btree.h" +#include "xfs_rmap.h" +#include "xfs_ag.h" /* Kernel only BMAP related definitions and functions */ @@ -1916,3 +1920,302 @@ xfs_convert_rtbigalloc_file_space( return 0; } #endif /* CONFIG_XFS_RT */ + +/* + * Reserve space and quota to this transaction to map in as much free space + * as we can. Callers should set @len to the amount of space desired; this + * function will shorten that quantity if it can't get space. + */ +STATIC int +xfs_map_free_reserve_more( + struct xfs_trans *tp, + struct xfs_inode *ip, + xfs_extlen_t *len) +{ + struct xfs_mount *mp = ip->i_mount; + unsigned int dblocks; + unsigned int rblocks; + unsigned int min_len; + bool isrt = XFS_IS_REALTIME_INODE(ip); + int error; + + if (*len > XFS_MAX_BMBT_EXTLEN) + *len = XFS_MAX_BMBT_EXTLEN; + min_len = isrt ? mp->m_sb.sb_rextsize : 1; + +again: + if (isrt) { + dblocks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); + rblocks = *len; + } else { + dblocks = XFS_DIOSTRAT_SPACE_RES(mp, *len); + rblocks = 0; + } + error = xfs_trans_reserve_more_inode(tp, ip, dblocks, rblocks, false); + if (error == -ENOSPC && *len > min_len) { + *len >>= 1; + goto again; + } + if (error) { + trace_xfs_map_free_reserve_more_fail(ip, error, _RET_IP_); + return error; + } + + return 0; +} + +static inline xfs_fileoff_t +xfs_fsblock_to_fileoff( + struct xfs_mount *mp, + xfs_fsblock_t fsbno) +{ + xfs_daddr_t daddr = XFS_FSB_TO_DADDR(mp, fsbno); + + return XFS_B_TO_FSB(mp, BBTOB(daddr)); +} + +/* + * Given a file and a free physical extent, map it into the file at the same + * offset if the file were a sparse image of the physical device. Set @mval to + * whatever mapping we added to the file. + */ +STATIC int +xfs_map_free_ag_extent( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_perag *pag, + xfs_agblock_t agbno, + xfs_extlen_t len, + struct xfs_bmbt_irec *mval) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_alloc_arg args = { + .mp = mp, + .tp = tp, + .pag = pag, + .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, + .resv = XFS_AG_RESV_NONE, + .prod = 1, + .datatype = XFS_ALLOC_USERDATA, + .maxlen = len, + .minlen = 1, + }; + struct xfs_bmbt_irec irec; + xfs_fsblock_t fsbno = xfs_gbno_to_fsb(pag_group(pag), agbno); + xfs_fileoff_t startoff = xfs_fsblock_to_fileoff(mp, fsbno); + int nimaps; + int error; + + ASSERT(!XFS_IS_REALTIME_INODE(ip)); + + trace_xfs_map_free_ag_extent(ip, fsbno, len); + + /* Make sure the entire range is a hole. */ + nimaps = 1; + error = xfs_bmapi_read(ip, startoff, len, &irec, &nimaps, 0); + if (error) + return error; + + if (irec.br_startoff != startoff || + irec.br_startblock != HOLESTARTBLOCK || + irec.br_blockcount < len) + return -EINVAL; + + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, + XFS_IEXT_ADD_NOSPLIT_CNT); + if (error) + return error; + + /* + * Allocate the physical extent. We should not have dropped the lock + * since the scan of the free space metadata, so this should work, + * though the length may be adjusted to play nicely with metadata space + * reservations. + */ + error = xfs_alloc_vextent_exact_bno(&args, fsbno); + if (error) + return error; + if (args.fsbno == NULLFSBLOCK) { + /* + * We were promised the space, but failed to get it. This + * could be because the space is reserved for metadata + * expansion, or it could be because the AGFL fixup grabbed the + * first block we wanted. Either way, if the transaction is + * dirty we must commit it and tell the caller to try again. + */ + if (tp->t_flags & XFS_TRANS_DIRTY) + return -EAGAIN; + return -ENOSPC; + } + if (args.fsbno != fsbno) { + ASSERT(0); + xfs_bmap_mark_sick(ip, XFS_DATA_FORK); + return -EFSCORRUPTED; + } + + /* Map extent into file, update quota. */ + mval->br_blockcount = args.len; + mval->br_startblock = fsbno; + mval->br_startoff = startoff; + mval->br_state = XFS_EXT_UNWRITTEN; + + trace_xfs_map_free_ag_extent_done(ip, mval); + + xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, mval); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, + mval->br_blockcount); + + return 0; +} + +/* Find a free extent in this AG and map it into the file. */ +STATIC int +xfs_map_free_extent( + struct xfs_inode *ip, + struct xfs_perag *pag, + xfs_agblock_t *cursor, + xfs_agblock_t end_agbno, + xfs_agblock_t *last_enospc_agbno) +{ + struct xfs_bmbt_irec irec; + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + loff_t endpos; + xfs_extlen_t free_len, map_len; + int error; + + if (fatal_signal_pending(current)) + return -EINTR; + + error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, 0, 0, false, + &tp); + if (error) + return error; + + error = xfs_alloc_find_freesp(tp, pag, cursor, end_agbno, &free_len); + if (error) + goto out_cancel; + + /* Bail out if the cursor is beyond what we asked for. */ + if (*cursor >= end_agbno) + goto out_cancel; + + error = xfs_map_free_reserve_more(tp, ip, &free_len); + if (error) + goto out_cancel; + + map_len = free_len; + do { + error = xfs_map_free_ag_extent(tp, ip, pag, *cursor, map_len, + &irec); + if (error == -EAGAIN) { + /* Failed to map space but were told to try again. */ + error = xfs_trans_commit(tp); + goto out; + } + if (error != -ENOSPC) + break; + /* + * If we can't get the space, try asking for successively less + * space in case we're bumping up against per-AG metadata + * reservation limits. + */ + map_len >>= 1; + } while (map_len > 0); + if (error == -ENOSPC) { + if (*last_enospc_agbno != *cursor) { + /* + * However, backing off on the size of the mapping + * request might not work if an AGFL fixup allocated + * the block at *cursor. The first time this happens, + * remember that we ran out of space here, and try + * again. + */ + *last_enospc_agbno = *cursor; + } else { + /* + * If we hit this a second time on the same extent, + * then it's likely that we're bumping up against + * per-AG space reservation limits. Skip to the next + * extent. + */ + *cursor += free_len; + } + error = 0; + goto out_cancel; + } + if (error) + goto out_cancel; + + /* Update isize if needed. */ + endpos = XFS_FSB_TO_B(mp, irec.br_startoff + irec.br_blockcount); + if (endpos > i_size_read(VFS_I(ip))) { + i_size_write(VFS_I(ip), endpos); + ip->i_disk_size = endpos; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + } + + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (error) + return error; + + *cursor += irec.br_blockcount; + return 0; +out_cancel: + xfs_trans_cancel(tp); +out: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +} + +/* + * Allocate all free physical space between off and len and map it to this + * regular non-realtime file. + */ +int +xfs_map_free_space( + struct xfs_inode *ip, + xfs_off_t off, + xfs_off_t len) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag = NULL; + xfs_daddr_t off_daddr = BTOBB(off); + xfs_daddr_t end_daddr = BTOBBT(off + len); + xfs_fsblock_t off_fsb = XFS_DADDR_TO_FSB(mp, off_daddr); + xfs_fsblock_t end_fsb = XFS_DADDR_TO_FSB(mp, end_daddr); + xfs_agnumber_t off_agno = XFS_FSB_TO_AGNO(mp, off_fsb); + xfs_agnumber_t end_agno = XFS_FSB_TO_AGNO(mp, end_fsb); + int error = 0; + + trace_xfs_map_free_space(ip, off, len); + + while ((pag = xfs_perag_next_range(mp, pag, off_agno, + mp->m_sb.sb_agcount - 1))) { + xfs_agblock_t off_agbno = 0; + xfs_agblock_t end_agbno; + xfs_agblock_t last_enospc_agbno = NULLAGBLOCK; + + end_agbno = xfs_ag_block_count(mp, pag_agno(pag)); + + if (pag_agno(pag) == off_agno) + off_agbno = XFS_FSB_TO_AGBNO(mp, off_fsb); + if (pag_agno(pag) == end_agno) + end_agbno = XFS_FSB_TO_AGBNO(mp, end_fsb); + + while (off_agbno < end_agbno) { + error = xfs_map_free_extent(ip, pag, &off_agbno, + end_agbno, &last_enospc_agbno); + if (error) + goto out; + } + } + +out: + if (pag) + xfs_perag_rele(pag); + if (error == -ENOSPC) + return 0; + return error; +} diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index c39cce66829e26..5d84b702b16326 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -63,6 +63,7 @@ int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, xfs_off_t len, struct xfs_zone_alloc_ctx *ac); int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, xfs_off_t len); +int xfs_map_free_space(struct xfs_inode *ip, xfs_off_t off, xfs_off_t len); /* EOF block manipulation functions */ bool xfs_can_free_eofblocks(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index b8f0b9a2998b9c..8bf1e96ab57a5b 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -34,6 +34,7 @@ #include #include #include +#include static const struct vm_operations_struct xfs_file_vm_ops; @@ -1548,6 +1549,144 @@ xfs_file_fallocate( return error; } +STATIC int +xfs_file_map_freesp( + struct file *file, + const struct xfs_map_freesp *mf) +{ + struct inode *inode = file_inode(file); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + xfs_off_t device_size; + uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; + loff_t new_size = 0; + int error; + + xfs_ilock(ip, iolock); + error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); + if (error) + goto out_unlock; + + /* + * Must wait for all AIO to complete before we continue as AIO can + * change the file size on completion without holding any locks we + * currently hold. We must do this first because AIO can update both + * the on disk and in memory inode sizes, and the operations that follow + * require the in-memory size to be fully up-to-date. + */ + inode_dio_wait(inode); + + error = file_modified(file); + if (error) + goto out_unlock; + + if (XFS_IS_REALTIME_INODE(ip)) { + error = -EOPNOTSUPP; + goto out_unlock; + } + device_size = XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks); + + /* + * Bail out now if we aren't allowed to make the file size the + * same length as the device. + */ + if (device_size > i_size_read(inode)) { + new_size = device_size; + error = inode_newsize_ok(inode, new_size); + if (error) + goto out_unlock; + } + + error = xfs_map_free_space(ip, mf->offset, mf->len); + if (error) { + if (error == -ECANCELED) + error = 0; + goto out_unlock; + } + + /* Change file size if needed */ + if (new_size) { + struct iattr iattr; + + iattr.ia_valid = ATTR_SIZE; + iattr.ia_size = new_size; + error = xfs_vn_setattr_size(file_mnt_idmap(file), + file_dentry(file), &iattr); + if (error) + goto out_unlock; + } + + if (xfs_file_sync_writes(file)) + error = xfs_log_force_inode(ip); + +out_unlock: + xfs_iunlock(ip, iolock); + return error; +} + +long +xfs_ioc_map_freesp( + struct file *file, + struct xfs_map_freesp __user *argp) +{ + struct xfs_map_freesp args; + struct inode *inode = file_inode(file); + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + if (args.flags || args.pad) + return -EINVAL; + + if (args.offset < 0 || args.len <= 0) + return -EINVAL; + + if (!(file->f_mode & FMODE_WRITE)) + return -EBADF; + + /* + * We can only allow pure fallocate on append only files + */ + if (IS_APPEND(inode)) + return -EPERM; + + if (IS_IMMUTABLE(inode)) + return -EPERM; + + /* + * We cannot allow any fallocate operation on an active swapfile + */ + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + + if (S_ISFIFO(inode->i_mode)) + return -ESPIPE; + + if (S_ISDIR(inode->i_mode)) + return -EISDIR; + + if (!S_ISREG(inode->i_mode)) + return -ENODEV; + + /* Check for wrap through zero too */ + if (args.offset + args.len > inode->i_sb->s_maxbytes) + return -EFBIG; + if (args.offset + args.len < 0) + return -EFBIG; + + file_start_write(file); + error = xfs_file_map_freesp(file, &args); + if (!error) + fsnotify_modify(file); + + file_end_write(file); + return error; +} + STATIC int xfs_file_fadvise( struct file *file, diff --git a/fs/xfs/xfs_file.h b/fs/xfs/xfs_file.h index 24490ea49e16c6..c9d50699baba85 100644 --- a/fs/xfs/xfs_file.h +++ b/fs/xfs/xfs_file.h @@ -15,4 +15,6 @@ bool xfs_is_falloc_aligned(struct xfs_inode *ip, loff_t pos, bool xfs_truncate_needs_cow_around(struct xfs_inode *ip, loff_t pos); int xfs_file_unshare_at(struct xfs_inode *ip, loff_t pos); +long xfs_ioc_map_freesp(struct file *file, struct xfs_map_freesp __user *argp); + #endif /* __XFS_FILE_H__ */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 20f013bd4ce653..092a3699ff9e75 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -45,6 +45,8 @@ #include #include +#include +#include /* Return 0 on success or positive error */ int @@ -1429,6 +1431,9 @@ xfs_file_ioctl( case XFS_IOC_COMMIT_RANGE: return xfs_ioc_commit_range(filp, arg); + case XFS_IOC_MAP_FREESP: + return xfs_ioc_map_freesp(filp, arg); + default: return -ENOTTY; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index e81247b3024e53..ebbc832db8fa1e 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1732,6 +1732,7 @@ DEFINE_SIMPLE_IO_EVENT(xfs_free_file_space); DEFINE_SIMPLE_IO_EVENT(xfs_zero_file_space); DEFINE_SIMPLE_IO_EVENT(xfs_collapse_file_space); DEFINE_SIMPLE_IO_EVENT(xfs_insert_file_space); +DEFINE_SIMPLE_IO_EVENT(xfs_map_free_space); DECLARE_EVENT_CLASS(xfs_itrunc_class, TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), @@ -1821,6 +1822,36 @@ TRACE_EVENT(xfs_bunmap, ); +DECLARE_EVENT_CLASS(xfs_map_free_extent_class, + TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_extlen_t len), + TP_ARGS(ip, bno, len), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fileoff_t, bno) + __field(xfs_extlen_t, len) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_disk_size; + __entry->bno = bno; + __entry->len = len; + ), + TP_printk("dev %d:%d ino 0x%llx disize 0x%llx fileoff 0x%llx fsbcount 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->bno, + __entry->len) +); +#define DEFINE_MAP_FREE_EXTENT_EVENT(name) \ +DEFINE_EVENT(xfs_map_free_extent_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_extlen_t len), \ + TP_ARGS(ip, bno, len)) +DEFINE_MAP_FREE_EXTENT_EVENT(xfs_map_free_ag_extent); + DECLARE_EVENT_CLASS(xfs_extent_busy_class, TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno, xfs_extlen_t len), @@ -1856,6 +1887,8 @@ DEFINE_BUSY_EVENT(xfs_extent_busy); DEFINE_BUSY_EVENT(xfs_extent_busy_force); DEFINE_BUSY_EVENT(xfs_extent_busy_reuse); DEFINE_BUSY_EVENT(xfs_extent_busy_clear); +DEFINE_BUSY_EVENT(xfs_alloc_find_freesp); +DEFINE_BUSY_EVENT(xfs_alloc_find_freesp_done); TRACE_EVENT(xfs_extent_busy_trim, TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno, @@ -3962,6 +3995,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class, DEFINE_EVENT(xfs_inode_irec_class, name, \ TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec), \ TP_ARGS(ip, irec)) +DEFINE_INODE_IREC_EVENT(xfs_map_free_ag_extent_done); /* inode iomap invalidation events */ DECLARE_EVENT_CLASS(xfs_wb_invalid_class, @@ -4096,6 +4130,7 @@ DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_blocks_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_extent_error); DEFINE_INODE_IREC_EVENT(xfs_reflink_remap_extent_src); DEFINE_INODE_IREC_EVENT(xfs_reflink_remap_extent_dest); +DEFINE_INODE_ERROR_EVENT(xfs_map_free_reserve_more_fail); /* dedupe tracepoints */ DEFINE_DOUBLE_IO_EVENT(xfs_reflink_compare_extents); From patchwork Tue Dec 31 23:38:51 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13924035 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id EDE4313FD72 for ; Tue, 31 Dec 2024 23:38:51 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1735688332; cv=none; b=GOUnw+1rT18FngbQjRfRdG11flXGNlfEo2LEQ5QRRByuSAoXTRDYlLXY/1UHnXP1W8nIg7DzXfUMbyE0VFj+2zllfx4LN3ToZNyLzIoyzcEy8/57CyhC0l+5TnOoNbiXWItf6G9z5q5N+jy3g1P9fxuJIVLUy4lO2Ij9ObZG7Bg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1735688332; c=relaxed/simple; bh=4siW6mV/16ezWx3vT5gKKFFxqagFSZ7Jla9rH23t4uE=; h=Date:Subject:From:To:Cc:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=S0HJInjQ2K6WSPPSrg5bwOvjOWRiu733p/IB+UnWZitW6OP4fzu9Woowczug4dZvAoYDE7kSkSHXPNBC+JDm4b7c9vLJfC2MYQAZSlUh+HkKRpjpTUMDsWtYlp23AIePv8RBeEvM2HFcf6DuuH4o72CNdA7DseBgEAe35jnhU8E= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=s0mDa1vP; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="s0mDa1vP" Received: by smtp.kernel.org (Postfix) with ESMTPSA id C79D6C4CED2; Tue, 31 Dec 2024 23:38:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1735688331; bh=4siW6mV/16ezWx3vT5gKKFFxqagFSZ7Jla9rH23t4uE=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=s0mDa1vPoWP8bIj7s3ZfIyChQ9dnHKh8EfcYpmRMmCpTmjZ4BsGWRt96JfS0fdvef BFraD7cWtb/C3ccwTKOXU/hVRMu36DoMC4FTfC/+kMpzXZ25o9f+CbqaGvThonrMvi VHdwWdaqeV9VGt+lLILJzBNMbfqTP1Tr0G8mX5FM/ujsJJlNwFlz8d9flaHN53edi3 wUhfw88evcQATkGtFm2ME8dErf9qr1x6064BLGitpHT9P5So7V+y7jhvAVgwZ2rVLm grfxWdiZQRu/olHgqzN/Ya1zeKaNoI7TQF9cqF7oPH2F0V1m3R1159KsTEnT5mfpSJ wVhb8onaVQgpw== Date: Tue, 31 Dec 2024 15:38:51 -0800 Subject: [PATCH 4/4] xfs: implement FALLOC_FL_MAP_FREE for realtime files From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <173568754285.2704719.13644245015063197003.stgit@frogsfrogsfrogs> In-Reply-To: <173568754204.2704719.1892779733633851572.stgit@frogsfrogsfrogs> References: <173568754204.2704719.1892779733633851572.stgit@frogsfrogsfrogs> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Implement mapfree for realtime space. Signed-off-by: "Darrick J. Wong" --- fs/xfs/xfs_bmap_util.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_bmap_util.h | 2 fs/xfs/xfs_file.c | 14 ++- fs/xfs/xfs_rtalloc.c | 108 ++++++++++++++++++++++++++ fs/xfs/xfs_rtalloc.h | 7 ++ fs/xfs/xfs_trace.h | 41 ++++++++++ 6 files changed, 368 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 8d5c2072bcd533..83e6c27f63a969 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -2219,3 +2219,205 @@ xfs_map_free_space( return 0; return error; } + +#ifdef CONFIG_XFS_RT +/* + * Given a file and a free rt extent, map it into the file at the same offset + * if the file were a sparse image of the physical device. Set @mval to + * whatever mapping we added to the file. + */ +STATIC int +xfs_map_free_rtgroup_extent( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_rtgroup *rtg, + xfs_rtxnum_t rtx, + xfs_rtxlen_t rtxlen, + struct xfs_bmbt_irec *mval) +{ + struct xfs_bmbt_irec irec; + struct xfs_mount *mp = ip->i_mount; + xfs_fsblock_t fsbno = xfs_rtx_to_rtb(rtg, rtx); + xfs_fileoff_t startoff = fsbno; + xfs_extlen_t len = xfs_rtbxlen_to_blen(mp, rtxlen); + int nimaps; + int error; + + ASSERT(XFS_IS_REALTIME_INODE(ip)); + + trace_xfs_map_free_rt_extent(ip, fsbno, len); + + /* Make sure the entire range is a hole. */ + nimaps = 1; + error = xfs_bmapi_read(ip, startoff, len, &irec, &nimaps, 0); + if (error) + return error; + + if (irec.br_startoff != startoff || + irec.br_startblock != HOLESTARTBLOCK || + irec.br_blockcount < len) + return -EINVAL; + + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, + XFS_IEXT_ADD_NOSPLIT_CNT); + if (error) + return error; + + /* + * Allocate the physical extent. We should not have dropped the lock + * since the scan of the free space metadata, so this should work, + * though the length may be adjusted to play nicely with metadata space + * reservations. + */ + error = xfs_rtallocate_exact(tp, rtg, rtx, rtxlen); + if (error) + return error; + + /* Map extent into file, update quota. */ + mval->br_blockcount = len; + mval->br_startblock = fsbno; + mval->br_startoff = startoff; + mval->br_state = XFS_EXT_UNWRITTEN; + + trace_xfs_map_free_rt_extent_done(ip, mval); + + xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, mval); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_RTBCOUNT, + mval->br_blockcount); + + return 0; +} + +/* Find a free extent in this rtgroup and map it into the file. */ +STATIC int +xfs_map_free_rt_extent( + struct xfs_inode *ip, + struct xfs_rtgroup *rtg, + xfs_rtxnum_t *cursor, + xfs_rtxnum_t end_rtx) +{ + struct xfs_bmbt_irec irec; + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + loff_t endpos; + xfs_rtxlen_t len_rtx; + xfs_extlen_t free_len; + int error; + + if (fatal_signal_pending(current)) + return -EINTR; + + error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, 0, 0, false, + &tp); + if (error) + return error; + + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP); + + error = xfs_rtallocate_find_freesp(tp, rtg, cursor, end_rtx, &len_rtx); + if (error) + goto out_rtglock; + + /* + * If off_rtx is beyond the end of the rt device or is past what the + * user asked for, bail out. + */ + if (*cursor >= end_rtx) + goto out_rtglock; + + free_len = xfs_rtxlen_to_extlen(mp, len_rtx); + error = xfs_map_free_reserve_more(tp, ip, &free_len); + if (error) + goto out_rtglock; + + error = xfs_map_free_rtgroup_extent(tp, ip, rtg, *cursor, len_rtx, + &irec); + if (error == -EAGAIN) { + /* + * The allocator was busy and told us to try again. The + * transaction could be dirty due to a nrext64 upgrade, so + * commit the transaction and try again without advancing + * the cursor. + * + * XXX do we fail to unlock something here? + */ + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP); + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + } + if (error) + goto out_cancel; + + /* Update isize if needed. */ + endpos = XFS_FSB_TO_B(mp, irec.br_startoff + irec.br_blockcount); + if (endpos > i_size_read(VFS_I(ip))) { + i_size_write(VFS_I(ip), endpos); + ip->i_disk_size = endpos; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + } + + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (error) + return error; + + ASSERT(xfs_blen_to_rtxoff(mp, irec.br_blockcount) == 0); + *cursor += xfs_extlen_to_rtxlen(mp, irec.br_blockcount); + return 0; +out_rtglock: + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP); +out_cancel: + xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +} + +/* + * Allocate all free physical space between off and len and map it to this + * regular realtime file. + */ +int +xfs_map_free_rt_space( + struct xfs_inode *ip, + xfs_off_t off, + xfs_off_t len) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_rtgroup *rtg = NULL; + xfs_daddr_t off_daddr = BTOBB(off); + xfs_daddr_t end_daddr = BTOBBT(off + len); + xfs_rtblock_t off_rtb = xfs_daddr_to_rtb(mp, off_daddr); + xfs_rtblock_t end_rtb = xfs_daddr_to_rtb(mp, end_daddr); + xfs_rgnumber_t off_rgno = xfs_rtb_to_rgno(mp, off_rtb); + xfs_rgnumber_t end_rgno = xfs_rtb_to_rgno(mp, end_rtb); + int error = 0; + + trace_xfs_map_free_rt_space(ip, off, len); + + while ((rtg = xfs_rtgroup_next_range(mp, rtg, off_rgno, + mp->m_sb.sb_rgcount))) { + xfs_rtxnum_t off_rtx = 0; + xfs_rtxnum_t end_rtx = rtg->rtg_extents; + + if (rtg_rgno(rtg) == off_rgno) + off_rtx = xfs_rtb_to_rtx(mp, off_rtb); + if (rtg_rgno(rtg) == end_rgno) + end_rtx = min(end_rtx, xfs_rtb_to_rtx(mp, end_rtb)); + + while (off_rtx < end_rtx) { + error = xfs_map_free_rt_extent(ip, rtg, &off_rtx, + end_rtx); + if (error) + goto out; + } + } + +out: + if (rtg) + xfs_rtgroup_rele(rtg); + if (error == -ENOSPC) + return 0; + return error; +} +#endif diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 5d84b702b16326..0e16fbfef6cd09 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -85,8 +85,10 @@ int xfs_flush_unmap_range(struct xfs_inode *ip, xfs_off_t offset, #ifdef CONFIG_XFS_RT int xfs_convert_rtbigalloc_file_space(struct xfs_inode *ip, loff_t pos, uint64_t len); +int xfs_map_free_rt_space(struct xfs_inode *ip, xfs_off_t off, xfs_off_t len); #else # define xfs_convert_rtbigalloc_file_space(ip, pos, len) (-EOPNOTSUPP) +# define xfs_map_free_rt_space(ip, off, len) (-EOPNOTSUPP) #endif #endif /* __XFS_BMAP_UTIL_H__ */ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8bf1e96ab57a5b..ceb7936e5fd9a3 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1580,11 +1580,10 @@ xfs_file_map_freesp( if (error) goto out_unlock; - if (XFS_IS_REALTIME_INODE(ip)) { - error = -EOPNOTSUPP; - goto out_unlock; - } - device_size = XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks); + if (XFS_IS_REALTIME_INODE(ip)) + device_size = XFS_FSB_TO_B(mp, mp->m_sb.sb_rblocks); + else + device_size = XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks); /* * Bail out now if we aren't allowed to make the file size the @@ -1597,7 +1596,10 @@ xfs_file_map_freesp( goto out_unlock; } - error = xfs_map_free_space(ip, mf->offset, mf->len); + if (XFS_IS_REALTIME_INODE(ip)) + error = xfs_map_free_rt_space(ip, mf->offset, mf->len); + else + error = xfs_map_free_space(ip, mf->offset, mf->len); if (error) { if (error == -ECANCELED) error = 0; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 2728c568ac5a8a..0a4e087b11b60e 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -2230,3 +2230,111 @@ xfs_bmap_rtalloc( xfs_bmap_alloc_account(ap); return 0; } + +/* + * Find the next free realtime extent starting at @rtx and going no higher than + * @end_rtx. Set @rtx and @len_rtx to whatever free extents we find, or to + * @end_rtx if we find no space. + */ +int +xfs_rtallocate_find_freesp( + struct xfs_trans *tp, + struct xfs_rtgroup *rtg, + xfs_rtxnum_t *rtx, + xfs_rtxnum_t end_rtx, + xfs_rtxlen_t *len_rtx) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_rtalloc_args args = { + .rtg = rtg, + .mp = mp, + .tp = tp, + }; + const unsigned int max_rtxlen = + xfs_blen_to_rtbxlen(mp, XFS_MAX_BMBT_EXTLEN); + int error; + + trace_xfs_rtallocate_find_freesp(rtg, *rtx, end_rtx - *rtx); + + while (*rtx < end_rtx) { + xfs_rtblock_t next_rtx; + int is_free = 0; + + if (fatal_signal_pending(current)) + return -EINTR; + + /* Is the first rtx in the range free? */ + error = xfs_rtcheck_range(&args, *rtx, 1, 1, &next_rtx, + &is_free); + if (error) + return error; + + /* Free or not, how many more rtx have the same status? */ + error = xfs_rtfind_forw(&args, *rtx, end_rtx, &next_rtx); + if (error) + return error; + + if (is_free) { + *len_rtx = min_t(xfs_rtxlen_t, max_rtxlen, + next_rtx - *rtx + 1); + + trace_xfs_rtallocate_find_freesp_done(rtg, *rtx, + *len_rtx); + return 0; + } + + *rtx = next_rtx + 1; + } + + return 0; +} + +/* Allocate exactly this space from the rt device. */ +int +xfs_rtallocate_exact( + struct xfs_trans *tp, + struct xfs_rtgroup *rtg, + xfs_rtxnum_t rtx, + xfs_rtxlen_t len) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_rtalloc_args args = { + .rtg = rtg, + .mp = mp, + .tp = tp, + }; + int error; + + trace_xfs_rtallocate_exact(rtg, rtx, len); + + if (xfs_has_rtgroups(mp)) { + xfs_rtxnum_t resrtx = rtx; + xfs_rtxlen_t reslen = len; + + /* + * Never pass 0 for start here so that the busy extent code + * knows that we wanted a near allocation and will flush the + * log to wait for the start to become available. + */ + error = xfs_rtallocate_adjust_for_busy(&args, rtx ? rtx : 1, 1, + len, &reslen, 1, &resrtx); + if (error) + return error; + + if (resrtx != rtx) { + ASSERT(resrtx == rtx); + return -EAGAIN; + } + + len = reslen; + } + + xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_BITMAP); + + error = xfs_rtallocate_range(&args, rtx, len); + if (error) + return error; + + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, -(long)len); + return 0; +} diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 0d95b29092c9f3..745af8a2798d36 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -10,6 +10,7 @@ struct xfs_mount; struct xfs_trans; +struct xfs_rtgroup; #ifdef CONFIG_XFS_RT /* rtgroup superblock initialization */ @@ -48,6 +49,10 @@ xfs_growfs_rt( int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp); int xfs_growfs_check_rtgeom(const struct xfs_mount *mp, xfs_rfsblock_t dblocks, xfs_rfsblock_t rblocks, xfs_agblock_t rextsize); +int xfs_rtallocate_find_freesp(struct xfs_trans *tp, struct xfs_rtgroup *rtg, + xfs_rtxnum_t *rtx, xfs_rtxnum_t end_rtx, xfs_rtxlen_t *len_rtx); +int xfs_rtallocate_exact(struct xfs_trans *tp, struct xfs_rtgroup *rtg, + xfs_rtxnum_t rtx, xfs_rtxlen_t rtxlen); #else # define xfs_growfs_rt(mp,in) (-ENOSYS) # define xfs_rtalloc_reinit_frextents(m) (0) @@ -67,6 +72,8 @@ xfs_rtmount_init( # define xfs_rtunmount_inodes(m) # define xfs_rt_resv_free(mp) ((void)0) # define xfs_rt_resv_init(mp) (0) +# define xfs_rtallocate_find_freesp(...) (-EOPNOTSUPP) +# define xfs_rtallocate_exact(...) (-EOPNOTSUPP) static inline int xfs_growfs_check_rtgeom(const struct xfs_mount *mp, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ebbc832db8fa1e..76f5d78b6a6e09 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -105,6 +105,7 @@ struct xfs_rtgroup; struct xfs_open_zone; struct xfs_fsrefs; struct xfs_fsrefs_irec; +struct xfs_rtgroup; #define XFS_ATTR_FILTER_FLAGS \ { XFS_ATTR_ROOT, "ROOT" }, \ @@ -1732,6 +1733,9 @@ DEFINE_SIMPLE_IO_EVENT(xfs_free_file_space); DEFINE_SIMPLE_IO_EVENT(xfs_zero_file_space); DEFINE_SIMPLE_IO_EVENT(xfs_collapse_file_space); DEFINE_SIMPLE_IO_EVENT(xfs_insert_file_space); +#ifdef CONFIG_XFS_RT +DEFINE_SIMPLE_IO_EVENT(xfs_map_free_rt_space); +#endif /* CONFIG_XFS_RT */ DEFINE_SIMPLE_IO_EVENT(xfs_map_free_space); DECLARE_EVENT_CLASS(xfs_itrunc_class, @@ -1851,6 +1855,9 @@ DEFINE_EVENT(xfs_map_free_extent_class, name, \ TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_extlen_t len), \ TP_ARGS(ip, bno, len)) DEFINE_MAP_FREE_EXTENT_EVENT(xfs_map_free_ag_extent); +#ifdef CONFIG_XFS_RT +DEFINE_MAP_FREE_EXTENT_EVENT(xfs_map_free_rt_extent); +#endif DECLARE_EVENT_CLASS(xfs_extent_busy_class, TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno, @@ -1995,6 +2002,37 @@ TRACE_EVENT(xfs_rtalloc_extent_busy_trim, __entry->new_rtx, __entry->new_len) ); + +DECLARE_EVENT_CLASS(xfs_rtextent_class, + TP_PROTO(struct xfs_rtgroup *rtg, xfs_rtxnum_t off_rtx, + xfs_rtxlen_t len_rtx), + TP_ARGS(rtg, off_rtx, len_rtx), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_rgnumber_t, rgno) + __field(xfs_rtxnum_t, off_rtx) + __field(xfs_rtxlen_t, len_rtx) + ), + TP_fast_assign( + __entry->dev = rtg_mount(rtg)->m_super->s_dev; + __entry->rgno = rtg_rgno(rtg); + __entry->off_rtx = off_rtx; + __entry->len_rtx = len_rtx; + ), + TP_printk("dev %d:%d rgno 0x%x rtx 0x%llx rtxcount 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rgno, + __entry->off_rtx, + __entry->len_rtx) +); +#define DEFINE_RTEXTENT_EVENT(name) \ +DEFINE_EVENT(xfs_rtextent_class, name, \ + TP_PROTO(struct xfs_rtgroup *rtg, xfs_rtxnum_t off_rtx, \ + xfs_rtxlen_t len_rtx), \ + TP_ARGS(rtg, off_rtx, len_rtx)) +DEFINE_RTEXTENT_EVENT(xfs_rtallocate_exact); +DEFINE_RTEXTENT_EVENT(xfs_rtallocate_find_freesp); +DEFINE_RTEXTENT_EVENT(xfs_rtallocate_find_freesp_done); #endif /* CONFIG_XFS_RT */ DECLARE_EVENT_CLASS(xfs_agf_class, @@ -3996,6 +4034,9 @@ DEFINE_EVENT(xfs_inode_irec_class, name, \ TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec), \ TP_ARGS(ip, irec)) DEFINE_INODE_IREC_EVENT(xfs_map_free_ag_extent_done); +#ifdef CONFIG_XFS_RT +DEFINE_INODE_IREC_EVENT(xfs_map_free_rt_extent_done); +#endif /* inode iomap invalidation events */ DECLARE_EVENT_CLASS(xfs_wb_invalid_class,