@@ -26,6 +26,8 @@
#define trace_xfs_alloc_exact_done(a) ((void) 0)
#define trace_xfs_alloc_exact_notfound(a) ((void) 0)
#define trace_xfs_alloc_exact_error(a) ((void) 0)
+#define trace_xfs_alloc_find_freesp(...) ((void) 0)
+#define trace_xfs_alloc_find_freesp_done(...) ((void) 0)
#define trace_xfs_alloc_near_first(a) ((void) 0)
#define trace_xfs_alloc_near_greater(a) ((void) 0)
#define trace_xfs_alloc_near_lesser(a) ((void) 0)
@@ -196,6 +198,8 @@
#define trace_xfs_bmap_pre_update(a,b,c,d) ((void) 0)
#define trace_xfs_bmap_post_update(a,b,c,d) ((void) 0)
+#define trace_xfs_bmapi_freesp(...) ((void) 0)
+#define trace_xfs_bmapi_freesp_done(...) ((void) 0)
#define trace_xfs_bunmap(a,b,c,d,e) ((void) 0)
#define trace_xfs_read_extent(a,b,c,d) ((void) 0)
@@ -501,6 +501,16 @@ void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa);
#define xfs_filestream_new_ag(ip,ag) (0)
#define xfs_filestream_select_ag(...) (-ENOSYS)
+struct xfs_trans;
+
+static inline int
+xfs_rtallocate_extent(struct xfs_trans *tp, xfs_rtblock_t bno,
+ xfs_extlen_t minlen, xfs_extlen_t maxlen, xfs_extlen_t *len,
+ int wasdel, xfs_extlen_t prod, xfs_rtblock_t *rtblock)
+{
+ return -EOPNOTSUPP;
+}
+
#define xfs_trans_inode_buf(tp, bp) ((void) 0)
/* quota bits */
@@ -4092,3 +4092,91 @@ xfs_extfree_intent_destroy_cache(void)
kmem_cache_destroy(xfs_extfree_item_cache);
xfs_extfree_item_cache = NULL;
}
+
+/*
+ * Find the next chunk of free space in @pag starting at @agbno and going no
+ * higher than @end_agbno. Set @agbno and @len to whatever free space we find,
+ * or to @end_agbno if we find no space.
+ */
+int
+xfs_alloc_find_freesp(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ xfs_agblock_t *agbno,
+ xfs_agblock_t end_agbno,
+ xfs_extlen_t *len)
+{
+ struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_btree_cur *cur;
+ struct xfs_buf *agf_bp = NULL;
+ xfs_agblock_t found_agbno;
+ xfs_extlen_t found_len;
+ int found;
+ int error;
+
+ trace_xfs_alloc_find_freesp(mp, pag->pag_agno, *agbno,
+ end_agbno - *agbno);
+
+ error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
+ if (error)
+ return error;
+
+ cur = xfs_allocbt_init_cursor(mp, tp, agf_bp, pag, XFS_BTNUM_BNO);
+
+ /* Try to find a free extent that starts before here. */
+ error = xfs_alloc_lookup_le(cur, *agbno, 0, &found);
+ if (error)
+ goto out_cur;
+ if (found) {
+ error = xfs_alloc_get_rec(cur, &found_agbno, &found_len,
+ &found);
+ if (error)
+ goto out_cur;
+ if (XFS_IS_CORRUPT(mp, !found)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto out_cur;
+ }
+
+ if (found_agbno + found_len > *agbno)
+ goto found;
+ }
+
+ /* Examine the next record if free extent not in range. */
+ error = xfs_btree_increment(cur, 0, &found);
+ if (error)
+ goto out_cur;
+ if (!found)
+ goto next_ag;
+
+ error = xfs_alloc_get_rec(cur, &found_agbno, &found_len, &found);
+ if (error)
+ goto out_cur;
+ if (XFS_IS_CORRUPT(mp, !found)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto out_cur;
+ }
+
+ if (found_agbno >= end_agbno)
+ goto next_ag;
+
+found:
+ /* Found something, so update the mapping. */
+ trace_xfs_alloc_find_freesp_done(mp, pag->pag_agno, found_agbno,
+ found_len);
+ if (found_agbno < *agbno) {
+ found_len -= *agbno - found_agbno;
+ found_agbno = *agbno;
+ }
+ *len = found_len;
+ *agbno = found_agbno;
+ goto out_cur;
+next_ag:
+ /* Found nothing, so advance the cursor beyond the end of the range. */
+ *agbno = end_agbno;
+ *len = 0;
+out_cur:
+ xfs_btree_del_cursor(cur, error);
+ return error;
+}
@@ -290,5 +290,8 @@ void xfs_extfree_intent_destroy_cache(void);
xfs_failaddr_t xfs_validate_ag_length(struct xfs_buf *bp, uint32_t seqno,
uint32_t length);
+int xfs_alloc_find_freesp(struct xfs_trans *tp, struct xfs_perag *pag,
+ xfs_agblock_t *agbno, xfs_agblock_t end_agbno,
+ xfs_extlen_t *len);
#endif /* __XFS_ALLOC_H__ */
@@ -6436,3 +6436,152 @@ xfs_get_cowextsz_hint(
return XFS_DEFAULT_COWEXTSZ_HINT;
return a;
}
+
+static inline xfs_fileoff_t
+xfs_fsblock_to_fileoff(
+ struct xfs_mount *mp,
+ xfs_fsblock_t fsbno)
+{
+ xfs_daddr_t daddr = XFS_FSB_TO_DADDR(mp, fsbno);
+
+ return XFS_B_TO_FSB(mp, BBTOB(daddr));
+}
+
+/*
+ * Given a file and a free physical extent, map it into the file at the same
+ * offset if the file were a sparse image of the physical device. Set @mval to
+ * whatever mapping we added to the file.
+ */
+int
+xfs_bmapi_freesp(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ xfs_fsblock_t fsbno,
+ xfs_extlen_t len,
+ struct xfs_bmbt_irec *mval)
+{
+ struct xfs_bmbt_irec irec;
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t startoff;
+ bool isrt = XFS_IS_REALTIME_INODE(ip);
+ int nimaps;
+ int error;
+
+ trace_xfs_bmapi_freesp(ip, fsbno, len);
+
+ error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
+ XFS_IEXT_ADD_NOSPLIT_CNT);
+ if (error)
+ return error;
+
+ if (isrt)
+ startoff = fsbno;
+ else
+ startoff = xfs_fsblock_to_fileoff(mp, fsbno);
+
+ /* Make sure the entire range is a hole. */
+ nimaps = 1;
+ error = xfs_bmapi_read(ip, startoff, len, &irec, &nimaps, 0);
+ if (error)
+ return error;
+
+ if (irec.br_startoff != startoff ||
+ irec.br_startblock != HOLESTARTBLOCK ||
+ irec.br_blockcount < len)
+ return -EINVAL;
+
+ /*
+ * Allocate the physical extent. We should not have dropped the lock
+ * since the scan of the free space metadata, so this should work,
+ * though the length may be adjusted to play nicely with metadata space
+ * reservations.
+ */
+ if (isrt) {
+ xfs_rtxnum_t rtx_in, rtx_out;
+ xfs_extlen_t rtxlen_in, rtxlen_out;
+ uint32_t mod;
+
+ rtx_in = xfs_rtb_to_rtxrem(mp, fsbno, &mod);
+ if (mod) {
+ ASSERT(mod == 0);
+ return -EFSCORRUPTED;
+ }
+
+ rtxlen_in = xfs_rtb_to_rtxrem(mp, len, &mod);
+ if (mod) {
+ ASSERT(mod == 0);
+ return -EFSCORRUPTED;
+ }
+
+ error = xfs_rtallocate_extent(tp, rtx_in, 1, rtxlen_in,
+ &rtxlen_out, 0, 1, &rtx_out);
+ if (error)
+ return error;
+ if (rtx_out == NULLRTEXTNO) {
+ /*
+ * We were promised the space! In theory there aren't
+ * any reserve lists that would prevent us from getting
+ * the space.
+ */
+ return -ENOSPC;
+ }
+ if (rtx_out != rtx_in) {
+ ASSERT(0);
+ xfs_bmap_mark_sick(ip, XFS_DATA_FORK);
+ return -EFSCORRUPTED;
+ }
+ mval->br_blockcount = rtxlen_out * mp->m_sb.sb_rextsize;
+ } else {
+ struct xfs_alloc_arg args = {
+ .mp = mp,
+ .tp = tp,
+ .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE,
+ .resv = XFS_AG_RESV_NONE,
+ .prod = 1,
+ .datatype = XFS_ALLOC_USERDATA,
+ .maxlen = len,
+ .minlen = 1,
+ };
+ args.pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, fsbno));
+ error = xfs_alloc_vextent_exact_bno(&args, fsbno);
+ xfs_perag_put(args.pag);
+ if (error)
+ return error;
+ if (args.fsbno == NULLFSBLOCK) {
+ /*
+ * We were promised the space, but failed to get it.
+ * This could be because the space is reserved for
+ * metadata expansion, or it could be because the AGFL
+ * fixup grabbed the first block we wanted. Either
+ * way, if the transaction is dirty we must commit it
+ * and tell the caller to try again.
+ */
+ if (tp->t_flags & XFS_TRANS_DIRTY)
+ return -EAGAIN;
+ return -ENOSPC;
+ }
+ if (args.fsbno != fsbno) {
+ ASSERT(0);
+ xfs_bmap_mark_sick(ip, XFS_DATA_FORK);
+ return -EFSCORRUPTED;
+ }
+ mval->br_blockcount = args.len;
+ }
+
+ /* Map extent into file, update quota. */
+ mval->br_startblock = fsbno;
+ mval->br_startoff = startoff;
+ mval->br_state = XFS_EXT_UNWRITTEN;
+
+ trace_xfs_bmapi_freesp_done(ip, mval);
+
+ xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, mval);
+ if (isrt)
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_RTBCOUNT,
+ mval->br_blockcount);
+ else
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT,
+ mval->br_blockcount);
+
+ return 0;
+}
@@ -197,6 +197,9 @@ int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags,
xfs_extlen_t total, struct xfs_bmbt_irec *mval, int *nmap);
+int xfs_bmapi_freesp(struct xfs_trans *tp, struct xfs_inode *ip,
+ xfs_fsblock_t fsbno, xfs_extlen_t len,
+ struct xfs_bmbt_irec *mval);
int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags,
xfs_extnum_t nexts, int *done);
@@ -864,6 +864,7 @@ struct xfs_scrub_metadata {
#define XFS_IOC_AG_GEOMETRY _IOWR('X', 61, struct xfs_ag_geometry)
/* XFS_IOC_GETPARENTS ---- staging 62 */
/* XFS_IOC_RTGROUP_GEOMETRY - staging 63 */
+/* XFS_IOC_MAP_FREESP ---- staging 64 */
/*
* ioctl commands that replace IRIX syssgi()'s
@@ -303,4 +303,19 @@ xfs_getfsrefs_advance(
/* XXX stealing XFS_IOC_GETBIOSIZE */
#define XFS_IOC_GETFSREFCOUNTS _IOWR('X', 47, struct xfs_getfsrefs_head)
+/* map free space to file */
+
+struct xfs_map_freesp {
+ __s64 offset; /* disk address to map, in bytes */
+ __s64 len; /* length in bytes */
+ __u64 flags; /* must be zero */
+ __u64 pad; /* must be zero */
+};
+
+/*
+ * XFS_IOC_MAP_FREESP maps all the free physical space in the filesystem into
+ * the file at the same offsets. This ioctl requires CAP_SYS_ADMIN.
+ */
+#define XFS_IOC_MAP_FREESP _IOWR('X', 64, struct xfs_map_freesp)
+
#endif /* __XFS_FS_STAGING_H__ */
new file mode 100644
@@ -0,0 +1,76 @@
+.\" Copyright (c) 2023-2024 Oracle. All rights reserved.
+.\"
+.\" %%%LICENSE_START(GPLv2+_DOC_FULL)
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\" %%%LICENSE_END
+.TH IOCTL-XFS-MAP-FREESP 2 2023-11-17 "XFS"
+.SH NAME
+ioctl_xfs_map_freesp \- map free space into a file
+.SH SYNOPSIS
+.br
+.B #include <xfs/xfs_fs_staging.h>
+.PP
+.BI "int ioctl(int " fd ", XFS_IOC_MAP_FREESP, struct xfs_map_freesp *" arg );
+.SH DESCRIPTION
+Maps free space into the sparse ranges of a regular file.
+This ioctl uses
+.B struct xfs_map_freesp
+to specify the range of free space to be mapped:
+.PP
+.in +4n
+.nf
+struct xfs_map_freesp {
+ __s64 offset;
+ __s64 len;
+ __s64 flags;
+ __s64 pad;
+};
+.fi
+.in
+.PP
+.I offset
+is the physical disk address, in bytes, of the start of the range to scan.
+Each free space extent in this range will be mapped to the file if the
+corresponding range of the file is sparse.
+.PP
+.I len
+is the number of bytes in the range to scan.
+.PP
+.I flags
+must be zero; there are no flags defined yet.
+.PP
+.I pad
+must be zero.
+.SH RETURN VALUE
+On error, \-1 is returned, and
+.I errno
+is set to indicate the error.
+.PP
+.SH ERRORS
+Error codes can be one of, but are not limited to, the following:
+.TP
+.B EFAULT
+The kernel was not able to copy into the userspace buffer.
+.TP
+.B EFSBADCRC
+Metadata checksum validation failed while performing the query.
+.TP
+.B EFSCORRUPTED
+Metadata corruption was encountered while performing the query.
+.TP
+.B EINVAL
+One of the arguments was not valid,
+or the file was not sparse.
+.TP
+.B EIO
+An I/O error was encountered while performing the query.
+.TP
+.B ENOMEM
+There was insufficient memory to perform the query.
+.TP
+.B ENOSPC
+There was insufficient disk space to commit the space mappings.
+.SH CONFORMING TO
+This API is specific to XFS filesystem on the Linux kernel.
+.SH SEE ALSO
+.BR ioctl (2)