@@ -790,6 +790,30 @@ struct xfs_exchange_range {
__u64 flags; /* see XFS_EXCHANGE_RANGE_* below */
};
+/*
+ * Using the same definition of file2 as struct xfs_exchange_range, commit the
+ * contents of file1 into file2 if file2 has the same inode number, mtime, and
+ * ctime as the arguments provided to the call. The old contents of file2 will
+ * be moved to file1.
+ *
+ * Returns -EBUSY if there isn't an exact match for the file2 fields.
+ *
+ * Filesystems must be able to restart and complete the operation even after
+ * the system goes down.
+ */
+struct xfs_commit_range {
+ __s32 file1_fd;
+ __u32 pad; /* must be zeroes */
+ __u64 file1_offset; /* file1 offset, bytes */
+ __u64 file2_offset; /* file2 offset, bytes */
+ __u64 length; /* bytes to exchange */
+
+ __u64 flags; /* see XFS_EXCHANGE_RANGE_* below */
+
+ /* opaque file2 metadata for freshness checks */
+ __u64 file2_freshness[5];
+};
+
/*
* Exchange file data all the way to the ends of both files, and then exchange
* the file sizes. This flag can be used to replace a file's contents with a
@@ -887,6 +911,8 @@ struct xfs_exchange_range {
#define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req)
#define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req)
#define XFS_IOC_EXCHANGE_RANGE _IOWR('X', 129, struct xfs_exchange_range)
+#define XFS_IOC_START_COMMIT _IOWR('X', 130, struct xfs_commit_range)
+#define XFS_IOC_COMMIT_RANGE _IOWR('X', 131, struct xfs_commit_range)
/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
@@ -131,6 +131,33 @@ xfs_exchrange_estimate(
return error;
}
+/*
+ * Check that file2's metadata agree with the snapshot that we took for the
+ * range commit request.
+ *
+ * This should be called after the filesystem has locked /all/ inode metadata
+ * against modification.
+ */
+STATIC int
+xfs_exchrange_check_freshness(
+ const struct xfs_exchrange *fxr,
+ struct xfs_inode *ip2)
+{
+ struct inode *inode2 = VFS_I(ip2);
+ struct timespec64 ctime = inode_get_ctime(inode2);
+ struct timespec64 mtime = inode_get_mtime(inode2);
+
+ trace_xfs_exchrange_freshness(fxr, ip2);
+
+ /* Check that file2 hasn't otherwise been modified. */
+ if (fxr->file2_ino != ip2->i_ino ||
+ !timespec64_equal(&fxr->file2_ctime, &ctime) ||
+ !timespec64_equal(&fxr->file2_mtime, &mtime))
+ return -EBUSY;
+
+ return 0;
+}
+
#define QRETRY_IP1 (0x1)
#define QRETRY_IP2 (0x2)
@@ -666,6 +693,12 @@ xfs_exchrange_prep(
if (error || fxr->length == 0)
return error;
+ if (fxr->flags & __XFS_EXCHANGE_RANGE_CHECK_FRESH2) {
+ error = xfs_exchrange_check_freshness(fxr, ip2);
+ if (error)
+ return error;
+ }
+
/* Attach dquots to both inodes before changing block maps. */
error = xfs_qm_dqattach(ip2);
if (error)
@@ -780,7 +813,8 @@ xfs_exchange_range(
if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt)
return -EXDEV;
- if (fxr->flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
+ if (fxr->flags & ~(XFS_EXCHANGE_RANGE_ALL_FLAGS |
+ __XFS_EXCHANGE_RANGE_CHECK_FRESH2))
return -EINVAL;
/* Userspace requests only honored for regular files. */
@@ -828,3 +862,21 @@ xfs_exchange_range(
fsnotify_modify(fxr->file2);
return 0;
}
+
+/* Sample freshness data from fxr->file2 for a commit range operation. */
+void
+xfs_exchrange_freshness(
+ struct xfs_exchrange *fxr)
+{
+ struct inode *inode2 = file_inode(fxr->file2);
+ struct xfs_inode *ip2 = XFS_I(inode2);
+ unsigned int lockflags = XFS_IOLOCK_SHARED |
+ XFS_MMAPLOCK_SHARED |
+ XFS_ILOCK_SHARED;
+
+ xfs_ilock(ip2, lockflags);
+ fxr->file2_ino = ip2->i_ino;
+ fxr->file2_ctime = inode_get_ctime(inode2);
+ fxr->file2_mtime = inode_get_mtime(inode2);
+ xfs_iunlock(ip2, lockflags);
+}
@@ -13,8 +13,12 @@ int xfs_exchrange_enable(struct xfs_mount *mp);
#define __XFS_EXCHANGE_RANGE_UPD_CMTIME1 (1ULL << 63)
#define __XFS_EXCHANGE_RANGE_UPD_CMTIME2 (1ULL << 62)
+/* Freshness check required */
+#define __XFS_EXCHANGE_RANGE_CHECK_FRESH2 (1ULL << 61)
+
#define XFS_EXCHANGE_RANGE_PRIV_FLAGS (__XFS_EXCHANGE_RANGE_UPD_CMTIME1 | \
- __XFS_EXCHANGE_RANGE_UPD_CMTIME2)
+ __XFS_EXCHANGE_RANGE_UPD_CMTIME2 | \
+ __XFS_EXCHANGE_RANGE_CHECK_FRESH2)
struct xfs_exchrange {
struct file *file1;
@@ -32,6 +36,7 @@ struct xfs_exchrange {
struct timespec64 file2_ctime;
};
+void xfs_exchrange_freshness(struct xfs_exchrange *fxr);
int xfs_exchange_range(struct xfs_exchrange *fxr);
struct xfs_exchmaps_req;
@@ -1965,6 +1965,90 @@ xfs_ioc_exchange_range(
return error;
}
+/* Opaque freshness blob for XFS_IOC_COMMIT_RANGE */
+struct xfs_commit_range_fresh {
+ __u64 file2_ino; /* inode number */
+ __s64 file2_mtime; /* modification time */
+ __s64 file2_ctime; /* change time */
+ __s32 file2_mtime_nsec; /* mod time, nsec */
+ __s32 file2_ctime_nsec; /* change time, nsec */
+ __u64 pad; /* zero */
+};
+
+static long
+xfs_ioc_start_commit(
+ struct file *file,
+ struct xfs_commit_range __user *argp)
+{
+ struct xfs_exchrange fxr = {
+ .file2 = file,
+ };
+ struct xfs_commit_range args;
+ struct xfs_commit_range_fresh *kern_f;
+ struct xfs_commit_range_fresh __user *user_f;
+
+ BUILD_BUG_ON(sizeof(struct xfs_commit_range_fresh) !=
+ sizeof(args.file2_freshness));
+
+ xfs_exchrange_freshness(&fxr);
+
+ kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness;
+ memset(kern_f, 0, sizeof(*kern_f));
+ kern_f->file2_ino = fxr.file2_ino;
+ kern_f->file2_mtime = fxr.file2_mtime.tv_sec;
+ kern_f->file2_mtime_nsec = fxr.file2_mtime.tv_nsec;
+ kern_f->file2_ctime = fxr.file2_ctime.tv_sec;
+ kern_f->file2_ctime_nsec = fxr.file2_ctime.tv_nsec;
+
+ user_f = (struct xfs_commit_range_fresh __user *)&argp->file2_freshness;
+ if (copy_to_user(user_f, kern_f, sizeof(*kern_f)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static long
+xfs_ioc_commit_range(
+ struct file *file,
+ struct xfs_commit_range __user *argp)
+{
+ struct xfs_exchrange fxr = {
+ .file2 = file,
+ };
+ struct xfs_commit_range args;
+ struct xfs_commit_range_fresh *kern_f;
+ struct fd file1;
+ int error;
+
+ kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness;
+
+ if (copy_from_user(&args, argp, sizeof(args)))
+ return -EFAULT;
+ if (memchr_inv(&kern_f->pad, 0, sizeof(kern_f->pad)))
+ return -EINVAL;
+ if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
+ return -EINVAL;
+
+ fxr.file1_offset = args.file1_offset;
+ fxr.file2_offset = args.file2_offset;
+ fxr.length = args.length;
+ fxr.flags = args.flags | __XFS_EXCHANGE_RANGE_CHECK_FRESH2;
+ fxr.file2_ino = kern_f->file2_ino;
+ fxr.file2_mtime.tv_sec = kern_f->file2_mtime;
+ fxr.file2_mtime.tv_nsec = kern_f->file2_mtime_nsec;
+ fxr.file2_ctime.tv_sec = kern_f->file2_ctime;
+ fxr.file2_ctime.tv_nsec = kern_f->file2_ctime_nsec;
+
+ file1 = fdget(args.file1_fd);
+ if (!file1.file)
+ return -EBADF;
+ fxr.file1 = file1.file;
+
+ error = xfs_exchange_range(&fxr);
+ fdput(file1);
+ return error;
+}
+
/*
* These long-unused ioctls were removed from the official ioctl API in 5.17,
* but retain these definitions so that we can log warnings about them.
@@ -2207,6 +2291,10 @@ xfs_file_ioctl(
case XFS_IOC_EXCHANGE_RANGE:
return xfs_ioc_exchange_range(filp, arg);
+ case XFS_IOC_START_COMMIT:
+ return xfs_ioc_start_commit(filp, arg);
+ case XFS_IOC_COMMIT_RANGE:
+ return xfs_ioc_commit_range(filp, arg);
default:
return -ENOTTY;
@@ -4835,7 +4835,8 @@ DEFINE_INODE_ERROR_EVENT(xfs_exchrange_error);
{ XFS_EXCHANGE_RANGE_DRY_RUN, "DRY_RUN" }, \
{ XFS_EXCHANGE_RANGE_FILE1_WRITTEN, "F1_WRITTEN" }, \
{ __XFS_EXCHANGE_RANGE_UPD_CMTIME1, "CMTIME1" }, \
- { __XFS_EXCHANGE_RANGE_UPD_CMTIME2, "CMTIME2" }
+ { __XFS_EXCHANGE_RANGE_UPD_CMTIME2, "CMTIME2" }, \
+ { __XFS_EXCHANGE_RANGE_CHECK_FRESH2, "FRESH2" }
/* file exchange-range tracepoint class */
DECLARE_EVENT_CLASS(xfs_exchrange_class,