diff mbox series

[v4,1/2] fsstress: add splice support

Message ID 20190123073455.24539-1-zlang@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v4,1/2] fsstress: add splice support | expand

Commit Message

Zorro Lang Jan. 23, 2019, 7:34 a.m. UTC
Support the splice syscall in fsstress.

Signed-off-by: Zorro Lang <zlang@redhat.com>
---
 ltp/fsstress.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 173 insertions(+)

Comments

Darrick J. Wong Feb. 1, 2019, 2:11 a.m. UTC | #1
On Wed, Jan 23, 2019 at 03:34:54PM +0800, Zorro Lang wrote:
> Support the splice syscall in fsstress.
> 
> Signed-off-by: Zorro Lang <zlang@redhat.com>
> ---
>  ltp/fsstress.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 173 insertions(+)
> 
> diff --git a/ltp/fsstress.c b/ltp/fsstress.c
> index 99a1d733..c04feb78 100644
> --- a/ltp/fsstress.c
> +++ b/ltp/fsstress.c
> @@ -85,6 +85,7 @@ typedef enum {
>  	OP_RMDIR,
>  	OP_SETATTR,
>  	OP_SETXATTR,
> +	OP_SPLICE,
>  	OP_STAT,
>  	OP_SYMLINK,
>  	OP_SYNC,
> @@ -194,6 +195,7 @@ void	resvsp_f(int, long);
>  void	rmdir_f(int, long);
>  void	setattr_f(int, long);
>  void	setxattr_f(int, long);
> +void	splice_f(int, long);
>  void	stat_f(int, long);
>  void	symlink_f(int, long);
>  void	sync_f(int, long);
> @@ -244,6 +246,7 @@ opdesc_t	ops[] = {
>  	{ OP_RMDIR, "rmdir", rmdir_f, 1, 1 },
>  	{ OP_SETATTR, "setattr", setattr_f, 0, 1 },
>  	{ OP_SETXATTR, "setxattr", setxattr_f, 1, 1 },
> +	{ OP_SPLICE, "splice", splice_f, 1, 1 },
>  	{ OP_STAT, "stat", stat_f, 1, 0 },
>  	{ OP_SYMLINK, "symlink", symlink_f, 2, 1 },
>  	{ OP_SYNC, "sync", sync_f, 1, 1 },
> @@ -2764,6 +2767,176 @@ setxattr_f(int opno, long r)
>  #endif
>  }
>  
> +void
> +splice_f(int opno, long r)
> +{
> +	struct pathname		fpath1;
> +	struct pathname		fpath2;
> +	struct stat64		stat1;
> +	struct stat64		stat2;
> +	char			inoinfo1[1024];
> +	char			inoinfo2[1024];
> +	loff_t			lr;
> +	loff_t			off1, off2;
> +	size_t			len;
> +	loff_t			offset1, offset2;
> +	size_t			length;
> +	size_t			total;
> +	int			v1;
> +	int			v2;
> +	int			fd1;
> +	int			fd2;
> +	ssize_t			ret1 = 0, ret2 = 0;
> +	size_t			bytes;
> +	int			e;
> +	int			filedes[2];
> +
> +	/* Load paths */
> +	init_pathname(&fpath1);
> +	if (!get_fname(FT_REGm, r, &fpath1, NULL, NULL, &v1)) {
> +		if (v1)
> +			printf("%d/%d: splice read - no filename\n",
> +				procid, opno);
> +		goto out_fpath1;
> +	}
> +
> +	init_pathname(&fpath2);
> +	if (!get_fname(FT_REGm, random(), &fpath2, NULL, NULL, &v2)) {
> +		if (v2)
> +			printf("%d/%d: splice write - no filename\n",
> +				procid, opno);
> +		goto out_fpath2;
> +	}
> +
> +	/* Open files */
> +	fd1 = open_path(&fpath1, O_RDONLY);
> +	e = fd1 < 0 ? errno : 0;
> +	check_cwd();
> +	if (fd1 < 0) {
> +		if (v1)
> +			printf("%d/%d: splice read - open %s failed %d\n",
> +				procid, opno, fpath1.path, e);
> +		goto out_fpath2;
> +	}
> +
> +	fd2 = open_path(&fpath2, O_WRONLY);
> +	e = fd2 < 0 ? errno : 0;
> +	check_cwd();
> +	if (fd2 < 0) {
> +		if (v2)
> +			printf("%d/%d: splice write - open %s failed %d\n",
> +				procid, opno, fpath2.path, e);
> +		goto out_fd1;
> +	}
> +
> +	/* Get file stats */
> +	if (fstat64(fd1, &stat1) < 0) {
> +		if (v1)
> +			printf("%d/%d: splice read - fstat64 %s failed %d\n",
> +				procid, opno, fpath1.path, errno);
> +		goto out_fd2;
> +	}
> +	inode_info(inoinfo1, sizeof(inoinfo1), &stat1, v1);
> +
> +	if (fstat64(fd2, &stat2) < 0) {
> +		if (v2)
> +			printf("%d/%d: splice write - fstat64 %s failed %d\n",
> +				procid, opno, fpath2.path, errno);
> +		goto out_fd2;
> +	}
> +	inode_info(inoinfo2, sizeof(inoinfo2), &stat2, v2);
> +
> +	/* Calculate offsets */
> +	len = (random() % FILELEN_MAX) + 1;
> +	if (len == 0)
> +		len = stat1.st_blksize;
> +	if (len > stat1.st_size)
> +		len = stat1.st_size;
> +
> +	lr = ((int64_t)random() << 32) + random();
> +	if (stat1.st_size == len)
> +		off1 = 0;
> +	else
> +		off1 = (off64_t)(lr % MIN(stat1.st_size - len, MAXFSIZE));
> +	off1 %= maxfsize;
> +
> +	/*
> +	 * splice can overlap write, so the offset of the target file can be
> +	 * any number (< maxfsize)
> +	 */

Er... sorry I've been offline for a couple of weeks due to illness and
so was not able to comment on this patch until now, but I've had a
problem with my overnight fstests runs:

> +	lr = ((int64_t)random() << 32) + random();

This generates a pseudorandom 64-bit candidate offset for the
destination file where we'll land the splice data...

> +	off2 = (off64_t)(lr % maxfsize);

...and this caps the offset at maxfsize (which is 2^63-	1 on x64), which
effectively means that the data will appear at a very high file offset,
which creates large (sparse) files very quickly.

Contrast this to other functions like clonerange_f, which add an
additional clamp of 1024 blocks past the current dest file EOF:

 max_off2 = MIN(stat2.st_size + (1024ULL * stat2.st_blksize), MAXFSIZE);
 do {
	lr = ((int64_t)random() << 32) + random();
	off2 = (off64_t)(lr % max_off2);
	off2 %= maxfsize;
	...
 }

or truncate_f, which clamps to both 1MB past EOF and MAXFSIZE:

 lr = ((int64_t)random() << 32) + random();
 off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
 off %= maxfsize;

IOWs, I think we try to grow the fsstress file sizes fairly slowly so
that gigantic files don't suddenly jump out of the bushes:

0/487: splice d3/d9/f2c[6319385 1 0 0 176 1395200] [860317,88612] ->
d3/d9/dd/d1c/d21/f4b[1111 1 0 0 0 1408811] [8492675175361853476,88612] 0

Yikes, it wrote 88,612 bytes of data at offset 8,492,675,175,361,853,476!

This causes shared/009 to take forever to run, because it runs fsstress
to generate some files, and then uses md5sum to ensure that duperemove
doesn't corrupt files.  Unfortunately it takes a very long time to
read an entire 8500 petabyte file.

--D

> +
> +	/*
> +	 * Due to len, off1 and off2 will be changed later, so record the
> +	 * original number at here
> +	 */
> +	length = len;
> +	offset1 = off1;
> +	offset2 = off2;
> +
> +	/* Pipe initialize */
> +	if (pipe(filedes) < 0) {
> +		if (v1 || v2) {
> +			printf("%d/%d: splice - pipe failed %d\n",
> +				procid, opno, errno);
> +			goto out_fd2;
> +		}
> +	}
> +
> +	bytes = 0;
> +	total = 0;
> +	while (len > 0) {
> +		/* move to pipe buffer */
> +		ret1 = splice(fd1, &off1, filedes[1], NULL, len, 0);
> +		if (ret1 < 0) {
> +			break;
> +		}
> +		bytes = ret1;
> +
> +		/* move from pipe buffer to dst file */
> +		while (bytes > 0) {
> +			ret2 = splice(filedes[0], NULL, fd2, &off2, bytes, 0);
> +			if (ret2 < 0) {
> +				break;
> +			}
> +			bytes -= ret2;
> +		}
> +		if (ret2 < 0)
> +			break;
> +
> +		len -= ret1;
> +		total += ret1;
> +	}
> +
> +	if (ret1 < 0 || ret2 < 0)
> +		e = errno;
> +	else
> +		e = 0;
> +	if (v1 || v2) {
> +		printf("%d/%d: splice %s%s [%lld,%lld] -> %s%s [%lld,%lld] %d",
> +			procid, opno,
> +			fpath1.path, inoinfo1, (long long)offset1, (long long)length,
> +			fpath2.path, inoinfo2, (long long)offset2, (long long)length, e);
> +
> +		if (length && length > total)
> +			printf(" asked for %lld, spliced %lld??\n",
> +				(long long)length, (long long)total);
> +		printf("\n");
> +	}
> +
> +	close(filedes[0]);
> +	close(filedes[1]);
> +out_fd2:
> +	close(fd2);
> +out_fd1:
> +	close(fd1);
> +out_fpath2:
> +	free_pathname(&fpath2);
> +out_fpath1:
> +	free_pathname(&fpath1);
> +}
> +
>  void
>  creat_f(int opno, long r)
>  {
> -- 
> 2.17.2
>
Zorro Lang Feb. 1, 2019, 5:07 a.m. UTC | #2
On Thu, Jan 31, 2019 at 06:11:30PM -0800, Darrick J. Wong wrote:
> On Wed, Jan 23, 2019 at 03:34:54PM +0800, Zorro Lang wrote:
> > Support the splice syscall in fsstress.
> > 
> > Signed-off-by: Zorro Lang <zlang@redhat.com>
> > ---
> >  ltp/fsstress.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 173 insertions(+)
> > 
> > diff --git a/ltp/fsstress.c b/ltp/fsstress.c
> > index 99a1d733..c04feb78 100644
> > --- a/ltp/fsstress.c
> > +++ b/ltp/fsstress.c
> > @@ -85,6 +85,7 @@ typedef enum {
> >  	OP_RMDIR,
> >  	OP_SETATTR,
> >  	OP_SETXATTR,
> > +	OP_SPLICE,
> >  	OP_STAT,
> >  	OP_SYMLINK,
> >  	OP_SYNC,
> > @@ -194,6 +195,7 @@ void	resvsp_f(int, long);
> >  void	rmdir_f(int, long);
> >  void	setattr_f(int, long);
> >  void	setxattr_f(int, long);
> > +void	splice_f(int, long);
> >  void	stat_f(int, long);
> >  void	symlink_f(int, long);
> >  void	sync_f(int, long);
> > @@ -244,6 +246,7 @@ opdesc_t	ops[] = {
> >  	{ OP_RMDIR, "rmdir", rmdir_f, 1, 1 },
> >  	{ OP_SETATTR, "setattr", setattr_f, 0, 1 },
> >  	{ OP_SETXATTR, "setxattr", setxattr_f, 1, 1 },
> > +	{ OP_SPLICE, "splice", splice_f, 1, 1 },
> >  	{ OP_STAT, "stat", stat_f, 1, 0 },
> >  	{ OP_SYMLINK, "symlink", symlink_f, 2, 1 },
> >  	{ OP_SYNC, "sync", sync_f, 1, 1 },
> > @@ -2764,6 +2767,176 @@ setxattr_f(int opno, long r)
> >  #endif
> >  }
> >  
> > +void
> > +splice_f(int opno, long r)
> > +{
> > +	struct pathname		fpath1;
> > +	struct pathname		fpath2;
> > +	struct stat64		stat1;
> > +	struct stat64		stat2;
> > +	char			inoinfo1[1024];
> > +	char			inoinfo2[1024];
> > +	loff_t			lr;
> > +	loff_t			off1, off2;
> > +	size_t			len;
> > +	loff_t			offset1, offset2;
> > +	size_t			length;
> > +	size_t			total;
> > +	int			v1;
> > +	int			v2;
> > +	int			fd1;
> > +	int			fd2;
> > +	ssize_t			ret1 = 0, ret2 = 0;
> > +	size_t			bytes;
> > +	int			e;
> > +	int			filedes[2];
> > +
> > +	/* Load paths */
> > +	init_pathname(&fpath1);
> > +	if (!get_fname(FT_REGm, r, &fpath1, NULL, NULL, &v1)) {
> > +		if (v1)
> > +			printf("%d/%d: splice read - no filename\n",
> > +				procid, opno);
> > +		goto out_fpath1;
> > +	}
> > +
> > +	init_pathname(&fpath2);
> > +	if (!get_fname(FT_REGm, random(), &fpath2, NULL, NULL, &v2)) {
> > +		if (v2)
> > +			printf("%d/%d: splice write - no filename\n",
> > +				procid, opno);
> > +		goto out_fpath2;
> > +	}
> > +
> > +	/* Open files */
> > +	fd1 = open_path(&fpath1, O_RDONLY);
> > +	e = fd1 < 0 ? errno : 0;
> > +	check_cwd();
> > +	if (fd1 < 0) {
> > +		if (v1)
> > +			printf("%d/%d: splice read - open %s failed %d\n",
> > +				procid, opno, fpath1.path, e);
> > +		goto out_fpath2;
> > +	}
> > +
> > +	fd2 = open_path(&fpath2, O_WRONLY);
> > +	e = fd2 < 0 ? errno : 0;
> > +	check_cwd();
> > +	if (fd2 < 0) {
> > +		if (v2)
> > +			printf("%d/%d: splice write - open %s failed %d\n",
> > +				procid, opno, fpath2.path, e);
> > +		goto out_fd1;
> > +	}
> > +
> > +	/* Get file stats */
> > +	if (fstat64(fd1, &stat1) < 0) {
> > +		if (v1)
> > +			printf("%d/%d: splice read - fstat64 %s failed %d\n",
> > +				procid, opno, fpath1.path, errno);
> > +		goto out_fd2;
> > +	}
> > +	inode_info(inoinfo1, sizeof(inoinfo1), &stat1, v1);
> > +
> > +	if (fstat64(fd2, &stat2) < 0) {
> > +		if (v2)
> > +			printf("%d/%d: splice write - fstat64 %s failed %d\n",
> > +				procid, opno, fpath2.path, errno);
> > +		goto out_fd2;
> > +	}
> > +	inode_info(inoinfo2, sizeof(inoinfo2), &stat2, v2);
> > +
> > +	/* Calculate offsets */
> > +	len = (random() % FILELEN_MAX) + 1;
> > +	if (len == 0)
> > +		len = stat1.st_blksize;
> > +	if (len > stat1.st_size)
> > +		len = stat1.st_size;
> > +
> > +	lr = ((int64_t)random() << 32) + random();
> > +	if (stat1.st_size == len)
> > +		off1 = 0;
> > +	else
> > +		off1 = (off64_t)(lr % MIN(stat1.st_size - len, MAXFSIZE));
> > +	off1 %= maxfsize;
> > +
> > +	/*
> > +	 * splice can overlap write, so the offset of the target file can be
> > +	 * any number (< maxfsize)
> > +	 */
> 
> Er... sorry I've been offline for a couple of weeks due to illness and
> so was not able to comment on this patch until now, but I've had a
> problem with my overnight fstests runs:
> 
> > +	lr = ((int64_t)random() << 32) + random();
> 
> This generates a pseudorandom 64-bit candidate offset for the
> destination file where we'll land the splice data...
> 
> > +	off2 = (off64_t)(lr % maxfsize);
> 
> ...and this caps the offset at maxfsize (which is 2^63-	1 on x64), which
> effectively means that the data will appear at a very high file offset,
> which creates large (sparse) files very quickly.
> 
> Contrast this to other functions like clonerange_f, which add an
> additional clamp of 1024 blocks past the current dest file EOF:
> 
>  max_off2 = MIN(stat2.st_size + (1024ULL * stat2.st_blksize), MAXFSIZE);
>  do {
> 	lr = ((int64_t)random() << 32) + random();
> 	off2 = (off64_t)(lr % max_off2);
> 	off2 %= maxfsize;
> 	...
>  }
> 
> or truncate_f, which clamps to both 1MB past EOF and MAXFSIZE:
> 
>  lr = ((int64_t)random() << 32) + random();
>  off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
>  off %= maxfsize;
> 
> IOWs, I think we try to grow the fsstress file sizes fairly slowly so
> that gigantic files don't suddenly jump out of the bushes:
> 
> 0/487: splice d3/d9/f2c[6319385 1 0 0 176 1395200] [860317,88612] ->
> d3/d9/dd/d1c/d21/f4b[1111 1 0 0 0 1408811] [8492675175361853476,88612] 0
> 
> Yikes, it wrote 88,612 bytes of data at offset 8,492,675,175,361,853,476!
> 
> This causes shared/009 to take forever to run, because it runs fsstress
> to generate some files, and then uses md5sum to ensure that duperemove
> doesn't corrupt files.  Unfortunately it takes a very long time to
> read an entire 8500 petabyte file.

Wow, you're right! I didn't think that large sparse file will cause this
problem. This patch has been merged, I'll send another patch to fix it.

Thanks very much,
Zorro


> 
> --D
> 
> > +
> > +	/*
> > +	 * Due to len, off1 and off2 will be changed later, so record the
> > +	 * original number at here
> > +	 */
> > +	length = len;
> > +	offset1 = off1;
> > +	offset2 = off2;
> > +
> > +	/* Pipe initialize */
> > +	if (pipe(filedes) < 0) {
> > +		if (v1 || v2) {
> > +			printf("%d/%d: splice - pipe failed %d\n",
> > +				procid, opno, errno);
> > +			goto out_fd2;
> > +		}
> > +	}
> > +
> > +	bytes = 0;
> > +	total = 0;
> > +	while (len > 0) {
> > +		/* move to pipe buffer */
> > +		ret1 = splice(fd1, &off1, filedes[1], NULL, len, 0);
> > +		if (ret1 < 0) {
> > +			break;
> > +		}
> > +		bytes = ret1;
> > +
> > +		/* move from pipe buffer to dst file */
> > +		while (bytes > 0) {
> > +			ret2 = splice(filedes[0], NULL, fd2, &off2, bytes, 0);
> > +			if (ret2 < 0) {
> > +				break;
> > +			}
> > +			bytes -= ret2;
> > +		}
> > +		if (ret2 < 0)
> > +			break;
> > +
> > +		len -= ret1;
> > +		total += ret1;
> > +	}
> > +
> > +	if (ret1 < 0 || ret2 < 0)
> > +		e = errno;
> > +	else
> > +		e = 0;
> > +	if (v1 || v2) {
> > +		printf("%d/%d: splice %s%s [%lld,%lld] -> %s%s [%lld,%lld] %d",
> > +			procid, opno,
> > +			fpath1.path, inoinfo1, (long long)offset1, (long long)length,
> > +			fpath2.path, inoinfo2, (long long)offset2, (long long)length, e);
> > +
> > +		if (length && length > total)
> > +			printf(" asked for %lld, spliced %lld??\n",
> > +				(long long)length, (long long)total);
> > +		printf("\n");
> > +	}
> > +
> > +	close(filedes[0]);
> > +	close(filedes[1]);
> > +out_fd2:
> > +	close(fd2);
> > +out_fd1:
> > +	close(fd1);
> > +out_fpath2:
> > +	free_pathname(&fpath2);
> > +out_fpath1:
> > +	free_pathname(&fpath1);
> > +}
> > +
> >  void
> >  creat_f(int opno, long r)
> >  {
> > -- 
> > 2.17.2
> >
diff mbox series

Patch

diff --git a/ltp/fsstress.c b/ltp/fsstress.c
index 99a1d733..c04feb78 100644
--- a/ltp/fsstress.c
+++ b/ltp/fsstress.c
@@ -85,6 +85,7 @@  typedef enum {
 	OP_RMDIR,
 	OP_SETATTR,
 	OP_SETXATTR,
+	OP_SPLICE,
 	OP_STAT,
 	OP_SYMLINK,
 	OP_SYNC,
@@ -194,6 +195,7 @@  void	resvsp_f(int, long);
 void	rmdir_f(int, long);
 void	setattr_f(int, long);
 void	setxattr_f(int, long);
+void	splice_f(int, long);
 void	stat_f(int, long);
 void	symlink_f(int, long);
 void	sync_f(int, long);
@@ -244,6 +246,7 @@  opdesc_t	ops[] = {
 	{ OP_RMDIR, "rmdir", rmdir_f, 1, 1 },
 	{ OP_SETATTR, "setattr", setattr_f, 0, 1 },
 	{ OP_SETXATTR, "setxattr", setxattr_f, 1, 1 },
+	{ OP_SPLICE, "splice", splice_f, 1, 1 },
 	{ OP_STAT, "stat", stat_f, 1, 0 },
 	{ OP_SYMLINK, "symlink", symlink_f, 2, 1 },
 	{ OP_SYNC, "sync", sync_f, 1, 1 },
@@ -2764,6 +2767,176 @@  setxattr_f(int opno, long r)
 #endif
 }
 
+void
+splice_f(int opno, long r)
+{
+	struct pathname		fpath1;
+	struct pathname		fpath2;
+	struct stat64		stat1;
+	struct stat64		stat2;
+	char			inoinfo1[1024];
+	char			inoinfo2[1024];
+	loff_t			lr;
+	loff_t			off1, off2;
+	size_t			len;
+	loff_t			offset1, offset2;
+	size_t			length;
+	size_t			total;
+	int			v1;
+	int			v2;
+	int			fd1;
+	int			fd2;
+	ssize_t			ret1 = 0, ret2 = 0;
+	size_t			bytes;
+	int			e;
+	int			filedes[2];
+
+	/* Load paths */
+	init_pathname(&fpath1);
+	if (!get_fname(FT_REGm, r, &fpath1, NULL, NULL, &v1)) {
+		if (v1)
+			printf("%d/%d: splice read - no filename\n",
+				procid, opno);
+		goto out_fpath1;
+	}
+
+	init_pathname(&fpath2);
+	if (!get_fname(FT_REGm, random(), &fpath2, NULL, NULL, &v2)) {
+		if (v2)
+			printf("%d/%d: splice write - no filename\n",
+				procid, opno);
+		goto out_fpath2;
+	}
+
+	/* Open files */
+	fd1 = open_path(&fpath1, O_RDONLY);
+	e = fd1 < 0 ? errno : 0;
+	check_cwd();
+	if (fd1 < 0) {
+		if (v1)
+			printf("%d/%d: splice read - open %s failed %d\n",
+				procid, opno, fpath1.path, e);
+		goto out_fpath2;
+	}
+
+	fd2 = open_path(&fpath2, O_WRONLY);
+	e = fd2 < 0 ? errno : 0;
+	check_cwd();
+	if (fd2 < 0) {
+		if (v2)
+			printf("%d/%d: splice write - open %s failed %d\n",
+				procid, opno, fpath2.path, e);
+		goto out_fd1;
+	}
+
+	/* Get file stats */
+	if (fstat64(fd1, &stat1) < 0) {
+		if (v1)
+			printf("%d/%d: splice read - fstat64 %s failed %d\n",
+				procid, opno, fpath1.path, errno);
+		goto out_fd2;
+	}
+	inode_info(inoinfo1, sizeof(inoinfo1), &stat1, v1);
+
+	if (fstat64(fd2, &stat2) < 0) {
+		if (v2)
+			printf("%d/%d: splice write - fstat64 %s failed %d\n",
+				procid, opno, fpath2.path, errno);
+		goto out_fd2;
+	}
+	inode_info(inoinfo2, sizeof(inoinfo2), &stat2, v2);
+
+	/* Calculate offsets */
+	len = (random() % FILELEN_MAX) + 1;
+	if (len == 0)
+		len = stat1.st_blksize;
+	if (len > stat1.st_size)
+		len = stat1.st_size;
+
+	lr = ((int64_t)random() << 32) + random();
+	if (stat1.st_size == len)
+		off1 = 0;
+	else
+		off1 = (off64_t)(lr % MIN(stat1.st_size - len, MAXFSIZE));
+	off1 %= maxfsize;
+
+	/*
+	 * splice can overlap write, so the offset of the target file can be
+	 * any number (< maxfsize)
+	 */
+	lr = ((int64_t)random() << 32) + random();
+	off2 = (off64_t)(lr % maxfsize);
+
+	/*
+	 * Due to len, off1 and off2 will be changed later, so record the
+	 * original number at here
+	 */
+	length = len;
+	offset1 = off1;
+	offset2 = off2;
+
+	/* Pipe initialize */
+	if (pipe(filedes) < 0) {
+		if (v1 || v2) {
+			printf("%d/%d: splice - pipe failed %d\n",
+				procid, opno, errno);
+			goto out_fd2;
+		}
+	}
+
+	bytes = 0;
+	total = 0;
+	while (len > 0) {
+		/* move to pipe buffer */
+		ret1 = splice(fd1, &off1, filedes[1], NULL, len, 0);
+		if (ret1 < 0) {
+			break;
+		}
+		bytes = ret1;
+
+		/* move from pipe buffer to dst file */
+		while (bytes > 0) {
+			ret2 = splice(filedes[0], NULL, fd2, &off2, bytes, 0);
+			if (ret2 < 0) {
+				break;
+			}
+			bytes -= ret2;
+		}
+		if (ret2 < 0)
+			break;
+
+		len -= ret1;
+		total += ret1;
+	}
+
+	if (ret1 < 0 || ret2 < 0)
+		e = errno;
+	else
+		e = 0;
+	if (v1 || v2) {
+		printf("%d/%d: splice %s%s [%lld,%lld] -> %s%s [%lld,%lld] %d",
+			procid, opno,
+			fpath1.path, inoinfo1, (long long)offset1, (long long)length,
+			fpath2.path, inoinfo2, (long long)offset2, (long long)length, e);
+
+		if (length && length > total)
+			printf(" asked for %lld, spliced %lld??\n",
+				(long long)length, (long long)total);
+		printf("\n");
+	}
+
+	close(filedes[0]);
+	close(filedes[1]);
+out_fd2:
+	close(fd2);
+out_fd1:
+	close(fd1);
+out_fpath2:
+	free_pathname(&fpath2);
+out_fpath1:
+	free_pathname(&fpath1);
+}
+
 void
 creat_f(int opno, long r)
 {