diff mbox series

[v2] fuse: Allow fallocate(FALLOC_FL_ZERO_RANGE)

Message ID 20210512103704.3505086-2-rjones@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v2] fuse: Allow fallocate(FALLOC_FL_ZERO_RANGE) | expand

Commit Message

Richard W.M. Jones May 12, 2021, 10:37 a.m. UTC
libnbd's nbdfuse utility would like to translate fallocate zero
requests into NBD_CMD_WRITE_ZEROES.  Currently the fuse module filters
these out, returning -EOPNOTSUPP.  This commit treats these almost the
same way as FALLOC_FL_PUNCH_HOLE except not calling
truncate_pagecache_range.

A way to test this is with the following script:

--------------------
  #!/bin/bash
  # Requires fuse >= 3, nbdkit >= 1.8, and latest nbdfuse from
  # https://gitlab.com/nbdkit/libnbd/-/tree/master/fuse
  set -e
  set -x

  export output=$PWD/output
  rm -f test.img $output

  # Create an nbdkit instance that prints the NBD requests seen.
  nbdkit sh - <<'EOF'
  case "$1" in
    get_size) echo 1M ;;
    can_write|can_trim|can_zero|can_fast_zero) ;;
    pread) echo "$@" >>$output; dd if=/dev/zero count=$3 iflag=count_bytes ;;
    pwrite) echo "$@" >>$output; cat >/dev/null ;;
    trim|zero) echo "$@" >>$output ;;
    *) exit 2 ;;
  esac
  EOF

  # Fuse-mount NBD instance as a file.
  touch test.img
  nbdfuse test.img nbd://localhost & sleep 2
  ls -lh test.img

  # Run a read, write, trim and zero request.
  dd if=test.img of=/dev/null bs=512 skip=1024 count=1
  dd if=/dev/zero of=test.img bs=512 skip=2048 count=1
  fallocate -p -l 512 -o 4096 test.img
  fallocate -z -l 512 -o 8192 test.img

  # Print the output from the NBD server.
  cat $output

  # Clean up.
  fusermount3 -u test.img
  killall nbdkit
  rm test.img $output
  --------------------

which will print:

  pread  4096 524288    # number depends on readahead
  pwrite  512 0
  trim  512 4096
  zero  512 8192 may_trim

The last line indicates that the FALLOC_FL_ZERO_RANGE request was
successfully passed through by the kernel module to nbdfuse,
translated to NBD_CMD_WRITE_ZEROES and sent through to the server.

Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
---
 fs/fuse/file.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

Comments

Shachar Sharon May 12, 2021, 2:27 p.m. UTC | #1
On Wed, May 12, 2021 at 11:37:04AM +0100, Richard W.M. Jones wrote:
>libnbd's nbdfuse utility would like to translate fallocate zero
>requests into NBD_CMD_WRITE_ZEROES.  Currently the fuse module filters
>these out, returning -EOPNOTSUPP.  This commit treats these almost the
>same way as FALLOC_FL_PUNCH_HOLE except not calling
>truncate_pagecache_range.
>
Why don't you call 'truncate_pagecache_range' ?

>A way to test this is with the following script:
>
>--------------------
>  #!/bin/bash
>  # Requires fuse >= 3, nbdkit >= 1.8, and latest nbdfuse from
>  # https://gitlab.com/nbdkit/libnbd/-/tree/master/fuse
>  set -e
>  set -x
>
>  export output=$PWD/output
>  rm -f test.img $output
>
>  # Create an nbdkit instance that prints the NBD requests seen.
>  nbdkit sh - <<'EOF'
>  case "$1" in
>    get_size) echo 1M ;;
>    can_write|can_trim|can_zero|can_fast_zero) ;;
>    pread) echo "$@" >>$output; dd if=/dev/zero count=$3 iflag=count_bytes ;;
>    pwrite) echo "$@" >>$output; cat >/dev/null ;;
>    trim|zero) echo "$@" >>$output ;;
>    *) exit 2 ;;
>  esac
>  EOF
>
>  # Fuse-mount NBD instance as a file.
>  touch test.img
>  nbdfuse test.img nbd://localhost & sleep 2
>  ls -lh test.img
>
>  # Run a read, write, trim and zero request.
>  dd if=test.img of=/dev/null bs=512 skip=1024 count=1
>  dd if=/dev/zero of=test.img bs=512 skip=2048 count=1
>  fallocate -p -l 512 -o 4096 test.img
>  fallocate -z -l 512 -o 8192 test.img
>
>  # Print the output from the NBD server.
>  cat $output
>
>  # Clean up.
>  fusermount3 -u test.img
>  killall nbdkit
>  rm test.img $output
>  --------------------
>
>which will print:
>
>  pread  4096 524288    # number depends on readahead
>  pwrite  512 0
>  trim  512 4096
>  zero  512 8192 may_trim
>
>The last line indicates that the FALLOC_FL_ZERO_RANGE request was
>successfully passed through by the kernel module to nbdfuse,
>translated to NBD_CMD_WRITE_ZEROES and sent through to the server.
>
>Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
>---
> fs/fuse/file.c | 9 ++++++---
> 1 file changed, 6 insertions(+), 3 deletions(-)
>
>diff --git a/fs/fuse/file.c b/fs/fuse/file.c
>index 09ef2a4d25ed..22e8e88c78d4 100644
>--- a/fs/fuse/file.c
>+++ b/fs/fuse/file.c
>@@ -2907,11 +2907,13 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
> 	};
> 	int err;
> 	bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
>-			   (mode & FALLOC_FL_PUNCH_HOLE);
>+			   (mode & FALLOC_FL_PUNCH_HOLE) ||
>+			   (mode & FALLOC_FL_ZERO_RANGE);
To stay aligned with existing code style, consider:
-			   (mode & FALLOC_FL_PUNCH_HOLE);
+»      »       »          (mode & (FALLOC_FL_PUNCH_HOLE |
+»      »       »       »           FALLOC_FL_ZERO_RANGE));

>
> 	bool block_faults = FUSE_IS_DAX(inode) && lock_inode;
>
>-	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
>+	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
>+		     FALLOC_FL_ZERO_RANGE))
> 		return -EOPNOTSUPP;
>
> 	if (fm->fc->no_fallocate)
>@@ -2926,7 +2928,8 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
> 				goto out;
> 		}
>
>-		if (mode & FALLOC_FL_PUNCH_HOLE) {
>+		if ((mode & FALLOC_FL_PUNCH_HOLE) ||
>+		    (mode & FALLOC_FL_ZERO_RANGE)) {
> 			loff_t endbyte = offset + length - 1;
>
> 			err = fuse_writeback_range(inode, offset, endbyte);
>-- 
>2.31.1
>
Richard W.M. Jones May 12, 2021, 2:49 p.m. UTC | #2
On Wed, May 12, 2021 at 05:27:22PM +0300, Shachar Sharon wrote:
> On Wed, May 12, 2021 at 11:37:04AM +0100, Richard W.M. Jones wrote:
> >libnbd's nbdfuse utility would like to translate fallocate zero
> >requests into NBD_CMD_WRITE_ZEROES.  Currently the fuse module filters
> >these out, returning -EOPNOTSUPP.  This commit treats these almost the
> >same way as FALLOC_FL_PUNCH_HOLE except not calling
> >truncate_pagecache_range.
> >
> Why don't you call 'truncate_pagecache_range' ?

Very good point.  I just assumed that it would only be useful when
hole-punching, but now I actually read the description of the function
I see we need it.

Also looking at other filesystems that also support FALLOC_FL_ZERO_RANGE:

  ext4_zero_range -> calls truncate_pagecache_range
  f2fs_zero_range -> calls it
  xfs -> calls it indirectly
  btrfs_zero_range -> does not call it (?)

I'll add this, and retest everything.

> >A way to test this is with the following script:

In my next version I'll also address this script which is rather
long-winded.  I think there's an easier way for people to test this:

> >--------------------
> > #!/bin/bash
> > # Requires fuse >= 3, nbdkit >= 1.8, and latest nbdfuse from
> > # https://gitlab.com/nbdkit/libnbd/-/tree/master/fuse
> > set -e
> > set -x
> >
> > export output=$PWD/output
> > rm -f test.img $output
> >
> > # Create an nbdkit instance that prints the NBD requests seen.
> > nbdkit sh - <<'EOF'
> > case "$1" in
> >   get_size) echo 1M ;;
> >   can_write|can_trim|can_zero|can_fast_zero) ;;
> >   pread) echo "$@" >>$output; dd if=/dev/zero count=$3 iflag=count_bytes ;;
> >   pwrite) echo "$@" >>$output; cat >/dev/null ;;
> >   trim|zero) echo "$@" >>$output ;;
> >   *) exit 2 ;;
> > esac
[etc]
> >diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> >index 09ef2a4d25ed..22e8e88c78d4 100644
> >--- a/fs/fuse/file.c
> >+++ b/fs/fuse/file.c
> >@@ -2907,11 +2907,13 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
> >	};
> >	int err;
> >	bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
> >-			   (mode & FALLOC_FL_PUNCH_HOLE);
> >+			   (mode & FALLOC_FL_PUNCH_HOLE) ||
> >+			   (mode & FALLOC_FL_ZERO_RANGE);
> To stay aligned with existing code style, consider:
> -			   (mode & FALLOC_FL_PUNCH_HOLE);
> +»      »       »          (mode & (FALLOC_FL_PUNCH_HOLE |
> +»      »       »       »           FALLOC_FL_ZERO_RANGE));

Good idea.

Thanks for the quick review.

Rich.
diff mbox series

Patch

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 09ef2a4d25ed..22e8e88c78d4 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2907,11 +2907,13 @@  static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 	};
 	int err;
 	bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
-			   (mode & FALLOC_FL_PUNCH_HOLE);
+			   (mode & FALLOC_FL_PUNCH_HOLE) ||
+			   (mode & FALLOC_FL_ZERO_RANGE);
 
 	bool block_faults = FUSE_IS_DAX(inode) && lock_inode;
 
-	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+		     FALLOC_FL_ZERO_RANGE))
 		return -EOPNOTSUPP;
 
 	if (fm->fc->no_fallocate)
@@ -2926,7 +2928,8 @@  static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 				goto out;
 		}
 
-		if (mode & FALLOC_FL_PUNCH_HOLE) {
+		if ((mode & FALLOC_FL_PUNCH_HOLE) ||
+		    (mode & FALLOC_FL_ZERO_RANGE)) {
 			loff_t endbyte = offset + length - 1;
 
 			err = fuse_writeback_range(inode, offset, endbyte);