diff mbox series

[v20,06/12] fs, block: copy_file_range for def_blk_ops for direct block device

Message ID 20240520102033.9361-7-nj.shetty@samsung.com (mailing list archive)
State New
Headers show
Series [v20,01/12] block: Introduce queue limits and sysfs for copy-offload support | expand

Commit Message

Nitesh Shetty May 20, 2024, 10:20 a.m. UTC
For direct block device opened with O_DIRECT, use blkdev_copy_offload to
issue device copy offload, or use splice_copy_file_range in case
device copy offload capability is absent or the device files are not open
with O_DIRECT.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
---
 block/fops.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

Comments

Dave Chinner May 25, 2024, 11:09 p.m. UTC | #1
On Mon, May 20, 2024 at 03:50:19PM +0530, Nitesh Shetty wrote:
> For direct block device opened with O_DIRECT, use blkdev_copy_offload to
> issue device copy offload, or use splice_copy_file_range in case
> device copy offload capability is absent or the device files are not open
> with O_DIRECT.
> 
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> ---
>  block/fops.c | 26 ++++++++++++++++++++++++++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/block/fops.c b/block/fops.c
> index 376265935714..5a4bba4f43aa 100644
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -17,6 +17,7 @@
>  #include <linux/fs.h>
>  #include <linux/iomap.h>
>  #include <linux/module.h>
> +#include <linux/splice.h>
>  #include "blk.h"
>  
>  static inline struct inode *bdev_file_inode(struct file *file)
> @@ -754,6 +755,30 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  	return ret;
>  }
>  
> +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
> +				      struct file *file_out, loff_t pos_out,
> +				      size_t len, unsigned int flags)
> +{
> +	struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
> +	struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
> +	ssize_t copied = 0;
> +
> +	if ((in_bdev == out_bdev) && bdev_max_copy_sectors(in_bdev) &&
> +	    (file_in->f_iocb_flags & IOCB_DIRECT) &&
> +	    (file_out->f_iocb_flags & IOCB_DIRECT)) {
> +		copied = blkdev_copy_offload(in_bdev, pos_in, pos_out, len,
> +					     NULL, NULL, GFP_KERNEL);
> +		if (copied < 0)
> +			copied = 0;
> +	} else {
> +		copied = splice_copy_file_range(file_in, pos_in + copied,
> +						 file_out, pos_out + copied,
> +						 len - copied);
> +	}

This should not fall back to a page cache copy.

We keep being told by application developers that if the fast
hardware/filesystem offload fails, then an error should be returned
so the application can determine what the fallback operation should
be.

It may well be that the application falls back to "copy through the
page cache", but that is an application policy choice, not a
something the kernel offload driver should be making mandatory.

Userspace has to handle copy offload failure anyway, so they a
fallback path regardless of whether copy_file_range() works on block
devices or not...

Cheers,

Dave.
Nitesh Shetty May 27, 2024, 8:43 a.m. UTC | #2
On 26/05/24 09:09AM, Dave Chinner wrote:
>On Mon, May 20, 2024 at 03:50:19PM +0530, Nitesh Shetty wrote:
>> For direct block device opened with O_DIRECT, use blkdev_copy_offload to
>> issue device copy offload, or use splice_copy_file_range in case
>> device copy offload capability is absent or the device files are not open
>> with O_DIRECT.
>>
>> Reviewed-by: Hannes Reinecke <hare@suse.de>
>> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>> ---
>>  block/fops.c | 26 ++++++++++++++++++++++++++
>>  1 file changed, 26 insertions(+)
>>
>> diff --git a/block/fops.c b/block/fops.c
>> index 376265935714..5a4bba4f43aa 100644
>> --- a/block/fops.c
>> +++ b/block/fops.c
>> @@ -17,6 +17,7 @@
>>  #include <linux/fs.h>
>>  #include <linux/iomap.h>
>>  #include <linux/module.h>
>> +#include <linux/splice.h>
>>  #include "blk.h"
>>
>>  static inline struct inode *bdev_file_inode(struct file *file)
>> @@ -754,6 +755,30 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
>>  	return ret;
>>  }
>>
>> +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
>> +				      struct file *file_out, loff_t pos_out,
>> +				      size_t len, unsigned int flags)
>> +{
>> +	struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
>> +	struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
>> +	ssize_t copied = 0;
>> +
>> +	if ((in_bdev == out_bdev) && bdev_max_copy_sectors(in_bdev) &&
>> +	    (file_in->f_iocb_flags & IOCB_DIRECT) &&
>> +	    (file_out->f_iocb_flags & IOCB_DIRECT)) {
>> +		copied = blkdev_copy_offload(in_bdev, pos_in, pos_out, len,
>> +					     NULL, NULL, GFP_KERNEL);
>> +		if (copied < 0)
>> +			copied = 0;
>> +	} else {
>> +		copied = splice_copy_file_range(file_in, pos_in + copied,
>> +						 file_out, pos_out + copied,
>> +						 len - copied);
>> +	}
>
>This should not fall back to a page cache copy.
>
>We keep being told by application developers that if the fast
>hardware/filesystem offload fails, then an error should be returned
>so the application can determine what the fallback operation should
>be.
>
>It may well be that the application falls back to "copy through the
>page cache", but that is an application policy choice, not a
>something the kernel offload driver should be making mandatory.
>
>Userspace has to handle copy offload failure anyway, so they a
>fallback path regardless of whether copy_file_range() works on block
>devices or not...
>
Makes sense, We will remove fallback part in next version.

Thank you,
Nitesh Shetty
diff mbox series

Patch

diff --git a/block/fops.c b/block/fops.c
index 376265935714..5a4bba4f43aa 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -17,6 +17,7 @@ 
 #include <linux/fs.h>
 #include <linux/iomap.h>
 #include <linux/module.h>
+#include <linux/splice.h>
 #include "blk.h"
 
 static inline struct inode *bdev_file_inode(struct file *file)
@@ -754,6 +755,30 @@  static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	return ret;
 }
 
+static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
+				      struct file *file_out, loff_t pos_out,
+				      size_t len, unsigned int flags)
+{
+	struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
+	struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
+	ssize_t copied = 0;
+
+	if ((in_bdev == out_bdev) && bdev_max_copy_sectors(in_bdev) &&
+	    (file_in->f_iocb_flags & IOCB_DIRECT) &&
+	    (file_out->f_iocb_flags & IOCB_DIRECT)) {
+		copied = blkdev_copy_offload(in_bdev, pos_in, pos_out, len,
+					     NULL, NULL, GFP_KERNEL);
+		if (copied < 0)
+			copied = 0;
+	} else {
+		copied = splice_copy_file_range(file_in, pos_in + copied,
+						 file_out, pos_out + copied,
+						 len - copied);
+	}
+
+	return copied;
+}
+
 #define	BLKDEV_FALLOC_FL_SUPPORTED					\
 		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
 		 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
@@ -859,6 +884,7 @@  const struct file_operations def_blk_fops = {
 	.splice_write	= iter_file_splice_write,
 	.fallocate	= blkdev_fallocate,
 	.fop_flags	= FOP_BUFFER_RASYNC,
+	.copy_file_range = blkdev_copy_file_range,
 };
 
 static __init int blkdev_init(void)