diff mbox series

[v8,4/9] fs, block: copy_file_range for def_blk_ops for direct block device.

Message ID 20230327084103.21601-5-anuj20.g@samsung.com (mailing list archive)
State New, archived
Headers show
Series [v8,1/9] block: Introduce queue limits for copy-offload support | expand

Commit Message

Anuj Gupta March 27, 2023, 8:40 a.m. UTC
From: Nitesh Shetty <nj.shetty@samsung.com>

For direct block device opened with O_DIRECT, use copy_file_range to
issue device copy offload, and fallback to generic_copy_file_range incase
device copy offload capability is absent.
Modify checks to allow bdevs to use copy_file_range.

Suggested-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
---
 block/blk-lib.c        | 22 ++++++++++++++++++++++
 block/fops.c           | 20 ++++++++++++++++++++
 fs/read_write.c        | 11 +++++++++--
 include/linux/blkdev.h |  3 +++
 4 files changed, 54 insertions(+), 2 deletions(-)

Comments

Christian Brauner March 29, 2023, 12:14 p.m. UTC | #1
On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote:
> From: Nitesh Shetty <nj.shetty@samsung.com>
> 
> For direct block device opened with O_DIRECT, use copy_file_range to
> issue device copy offload, and fallback to generic_copy_file_range incase
> device copy offload capability is absent.
> Modify checks to allow bdevs to use copy_file_range.
> 
> Suggested-by: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> ---
>  block/blk-lib.c        | 22 ++++++++++++++++++++++
>  block/fops.c           | 20 ++++++++++++++++++++
>  fs/read_write.c        | 11 +++++++++--
>  include/linux/blkdev.h |  3 +++
>  4 files changed, 54 insertions(+), 2 deletions(-)
> 
> diff --git a/block/blk-lib.c b/block/blk-lib.c
> index a21819e59b29..c288573c7e77 100644
> --- a/block/blk-lib.c
> +++ b/block/blk-lib.c
> @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in,
>  	return blk_queue_copy(q_in) && blk_queue_copy(q_out);
>  }
>  
> +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in,
> +		      struct block_device *bdev_out, loff_t pos_out, size_t len,
> +		      cio_iodone_t end_io, void *private, gfp_t gfp_mask)
> +{
> +	struct request_queue *in_q = bdev_get_queue(bdev_in);
> +	struct request_queue *out_q = bdev_get_queue(bdev_out);
> +	int ret = -EINVAL;

Why initialize to -EINVAL if blk_copy_sanity_check() initializes it
right away anyway?

> +	bool offload = false;

Same thing with initializing offload.

> +
> +	ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len);
> +	if (ret)
> +		return ret;
> +
> +	offload = blk_check_copy_offload(in_q, out_q);
> +	if (offload)
> +		ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
> +				len, end_io, private, gfp_mask);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(blkdev_copy_offload);
> +
>  /*
>   * @bdev_in:	source block device
>   * @pos_in:	source offset
> diff --git a/block/fops.c b/block/fops.c
> index d2e6be4e3d1c..3b7c05831d5c 100644
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  	return ret;
>  }
>  
> +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
> +				struct file *file_out, loff_t pos_out,
> +				size_t len, unsigned int flags)
> +{
> +	struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
> +	struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
> +	int comp_len = 0;
> +
> +	if ((file_in->f_iocb_flags & IOCB_DIRECT) &&
> +		(file_out->f_iocb_flags & IOCB_DIRECT))
> +		comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev,
> +				 pos_out, len, NULL, NULL, GFP_KERNEL);
> +	if (comp_len != len)
> +		comp_len = generic_copy_file_range(file_in, pos_in + comp_len,
> +			file_out, pos_out + comp_len, len - comp_len, flags);

I'm not deeply familiar with this code but this looks odd. It at least
seems possible that comp_len could be -EINVAL and len 20 at which point
you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range().
Nitesh Shetty March 29, 2023, 12:42 p.m. UTC | #2
On Wed, Mar 29, 2023 at 02:14:40PM +0200, Christian Brauner wrote:
> On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote:
> > From: Nitesh Shetty <nj.shetty@samsung.com>
> > 
> > For direct block device opened with O_DIRECT, use copy_file_range to
> > issue device copy offload, and fallback to generic_copy_file_range incase
> > device copy offload capability is absent.
> > Modify checks to allow bdevs to use copy_file_range.
> > 
> > Suggested-by: Ming Lei <ming.lei@redhat.com>
> > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> > ---
> >  block/blk-lib.c        | 22 ++++++++++++++++++++++
> >  block/fops.c           | 20 ++++++++++++++++++++
> >  fs/read_write.c        | 11 +++++++++--
> >  include/linux/blkdev.h |  3 +++
> >  4 files changed, 54 insertions(+), 2 deletions(-)
> > 
> > diff --git a/block/blk-lib.c b/block/blk-lib.c
> > index a21819e59b29..c288573c7e77 100644
> > --- a/block/blk-lib.c
> > +++ b/block/blk-lib.c
> > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in,
> >  	return blk_queue_copy(q_in) && blk_queue_copy(q_out);
> >  }
> >  
> > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in,
> > +		      struct block_device *bdev_out, loff_t pos_out, size_t len,
> > +		      cio_iodone_t end_io, void *private, gfp_t gfp_mask)
> > +{
> > +	struct request_queue *in_q = bdev_get_queue(bdev_in);
> > +	struct request_queue *out_q = bdev_get_queue(bdev_out);
> > +	int ret = -EINVAL;
> 
> Why initialize to -EINVAL if blk_copy_sanity_check() initializes it
> right away anyway?
> 

acked.

> > +	bool offload = false;
> 
> Same thing with initializing offload.
> 
acked

> > +
> > +	ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len);
> > +	if (ret)
> > +		return ret;
> > +
> > +	offload = blk_check_copy_offload(in_q, out_q);
> > +	if (offload)
> > +		ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
> > +				len, end_io, private, gfp_mask);
> > +
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(blkdev_copy_offload);
> > +
> >  /*
> >   * @bdev_in:	source block device
> >   * @pos_in:	source offset
> > diff --git a/block/fops.c b/block/fops.c
> > index d2e6be4e3d1c..3b7c05831d5c 100644
> > --- a/block/fops.c
> > +++ b/block/fops.c
> > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
> >  	return ret;
> >  }
> >  
> > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
> > +				struct file *file_out, loff_t pos_out,
> > +				size_t len, unsigned int flags)
> > +{
> > +	struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
> > +	struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
> > +	int comp_len = 0;
> > +
> > +	if ((file_in->f_iocb_flags & IOCB_DIRECT) &&
> > +		(file_out->f_iocb_flags & IOCB_DIRECT))
> > +		comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev,
> > +				 pos_out, len, NULL, NULL, GFP_KERNEL);
> > +	if (comp_len != len)
> > +		comp_len = generic_copy_file_range(file_in, pos_in + comp_len,
> > +			file_out, pos_out + comp_len, len - comp_len, flags);
> 
> I'm not deeply familiar with this code but this looks odd. It at least
> seems possible that comp_len could be -EINVAL and len 20 at which point
> you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range().

comp_len should be 0 incase of error. We do agree, some function
description needs to be updated. We will recheck this completion path to
make sure not to return negative value, incase of failure.

Thank You,
Nitesh Shetty
kernel test robot March 29, 2023, 2:07 p.m. UTC | #3
Hi Anuj,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on axboe-block/for-next]
[also build test ERROR on device-mapper-dm/for-next linus/master v6.3-rc4 next-20230329]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Anuj-Gupta/block-Add-copy-offload-support-infrastructure/20230329-162018
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link:    https://lore.kernel.org/r/20230327084103.21601-5-anuj20.g%40samsung.com
patch subject: [PATCH v8 4/9] fs, block: copy_file_range for def_blk_ops for direct block device.
config: loongarch-randconfig-r001-20230329 (https://download.01.org/0day-ci/archive/20230329/202303292151.7DDOUCIt-lkp@intel.com/config)
compiler: loongarch64-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/61819d260936954ddd6688548f074e7063dcf39e
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Anuj-Gupta/block-Add-copy-offload-support-infrastructure/20230329-162018
        git checkout 61819d260936954ddd6688548f074e7063dcf39e
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=loongarch olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=loongarch SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303292151.7DDOUCIt-lkp@intel.com/

All errors (new ones prefixed by >>):

   loongarch64-linux-ld: fs/read_write.o: in function `.L633':
>> read_write.c:(.text+0x42e0): undefined reference to `I_BDEV'
kernel test robot March 29, 2023, 3:30 p.m. UTC | #4
Hi Anuj,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on axboe-block/for-next]
[also build test ERROR on device-mapper-dm/for-next linus/master v6.3-rc4 next-20230329]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Anuj-Gupta/block-Add-copy-offload-support-infrastructure/20230329-162018
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link:    https://lore.kernel.org/r/20230327084103.21601-5-anuj20.g%40samsung.com
patch subject: [PATCH v8 4/9] fs, block: copy_file_range for def_blk_ops for direct block device.
config: x86_64-randconfig-a013 (https://download.01.org/0day-ci/archive/20230329/202303292349.ED70Fxdw-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/61819d260936954ddd6688548f074e7063dcf39e
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Anuj-Gupta/block-Add-copy-offload-support-infrastructure/20230329-162018
        git checkout 61819d260936954ddd6688548f074e7063dcf39e
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=x86_64 olddefconfig
        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303292349.ED70Fxdw-lkp@intel.com/

All errors (new ones prefixed by >>):

   ld: vmlinux.o: in function `generic_copy_file_checks':
>> fs/read_write.c:1453: undefined reference to `I_BDEV'


vim +1453 fs/read_write.c

  1398	
  1399	/*
  1400	 * Performs necessary checks before doing a file copy
  1401	 *
  1402	 * Can adjust amount of bytes to copy via @req_count argument.
  1403	 * Returns appropriate error code that caller should return or
  1404	 * zero in case the copy should be allowed.
  1405	 */
  1406	static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
  1407					    struct file *file_out, loff_t pos_out,
  1408					    size_t *req_count, unsigned int flags)
  1409	{
  1410		struct inode *inode_in = file_inode(file_in);
  1411		struct inode *inode_out = file_inode(file_out);
  1412		uint64_t count = *req_count;
  1413		loff_t size_in;
  1414		int ret;
  1415	
  1416		ret = generic_file_rw_checks(file_in, file_out);
  1417		if (ret)
  1418			return ret;
  1419	
  1420		/*
  1421		 * We allow some filesystems to handle cross sb copy, but passing
  1422		 * a file of the wrong filesystem type to filesystem driver can result
  1423		 * in an attempt to dereference the wrong type of ->private_data, so
  1424		 * avoid doing that until we really have a good reason.
  1425		 *
  1426		 * nfs and cifs define several different file_system_type structures
  1427		 * and several different sets of file_operations, but they all end up
  1428		 * using the same ->copy_file_range() function pointer.
  1429		 */
  1430		if (flags & COPY_FILE_SPLICE) {
  1431			/* cross sb splice is allowed */
  1432		} else if (file_out->f_op->copy_file_range) {
  1433			if (file_in->f_op->copy_file_range !=
  1434			    file_out->f_op->copy_file_range)
  1435				return -EXDEV;
  1436		} else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) {
  1437			return -EXDEV;
  1438		}
  1439	
  1440		/* Don't touch certain kinds of inodes */
  1441		if (IS_IMMUTABLE(inode_out))
  1442			return -EPERM;
  1443	
  1444		if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
  1445			return -ETXTBSY;
  1446	
  1447		/* Ensure offsets don't wrap. */
  1448		if (pos_in + count < pos_in || pos_out + count < pos_out)
  1449			return -EOVERFLOW;
  1450	
  1451		/* Shorten the copy to EOF */
  1452		if (S_ISBLK(inode_in->i_mode))
> 1453			size_in = bdev_nr_bytes(I_BDEV(file_in->f_mapping->host));
  1454		else
  1455			size_in = i_size_read(inode_in);
  1456	
  1457		if (pos_in >= size_in)
  1458			count = 0;
  1459		else
  1460			count = min(count, size_in - (uint64_t)pos_in);
  1461	
  1462		ret = generic_write_check_limits(file_out, pos_out, &count);
  1463		if (ret)
  1464			return ret;
  1465	
  1466		/* Don't allow overlapped copying within the same file. */
  1467		if (inode_in == inode_out &&
  1468		    pos_out + count > pos_in &&
  1469		    pos_out < pos_in + count)
  1470			return -EINVAL;
  1471	
  1472		*req_count = count;
  1473		return 0;
  1474	}
  1475
Christian Brauner March 30, 2023, 5:48 a.m. UTC | #5
On Wed, Mar 29, 2023 at 06:12:36PM +0530, Nitesh Shetty wrote:
> On Wed, Mar 29, 2023 at 02:14:40PM +0200, Christian Brauner wrote:
> > On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote:
> > > From: Nitesh Shetty <nj.shetty@samsung.com>
> > > 
> > > For direct block device opened with O_DIRECT, use copy_file_range to
> > > issue device copy offload, and fallback to generic_copy_file_range incase
> > > device copy offload capability is absent.
> > > Modify checks to allow bdevs to use copy_file_range.
> > > 
> > > Suggested-by: Ming Lei <ming.lei@redhat.com>
> > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> > > ---
> > >  block/blk-lib.c        | 22 ++++++++++++++++++++++
> > >  block/fops.c           | 20 ++++++++++++++++++++
> > >  fs/read_write.c        | 11 +++++++++--
> > >  include/linux/blkdev.h |  3 +++
> > >  4 files changed, 54 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/block/blk-lib.c b/block/blk-lib.c
> > > index a21819e59b29..c288573c7e77 100644
> > > --- a/block/blk-lib.c
> > > +++ b/block/blk-lib.c
> > > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in,
> > >  	return blk_queue_copy(q_in) && blk_queue_copy(q_out);
> > >  }
> > >  
> > > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in,
> > > +		      struct block_device *bdev_out, loff_t pos_out, size_t len,
> > > +		      cio_iodone_t end_io, void *private, gfp_t gfp_mask)
> > > +{
> > > +	struct request_queue *in_q = bdev_get_queue(bdev_in);
> > > +	struct request_queue *out_q = bdev_get_queue(bdev_out);
> > > +	int ret = -EINVAL;
> > 
> > Why initialize to -EINVAL if blk_copy_sanity_check() initializes it
> > right away anyway?
> > 
> 
> acked.
> 
> > > +	bool offload = false;
> > 
> > Same thing with initializing offload.
> > 
> acked
> 
> > > +
> > > +	ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	offload = blk_check_copy_offload(in_q, out_q);
> > > +	if (offload)
> > > +		ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
> > > +				len, end_io, private, gfp_mask);
> > > +
> > > +	return ret;
> > > +}
> > > +EXPORT_SYMBOL_GPL(blkdev_copy_offload);
> > > +
> > >  /*
> > >   * @bdev_in:	source block device
> > >   * @pos_in:	source offset
> > > diff --git a/block/fops.c b/block/fops.c
> > > index d2e6be4e3d1c..3b7c05831d5c 100644
> > > --- a/block/fops.c
> > > +++ b/block/fops.c
> > > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
> > >  	return ret;
> > >  }
> > >  
> > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
> > > +				struct file *file_out, loff_t pos_out,
> > > +				size_t len, unsigned int flags)
> > > +{
> > > +	struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
> > > +	struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
> > > +	int comp_len = 0;
> > > +
> > > +	if ((file_in->f_iocb_flags & IOCB_DIRECT) &&
> > > +		(file_out->f_iocb_flags & IOCB_DIRECT))
> > > +		comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev,
> > > +				 pos_out, len, NULL, NULL, GFP_KERNEL);
> > > +	if (comp_len != len)
> > > +		comp_len = generic_copy_file_range(file_in, pos_in + comp_len,
> > > +			file_out, pos_out + comp_len, len - comp_len, flags);
> > 
> > I'm not deeply familiar with this code but this looks odd. It at least
> > seems possible that comp_len could be -EINVAL and len 20 at which point
> > you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range().

20 - -22 = 44 ofc

> 
> comp_len should be 0 incase of error. We do agree, some function

I mean, not to hammer on this point too much but just to be clear
blk_copy_sanity_check(), which is introduced in the second patch, can
return both -EPERM and -EINVAL and is first called in
blkdev_copy_offload() so it's definitely possible for comp_len to be
negative.
Nitesh Shetty March 30, 2023, 3:21 p.m. UTC | #6
On Thu, Mar 30, 2023 at 11:18 AM Christian Brauner <brauner@kernel.org> wrote:
>
> On Wed, Mar 29, 2023 at 06:12:36PM +0530, Nitesh Shetty wrote:
> > On Wed, Mar 29, 2023 at 02:14:40PM +0200, Christian Brauner wrote:
> > > On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote:
> > > > From: Nitesh Shetty <nj.shetty@samsung.com>
> > > >
> > > > For direct block device opened with O_DIRECT, use copy_file_range to
> > > > issue device copy offload, and fallback to generic_copy_file_range incase
> > > > device copy offload capability is absent.
> > > > Modify checks to allow bdevs to use copy_file_range.
> > > >
> > > > Suggested-by: Ming Lei <ming.lei@redhat.com>
> > > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> > > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> > > > ---
> > > >  block/blk-lib.c        | 22 ++++++++++++++++++++++
> > > >  block/fops.c           | 20 ++++++++++++++++++++
> > > >  fs/read_write.c        | 11 +++++++++--
> > > >  include/linux/blkdev.h |  3 +++
> > > >  4 files changed, 54 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/block/blk-lib.c b/block/blk-lib.c
> > > > index a21819e59b29..c288573c7e77 100644
> > > > --- a/block/blk-lib.c
> > > > +++ b/block/blk-lib.c
> > > > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in,
> > > >   return blk_queue_copy(q_in) && blk_queue_copy(q_out);
> > > >  }
> > > >
> > > > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in,
> > > > +               struct block_device *bdev_out, loff_t pos_out, size_t len,
> > > > +               cio_iodone_t end_io, void *private, gfp_t gfp_mask)
> > > > +{
> > > > + struct request_queue *in_q = bdev_get_queue(bdev_in);
> > > > + struct request_queue *out_q = bdev_get_queue(bdev_out);
> > > > + int ret = -EINVAL;
> > >
> > > Why initialize to -EINVAL if blk_copy_sanity_check() initializes it
> > > right away anyway?
> > >
> >
> > acked.
> >
> > > > + bool offload = false;
> > >
> > > Same thing with initializing offload.
> > >
> > acked
> >
> > > > +
> > > > + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len);
> > > > + if (ret)
> > > > +         return ret;
> > > > +
> > > > + offload = blk_check_copy_offload(in_q, out_q);
> > > > + if (offload)
> > > > +         ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
> > > > +                         len, end_io, private, gfp_mask);
> > > > +
> > > > + return ret;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(blkdev_copy_offload);
> > > > +
> > > >  /*
> > > >   * @bdev_in:     source block device
> > > >   * @pos_in:      source offset
> > > > diff --git a/block/fops.c b/block/fops.c
> > > > index d2e6be4e3d1c..3b7c05831d5c 100644
> > > > --- a/block/fops.c
> > > > +++ b/block/fops.c
> > > > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
> > > >   return ret;
> > > >  }
> > > >
> > > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
> > > > +                         struct file *file_out, loff_t pos_out,
> > > > +                         size_t len, unsigned int flags)
> > > > +{
> > > > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
> > > > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
> > > > + int comp_len = 0;
> > > > +
> > > > + if ((file_in->f_iocb_flags & IOCB_DIRECT) &&
> > > > +         (file_out->f_iocb_flags & IOCB_DIRECT))
> > > > +         comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev,
> > > > +                          pos_out, len, NULL, NULL, GFP_KERNEL);
> > > > + if (comp_len != len)
> > > > +         comp_len = generic_copy_file_range(file_in, pos_in + comp_len,
> > > > +                 file_out, pos_out + comp_len, len - comp_len, flags);
> > >
> > > I'm not deeply familiar with this code but this looks odd. It at least
> > > seems possible that comp_len could be -EINVAL and len 20 at which point
> > > you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range().
>
> 20 - -22 = 44 ofc
>
> >
> > comp_len should be 0 incase of error. We do agree, some function
>
> I mean, not to hammer on this point too much but just to be clear
> blk_copy_sanity_check(), which is introduced in the second patch, can
> return both -EPERM and -EINVAL and is first called in
> blkdev_copy_offload() so it's definitely possible for comp_len to be
> negative.

Acked. Will be updated in the next version.

Thank you,
Nitesh Shetty
diff mbox series

Patch

diff --git a/block/blk-lib.c b/block/blk-lib.c
index a21819e59b29..c288573c7e77 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -475,6 +475,28 @@  static inline bool blk_check_copy_offload(struct request_queue *q_in,
 	return blk_queue_copy(q_in) && blk_queue_copy(q_out);
 }
 
+int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in,
+		      struct block_device *bdev_out, loff_t pos_out, size_t len,
+		      cio_iodone_t end_io, void *private, gfp_t gfp_mask)
+{
+	struct request_queue *in_q = bdev_get_queue(bdev_in);
+	struct request_queue *out_q = bdev_get_queue(bdev_out);
+	int ret = -EINVAL;
+	bool offload = false;
+
+	ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len);
+	if (ret)
+		return ret;
+
+	offload = blk_check_copy_offload(in_q, out_q);
+	if (offload)
+		ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
+				len, end_io, private, gfp_mask);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blkdev_copy_offload);
+
 /*
  * @bdev_in:	source block device
  * @pos_in:	source offset
diff --git a/block/fops.c b/block/fops.c
index d2e6be4e3d1c..3b7c05831d5c 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -611,6 +611,25 @@  static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	return ret;
 }
 
+static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				size_t len, unsigned int flags)
+{
+	struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in));
+	struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out));
+	int comp_len = 0;
+
+	if ((file_in->f_iocb_flags & IOCB_DIRECT) &&
+		(file_out->f_iocb_flags & IOCB_DIRECT))
+		comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev,
+				 pos_out, len, NULL, NULL, GFP_KERNEL);
+	if (comp_len != len)
+		comp_len = generic_copy_file_range(file_in, pos_in + comp_len,
+			file_out, pos_out + comp_len, len - comp_len, flags);
+
+	return comp_len;
+}
+
 #define	BLKDEV_FALLOC_FL_SUPPORTED					\
 		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
 		 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
@@ -694,6 +713,7 @@  const struct file_operations def_blk_fops = {
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.fallocate	= blkdev_fallocate,
+	.copy_file_range = blkdev_copy_file_range,
 };
 
 static __init int blkdev_init(void)
diff --git a/fs/read_write.c b/fs/read_write.c
index 7a2ff6157eda..62e925e9b2f0 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -20,6 +20,7 @@ 
 #include <linux/compat.h>
 #include <linux/mount.h>
 #include <linux/fs.h>
+#include <linux/blkdev.h>
 #include "internal.h"
 
 #include <linux/uaccess.h>
@@ -1448,7 +1449,11 @@  static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
 		return -EOVERFLOW;
 
 	/* Shorten the copy to EOF */
-	size_in = i_size_read(inode_in);
+	if (S_ISBLK(inode_in->i_mode))
+		size_in = bdev_nr_bytes(I_BDEV(file_in->f_mapping->host));
+	else
+		size_in = i_size_read(inode_in);
+
 	if (pos_in >= size_in)
 		count = 0;
 	else
@@ -1709,7 +1714,9 @@  int generic_file_rw_checks(struct file *file_in, struct file *file_out)
 	/* Don't copy dirs, pipes, sockets... */
 	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
 		return -EISDIR;
-	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+
+	if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) &&
+		(!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode)))
 		return -EINVAL;
 
 	if (!(file_in->f_mode & FMODE_READ) ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a54153610800..468d5f3378e2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1057,6 +1057,9 @@  int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
 int blkdev_issue_copy(struct block_device *bdev_in, loff_t pos_in,
 		      struct block_device *bdev_out, loff_t pos_out, size_t len,
 		      cio_iodone_t end_io, void *private, gfp_t gfp_mask);
+int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in,
+		      struct block_device *bdev_out, loff_t pos_out, size_t len,
+		      cio_iodone_t end_io, void *private, gfp_t gfp_mask);
 struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
 		gfp_t gfp_mask);
 void bio_map_kern_endio(struct bio *bio);