Message ID | 20230327084103.21601-5-anuj20.g@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v8,1/9] block: Introduce queue limits for copy-offload support | expand |
On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote: > From: Nitesh Shetty <nj.shetty@samsung.com> > > For direct block device opened with O_DIRECT, use copy_file_range to > issue device copy offload, and fallback to generic_copy_file_range incase > device copy offload capability is absent. > Modify checks to allow bdevs to use copy_file_range. > > Suggested-by: Ming Lei <ming.lei@redhat.com> > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > --- > block/blk-lib.c | 22 ++++++++++++++++++++++ > block/fops.c | 20 ++++++++++++++++++++ > fs/read_write.c | 11 +++++++++-- > include/linux/blkdev.h | 3 +++ > 4 files changed, 54 insertions(+), 2 deletions(-) > > diff --git a/block/blk-lib.c b/block/blk-lib.c > index a21819e59b29..c288573c7e77 100644 > --- a/block/blk-lib.c > +++ b/block/blk-lib.c > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in, > return blk_queue_copy(q_in) && blk_queue_copy(q_out); > } > > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, > + struct block_device *bdev_out, loff_t pos_out, size_t len, > + cio_iodone_t end_io, void *private, gfp_t gfp_mask) > +{ > + struct request_queue *in_q = bdev_get_queue(bdev_in); > + struct request_queue *out_q = bdev_get_queue(bdev_out); > + int ret = -EINVAL; Why initialize to -EINVAL if blk_copy_sanity_check() initializes it right away anyway? > + bool offload = false; Same thing with initializing offload. > + > + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len); > + if (ret) > + return ret; > + > + offload = blk_check_copy_offload(in_q, out_q); > + if (offload) > + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out, > + len, end_io, private, gfp_mask); > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(blkdev_copy_offload); > + > /* > * @bdev_in: source block device > * @pos_in: source offset > diff --git a/block/fops.c b/block/fops.c > index d2e6be4e3d1c..3b7c05831d5c 100644 > --- a/block/fops.c > +++ b/block/fops.c > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) > return ret; > } > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, > + struct file *file_out, loff_t pos_out, > + size_t len, unsigned int flags) > +{ > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); > + int comp_len = 0; > + > + if ((file_in->f_iocb_flags & IOCB_DIRECT) && > + (file_out->f_iocb_flags & IOCB_DIRECT)) > + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev, > + pos_out, len, NULL, NULL, GFP_KERNEL); > + if (comp_len != len) > + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, > + file_out, pos_out + comp_len, len - comp_len, flags); I'm not deeply familiar with this code but this looks odd. It at least seems possible that comp_len could be -EINVAL and len 20 at which point you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range().
On Wed, Mar 29, 2023 at 02:14:40PM +0200, Christian Brauner wrote: > On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote: > > From: Nitesh Shetty <nj.shetty@samsung.com> > > > > For direct block device opened with O_DIRECT, use copy_file_range to > > issue device copy offload, and fallback to generic_copy_file_range incase > > device copy offload capability is absent. > > Modify checks to allow bdevs to use copy_file_range. > > > > Suggested-by: Ming Lei <ming.lei@redhat.com> > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > > --- > > block/blk-lib.c | 22 ++++++++++++++++++++++ > > block/fops.c | 20 ++++++++++++++++++++ > > fs/read_write.c | 11 +++++++++-- > > include/linux/blkdev.h | 3 +++ > > 4 files changed, 54 insertions(+), 2 deletions(-) > > > > diff --git a/block/blk-lib.c b/block/blk-lib.c > > index a21819e59b29..c288573c7e77 100644 > > --- a/block/blk-lib.c > > +++ b/block/blk-lib.c > > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in, > > return blk_queue_copy(q_in) && blk_queue_copy(q_out); > > } > > > > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, > > + struct block_device *bdev_out, loff_t pos_out, size_t len, > > + cio_iodone_t end_io, void *private, gfp_t gfp_mask) > > +{ > > + struct request_queue *in_q = bdev_get_queue(bdev_in); > > + struct request_queue *out_q = bdev_get_queue(bdev_out); > > + int ret = -EINVAL; > > Why initialize to -EINVAL if blk_copy_sanity_check() initializes it > right away anyway? > acked. > > + bool offload = false; > > Same thing with initializing offload. > acked > > + > > + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len); > > + if (ret) > > + return ret; > > + > > + offload = blk_check_copy_offload(in_q, out_q); > > + if (offload) > > + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out, > > + len, end_io, private, gfp_mask); > > + > > + return ret; > > +} > > +EXPORT_SYMBOL_GPL(blkdev_copy_offload); > > + > > /* > > * @bdev_in: source block device > > * @pos_in: source offset > > diff --git a/block/fops.c b/block/fops.c > > index d2e6be4e3d1c..3b7c05831d5c 100644 > > --- a/block/fops.c > > +++ b/block/fops.c > > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) > > return ret; > > } > > > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, > > + struct file *file_out, loff_t pos_out, > > + size_t len, unsigned int flags) > > +{ > > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); > > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); > > + int comp_len = 0; > > + > > + if ((file_in->f_iocb_flags & IOCB_DIRECT) && > > + (file_out->f_iocb_flags & IOCB_DIRECT)) > > + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev, > > + pos_out, len, NULL, NULL, GFP_KERNEL); > > + if (comp_len != len) > > + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, > > + file_out, pos_out + comp_len, len - comp_len, flags); > > I'm not deeply familiar with this code but this looks odd. It at least > seems possible that comp_len could be -EINVAL and len 20 at which point > you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range(). comp_len should be 0 incase of error. We do agree, some function description needs to be updated. We will recheck this completion path to make sure not to return negative value, incase of failure. Thank You, Nitesh Shetty
Hi Anuj,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on axboe-block/for-next]
[also build test ERROR on device-mapper-dm/for-next linus/master v6.3-rc4 next-20230329]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Anuj-Gupta/block-Add-copy-offload-support-infrastructure/20230329-162018
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link: https://lore.kernel.org/r/20230327084103.21601-5-anuj20.g%40samsung.com
patch subject: [PATCH v8 4/9] fs, block: copy_file_range for def_blk_ops for direct block device.
config: loongarch-randconfig-r001-20230329 (https://download.01.org/0day-ci/archive/20230329/202303292151.7DDOUCIt-lkp@intel.com/config)
compiler: loongarch64-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/61819d260936954ddd6688548f074e7063dcf39e
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Anuj-Gupta/block-Add-copy-offload-support-infrastructure/20230329-162018
git checkout 61819d260936954ddd6688548f074e7063dcf39e
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=loongarch olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=loongarch SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303292151.7DDOUCIt-lkp@intel.com/
All errors (new ones prefixed by >>):
loongarch64-linux-ld: fs/read_write.o: in function `.L633':
>> read_write.c:(.text+0x42e0): undefined reference to `I_BDEV'
Hi Anuj, Thank you for the patch! Yet something to improve: [auto build test ERROR on axboe-block/for-next] [also build test ERROR on device-mapper-dm/for-next linus/master v6.3-rc4 next-20230329] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Anuj-Gupta/block-Add-copy-offload-support-infrastructure/20230329-162018 base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next patch link: https://lore.kernel.org/r/20230327084103.21601-5-anuj20.g%40samsung.com patch subject: [PATCH v8 4/9] fs, block: copy_file_range for def_blk_ops for direct block device. config: x86_64-randconfig-a013 (https://download.01.org/0day-ci/archive/20230329/202303292349.ED70Fxdw-lkp@intel.com/config) compiler: gcc-11 (Debian 11.3.0-8) 11.3.0 reproduce (this is a W=1 build): # https://github.com/intel-lab-lkp/linux/commit/61819d260936954ddd6688548f074e7063dcf39e git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Anuj-Gupta/block-Add-copy-offload-support-infrastructure/20230329-162018 git checkout 61819d260936954ddd6688548f074e7063dcf39e # save the config file mkdir build_dir && cp config build_dir/.config make W=1 O=build_dir ARCH=x86_64 olddefconfig make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> | Link: https://lore.kernel.org/oe-kbuild-all/202303292349.ED70Fxdw-lkp@intel.com/ All errors (new ones prefixed by >>): ld: vmlinux.o: in function `generic_copy_file_checks': >> fs/read_write.c:1453: undefined reference to `I_BDEV' vim +1453 fs/read_write.c 1398 1399 /* 1400 * Performs necessary checks before doing a file copy 1401 * 1402 * Can adjust amount of bytes to copy via @req_count argument. 1403 * Returns appropriate error code that caller should return or 1404 * zero in case the copy should be allowed. 1405 */ 1406 static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, 1407 struct file *file_out, loff_t pos_out, 1408 size_t *req_count, unsigned int flags) 1409 { 1410 struct inode *inode_in = file_inode(file_in); 1411 struct inode *inode_out = file_inode(file_out); 1412 uint64_t count = *req_count; 1413 loff_t size_in; 1414 int ret; 1415 1416 ret = generic_file_rw_checks(file_in, file_out); 1417 if (ret) 1418 return ret; 1419 1420 /* 1421 * We allow some filesystems to handle cross sb copy, but passing 1422 * a file of the wrong filesystem type to filesystem driver can result 1423 * in an attempt to dereference the wrong type of ->private_data, so 1424 * avoid doing that until we really have a good reason. 1425 * 1426 * nfs and cifs define several different file_system_type structures 1427 * and several different sets of file_operations, but they all end up 1428 * using the same ->copy_file_range() function pointer. 1429 */ 1430 if (flags & COPY_FILE_SPLICE) { 1431 /* cross sb splice is allowed */ 1432 } else if (file_out->f_op->copy_file_range) { 1433 if (file_in->f_op->copy_file_range != 1434 file_out->f_op->copy_file_range) 1435 return -EXDEV; 1436 } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) { 1437 return -EXDEV; 1438 } 1439 1440 /* Don't touch certain kinds of inodes */ 1441 if (IS_IMMUTABLE(inode_out)) 1442 return -EPERM; 1443 1444 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) 1445 return -ETXTBSY; 1446 1447 /* Ensure offsets don't wrap. */ 1448 if (pos_in + count < pos_in || pos_out + count < pos_out) 1449 return -EOVERFLOW; 1450 1451 /* Shorten the copy to EOF */ 1452 if (S_ISBLK(inode_in->i_mode)) > 1453 size_in = bdev_nr_bytes(I_BDEV(file_in->f_mapping->host)); 1454 else 1455 size_in = i_size_read(inode_in); 1456 1457 if (pos_in >= size_in) 1458 count = 0; 1459 else 1460 count = min(count, size_in - (uint64_t)pos_in); 1461 1462 ret = generic_write_check_limits(file_out, pos_out, &count); 1463 if (ret) 1464 return ret; 1465 1466 /* Don't allow overlapped copying within the same file. */ 1467 if (inode_in == inode_out && 1468 pos_out + count > pos_in && 1469 pos_out < pos_in + count) 1470 return -EINVAL; 1471 1472 *req_count = count; 1473 return 0; 1474 } 1475
On Wed, Mar 29, 2023 at 06:12:36PM +0530, Nitesh Shetty wrote: > On Wed, Mar 29, 2023 at 02:14:40PM +0200, Christian Brauner wrote: > > On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote: > > > From: Nitesh Shetty <nj.shetty@samsung.com> > > > > > > For direct block device opened with O_DIRECT, use copy_file_range to > > > issue device copy offload, and fallback to generic_copy_file_range incase > > > device copy offload capability is absent. > > > Modify checks to allow bdevs to use copy_file_range. > > > > > > Suggested-by: Ming Lei <ming.lei@redhat.com> > > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > > > --- > > > block/blk-lib.c | 22 ++++++++++++++++++++++ > > > block/fops.c | 20 ++++++++++++++++++++ > > > fs/read_write.c | 11 +++++++++-- > > > include/linux/blkdev.h | 3 +++ > > > 4 files changed, 54 insertions(+), 2 deletions(-) > > > > > > diff --git a/block/blk-lib.c b/block/blk-lib.c > > > index a21819e59b29..c288573c7e77 100644 > > > --- a/block/blk-lib.c > > > +++ b/block/blk-lib.c > > > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in, > > > return blk_queue_copy(q_in) && blk_queue_copy(q_out); > > > } > > > > > > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, > > > + struct block_device *bdev_out, loff_t pos_out, size_t len, > > > + cio_iodone_t end_io, void *private, gfp_t gfp_mask) > > > +{ > > > + struct request_queue *in_q = bdev_get_queue(bdev_in); > > > + struct request_queue *out_q = bdev_get_queue(bdev_out); > > > + int ret = -EINVAL; > > > > Why initialize to -EINVAL if blk_copy_sanity_check() initializes it > > right away anyway? > > > > acked. > > > > + bool offload = false; > > > > Same thing with initializing offload. > > > acked > > > > + > > > + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len); > > > + if (ret) > > > + return ret; > > > + > > > + offload = blk_check_copy_offload(in_q, out_q); > > > + if (offload) > > > + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out, > > > + len, end_io, private, gfp_mask); > > > + > > > + return ret; > > > +} > > > +EXPORT_SYMBOL_GPL(blkdev_copy_offload); > > > + > > > /* > > > * @bdev_in: source block device > > > * @pos_in: source offset > > > diff --git a/block/fops.c b/block/fops.c > > > index d2e6be4e3d1c..3b7c05831d5c 100644 > > > --- a/block/fops.c > > > +++ b/block/fops.c > > > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) > > > return ret; > > > } > > > > > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, > > > + struct file *file_out, loff_t pos_out, > > > + size_t len, unsigned int flags) > > > +{ > > > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); > > > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); > > > + int comp_len = 0; > > > + > > > + if ((file_in->f_iocb_flags & IOCB_DIRECT) && > > > + (file_out->f_iocb_flags & IOCB_DIRECT)) > > > + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev, > > > + pos_out, len, NULL, NULL, GFP_KERNEL); > > > + if (comp_len != len) > > > + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, > > > + file_out, pos_out + comp_len, len - comp_len, flags); > > > > I'm not deeply familiar with this code but this looks odd. It at least > > seems possible that comp_len could be -EINVAL and len 20 at which point > > you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range(). 20 - -22 = 44 ofc > > comp_len should be 0 incase of error. We do agree, some function I mean, not to hammer on this point too much but just to be clear blk_copy_sanity_check(), which is introduced in the second patch, can return both -EPERM and -EINVAL and is first called in blkdev_copy_offload() so it's definitely possible for comp_len to be negative.
On Thu, Mar 30, 2023 at 11:18 AM Christian Brauner <brauner@kernel.org> wrote: > > On Wed, Mar 29, 2023 at 06:12:36PM +0530, Nitesh Shetty wrote: > > On Wed, Mar 29, 2023 at 02:14:40PM +0200, Christian Brauner wrote: > > > On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote: > > > > From: Nitesh Shetty <nj.shetty@samsung.com> > > > > > > > > For direct block device opened with O_DIRECT, use copy_file_range to > > > > issue device copy offload, and fallback to generic_copy_file_range incase > > > > device copy offload capability is absent. > > > > Modify checks to allow bdevs to use copy_file_range. > > > > > > > > Suggested-by: Ming Lei <ming.lei@redhat.com> > > > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > > > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > > > > --- > > > > block/blk-lib.c | 22 ++++++++++++++++++++++ > > > > block/fops.c | 20 ++++++++++++++++++++ > > > > fs/read_write.c | 11 +++++++++-- > > > > include/linux/blkdev.h | 3 +++ > > > > 4 files changed, 54 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/block/blk-lib.c b/block/blk-lib.c > > > > index a21819e59b29..c288573c7e77 100644 > > > > --- a/block/blk-lib.c > > > > +++ b/block/blk-lib.c > > > > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in, > > > > return blk_queue_copy(q_in) && blk_queue_copy(q_out); > > > > } > > > > > > > > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, > > > > + struct block_device *bdev_out, loff_t pos_out, size_t len, > > > > + cio_iodone_t end_io, void *private, gfp_t gfp_mask) > > > > +{ > > > > + struct request_queue *in_q = bdev_get_queue(bdev_in); > > > > + struct request_queue *out_q = bdev_get_queue(bdev_out); > > > > + int ret = -EINVAL; > > > > > > Why initialize to -EINVAL if blk_copy_sanity_check() initializes it > > > right away anyway? > > > > > > > acked. > > > > > > + bool offload = false; > > > > > > Same thing with initializing offload. > > > > > acked > > > > > > + > > > > + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len); > > > > + if (ret) > > > > + return ret; > > > > + > > > > + offload = blk_check_copy_offload(in_q, out_q); > > > > + if (offload) > > > > + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out, > > > > + len, end_io, private, gfp_mask); > > > > + > > > > + return ret; > > > > +} > > > > +EXPORT_SYMBOL_GPL(blkdev_copy_offload); > > > > + > > > > /* > > > > * @bdev_in: source block device > > > > * @pos_in: source offset > > > > diff --git a/block/fops.c b/block/fops.c > > > > index d2e6be4e3d1c..3b7c05831d5c 100644 > > > > --- a/block/fops.c > > > > +++ b/block/fops.c > > > > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) > > > > return ret; > > > > } > > > > > > > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, > > > > + struct file *file_out, loff_t pos_out, > > > > + size_t len, unsigned int flags) > > > > +{ > > > > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); > > > > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); > > > > + int comp_len = 0; > > > > + > > > > + if ((file_in->f_iocb_flags & IOCB_DIRECT) && > > > > + (file_out->f_iocb_flags & IOCB_DIRECT)) > > > > + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev, > > > > + pos_out, len, NULL, NULL, GFP_KERNEL); > > > > + if (comp_len != len) > > > > + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, > > > > + file_out, pos_out + comp_len, len - comp_len, flags); > > > > > > I'm not deeply familiar with this code but this looks odd. It at least > > > seems possible that comp_len could be -EINVAL and len 20 at which point > > > you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range(). > > 20 - -22 = 44 ofc > > > > > comp_len should be 0 incase of error. We do agree, some function > > I mean, not to hammer on this point too much but just to be clear > blk_copy_sanity_check(), which is introduced in the second patch, can > return both -EPERM and -EINVAL and is first called in > blkdev_copy_offload() so it's definitely possible for comp_len to be > negative. Acked. Will be updated in the next version. Thank you, Nitesh Shetty
diff --git a/block/blk-lib.c b/block/blk-lib.c index a21819e59b29..c288573c7e77 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in, return blk_queue_copy(q_in) && blk_queue_copy(q_out); } +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, + struct block_device *bdev_out, loff_t pos_out, size_t len, + cio_iodone_t end_io, void *private, gfp_t gfp_mask) +{ + struct request_queue *in_q = bdev_get_queue(bdev_in); + struct request_queue *out_q = bdev_get_queue(bdev_out); + int ret = -EINVAL; + bool offload = false; + + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len); + if (ret) + return ret; + + offload = blk_check_copy_offload(in_q, out_q); + if (offload) + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out, + len, end_io, private, gfp_mask); + + return ret; +} +EXPORT_SYMBOL_GPL(blkdev_copy_offload); + /* * @bdev_in: source block device * @pos_in: source offset diff --git a/block/fops.c b/block/fops.c index d2e6be4e3d1c..3b7c05831d5c 100644 --- a/block/fops.c +++ b/block/fops.c @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; } +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); + int comp_len = 0; + + if ((file_in->f_iocb_flags & IOCB_DIRECT) && + (file_out->f_iocb_flags & IOCB_DIRECT)) + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev, + pos_out, len, NULL, NULL, GFP_KERNEL); + if (comp_len != len) + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, + file_out, pos_out + comp_len, len - comp_len, flags); + + return comp_len; +} + #define BLKDEV_FALLOC_FL_SUPPORTED \ (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) @@ -694,6 +713,7 @@ const struct file_operations def_blk_fops = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = blkdev_fallocate, + .copy_file_range = blkdev_copy_file_range, }; static __init int blkdev_init(void) diff --git a/fs/read_write.c b/fs/read_write.c index 7a2ff6157eda..62e925e9b2f0 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -20,6 +20,7 @@ #include <linux/compat.h> #include <linux/mount.h> #include <linux/fs.h> +#include <linux/blkdev.h> #include "internal.h" #include <linux/uaccess.h> @@ -1448,7 +1449,11 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, return -EOVERFLOW; /* Shorten the copy to EOF */ - size_in = i_size_read(inode_in); + if (S_ISBLK(inode_in->i_mode)) + size_in = bdev_nr_bytes(I_BDEV(file_in->f_mapping->host)); + else + size_in = i_size_read(inode_in); + if (pos_in >= size_in) count = 0; else @@ -1709,7 +1714,9 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) /* Don't copy dirs, pipes, sockets... */ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + + if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) && + (!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode))) return -EINVAL; if (!(file_in->f_mode & FMODE_READ) || diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a54153610800..468d5f3378e2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1057,6 +1057,9 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, int blkdev_issue_copy(struct block_device *bdev_in, loff_t pos_in, struct block_device *bdev_out, loff_t pos_out, size_t len, cio_iodone_t end_io, void *private, gfp_t gfp_mask); +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, + struct block_device *bdev_out, loff_t pos_out, size_t len, + cio_iodone_t end_io, void *private, gfp_t gfp_mask); struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask); void bio_map_kern_endio(struct bio *bio);