Message ID | 20230327084103.21601-5-anuj20.g@samsung.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Mike Snitzer |
Headers | show |
Series | [v8,1/9] block: Introduce queue limits for copy-offload support | expand |
On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote: > From: Nitesh Shetty <nj.shetty@samsung.com> > > For direct block device opened with O_DIRECT, use copy_file_range to > issue device copy offload, and fallback to generic_copy_file_range incase > device copy offload capability is absent. > Modify checks to allow bdevs to use copy_file_range. > > Suggested-by: Ming Lei <ming.lei@redhat.com> > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > --- > block/blk-lib.c | 22 ++++++++++++++++++++++ > block/fops.c | 20 ++++++++++++++++++++ > fs/read_write.c | 11 +++++++++-- > include/linux/blkdev.h | 3 +++ > 4 files changed, 54 insertions(+), 2 deletions(-) > > diff --git a/block/blk-lib.c b/block/blk-lib.c > index a21819e59b29..c288573c7e77 100644 > --- a/block/blk-lib.c > +++ b/block/blk-lib.c > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in, > return blk_queue_copy(q_in) && blk_queue_copy(q_out); > } > > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, > + struct block_device *bdev_out, loff_t pos_out, size_t len, > + cio_iodone_t end_io, void *private, gfp_t gfp_mask) > +{ > + struct request_queue *in_q = bdev_get_queue(bdev_in); > + struct request_queue *out_q = bdev_get_queue(bdev_out); > + int ret = -EINVAL; Why initialize to -EINVAL if blk_copy_sanity_check() initializes it right away anyway? > + bool offload = false; Same thing with initializing offload. > + > + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len); > + if (ret) > + return ret; > + > + offload = blk_check_copy_offload(in_q, out_q); > + if (offload) > + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out, > + len, end_io, private, gfp_mask); > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(blkdev_copy_offload); > + > /* > * @bdev_in: source block device > * @pos_in: source offset > diff --git a/block/fops.c b/block/fops.c > index d2e6be4e3d1c..3b7c05831d5c 100644 > --- a/block/fops.c > +++ b/block/fops.c > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) > return ret; > } > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, > + struct file *file_out, loff_t pos_out, > + size_t len, unsigned int flags) > +{ > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); > + int comp_len = 0; > + > + if ((file_in->f_iocb_flags & IOCB_DIRECT) && > + (file_out->f_iocb_flags & IOCB_DIRECT)) > + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev, > + pos_out, len, NULL, NULL, GFP_KERNEL); > + if (comp_len != len) > + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, > + file_out, pos_out + comp_len, len - comp_len, flags); I'm not deeply familiar with this code but this looks odd. It at least seems possible that comp_len could be -EINVAL and len 20 at which point you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range(). -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
On Wed, Mar 29, 2023 at 02:14:40PM +0200, Christian Brauner wrote: > On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote: > > From: Nitesh Shetty <nj.shetty@samsung.com> > > > > For direct block device opened with O_DIRECT, use copy_file_range to > > issue device copy offload, and fallback to generic_copy_file_range incase > > device copy offload capability is absent. > > Modify checks to allow bdevs to use copy_file_range. > > > > Suggested-by: Ming Lei <ming.lei@redhat.com> > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > > --- > > block/blk-lib.c | 22 ++++++++++++++++++++++ > > block/fops.c | 20 ++++++++++++++++++++ > > fs/read_write.c | 11 +++++++++-- > > include/linux/blkdev.h | 3 +++ > > 4 files changed, 54 insertions(+), 2 deletions(-) > > > > diff --git a/block/blk-lib.c b/block/blk-lib.c > > index a21819e59b29..c288573c7e77 100644 > > --- a/block/blk-lib.c > > +++ b/block/blk-lib.c > > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in, > > return blk_queue_copy(q_in) && blk_queue_copy(q_out); > > } > > > > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, > > + struct block_device *bdev_out, loff_t pos_out, size_t len, > > + cio_iodone_t end_io, void *private, gfp_t gfp_mask) > > +{ > > + struct request_queue *in_q = bdev_get_queue(bdev_in); > > + struct request_queue *out_q = bdev_get_queue(bdev_out); > > + int ret = -EINVAL; > > Why initialize to -EINVAL if blk_copy_sanity_check() initializes it > right away anyway? > acked. > > + bool offload = false; > > Same thing with initializing offload. > acked > > + > > + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len); > > + if (ret) > > + return ret; > > + > > + offload = blk_check_copy_offload(in_q, out_q); > > + if (offload) > > + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out, > > + len, end_io, private, gfp_mask); > > + > > + return ret; > > +} > > +EXPORT_SYMBOL_GPL(blkdev_copy_offload); > > + > > /* > > * @bdev_in: source block device > > * @pos_in: source offset > > diff --git a/block/fops.c b/block/fops.c > > index d2e6be4e3d1c..3b7c05831d5c 100644 > > --- a/block/fops.c > > +++ b/block/fops.c > > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) > > return ret; > > } > > > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, > > + struct file *file_out, loff_t pos_out, > > + size_t len, unsigned int flags) > > +{ > > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); > > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); > > + int comp_len = 0; > > + > > + if ((file_in->f_iocb_flags & IOCB_DIRECT) && > > + (file_out->f_iocb_flags & IOCB_DIRECT)) > > + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev, > > + pos_out, len, NULL, NULL, GFP_KERNEL); > > + if (comp_len != len) > > + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, > > + file_out, pos_out + comp_len, len - comp_len, flags); > > I'm not deeply familiar with this code but this looks odd. It at least > seems possible that comp_len could be -EINVAL and len 20 at which point > you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range(). comp_len should be 0 incase of error. We do agree, some function description needs to be updated. We will recheck this completion path to make sure not to return negative value, incase of failure. Thank You, Nitesh Shetty -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
On Wed, Mar 29, 2023 at 06:12:36PM +0530, Nitesh Shetty wrote: > On Wed, Mar 29, 2023 at 02:14:40PM +0200, Christian Brauner wrote: > > On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote: > > > From: Nitesh Shetty <nj.shetty@samsung.com> > > > > > > For direct block device opened with O_DIRECT, use copy_file_range to > > > issue device copy offload, and fallback to generic_copy_file_range incase > > > device copy offload capability is absent. > > > Modify checks to allow bdevs to use copy_file_range. > > > > > > Suggested-by: Ming Lei <ming.lei@redhat.com> > > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > > > --- > > > block/blk-lib.c | 22 ++++++++++++++++++++++ > > > block/fops.c | 20 ++++++++++++++++++++ > > > fs/read_write.c | 11 +++++++++-- > > > include/linux/blkdev.h | 3 +++ > > > 4 files changed, 54 insertions(+), 2 deletions(-) > > > > > > diff --git a/block/blk-lib.c b/block/blk-lib.c > > > index a21819e59b29..c288573c7e77 100644 > > > --- a/block/blk-lib.c > > > +++ b/block/blk-lib.c > > > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in, > > > return blk_queue_copy(q_in) && blk_queue_copy(q_out); > > > } > > > > > > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, > > > + struct block_device *bdev_out, loff_t pos_out, size_t len, > > > + cio_iodone_t end_io, void *private, gfp_t gfp_mask) > > > +{ > > > + struct request_queue *in_q = bdev_get_queue(bdev_in); > > > + struct request_queue *out_q = bdev_get_queue(bdev_out); > > > + int ret = -EINVAL; > > > > Why initialize to -EINVAL if blk_copy_sanity_check() initializes it > > right away anyway? > > > > acked. > > > > + bool offload = false; > > > > Same thing with initializing offload. > > > acked > > > > + > > > + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len); > > > + if (ret) > > > + return ret; > > > + > > > + offload = blk_check_copy_offload(in_q, out_q); > > > + if (offload) > > > + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out, > > > + len, end_io, private, gfp_mask); > > > + > > > + return ret; > > > +} > > > +EXPORT_SYMBOL_GPL(blkdev_copy_offload); > > > + > > > /* > > > * @bdev_in: source block device > > > * @pos_in: source offset > > > diff --git a/block/fops.c b/block/fops.c > > > index d2e6be4e3d1c..3b7c05831d5c 100644 > > > --- a/block/fops.c > > > +++ b/block/fops.c > > > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) > > > return ret; > > > } > > > > > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, > > > + struct file *file_out, loff_t pos_out, > > > + size_t len, unsigned int flags) > > > +{ > > > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); > > > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); > > > + int comp_len = 0; > > > + > > > + if ((file_in->f_iocb_flags & IOCB_DIRECT) && > > > + (file_out->f_iocb_flags & IOCB_DIRECT)) > > > + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev, > > > + pos_out, len, NULL, NULL, GFP_KERNEL); > > > + if (comp_len != len) > > > + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, > > > + file_out, pos_out + comp_len, len - comp_len, flags); > > > > I'm not deeply familiar with this code but this looks odd. It at least > > seems possible that comp_len could be -EINVAL and len 20 at which point > > you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range(). 20 - -22 = 44 ofc > > comp_len should be 0 incase of error. We do agree, some function I mean, not to hammer on this point too much but just to be clear blk_copy_sanity_check(), which is introduced in the second patch, can return both -EPERM and -EINVAL and is first called in blkdev_copy_offload() so it's definitely possible for comp_len to be negative. -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
On Thu, Mar 30, 2023 at 11:18 AM Christian Brauner <brauner@kernel.org> wrote: > > On Wed, Mar 29, 2023 at 06:12:36PM +0530, Nitesh Shetty wrote: > > On Wed, Mar 29, 2023 at 02:14:40PM +0200, Christian Brauner wrote: > > > On Mon, Mar 27, 2023 at 02:10:52PM +0530, Anuj Gupta wrote: > > > > From: Nitesh Shetty <nj.shetty@samsung.com> > > > > > > > > For direct block device opened with O_DIRECT, use copy_file_range to > > > > issue device copy offload, and fallback to generic_copy_file_range incase > > > > device copy offload capability is absent. > > > > Modify checks to allow bdevs to use copy_file_range. > > > > > > > > Suggested-by: Ming Lei <ming.lei@redhat.com> > > > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > > > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > > > > --- > > > > block/blk-lib.c | 22 ++++++++++++++++++++++ > > > > block/fops.c | 20 ++++++++++++++++++++ > > > > fs/read_write.c | 11 +++++++++-- > > > > include/linux/blkdev.h | 3 +++ > > > > 4 files changed, 54 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/block/blk-lib.c b/block/blk-lib.c > > > > index a21819e59b29..c288573c7e77 100644 > > > > --- a/block/blk-lib.c > > > > +++ b/block/blk-lib.c > > > > @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in, > > > > return blk_queue_copy(q_in) && blk_queue_copy(q_out); > > > > } > > > > > > > > +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, > > > > + struct block_device *bdev_out, loff_t pos_out, size_t len, > > > > + cio_iodone_t end_io, void *private, gfp_t gfp_mask) > > > > +{ > > > > + struct request_queue *in_q = bdev_get_queue(bdev_in); > > > > + struct request_queue *out_q = bdev_get_queue(bdev_out); > > > > + int ret = -EINVAL; > > > > > > Why initialize to -EINVAL if blk_copy_sanity_check() initializes it > > > right away anyway? > > > > > > > acked. > > > > > > + bool offload = false; > > > > > > Same thing with initializing offload. > > > > > acked > > > > > > + > > > > + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len); > > > > + if (ret) > > > > + return ret; > > > > + > > > > + offload = blk_check_copy_offload(in_q, out_q); > > > > + if (offload) > > > > + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out, > > > > + len, end_io, private, gfp_mask); > > > > + > > > > + return ret; > > > > +} > > > > +EXPORT_SYMBOL_GPL(blkdev_copy_offload); > > > > + > > > > /* > > > > * @bdev_in: source block device > > > > * @pos_in: source offset > > > > diff --git a/block/fops.c b/block/fops.c > > > > index d2e6be4e3d1c..3b7c05831d5c 100644 > > > > --- a/block/fops.c > > > > +++ b/block/fops.c > > > > @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) > > > > return ret; > > > > } > > > > > > > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, > > > > + struct file *file_out, loff_t pos_out, > > > > + size_t len, unsigned int flags) > > > > +{ > > > > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); > > > > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); > > > > + int comp_len = 0; > > > > + > > > > + if ((file_in->f_iocb_flags & IOCB_DIRECT) && > > > > + (file_out->f_iocb_flags & IOCB_DIRECT)) > > > > + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev, > > > > + pos_out, len, NULL, NULL, GFP_KERNEL); > > > > + if (comp_len != len) > > > > + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, > > > > + file_out, pos_out + comp_len, len - comp_len, flags); > > > > > > I'm not deeply familiar with this code but this looks odd. It at least > > > seems possible that comp_len could be -EINVAL and len 20 at which point > > > you'd be doing len - comp_len aka 20 - 22 = -2 in generic_copy_file_range(). > > 20 - -22 = 44 ofc > > > > > comp_len should be 0 incase of error. We do agree, some function > > I mean, not to hammer on this point too much but just to be clear > blk_copy_sanity_check(), which is introduced in the second patch, can > return both -EPERM and -EINVAL and is first called in > blkdev_copy_offload() so it's definitely possible for comp_len to be > negative. Acked. Will be updated in the next version. Thank you, Nitesh Shetty -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
diff --git a/block/blk-lib.c b/block/blk-lib.c index a21819e59b29..c288573c7e77 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -475,6 +475,28 @@ static inline bool blk_check_copy_offload(struct request_queue *q_in, return blk_queue_copy(q_in) && blk_queue_copy(q_out); } +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, + struct block_device *bdev_out, loff_t pos_out, size_t len, + cio_iodone_t end_io, void *private, gfp_t gfp_mask) +{ + struct request_queue *in_q = bdev_get_queue(bdev_in); + struct request_queue *out_q = bdev_get_queue(bdev_out); + int ret = -EINVAL; + bool offload = false; + + ret = blk_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len); + if (ret) + return ret; + + offload = blk_check_copy_offload(in_q, out_q); + if (offload) + ret = __blk_copy_offload(bdev_in, pos_in, bdev_out, pos_out, + len, end_io, private, gfp_mask); + + return ret; +} +EXPORT_SYMBOL_GPL(blkdev_copy_offload); + /* * @bdev_in: source block device * @pos_in: source offset diff --git a/block/fops.c b/block/fops.c index d2e6be4e3d1c..3b7c05831d5c 100644 --- a/block/fops.c +++ b/block/fops.c @@ -611,6 +611,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; } +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); + int comp_len = 0; + + if ((file_in->f_iocb_flags & IOCB_DIRECT) && + (file_out->f_iocb_flags & IOCB_DIRECT)) + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev, + pos_out, len, NULL, NULL, GFP_KERNEL); + if (comp_len != len) + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, + file_out, pos_out + comp_len, len - comp_len, flags); + + return comp_len; +} + #define BLKDEV_FALLOC_FL_SUPPORTED \ (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) @@ -694,6 +713,7 @@ const struct file_operations def_blk_fops = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = blkdev_fallocate, + .copy_file_range = blkdev_copy_file_range, }; static __init int blkdev_init(void) diff --git a/fs/read_write.c b/fs/read_write.c index 7a2ff6157eda..62e925e9b2f0 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -20,6 +20,7 @@ #include <linux/compat.h> #include <linux/mount.h> #include <linux/fs.h> +#include <linux/blkdev.h> #include "internal.h" #include <linux/uaccess.h> @@ -1448,7 +1449,11 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, return -EOVERFLOW; /* Shorten the copy to EOF */ - size_in = i_size_read(inode_in); + if (S_ISBLK(inode_in->i_mode)) + size_in = bdev_nr_bytes(I_BDEV(file_in->f_mapping->host)); + else + size_in = i_size_read(inode_in); + if (pos_in >= size_in) count = 0; else @@ -1709,7 +1714,9 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) /* Don't copy dirs, pipes, sockets... */ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + + if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) && + (!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode))) return -EINVAL; if (!(file_in->f_mode & FMODE_READ) || diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a54153610800..468d5f3378e2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1057,6 +1057,9 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, int blkdev_issue_copy(struct block_device *bdev_in, loff_t pos_in, struct block_device *bdev_out, loff_t pos_out, size_t len, cio_iodone_t end_io, void *private, gfp_t gfp_mask); +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, + struct block_device *bdev_out, loff_t pos_out, size_t len, + cio_iodone_t end_io, void *private, gfp_t gfp_mask); struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask); void bio_map_kern_endio(struct bio *bio);