mbox series

[v4,0/3] io_uring: add splice(2) support

Message ID cover.1582530525.git.asml.silence@gmail.com (mailing list archive)
Headers show
Series io_uring: add splice(2) support | expand

Message

Pavel Begunkov Feb. 24, 2020, 8:32 a.m. UTC
*on top of for-5.6 + async patches*

Not the fastets implementation, but I'd need to stir up/duplicate
splice.c bits to do it more efficiently.

note: rebase on top of the recent inflight patchset.

v2:
- u32 len and SQE layout changes (Jens)
- output file is in sqe->fd for automatic hash_reg_file support
- handle unbound_nonreg_file for the second fd
- file leaks fixed with REQ_F_NEED_CLEANUP
- place SPLICE_F_FD_IN_FIXED in splice flags (Jens)
- loff_t* -> loff_t, -1 means not specified offset

v3: [PATCH 3/3] changes
- fd u32 -> s32 (Stefan Metzmacher)
- add BUILD_BUG_SQE_ELEM() (Stefan Metzmacher)
- accept and ignore ioprio (Stefan Metzmacher)
- off_in -> splice_off_in

v4:
- rebase + a bit of function renaming
- make file_get/put accept req instead of ctx (Jens)
- fix lost REQ_F_FIXED_FILE

Pavel Begunkov (3):
  splice: make do_splice public
  io_uring: add interface for getting files
  io_uring: add splice(2) support

 fs/io_uring.c                 | 181 ++++++++++++++++++++++++++++------
 fs/splice.c                   |   6 +-
 include/linux/splice.h        |   3 +
 include/uapi/linux/io_uring.h |  14 ++-
 4 files changed, 171 insertions(+), 33 deletions(-)

Comments

Jens Axboe Feb. 24, 2020, 3:35 p.m. UTC | #1
On 2/24/20 1:32 AM, Pavel Begunkov wrote:
> *on top of for-5.6 + async patches*
> 
> Not the fastets implementation, but I'd need to stir up/duplicate
> splice.c bits to do it more efficiently.
> 
> note: rebase on top of the recent inflight patchset.

Let's get this queued up, looks good to go to me. Do you have a few
liburing test cases we can add for this?
Jens Axboe Feb. 24, 2020, 10:34 p.m. UTC | #2
On 2/24/20 8:35 AM, Jens Axboe wrote:
> On 2/24/20 1:32 AM, Pavel Begunkov wrote:
>> *on top of for-5.6 + async patches*
>>
>> Not the fastets implementation, but I'd need to stir up/duplicate
>> splice.c bits to do it more efficiently.
>>
>> note: rebase on top of the recent inflight patchset.
> 
> Let's get this queued up, looks good to go to me. Do you have a few
> liburing test cases we can add for this?

Seems to me like we have an address space issue for the off_in and
off_out parameters. Why aren't we passing in pointers to these
and making them work like regular splice?


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 792ef01a521c..b0cfd68be8c9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -448,8 +448,8 @@ struct io_epoll {
 struct io_splice {
 	struct file			*file_out;
 	struct file			*file_in;
-	loff_t				off_out;
-	loff_t				off_in;
+	loff_t __user			*off_out;
+	loff_t __user			*off_in;
 	u64				len;
 	unsigned int			flags;
 };
@@ -2578,8 +2578,8 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return 0;
 
 	sp->file_in = NULL;
-	sp->off_in = READ_ONCE(sqe->splice_off_in);
-	sp->off_out = READ_ONCE(sqe->off);
+	sp->off_in = u64_to_user_ptr(READ_ONCE(sqe->splice_off_in));
+	sp->off_out = u64_to_user_ptr(READ_ONCE(sqe->off));
 	sp->len = READ_ONCE(sqe->len);
 	sp->flags = READ_ONCE(sqe->splice_flags);
 
@@ -2614,7 +2614,6 @@ static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
 	struct file *in = sp->file_in;
 	struct file *out = sp->file_out;
 	unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
-	loff_t *poff_in, *poff_out;
 	long ret;
 
 	if (force_nonblock) {
@@ -2623,9 +2622,7 @@ static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
 		flags |= SPLICE_F_NONBLOCK;
 	}
 
-	poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
-	poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
-	ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
+	ret = do_splice(in, sp->off_in, out, sp->off_out, sp->len, flags);
 	if (force_nonblock && ret == -EAGAIN)
 		return -EAGAIN;
Pavel Begunkov Feb. 24, 2020, 10:51 p.m. UTC | #3
On 25/02/2020 01:34, Jens Axboe wrote:
> On 2/24/20 8:35 AM, Jens Axboe wrote:
>> On 2/24/20 1:32 AM, Pavel Begunkov wrote:
>>> *on top of for-5.6 + async patches*
>>>
>>> Not the fastets implementation, but I'd need to stir up/duplicate
>>> splice.c bits to do it more efficiently.
>>>
>>> note: rebase on top of the recent inflight patchset.
>>
>> Let's get this queued up, looks good to go to me. Do you have a few
>> liburing test cases we can add for this?
> 
> Seems to me like we have an address space issue for the off_in and

Is that a problem? From the old fixing thread loop_rw_iter() it appeared
to me, that it's ok to pass a kernel address as a user one.
f_op->write of some implemented through the same copy_to_user().


> off_out parameters. Why aren't we passing in pointers to these
> and making them work like regular splice?

That's one extra copy_to_user() + copy_from_user(), which I hope to remove
in the future. And I'm not really a fan of such API, and would prefer to give
away such tracking to the userspace.

> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 792ef01a521c..b0cfd68be8c9 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -448,8 +448,8 @@ struct io_epoll {
>  struct io_splice {
>  	struct file			*file_out;
>  	struct file			*file_in;
> -	loff_t				off_out;
> -	loff_t				off_in;
> +	loff_t __user			*off_out;
> +	loff_t __user			*off_in;
>  	u64				len;
>  	unsigned int			flags;
>  };
> @@ -2578,8 +2578,8 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  		return 0;
>  
>  	sp->file_in = NULL;
> -	sp->off_in = READ_ONCE(sqe->splice_off_in);
> -	sp->off_out = READ_ONCE(sqe->off);
> +	sp->off_in = u64_to_user_ptr(READ_ONCE(sqe->splice_off_in));
> +	sp->off_out = u64_to_user_ptr(READ_ONCE(sqe->off));
>  	sp->len = READ_ONCE(sqe->len);
>  	sp->flags = READ_ONCE(sqe->splice_flags);
>  
> @@ -2614,7 +2614,6 @@ static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
>  	struct file *in = sp->file_in;
>  	struct file *out = sp->file_out;
>  	unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
> -	loff_t *poff_in, *poff_out;
>  	long ret;
>  
>  	if (force_nonblock) {
> @@ -2623,9 +2622,7 @@ static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
>  		flags |= SPLICE_F_NONBLOCK;
>  	}
>  
> -	poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
> -	poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
> -	ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
> +	ret = do_splice(in, sp->off_in, out, sp->off_out, sp->len, flags);
>  	if (force_nonblock && ret == -EAGAIN)
>  		return -EAGAIN;
>  
>
Pavel Begunkov Feb. 24, 2020, 10:53 p.m. UTC | #4
On 25/02/2020 01:51, Pavel Begunkov wrote:
> On 25/02/2020 01:34, Jens Axboe wrote:
>> On 2/24/20 8:35 AM, Jens Axboe wrote:
>>> On 2/24/20 1:32 AM, Pavel Begunkov wrote:
>>>> *on top of for-5.6 + async patches*
>>>>
>>>> Not the fastets implementation, but I'd need to stir up/duplicate
>>>> splice.c bits to do it more efficiently.
>>>>
>>>> note: rebase on top of the recent inflight patchset.
>>>
>>> Let's get this queued up, looks good to go to me. Do you have a few
>>> liburing test cases we can add for this?
>>
>> Seems to me like we have an address space issue for the off_in and
> 
> Is that a problem? From the old fixing thread loop_rw_iter() it appeared
> to me, that it's ok to pass a kernel address as a user one.
> f_op->write of some implemented through the same copy_to_user().

Either I finally need to check myself how the protection is implemented...

> 
>> off_out parameters. Why aren't we passing in pointers to these
>> and making them work like regular splice?
> 
> That's one extra copy_to_user() + copy_from_user(), which I hope to remove
> in the future. And I'm not really a fan of such API, and would prefer to give
> away such tracking to the userspace.
> 
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 792ef01a521c..b0cfd68be8c9 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -448,8 +448,8 @@ struct io_epoll {
>>  struct io_splice {
>>  	struct file			*file_out;
>>  	struct file			*file_in;
>> -	loff_t				off_out;
>> -	loff_t				off_in;
>> +	loff_t __user			*off_out;
>> +	loff_t __user			*off_in;
>>  	u64				len;
>>  	unsigned int			flags;
>>  };
>> @@ -2578,8 +2578,8 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>  		return 0;
>>  
>>  	sp->file_in = NULL;
>> -	sp->off_in = READ_ONCE(sqe->splice_off_in);
>> -	sp->off_out = READ_ONCE(sqe->off);
>> +	sp->off_in = u64_to_user_ptr(READ_ONCE(sqe->splice_off_in));
>> +	sp->off_out = u64_to_user_ptr(READ_ONCE(sqe->off));
>>  	sp->len = READ_ONCE(sqe->len);
>>  	sp->flags = READ_ONCE(sqe->splice_flags);
>>  
>> @@ -2614,7 +2614,6 @@ static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
>>  	struct file *in = sp->file_in;
>>  	struct file *out = sp->file_out;
>>  	unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
>> -	loff_t *poff_in, *poff_out;
>>  	long ret;
>>  
>>  	if (force_nonblock) {
>> @@ -2623,9 +2622,7 @@ static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
>>  		flags |= SPLICE_F_NONBLOCK;
>>  	}
>>  
>> -	poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
>> -	poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
>> -	ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
>> +	ret = do_splice(in, sp->off_in, out, sp->off_out, sp->len, flags);
>>  	if (force_nonblock && ret == -EAGAIN)
>>  		return -EAGAIN;
>>  
>>
>