diff mbox series

[2/3] pipe: enable handling of IOCB_NOWAIT

Message ID 20230308031033.155717-3-axboe@kernel.dk (mailing list archive)
State Mainlined, archived
Headers show
Series Add FMODE_NOWAIT support to pipes | expand

Commit Message

Jens Axboe March 8, 2023, 3:10 a.m. UTC
In preparation for enabling FMODE_NOWAIT for pipes, ensure that the read
and write path handle it correctly. This includes the pipe locking,
page allocation for writes, and confirming pipe buffers.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/pipe.c | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

Comments

Christian Brauner March 14, 2023, 9:26 a.m. UTC | #1
On Tue, Mar 07, 2023 at 08:10:32PM -0700, Jens Axboe wrote:
> In preparation for enabling FMODE_NOWAIT for pipes, ensure that the read
> and write path handle it correctly. This includes the pipe locking,
> page allocation for writes, and confirming pipe buffers.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  fs/pipe.c | 34 ++++++++++++++++++++++++++--------
>  1 file changed, 26 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/pipe.c b/fs/pipe.c
> index 340f253913a2..10366a6cb5b6 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -108,6 +108,16 @@ static inline void __pipe_unlock(struct pipe_inode_info *pipe)
>  	mutex_unlock(&pipe->mutex);
>  }
>  
> +static inline bool __pipe_trylock(struct pipe_inode_info *pipe, bool nonblock)
> +{
> +	if (!nonblock) {
> +		__pipe_lock(pipe);
> +		return true;
> +	}
> +
> +	return mutex_trylock(&pipe->mutex);
> +}
> +
>  void pipe_double_lock(struct pipe_inode_info *pipe1,
>  		      struct pipe_inode_info *pipe2)
>  {
> @@ -234,6 +244,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  	struct file *filp = iocb->ki_filp;
>  	struct pipe_inode_info *pipe = filp->private_data;
>  	bool was_full, wake_next_reader = false;
> +	const bool nonblock = iocb->ki_flags & IOCB_NOWAIT;
>  	ssize_t ret;
>  
>  	/* Null read succeeds. */
> @@ -241,7 +252,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  		return 0;
>  
>  	ret = 0;
> -	__pipe_lock(pipe);
> +	if (!__pipe_trylock(pipe, nonblock))
> +		return -EAGAIN;
>  
>  	/*
>  	 * We only wake up writers if the pipe was full when we started
> @@ -297,7 +309,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  				chars = total_len;
>  			}
>  
> -			error = pipe_buf_confirm(pipe, buf, false);
> +			error = pipe_buf_confirm(pipe, buf, nonblock);
>  			if (error) {
>  				if (!ret)
>  					ret = error;
> @@ -342,7 +354,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  			break;
>  		if (ret)
>  			break;
> -		if (filp->f_flags & O_NONBLOCK) {
> +		if (filp->f_flags & O_NONBLOCK || nonblock) {
>  			ret = -EAGAIN;
>  			break;
>  		}
> @@ -423,12 +435,14 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  	ssize_t chars;
>  	bool was_empty = false;
>  	bool wake_next_writer = false;
> +	const bool nonblock = iocb->ki_flags & IOCB_NOWAIT;
>  
>  	/* Null write succeeds. */
>  	if (unlikely(total_len == 0))
>  		return 0;
>  
> -	__pipe_lock(pipe);
> +	if (!__pipe_trylock(pipe, nonblock))
> +		return -EAGAIN;
>  
>  	if (!pipe->readers) {
>  		send_sig(SIGPIPE, current, 0);
> @@ -461,7 +475,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  
>  		if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
>  		    offset + chars <= PAGE_SIZE) {
> -			ret = pipe_buf_confirm(pipe, buf, false);
> +			ret = pipe_buf_confirm(pipe, buf, nonblock);
>  			if (ret)
>  				goto out;
>  
> @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  			int copied;
>  
>  			if (!page) {
> -				page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> +				gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT;
> +
> +				if (!nonblock)
> +					gfp |= GFP_USER;

Just for my education: Does this encode the assumpation that the
non-blocking code can only be reached from io_uring and thus GFP_USER
can be dropped for that case? IOW, if there's other code that could in
the future reach the non blocking condition would this still be correct?

> +				page = alloc_page(gfp);
>  				if (unlikely(!page)) {
> -					ret = ret ? : -ENOMEM;
> +					ret = ret ? : nonblock ? -EAGAIN : -ENOMEM;

Hm, could we try and avoid the nested "?:?:" please. Imho, that's easy
to misparse. Idk, doesn't need to be exactly that code but sm like:

   				if (!nonblock) {
   					gfp |= GFP_USER;
					ret = -EAGAIN;
				} else {
					ret = -ENOMEM;
				}

   				page = alloc_page(gfp);
   				if (unlikely(!page))
					break;
				else
					ret = 0;
   				pipe->tmp_page = page;

or sm else.
Matthew Wilcox March 14, 2023, 9:38 a.m. UTC | #2
On Tue, Mar 07, 2023 at 08:10:32PM -0700, Jens Axboe wrote:
> @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  			int copied;
>  
>  			if (!page) {
> -				page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> +				gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT;
> +
> +				if (!nonblock)
> +					gfp |= GFP_USER;
> +				page = alloc_page(gfp);

Hmm, looks like you drop __GFP_HARDWALL in the nonblock case.  People who
use cpusets might be annoyed by that.

>  				if (unlikely(!page)) {
> -					ret = ret ? : -ENOMEM;
> +					ret = ret ? : nonblock ? -EAGAIN : -ENOMEM;

double ternary operator?  really?

					if (!ret)
						ret = nonblock ? -EAGAIN : -ENOMEM;
Jens Axboe March 14, 2023, 12:03 p.m. UTC | #3
On 3/14/23 3:26?AM, Christian Brauner wrote:
> On Tue, Mar 07, 2023 at 08:10:32PM -0700, Jens Axboe wrote:
>> @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>>  			int copied;
>>  
>>  			if (!page) {
>> -				page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
>> +				gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT;
>> +
>> +				if (!nonblock)
>> +					gfp |= GFP_USER;
> 
> Just for my education: Does this encode the assumpation that the
> non-blocking code can only be reached from io_uring and thus GFP_USER
> can be dropped for that case? IOW, if there's other code that could in
> the future reach the non blocking condition would this still be correct?

You can already reach that if you do preadv2(..., RWF_NOWAIT). There
should be no assumptions here on the user of it, semantics should be the
same. The gfp mask is just split so we avoid __GFP_WAIT for the
nonblocking case.

> 
>> +				page = alloc_page(gfp);
>>  				if (unlikely(!page)) {
>> -					ret = ret ? : -ENOMEM;
>> +					ret = ret ? : nonblock ? -EAGAIN : -ENOMEM;
> 
> Hm, could we try and avoid the nested "?:?:" please. Imho, that's easy
> to misparse. Idk, doesn't need to be exactly that code but sm like:
> 
>    				if (!nonblock) {
>    					gfp |= GFP_USER;
> 					ret = -EAGAIN;
> 				} else {
> 					ret = -ENOMEM;
> 				}
> 
>    				page = alloc_page(gfp);
>    				if (unlikely(!page))
> 					break;
> 				else
> 					ret = 0;
>    				pipe->tmp_page = page;
> 
> or sm else.

Yeah this is much better, I think I was a bit too lazy here, not really
a fan of ternaries myself... I'll fix that up. Thanks!
Jens Axboe March 14, 2023, 12:04 p.m. UTC | #4
On 3/14/23 3:38?AM, Matthew Wilcox wrote:
> On Tue, Mar 07, 2023 at 08:10:32PM -0700, Jens Axboe wrote:
>> @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>>  			int copied;
>>  
>>  			if (!page) {
>> -				page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
>> +				gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT;
>> +
>> +				if (!nonblock)
>> +					gfp |= GFP_USER;
>> +				page = alloc_page(gfp);
> 
> Hmm, looks like you drop __GFP_HARDWALL in the nonblock case.  People who
> use cpusets might be annoyed by that.

Ah good catch! Yes, that's an oversight, I'll rectify that in v2.

>>  				if (unlikely(!page)) {
>> -					ret = ret ? : -ENOMEM;
>> +					ret = ret ? : nonblock ? -EAGAIN : -ENOMEM;
> 
> double ternary operator?  really?

yeah sorry... See reply to Christian, I'll make this cleaner.
diff mbox series

Patch

diff --git a/fs/pipe.c b/fs/pipe.c
index 340f253913a2..10366a6cb5b6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -108,6 +108,16 @@  static inline void __pipe_unlock(struct pipe_inode_info *pipe)
 	mutex_unlock(&pipe->mutex);
 }
 
+static inline bool __pipe_trylock(struct pipe_inode_info *pipe, bool nonblock)
+{
+	if (!nonblock) {
+		__pipe_lock(pipe);
+		return true;
+	}
+
+	return mutex_trylock(&pipe->mutex);
+}
+
 void pipe_double_lock(struct pipe_inode_info *pipe1,
 		      struct pipe_inode_info *pipe2)
 {
@@ -234,6 +244,7 @@  pipe_read(struct kiocb *iocb, struct iov_iter *to)
 	struct file *filp = iocb->ki_filp;
 	struct pipe_inode_info *pipe = filp->private_data;
 	bool was_full, wake_next_reader = false;
+	const bool nonblock = iocb->ki_flags & IOCB_NOWAIT;
 	ssize_t ret;
 
 	/* Null read succeeds. */
@@ -241,7 +252,8 @@  pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		return 0;
 
 	ret = 0;
-	__pipe_lock(pipe);
+	if (!__pipe_trylock(pipe, nonblock))
+		return -EAGAIN;
 
 	/*
 	 * We only wake up writers if the pipe was full when we started
@@ -297,7 +309,7 @@  pipe_read(struct kiocb *iocb, struct iov_iter *to)
 				chars = total_len;
 			}
 
-			error = pipe_buf_confirm(pipe, buf, false);
+			error = pipe_buf_confirm(pipe, buf, nonblock);
 			if (error) {
 				if (!ret)
 					ret = error;
@@ -342,7 +354,7 @@  pipe_read(struct kiocb *iocb, struct iov_iter *to)
 			break;
 		if (ret)
 			break;
-		if (filp->f_flags & O_NONBLOCK) {
+		if (filp->f_flags & O_NONBLOCK || nonblock) {
 			ret = -EAGAIN;
 			break;
 		}
@@ -423,12 +435,14 @@  pipe_write(struct kiocb *iocb, struct iov_iter *from)
 	ssize_t chars;
 	bool was_empty = false;
 	bool wake_next_writer = false;
+	const bool nonblock = iocb->ki_flags & IOCB_NOWAIT;
 
 	/* Null write succeeds. */
 	if (unlikely(total_len == 0))
 		return 0;
 
-	__pipe_lock(pipe);
+	if (!__pipe_trylock(pipe, nonblock))
+		return -EAGAIN;
 
 	if (!pipe->readers) {
 		send_sig(SIGPIPE, current, 0);
@@ -461,7 +475,7 @@  pipe_write(struct kiocb *iocb, struct iov_iter *from)
 
 		if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
 		    offset + chars <= PAGE_SIZE) {
-			ret = pipe_buf_confirm(pipe, buf, false);
+			ret = pipe_buf_confirm(pipe, buf, nonblock);
 			if (ret)
 				goto out;
 
@@ -493,9 +507,13 @@  pipe_write(struct kiocb *iocb, struct iov_iter *from)
 			int copied;
 
 			if (!page) {
-				page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
+				gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT;
+
+				if (!nonblock)
+					gfp |= GFP_USER;
+				page = alloc_page(gfp);
 				if (unlikely(!page)) {
-					ret = ret ? : -ENOMEM;
+					ret = ret ? : nonblock ? -EAGAIN : -ENOMEM;
 					break;
 				}
 				pipe->tmp_page = page;
@@ -547,7 +565,7 @@  pipe_write(struct kiocb *iocb, struct iov_iter *from)
 			continue;
 
 		/* Wait for buffer space to become available. */
-		if (filp->f_flags & O_NONBLOCK) {
+		if (filp->f_flags & O_NONBLOCK || nonblock) {
 			if (!ret)
 				ret = -EAGAIN;
 			break;