Message ID | 20230308031033.155717-3-axboe@kernel.dk (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add FMODE_NOWAIT support to pipes | expand |
On Tue, Mar 07, 2023 at 08:10:32PM -0700, Jens Axboe wrote: > In preparation for enabling FMODE_NOWAIT for pipes, ensure that the read > and write path handle it correctly. This includes the pipe locking, > page allocation for writes, and confirming pipe buffers. > > Signed-off-by: Jens Axboe <axboe@kernel.dk> > --- > fs/pipe.c | 34 ++++++++++++++++++++++++++-------- > 1 file changed, 26 insertions(+), 8 deletions(-) > > diff --git a/fs/pipe.c b/fs/pipe.c > index 340f253913a2..10366a6cb5b6 100644 > --- a/fs/pipe.c > +++ b/fs/pipe.c > @@ -108,6 +108,16 @@ static inline void __pipe_unlock(struct pipe_inode_info *pipe) > mutex_unlock(&pipe->mutex); > } > > +static inline bool __pipe_trylock(struct pipe_inode_info *pipe, bool nonblock) > +{ > + if (!nonblock) { > + __pipe_lock(pipe); > + return true; > + } > + > + return mutex_trylock(&pipe->mutex); > +} > + > void pipe_double_lock(struct pipe_inode_info *pipe1, > struct pipe_inode_info *pipe2) > { > @@ -234,6 +244,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > struct file *filp = iocb->ki_filp; > struct pipe_inode_info *pipe = filp->private_data; > bool was_full, wake_next_reader = false; > + const bool nonblock = iocb->ki_flags & IOCB_NOWAIT; > ssize_t ret; > > /* Null read succeeds. */ > @@ -241,7 +252,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > return 0; > > ret = 0; > - __pipe_lock(pipe); > + if (!__pipe_trylock(pipe, nonblock)) > + return -EAGAIN; > > /* > * We only wake up writers if the pipe was full when we started > @@ -297,7 +309,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > chars = total_len; > } > > - error = pipe_buf_confirm(pipe, buf, false); > + error = pipe_buf_confirm(pipe, buf, nonblock); > if (error) { > if (!ret) > ret = error; > @@ -342,7 +354,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > break; > if (ret) > break; > - if (filp->f_flags & O_NONBLOCK) { > + if (filp->f_flags & O_NONBLOCK || nonblock) { > ret = -EAGAIN; > break; > } > @@ -423,12 +435,14 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > ssize_t chars; > bool was_empty = false; > bool wake_next_writer = false; > + const bool nonblock = iocb->ki_flags & IOCB_NOWAIT; > > /* Null write succeeds. */ > if (unlikely(total_len == 0)) > return 0; > > - __pipe_lock(pipe); > + if (!__pipe_trylock(pipe, nonblock)) > + return -EAGAIN; > > if (!pipe->readers) { > send_sig(SIGPIPE, current, 0); > @@ -461,7 +475,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > > if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) && > offset + chars <= PAGE_SIZE) { > - ret = pipe_buf_confirm(pipe, buf, false); > + ret = pipe_buf_confirm(pipe, buf, nonblock); > if (ret) > goto out; > > @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > int copied; > > if (!page) { > - page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); > + gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT; > + > + if (!nonblock) > + gfp |= GFP_USER; Just for my education: Does this encode the assumpation that the non-blocking code can only be reached from io_uring and thus GFP_USER can be dropped for that case? IOW, if there's other code that could in the future reach the non blocking condition would this still be correct? > + page = alloc_page(gfp); > if (unlikely(!page)) { > - ret = ret ? : -ENOMEM; > + ret = ret ? : nonblock ? -EAGAIN : -ENOMEM; Hm, could we try and avoid the nested "?:?:" please. Imho, that's easy to misparse. Idk, doesn't need to be exactly that code but sm like: if (!nonblock) { gfp |= GFP_USER; ret = -EAGAIN; } else { ret = -ENOMEM; } page = alloc_page(gfp); if (unlikely(!page)) break; else ret = 0; pipe->tmp_page = page; or sm else.
On Tue, Mar 07, 2023 at 08:10:32PM -0700, Jens Axboe wrote: > @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > int copied; > > if (!page) { > - page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); > + gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT; > + > + if (!nonblock) > + gfp |= GFP_USER; > + page = alloc_page(gfp); Hmm, looks like you drop __GFP_HARDWALL in the nonblock case. People who use cpusets might be annoyed by that. > if (unlikely(!page)) { > - ret = ret ? : -ENOMEM; > + ret = ret ? : nonblock ? -EAGAIN : -ENOMEM; double ternary operator? really? if (!ret) ret = nonblock ? -EAGAIN : -ENOMEM;
On 3/14/23 3:26?AM, Christian Brauner wrote: > On Tue, Mar 07, 2023 at 08:10:32PM -0700, Jens Axboe wrote: >> @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) >> int copied; >> >> if (!page) { >> - page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); >> + gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT; >> + >> + if (!nonblock) >> + gfp |= GFP_USER; > > Just for my education: Does this encode the assumpation that the > non-blocking code can only be reached from io_uring and thus GFP_USER > can be dropped for that case? IOW, if there's other code that could in > the future reach the non blocking condition would this still be correct? You can already reach that if you do preadv2(..., RWF_NOWAIT). There should be no assumptions here on the user of it, semantics should be the same. The gfp mask is just split so we avoid __GFP_WAIT for the nonblocking case. > >> + page = alloc_page(gfp); >> if (unlikely(!page)) { >> - ret = ret ? : -ENOMEM; >> + ret = ret ? : nonblock ? -EAGAIN : -ENOMEM; > > Hm, could we try and avoid the nested "?:?:" please. Imho, that's easy > to misparse. Idk, doesn't need to be exactly that code but sm like: > > if (!nonblock) { > gfp |= GFP_USER; > ret = -EAGAIN; > } else { > ret = -ENOMEM; > } > > page = alloc_page(gfp); > if (unlikely(!page)) > break; > else > ret = 0; > pipe->tmp_page = page; > > or sm else. Yeah this is much better, I think I was a bit too lazy here, not really a fan of ternaries myself... I'll fix that up. Thanks!
On 3/14/23 3:38?AM, Matthew Wilcox wrote: > On Tue, Mar 07, 2023 at 08:10:32PM -0700, Jens Axboe wrote: >> @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) >> int copied; >> >> if (!page) { >> - page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); >> + gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT; >> + >> + if (!nonblock) >> + gfp |= GFP_USER; >> + page = alloc_page(gfp); > > Hmm, looks like you drop __GFP_HARDWALL in the nonblock case. People who > use cpusets might be annoyed by that. Ah good catch! Yes, that's an oversight, I'll rectify that in v2. >> if (unlikely(!page)) { >> - ret = ret ? : -ENOMEM; >> + ret = ret ? : nonblock ? -EAGAIN : -ENOMEM; > > double ternary operator? really? yeah sorry... See reply to Christian, I'll make this cleaner.
diff --git a/fs/pipe.c b/fs/pipe.c index 340f253913a2..10366a6cb5b6 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -108,6 +108,16 @@ static inline void __pipe_unlock(struct pipe_inode_info *pipe) mutex_unlock(&pipe->mutex); } +static inline bool __pipe_trylock(struct pipe_inode_info *pipe, bool nonblock) +{ + if (!nonblock) { + __pipe_lock(pipe); + return true; + } + + return mutex_trylock(&pipe->mutex); +} + void pipe_double_lock(struct pipe_inode_info *pipe1, struct pipe_inode_info *pipe2) { @@ -234,6 +244,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) struct file *filp = iocb->ki_filp; struct pipe_inode_info *pipe = filp->private_data; bool was_full, wake_next_reader = false; + const bool nonblock = iocb->ki_flags & IOCB_NOWAIT; ssize_t ret; /* Null read succeeds. */ @@ -241,7 +252,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) return 0; ret = 0; - __pipe_lock(pipe); + if (!__pipe_trylock(pipe, nonblock)) + return -EAGAIN; /* * We only wake up writers if the pipe was full when we started @@ -297,7 +309,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) chars = total_len; } - error = pipe_buf_confirm(pipe, buf, false); + error = pipe_buf_confirm(pipe, buf, nonblock); if (error) { if (!ret) ret = error; @@ -342,7 +354,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) break; if (ret) break; - if (filp->f_flags & O_NONBLOCK) { + if (filp->f_flags & O_NONBLOCK || nonblock) { ret = -EAGAIN; break; } @@ -423,12 +435,14 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ssize_t chars; bool was_empty = false; bool wake_next_writer = false; + const bool nonblock = iocb->ki_flags & IOCB_NOWAIT; /* Null write succeeds. */ if (unlikely(total_len == 0)) return 0; - __pipe_lock(pipe); + if (!__pipe_trylock(pipe, nonblock)) + return -EAGAIN; if (!pipe->readers) { send_sig(SIGPIPE, current, 0); @@ -461,7 +475,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) && offset + chars <= PAGE_SIZE) { - ret = pipe_buf_confirm(pipe, buf, false); + ret = pipe_buf_confirm(pipe, buf, nonblock); if (ret) goto out; @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) int copied; if (!page) { - page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); + gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT; + + if (!nonblock) + gfp |= GFP_USER; + page = alloc_page(gfp); if (unlikely(!page)) { - ret = ret ? : -ENOMEM; + ret = ret ? : nonblock ? -EAGAIN : -ENOMEM; break; } pipe->tmp_page = page; @@ -547,7 +565,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) continue; /* Wait for buffer space to become available. */ - if (filp->f_flags & O_NONBLOCK) { + if (filp->f_flags & O_NONBLOCK || nonblock) { if (!ret) ret = -EAGAIN; break;
In preparation for enabling FMODE_NOWAIT for pipes, ensure that the read and write path handle it correctly. This includes the pipe locking, page allocation for writes, and confirming pipe buffers. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- fs/pipe.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-)