diff mbox

[1/4] aio: all callers of aio_{read,write,fsync,poll} treat 0 and -EIOCBQUEUED the same way

Message ID 20180527222853.30715-1-viro@ZenIV.linux.org.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Al Viro May 27, 2018, 10:28 p.m. UTC
From: Al Viro <viro@zeniv.linux.org.uk>

... so just make them return 0 when caller does not need to destroy iocb

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

Comments

Christoph Hellwig May 28, 2018, 5:15 a.m. UTC | #1
On Sun, May 27, 2018 at 11:28:50PM +0100, Al Viro wrote:
> From: Al Viro <viro@zeniv.linux.org.uk>
> 
> ... so just make them return 0 when caller does not need to destroy iocb
> 
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

But I think we really need a better name for aio_rw_ret now.
Unfortunately I can't think of one.
Al Viro May 28, 2018, 2:04 p.m. UTC | #2
On Mon, May 28, 2018 at 07:15:29AM +0200, Christoph Hellwig wrote:
> On Sun, May 27, 2018 at 11:28:50PM +0100, Al Viro wrote:
> > From: Al Viro <viro@zeniv.linux.org.uk>
> > 
> > ... so just make them return 0 when caller does not need to destroy iocb
> > 
> > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> 
> Looks good,
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> 
> But I think we really need a better name for aio_rw_ret now.
> Unfortunately I can't think of one.

Hell knows...  aio_rw_done(), perhaps?  BTW, I would rather have fput()
in aio_complete_rw() done after ki_list removal - having ->ki_cancel()
callable after fput() is Not Nice(tm).  Consider e.g.
static int ffs_aio_cancel(struct kiocb *kiocb)
{
        struct ffs_io_data *io_data = kiocb->private;
        struct ffs_epfile *epfile = kiocb->ki_filp->private_data;
        int value;

        ENTER();

        spin_lock_irq(&epfile->ffs->eps_lock);

What's to guarantee that kiocb->ki_filp is not already freed and reused by
the time we call that sucker, with its ->private_data pointing to something
completely unrelated?

How about lifting the list removal into aio_complete_rw() and aio_poll_work(),
with WARN_ON() left in its place in aio_complete() itself?  Look:
aio_compelete() call chains are
	aio_complete_rw()
	aio_fsync_work()
	__aio_poll_complete()
		aio_poll_work()
		aio_poll_wake()
		aio_poll()

The call in aio_fsync_work() is guaranteed to have iocb not on cancel lists.
The call in aio_poll_wake() *relies* upon aio_complete() not going into
list removal.  The call in aio_poll() is also guaranteed to be not on cancel
list - we get there only if mask != 0 and we add to ->active_reqs only if
mask == 0.

So if we take the list removal into aio_complete_rw() and aio_poll_wake() we
should get the right ordering - iocb gets removed from the list before fput()
in all cases.  And aio_complete() locking footprint becomes simpler...  As
a fringe benefit, __aio_poll_complete() becomes simply
	fput(req->file);
	aio_complete(iocb, mangle_poll(mask), 0);
since we don't need to order fput() vs. aio_complete() anymore - the caller
of __aio_poll_complete() has already taken care of ->ki_cancel() possibility.
Al Viro May 28, 2018, 5:54 p.m. UTC | #3
On Mon, May 28, 2018 at 03:04:34PM +0100, Al Viro wrote:

> How about lifting the list removal into aio_complete_rw() and aio_poll_work(),
> with WARN_ON() left in its place in aio_complete() itself?  Look:
> aio_compelete() call chains are
> 	aio_complete_rw()
> 	aio_fsync_work()
> 	__aio_poll_complete()
> 		aio_poll_work()
> 		aio_poll_wake()
> 		aio_poll()
> 
> The call in aio_fsync_work() is guaranteed to have iocb not on cancel lists.
> The call in aio_poll_wake() *relies* upon aio_complete() not going into
> list removal.  The call in aio_poll() is also guaranteed to be not on cancel
> list - we get there only if mask != 0 and we add to ->active_reqs only if
> mask == 0.
> 
> So if we take the list removal into aio_complete_rw() and aio_poll_wake() we
> should get the right ordering - iocb gets removed from the list before fput()
> in all cases.  And aio_complete() locking footprint becomes simpler...  As
> a fringe benefit, __aio_poll_complete() becomes simply
> 	fput(req->file);
> 	aio_complete(iocb, mangle_poll(mask), 0);
> since we don't need to order fput() vs. aio_complete() anymore - the caller
> of __aio_poll_complete() has already taken care of ->ki_cancel() possibility.

Anyway, what I have in mind is in vfs.git#work.aio; on top of your fix for missing
break it's
      aio: take list removal to (some) callers of aio_complete()
      aio: all callers of aio_{read,write,fsync,poll} treat 0 and -EIOCBQUEUED the same way
      aio_read_events_ring(): make a bit more readable
      aio: shift copyin of iocb into io_submit_one()
      aio: fold do_io_submit() into callers
      aio: sanitize the limit checking in io_submit(2)
(in followups)
diff mbox

Patch

diff --git a/fs/aio.c b/fs/aio.c
index 8274d09d44a2..c6f29d9d006c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1456,11 +1456,11 @@  static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
 	return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
 }
 
-static inline ssize_t aio_rw_ret(struct kiocb *req, ssize_t ret)
+static inline void aio_rw_ret(struct kiocb *req, ssize_t ret)
 {
 	switch (ret) {
 	case -EIOCBQUEUED:
-		return ret;
+		break;
 	case -ERESTARTSYS:
 	case -ERESTARTNOINTR:
 	case -ERESTARTNOHAND:
@@ -1473,7 +1473,6 @@  static inline ssize_t aio_rw_ret(struct kiocb *req, ssize_t ret)
 		/*FALLTHRU*/
 	default:
 		aio_complete_rw(req, ret, 0);
-		return 0;
 	}
 }
 
@@ -1502,10 +1501,10 @@  static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
 		goto out_fput;
 	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
 	if (!ret)
-		ret = aio_rw_ret(req, call_read_iter(file, req, &iter));
+		aio_rw_ret(req, call_read_iter(file, req, &iter));
 	kfree(iovec);
 out_fput:
-	if (unlikely(ret && ret != -EIOCBQUEUED))
+	if (unlikely(ret))
 		fput(file);
 	return ret;
 }
@@ -1547,11 +1546,11 @@  static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
 			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 		}
 		req->ki_flags |= IOCB_WRITE;
-		ret = aio_rw_ret(req, call_write_iter(file, req, &iter));
+		aio_rw_ret(req, call_write_iter(file, req, &iter));
 	}
 	kfree(iovec);
 out_fput:
-	if (unlikely(ret && ret != -EIOCBQUEUED))
+	if (unlikely(ret))
 		fput(file);
 	return ret;
 }
@@ -1582,7 +1581,7 @@  static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
 	req->datasync = datasync;
 	INIT_WORK(&req->work, aio_fsync_work);
 	schedule_work(&req->work);
-	return -EIOCBQUEUED;
+	return 0;
 }
 
 /* need to use list_del_init so we can check if item was present */
@@ -1711,7 +1710,7 @@  static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
 done:
 	if (mask)
 		__aio_poll_complete(req, mask);
-	return -EIOCBQUEUED;
+	return 0;
 out_fail:
 	fput(req->file);
 	return -EINVAL; /* same as no support for IOCB_CMD_POLL */
@@ -1795,12 +1794,11 @@  static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	}
 
 	/*
-	 * If ret is -EIOCBQUEUED, ownership of the file reference acquired
-	 * above passed to the file system, which at this point might have
-	 * dropped the reference, so we must be careful to not reference it
-	 * once we have called into the file system.
+	 * If ret is 0, we'd either done aio_complete() ourselves or have
+	 * arranged for that to be done asynchronously.  Anything non-zero
+	 * means that we need to destroy req ourselves.
 	 */
-	if (ret && ret != -EIOCBQUEUED)
+	if (ret)
 		goto out_put_req;
 	return 0;
 out_put_req: