diff mbox

[30/32] aio: add delayed cancel support

Message ID 20180110155853.32348-31-hch@lst.de (mailing list archive)
State New, archived
Headers show

Commit Message

Christoph Hellwig Jan. 10, 2018, 3:58 p.m. UTC
The upcoming aio poll support would like to be able to complete the
iocb inline from the cancellation context, but that would cause
a lock order reversal.  Add support for optionally moving the cancelation
outside the context lock to avoid this reversal.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/aio.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 11 deletions(-)

Comments

Jeff Moyer Jan. 10, 2018, 10:59 p.m. UTC | #1
Christoph Hellwig <hch@lst.de> writes:

> The upcoming aio poll support would like to be able to complete the
> iocb inline from the cancellation context, but that would cause
> a lock order reversal.  Add support for optionally moving the cancelation
> outside the context lock to avoid this reversal.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Acked-by: Jeff Moyer <jmoyer@redhat.com>
Jeff Moyer Jan. 10, 2018, 11:26 p.m. UTC | #2
Jeff Moyer <jmoyer@redhat.com> writes:

> Christoph Hellwig <hch@lst.de> writes:
>
>> The upcoming aio poll support would like to be able to complete the
>> iocb inline from the cancellation context, but that would cause
>> a lock order reversal.  Add support for optionally moving the cancelation
>> outside the context lock to avoid this reversal.
>>
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>
> Acked-by: Jeff Moyer <jmoyer@redhat.com>

Actually, let's move these two defines:

#define AIO_IOCB_DELAYED_CANCEL (1 << 0)
#define AIO_IOCB_CANCELLED      (1 << 1)

to include/linux/aio.h so that drivers outside of fs/aio.c can make use
of them.

-Jeff
Christoph Hellwig Jan. 11, 2018, 1:43 p.m. UTC | #3
On Wed, Jan 10, 2018 at 06:26:39PM -0500, Jeff Moyer wrote:
> >> The upcoming aio poll support would like to be able to complete the
> >> iocb inline from the cancellation context, but that would cause
> >> a lock order reversal.  Add support for optionally moving the cancelation
> >> outside the context lock to avoid this reversal.
> >>
> >> Signed-off-by: Christoph Hellwig <hch@lst.de>
> >
> > Acked-by: Jeff Moyer <jmoyer@redhat.com>
> 
> Actually, let's move these two defines:
> 
> #define AIO_IOCB_DELAYED_CANCEL (1 << 0)
> #define AIO_IOCB_CANCELLED      (1 << 1)
> 
> to include/linux/aio.h so that drivers outside of fs/aio.c can make use
> of them.

struct aio_kiocb is private to aio.c, so just exposing them won't
do anything useful.  If we really need these elsewhere we'll need
to come up with a proper interface.
Jeff Moyer Jan. 11, 2018, 3:27 p.m. UTC | #4
Christoph Hellwig <hch@lst.de> writes:

> On Wed, Jan 10, 2018 at 06:26:39PM -0500, Jeff Moyer wrote:
>> >> The upcoming aio poll support would like to be able to complete the
>> >> iocb inline from the cancellation context, but that would cause
>> >> a lock order reversal.  Add support for optionally moving the cancelation
>> >> outside the context lock to avoid this reversal.
>> >>
>> >> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> >
>> > Acked-by: Jeff Moyer <jmoyer@redhat.com>
>> 
>> Actually, let's move these two defines:
>> 
>> #define AIO_IOCB_DELAYED_CANCEL (1 << 0)
>> #define AIO_IOCB_CANCELLED      (1 << 1)
>> 
>> to include/linux/aio.h so that drivers outside of fs/aio.c can make use
>> of them.
>
> struct aio_kiocb is private to aio.c, so just exposing them won't
> do anything useful.  If we really need these elsewhere we'll need
> to come up with a proper interface.

Duh, good point.  My main concern is that things like usb gadget will
have to deal with races between cancellation and completion on their
own.  It would be nice if we had infrastructure for them to use.  I'll
have a look through that code to see if there's something we could or
should be doing.

Cheers,
Jeff
Christoph Hellwig Jan. 15, 2018, 8:54 a.m. UTC | #5
On Thu, Jan 11, 2018 at 10:27:05AM -0500, Jeff Moyer wrote:
> Duh, good point.  My main concern is that things like usb gadget will
> have to deal with races between cancellation and completion on their
> own.  It would be nice if we had infrastructure for them to use.  I'll
> have a look through that code to see if there's something we could or
> should be doing.

The current cancel interface is a bit of a mess, as it assumes the
kiocb is embedded into an aio_kiocb.  I think it would need a major
rework in general, but I'll need to fix a couple other aio issues
first.
diff mbox

Patch

diff --git a/fs/aio.c b/fs/aio.c
index 99c1d2ad67e9..6fca1408a8a8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -170,6 +170,10 @@  struct aio_kiocb {
 	struct list_head	ki_list;	/* the aio core uses this
 						 * for cancellation */
 
+	unsigned int		flags;		/* protected by ctx->ctx_lock */
+#define AIO_IOCB_DELAYED_CANCEL	(1 << 0)
+#define AIO_IOCB_CANCELLED	(1 << 1)
+
 	/*
 	 * If the aio_resfd field of the userspace iocb is not zero,
 	 * this is the underlying eventfd context to deliver events to.
@@ -536,9 +540,9 @@  static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
 #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
 #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
 
-void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
+static void __kiocb_set_cancel_fn(struct aio_kiocb *req,
+		kiocb_cancel_fn *cancel, unsigned int iocb_flags)
 {
-	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
 	struct kioctx *ctx = req->ki_ctx;
 	unsigned long flags;
 
@@ -548,8 +552,15 @@  void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
 	spin_lock_irqsave(&ctx->ctx_lock, flags);
 	list_add_tail(&req->ki_list, &ctx->active_reqs);
 	req->ki_cancel = cancel;
+	req->flags |= iocb_flags;
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 }
+
+void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
+{
+	return __kiocb_set_cancel_fn(container_of(iocb, struct aio_kiocb, rw),
+			cancel, 0);
+}
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
 /*
@@ -603,17 +614,27 @@  static void free_ioctx_users(struct percpu_ref *ref)
 {
 	struct kioctx *ctx = container_of(ref, struct kioctx, users);
 	struct aio_kiocb *req;
+	LIST_HEAD(list);
 
 	spin_lock_irq(&ctx->ctx_lock);
-
 	while (!list_empty(&ctx->active_reqs)) {
 		req = list_first_entry(&ctx->active_reqs,
 				       struct aio_kiocb, ki_list);
-		kiocb_cancel(req);
-	}
 
+		if (req->flags & AIO_IOCB_DELAYED_CANCEL) {
+			req->flags |= AIO_IOCB_CANCELLED;
+			list_move_tail(&req->ki_list, &list);
+		} else {
+			kiocb_cancel(req);
+		}
+	}
 	spin_unlock_irq(&ctx->ctx_lock);
 
+	while (!list_empty(&list)) {
+		req = list_first_entry(&list, struct aio_kiocb, ki_list);
+		kiocb_cancel(req);
+	}
+
 	percpu_ref_kill(&ctx->reqs);
 	percpu_ref_put(&ctx->reqs);
 }
@@ -1786,15 +1807,22 @@  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 	if (unlikely(!ctx))
 		return -EINVAL;
 
-	spin_lock_irq(&ctx->ctx_lock);
+	ret = -EINVAL;
 
+	spin_lock_irq(&ctx->ctx_lock);
 	kiocb = lookup_kiocb(ctx, iocb, key);
+	if (kiocb) {
+		if (kiocb->flags & AIO_IOCB_DELAYED_CANCEL) {
+			kiocb->flags |= AIO_IOCB_CANCELLED;
+		} else {
+			ret = kiocb_cancel(kiocb);
+			kiocb = NULL;
+		}
+	}
+	spin_unlock_irq(&ctx->ctx_lock);
+
 	if (kiocb)
 		ret = kiocb_cancel(kiocb);
-	else
-		ret = -EINVAL;
-
-	spin_unlock_irq(&ctx->ctx_lock);
 
 	if (!ret) {
 		/*
@@ -1806,7 +1834,6 @@  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 	}
 
 	percpu_ref_put(&ctx->users);
-
 	return ret;
 }