diff mbox series

fs, USB gadget: Rework kiocb cancellation

Message ID 20240206234718.1437772-1-bvanassche@acm.org (mailing list archive)
State New
Headers show
Series fs, USB gadget: Rework kiocb cancellation | expand

Commit Message

Bart Van Assche Feb. 6, 2024, 11:47 p.m. UTC
Calling kiocb_set_cancel_fn() without knowing whether the caller
submitted a struct kiocb or a struct aio_kiocb is unsafe. Fix this by
introducing the cancel_kiocb() method in struct file_operations. The
following call trace illustrates that without this patch an
out-of-bounds write happens if I/O is submitted by io_uring (from a
phone with an ARM CPU and kernel 6.1):

WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8
Call trace:
 kiocb_set_cancel_fn+0x9c/0xa8
 ffs_epfile_read_iter+0x144/0x1d0
 io_read+0x19c/0x498
 io_issue_sqe+0x118/0x27c
 io_submit_sqes+0x25c/0x5fc
 __arm64_sys_io_uring_enter+0x104/0xab0
 invoke_syscall+0x58/0x11c
 el0_svc_common+0xb4/0xf4
 do_el0_svc+0x2c/0xb0
 el0_svc+0x2c/0xa4
 el0t_64_sync_handler+0x68/0xb4
 el0t_64_sync+0x1a4/0x1a8

The following patch has been used as the basis of this patch: Christoph
Hellwig, "[PATCH 08/32] aio: replace kiocb_set_cancel_fn with a
cancel_kiocb file operation", May 2018
(https://lore.kernel.org/all/20180515194833.6906-9-hch@lst.de/).

Cc: Christoph Hellwig <hch@lst.de>
Cc: Avi Kivity <avi@scylladb.com>
Cc: Sandeep Dhavale <dhavale@google.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/usb/gadget/function/f_fs.c | 10 +---
 drivers/usb/gadget/legacy/inode.c  |  5 +-
 fs/aio.c                           | 93 +++++++++++++++++-------------
 include/linux/aio.h                |  7 ---
 include/linux/fs.h                 |  1 +
 include/linux/mm.h                 |  5 ++
 include/linux/mm_inline.h          |  5 ++
 7 files changed, 68 insertions(+), 58 deletions(-)

Comments

Christian Brauner Feb. 7, 2024, 8:56 a.m. UTC | #1
On Tue, Feb 06, 2024 at 03:47:18PM -0800, Bart Van Assche wrote:
> Calling kiocb_set_cancel_fn() without knowing whether the caller
> submitted a struct kiocb or a struct aio_kiocb is unsafe. Fix this by
> introducing the cancel_kiocb() method in struct file_operations. The
> following call trace illustrates that without this patch an
> out-of-bounds write happens if I/O is submitted by io_uring (from a
> phone with an ARM CPU and kernel 6.1):
> 
> WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8
> Call trace:
>  kiocb_set_cancel_fn+0x9c/0xa8
>  ffs_epfile_read_iter+0x144/0x1d0
>  io_read+0x19c/0x498
>  io_issue_sqe+0x118/0x27c
>  io_submit_sqes+0x25c/0x5fc
>  __arm64_sys_io_uring_enter+0x104/0xab0
>  invoke_syscall+0x58/0x11c
>  el0_svc_common+0xb4/0xf4
>  do_el0_svc+0x2c/0xb0
>  el0_svc+0x2c/0xa4
>  el0t_64_sync_handler+0x68/0xb4
>  el0t_64_sync+0x1a4/0x1a8
> 
> The following patch has been used as the basis of this patch: Christoph
> Hellwig, "[PATCH 08/32] aio: replace kiocb_set_cancel_fn with a
> cancel_kiocb file operation", May 2018
> (https://lore.kernel.org/all/20180515194833.6906-9-hch@lst.de/).

What's changed that voids Al's objections on that patch from 2018?
Specifically
https://lore.kernel.org/all/20180406021553.GS30522@ZenIV.linux.org.uk
https://lore.kernel.org/all/20180520052720.GY30522@ZenIV.linux.org.uk

> 
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Avi Kivity <avi@scylladb.com>
> Cc: Sandeep Dhavale <dhavale@google.com>
> Cc: Jens Axboe <axboe@kernel.dk>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/usb/gadget/function/f_fs.c | 10 +---
>  drivers/usb/gadget/legacy/inode.c  |  5 +-
>  fs/aio.c                           | 93 +++++++++++++++++-------------
>  include/linux/aio.h                |  7 ---
>  include/linux/fs.h                 |  1 +
>  include/linux/mm.h                 |  5 ++
>  include/linux/mm_inline.h          |  5 ++
>  7 files changed, 68 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
> index 6bff6cb93789..adc00689fe3b 100644
> --- a/drivers/usb/gadget/function/f_fs.c
> +++ b/drivers/usb/gadget/function/f_fs.c
> @@ -31,7 +31,6 @@
>  #include <linux/usb/composite.h>
>  #include <linux/usb/functionfs.h>
>  
> -#include <linux/aio.h>
>  #include <linux/kthread.h>
>  #include <linux/poll.h>
>  #include <linux/eventfd.h>
> @@ -1157,7 +1156,7 @@ ffs_epfile_open(struct inode *inode, struct file *file)
>  	return stream_open(inode, file);
>  }
>  
> -static int ffs_aio_cancel(struct kiocb *kiocb)
> +static int ffs_epfile_cancel_kiocb(struct kiocb *kiocb)
>  {
>  	struct ffs_io_data *io_data = kiocb->private;
>  	struct ffs_epfile *epfile = kiocb->ki_filp->private_data;
> @@ -1198,9 +1197,6 @@ static ssize_t ffs_epfile_write_iter(struct kiocb *kiocb, struct iov_iter *from)
>  
>  	kiocb->private = p;
>  
> -	if (p->aio)
> -		kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
> -
>  	res = ffs_epfile_io(kiocb->ki_filp, p);
>  	if (res == -EIOCBQUEUED)
>  		return res;
> @@ -1242,9 +1238,6 @@ static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to)
>  
>  	kiocb->private = p;
>  
> -	if (p->aio)
> -		kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
> -
>  	res = ffs_epfile_io(kiocb->ki_filp, p);
>  	if (res == -EIOCBQUEUED)
>  		return res;
> @@ -1356,6 +1349,7 @@ static const struct file_operations ffs_epfile_operations = {
>  	.release =	ffs_epfile_release,
>  	.unlocked_ioctl =	ffs_epfile_ioctl,
>  	.compat_ioctl = compat_ptr_ioctl,
> +	.cancel_kiocb = ffs_epfile_cancel_kiocb,
>  };
>  
>  
> diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
> index 03179b1880fd..a4c03cfb3baa 100644
> --- a/drivers/usb/gadget/legacy/inode.c
> +++ b/drivers/usb/gadget/legacy/inode.c
> @@ -22,7 +22,6 @@
>  #include <linux/slab.h>
>  #include <linux/poll.h>
>  #include <linux/kthread.h>
> -#include <linux/aio.h>
>  #include <linux/uio.h>
>  #include <linux/refcount.h>
>  #include <linux/delay.h>
> @@ -446,7 +445,7 @@ struct kiocb_priv {
>  	unsigned		actual;
>  };
>  
> -static int ep_aio_cancel(struct kiocb *iocb)
> +static int ep_cancel_kiocb(struct kiocb *iocb)
>  {
>  	struct kiocb_priv	*priv = iocb->private;
>  	struct ep_data		*epdata;
> @@ -537,7 +536,6 @@ static ssize_t ep_aio(struct kiocb *iocb,
>  	iocb->private = priv;
>  	priv->iocb = iocb;
>  
> -	kiocb_set_cancel_fn(iocb, ep_aio_cancel);
>  	get_ep(epdata);
>  	priv->epdata = epdata;
>  	priv->actual = 0;
> @@ -709,6 +707,7 @@ static const struct file_operations ep_io_operations = {
>  	.unlocked_ioctl = ep_ioctl,
>  	.read_iter =	ep_read_iter,
>  	.write_iter =	ep_write_iter,
> +	.cancel_kiocb = ep_cancel_kiocb,
>  };
>  
>  /* ENDPOINT INITIALIZATION
> diff --git a/fs/aio.c b/fs/aio.c
> index bb2ff48991f3..0b13d6be773d 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -203,7 +203,6 @@ struct aio_kiocb {
>  	};
>  
>  	struct kioctx		*ki_ctx;
> -	kiocb_cancel_fn		*ki_cancel;
>  
>  	struct io_event		ki_res;
>  
> @@ -587,22 +586,6 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
>  #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
>  #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
>  
> -void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
> -{
> -	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
> -	struct kioctx *ctx = req->ki_ctx;
> -	unsigned long flags;
> -
> -	if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
> -		return;
> -
> -	spin_lock_irqsave(&ctx->ctx_lock, flags);
> -	list_add_tail(&req->ki_list, &ctx->active_reqs);
> -	req->ki_cancel = cancel;
> -	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
> -}
> -EXPORT_SYMBOL(kiocb_set_cancel_fn);
> -
>  /*
>   * free_ioctx() should be RCU delayed to synchronize against the RCU
>   * protected lookup_ioctx() and also needs process context to call
> @@ -634,6 +617,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
>  	queue_rcu_work(system_wq, &ctx->free_rwork);
>  }
>  
> +static void aio_cancel_and_del(struct aio_kiocb *req);
> +
>  /*
>   * When this function runs, the kioctx has been removed from the "hash table"
>   * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
> @@ -649,8 +634,7 @@ static void free_ioctx_users(struct percpu_ref *ref)
>  	while (!list_empty(&ctx->active_reqs)) {
>  		req = list_first_entry(&ctx->active_reqs,
>  				       struct aio_kiocb, ki_list);
> -		req->ki_cancel(&req->rw);
> -		list_del_init(&req->ki_list);
> +		aio_cancel_and_del(req);
>  	}
>  
>  	spin_unlock_irq(&ctx->ctx_lock);
> @@ -1556,6 +1540,20 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
>  {
>  	switch (ret) {
>  	case -EIOCBQUEUED:
> +		/*
> +		 * If the .cancel_kiocb() callback has been set, add the request
> +		 * to the list of active requests.
> +		 */
> +		if (req->ki_filp->f_op->cancel_kiocb) {
> +			struct aio_kiocb *iocb =
> +				container_of(req, struct aio_kiocb, rw);
> +			struct kioctx *ctx = iocb->ki_ctx;
> +			unsigned long flags;
> +
> +			spin_lock_irqsave(&ctx->ctx_lock, flags);
> +			list_add_tail(&iocb->ki_list, &ctx->active_reqs);
> +			spin_unlock_irqrestore(&ctx->ctx_lock, flags);
> +		}
>  		break;
>  	case -ERESTARTSYS:
>  	case -ERESTARTNOINTR:
> @@ -1715,6 +1713,41 @@ static void poll_iocb_unlock_wq(struct poll_iocb *req)
>  	rcu_read_unlock();
>  }
>  
> +/* assumes we are called with irqs disabled */
> +static int aio_poll_cancel(struct aio_kiocb *aiocb)
> +{
> +	struct poll_iocb *req = &aiocb->poll;
> +	struct kioctx *ctx = aiocb->ki_ctx;
> +
> +	lockdep_assert_held(&ctx->ctx_lock);
> +
> +	if (!poll_iocb_lock_wq(req)) {
> +		/* Not a polled request or already cancelled. */
> +		return 0;
> +	}
> +
> +	WRITE_ONCE(req->cancelled, true);
> +	if (!req->work_scheduled) {
> +		schedule_work(&aiocb->poll.work);
> +		req->work_scheduled = true;
> +	}
> +	poll_iocb_unlock_wq(req);
> +
> +	return 0;
> +}
> +
> +static void aio_cancel_and_del(struct aio_kiocb *req)
> +{
> +	struct kioctx *ctx = req->ki_ctx;
> +
> +	lockdep_assert_held(&ctx->ctx_lock);
> +
> +	if (req->rw.ki_filp->f_op->cancel_kiocb)
> +		req->rw.ki_filp->f_op->cancel_kiocb(&req->rw);
> +	aio_poll_cancel(req);
> +	list_del_init(&req->ki_list);
> +}
> +
>  static void aio_poll_complete_work(struct work_struct *work)
>  {
>  	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
> @@ -1760,24 +1793,6 @@ static void aio_poll_complete_work(struct work_struct *work)
>  	iocb_put(iocb);
>  }
>  
> -/* assumes we are called with irqs disabled */
> -static int aio_poll_cancel(struct kiocb *iocb)
> -{
> -	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
> -	struct poll_iocb *req = &aiocb->poll;
> -
> -	if (poll_iocb_lock_wq(req)) {
> -		WRITE_ONCE(req->cancelled, true);
> -		if (!req->work_scheduled) {
> -			schedule_work(&aiocb->poll.work);
> -			req->work_scheduled = true;
> -		}
> -		poll_iocb_unlock_wq(req);
> -	} /* else, the request was force-cancelled by POLLFREE already */
> -
> -	return 0;
> -}
> -
>  static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
>  		void *key)
>  {
> @@ -1945,7 +1960,6 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
>  			 * active_reqs so that it can be cancelled if needed.
>  			 */
>  			list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
> -			aiocb->ki_cancel = aio_poll_cancel;
>  		}
>  		if (on_queue)
>  			poll_iocb_unlock_wq(req);
> @@ -2189,8 +2203,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
>  	/* TODO: use a hash or array, this sucks. */
>  	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
>  		if (kiocb->ki_res.obj == obj) {
> -			ret = kiocb->ki_cancel(&kiocb->rw);
> -			list_del_init(&kiocb->ki_list);
> +			aio_cancel_and_del(kiocb);
>  			break;
>  		}
>  	}
> diff --git a/include/linux/aio.h b/include/linux/aio.h
> index 86892a4fe7c8..2aa6d0be3171 100644
> --- a/include/linux/aio.h
> +++ b/include/linux/aio.h
> @@ -4,20 +4,13 @@
>  
>  #include <linux/aio_abi.h>
>  
> -struct kioctx;
> -struct kiocb;
>  struct mm_struct;
>  
> -typedef int (kiocb_cancel_fn)(struct kiocb *);
> -
>  /* prototypes */
>  #ifdef CONFIG_AIO
>  extern void exit_aio(struct mm_struct *mm);
> -void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
>  #else
>  static inline void exit_aio(struct mm_struct *mm) { }
> -static inline void kiocb_set_cancel_fn(struct kiocb *req,
> -				       kiocb_cancel_fn *cancel) { }
>  #endif /* CONFIG_AIO */
>  
>  #endif /* __LINUX__AIO_H */
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index ed5966a70495..36cd982c167d 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2021,6 +2021,7 @@ struct file_operations {
>  	int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
>  	int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
>  				unsigned int poll_flags);
> +	int (*cancel_kiocb)(struct kiocb *);
>  } __randomize_layout;
>  
>  /* Wrap a directory iterator that needs exclusive inode access */
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index f5a97dec5169..7c05464c0ad0 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1092,6 +1092,11 @@ static inline unsigned int folio_order(struct folio *folio)
>  	return folio->_flags_1 & 0xff;
>  }
>  
> +/*
> + * Include <linux/fs.h> to work around a circular dependency between
> + * <linux/fs.h> and <linux/huge_mm.h>.
> + */
> +#include <linux/fs.h>
>  #include <linux/huge_mm.h>
>  
>  /*
> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
> index f4fe593c1400..4eb96f08ec4f 100644
> --- a/include/linux/mm_inline.h
> +++ b/include/linux/mm_inline.h
> @@ -3,6 +3,11 @@
>  #define LINUX_MM_INLINE_H
>  
>  #include <linux/atomic.h>
> +/*
> + * Include <linux/fs.h> to work around a circular dependency between
> + * <linux/fs.h> and <linux/huge_mm.h>.
> + */
> +#include <linux/fs.h>
>  #include <linux/huge_mm.h>
>  #include <linux/mm_types.h>
>  #include <linux/swap.h>
Jens Axboe Feb. 7, 2024, 3:53 p.m. UTC | #2
On 2/7/24 1:56 AM, Christian Brauner wrote:
> On Tue, Feb 06, 2024 at 03:47:18PM -0800, Bart Van Assche wrote:
>> Calling kiocb_set_cancel_fn() without knowing whether the caller
>> submitted a struct kiocb or a struct aio_kiocb is unsafe. Fix this by
>> introducing the cancel_kiocb() method in struct file_operations. The
>> following call trace illustrates that without this patch an
>> out-of-bounds write happens if I/O is submitted by io_uring (from a
>> phone with an ARM CPU and kernel 6.1):
>>
>> WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8
>> Call trace:
>>  kiocb_set_cancel_fn+0x9c/0xa8
>>  ffs_epfile_read_iter+0x144/0x1d0
>>  io_read+0x19c/0x498
>>  io_issue_sqe+0x118/0x27c
>>  io_submit_sqes+0x25c/0x5fc
>>  __arm64_sys_io_uring_enter+0x104/0xab0
>>  invoke_syscall+0x58/0x11c
>>  el0_svc_common+0xb4/0xf4
>>  do_el0_svc+0x2c/0xb0
>>  el0_svc+0x2c/0xa4
>>  el0t_64_sync_handler+0x68/0xb4
>>  el0t_64_sync+0x1a4/0x1a8
>>
>> The following patch has been used as the basis of this patch: Christoph
>> Hellwig, "[PATCH 08/32] aio: replace kiocb_set_cancel_fn with a
>> cancel_kiocb file operation", May 2018
>> (https://lore.kernel.org/all/20180515194833.6906-9-hch@lst.de/).
> 
> What's changed that voids Al's objections on that patch from 2018?
> Specifically
> https://lore.kernel.org/all/20180406021553.GS30522@ZenIV.linux.org.uk
> https://lore.kernel.org/all/20180520052720.GY30522@ZenIV.linux.org.uk

Nothing, it's definitely still broken in the same UAF way.

But so is the cancelation bits, the assumption of the kiocb being
contained within some aio specific struct is just awful.

Does the cancelation support even work, or is needed? It would seem a
lot more prudent to get rid of it. Barring that, we should probably do
something like the below upfront until the whole thing is sorted out.


diff --git a/fs/aio.c b/fs/aio.c
index bb2ff48991f3..159648f43238 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -593,6 +593,10 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
 	struct kioctx *ctx = req->ki_ctx;
 	unsigned long flags;
 
+	/* kiocb didn't come from aio, so just ignore it */
+	if (!(iocb->ki_flags & IOCB_FROM_AIO))
+		return;
+
 	if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
 		return;
 
@@ -1509,7 +1513,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
 	req->ki_complete = aio_complete_rw;
 	req->private = NULL;
 	req->ki_pos = iocb->aio_offset;
-	req->ki_flags = req->ki_filp->f_iocb_flags;
+	req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_FROM_AIO;
 	if (iocb->aio_flags & IOCB_FLAG_RESFD)
 		req->ki_flags |= IOCB_EVENTFD;
 	if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed5966a70495..2870b3d61866 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -352,6 +352,10 @@ enum rw_hint {
  * unrelated IO (like cache flushing, new IO generation, etc).
  */
 #define IOCB_DIO_CALLER_COMP	(1 << 22)
+ /*
+  * kiocb came from fs/aio.c
+  */
+#define IOCB_FROM_AIO		(1 << 23)
 
 /* for use in trace events */
 #define TRACE_IOCB_STRINGS \
Bart Van Assche Feb. 7, 2024, 6:04 p.m. UTC | #3
On 2/7/24 00:56, Christian Brauner wrote:
> What's changed that voids Al's objections on that patch from 2018?
> Specifically
> https://lore.kernel.org/all/20180406021553.GS30522@ZenIV.linux.org.uk
> https://lore.kernel.org/all/20180520052720.GY30522@ZenIV.linux.org.uk

Thanks for having drawn my attention to Al's feedback. I had not yet
noticed his feedback but I plan to take a close look at it.

Thanks,

Bart.
Bart Van Assche Feb. 8, 2024, 12:26 a.m. UTC | #4
On 2/7/24 00:56, Christian Brauner wrote:
> On Tue, Feb 06, 2024 at 03:47:18PM -0800, Bart Van Assche wrote:
>> Calling kiocb_set_cancel_fn() without knowing whether the caller
>> submitted a struct kiocb or a struct aio_kiocb is unsafe. Fix this by
>> introducing the cancel_kiocb() method in struct file_operations. The
>> following call trace illustrates that without this patch an
>> out-of-bounds write happens if I/O is submitted by io_uring (from a
>> phone with an ARM CPU and kernel 6.1):
>>
>> WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8
>> Call trace:
>>   kiocb_set_cancel_fn+0x9c/0xa8
>>   ffs_epfile_read_iter+0x144/0x1d0
>>   io_read+0x19c/0x498
>>   io_issue_sqe+0x118/0x27c
>>   io_submit_sqes+0x25c/0x5fc
>>   __arm64_sys_io_uring_enter+0x104/0xab0
>>   invoke_syscall+0x58/0x11c
>>   el0_svc_common+0xb4/0xf4
>>   do_el0_svc+0x2c/0xb0
>>   el0_svc+0x2c/0xa4
>>   el0t_64_sync_handler+0x68/0xb4
>>   el0t_64_sync+0x1a4/0x1a8
>>
>> The following patch has been used as the basis of this patch: Christoph
>> Hellwig, "[PATCH 08/32] aio: replace kiocb_set_cancel_fn with a
>> cancel_kiocb file operation", May 2018
>> (https://lore.kernel.org/all/20180515194833.6906-9-hch@lst.de/).
> 
> What's changed that voids Al's objections on that patch from 2018?
> Specifically
> https://lore.kernel.org/all/20180406021553.GS30522@ZenIV.linux.org.uk
> https://lore.kernel.org/all/20180520052720.GY30522@ZenIV.linux.org.uk

How about adding requests to the pending request list before calling
call_*_iter() instead of after call_*_iter() has returned?

Thanks,

Bart.

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 6bff6cb93789..4837e3071263 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -31,7 +31,6 @@
  #include <linux/usb/composite.h>
  #include <linux/usb/functionfs.h>

-#include <linux/aio.h>
  #include <linux/kthread.h>
  #include <linux/poll.h>
  #include <linux/eventfd.h>
@@ -1157,23 +1156,16 @@ ffs_epfile_open(struct inode *inode, struct file *file)
  	return stream_open(inode, file);
  }

-static int ffs_aio_cancel(struct kiocb *kiocb)
+static void ffs_epfile_cancel_kiocb(struct kiocb *kiocb)
  {
  	struct ffs_io_data *io_data = kiocb->private;
  	struct ffs_epfile *epfile = kiocb->ki_filp->private_data;
  	unsigned long flags;
-	int value;

  	spin_lock_irqsave(&epfile->ffs->eps_lock, flags);
-
  	if (io_data && io_data->ep && io_data->req)
-		value = usb_ep_dequeue(io_data->ep, io_data->req);
-	else
-		value = -EINVAL;
-
+		usb_ep_dequeue(io_data->ep, io_data->req);
  	spin_unlock_irqrestore(&epfile->ffs->eps_lock, flags);
-
-	return value;
  }

  static ssize_t ffs_epfile_write_iter(struct kiocb *kiocb, struct iov_iter *from)
@@ -1198,9 +1190,6 @@ static ssize_t ffs_epfile_write_iter(struct kiocb *kiocb, struct iov_iter *from)

  	kiocb->private = p;

-	if (p->aio)
-		kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
-
  	res = ffs_epfile_io(kiocb->ki_filp, p);
  	if (res == -EIOCBQUEUED)
  		return res;
@@ -1242,9 +1231,6 @@ static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to)

  	kiocb->private = p;

-	if (p->aio)
-		kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
-
  	res = ffs_epfile_io(kiocb->ki_filp, p);
  	if (res == -EIOCBQUEUED)
  		return res;
@@ -1356,6 +1342,7 @@ static const struct file_operations ffs_epfile_operations = {
  	.release =	ffs_epfile_release,
  	.unlocked_ioctl =	ffs_epfile_ioctl,
  	.compat_ioctl = compat_ptr_ioctl,
+	.cancel_kiocb = ffs_epfile_cancel_kiocb,
  };


diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 03179b1880fd..b79cf0db8243 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -22,7 +22,6 @@
  #include <linux/slab.h>
  #include <linux/poll.h>
  #include <linux/kthread.h>
-#include <linux/aio.h>
  #include <linux/uio.h>
  #include <linux/refcount.h>
  #include <linux/delay.h>
@@ -446,23 +445,18 @@ struct kiocb_priv {
  	unsigned		actual;
  };

-static int ep_aio_cancel(struct kiocb *iocb)
+static void ep_cancel_kiocb(struct kiocb *iocb)
  {
  	struct kiocb_priv	*priv = iocb->private;
  	struct ep_data		*epdata;
-	int			value;

  	local_irq_disable();
  	epdata = priv->epdata;
  	// spin_lock(&epdata->dev->lock);
  	if (likely(epdata && epdata->ep && priv->req))
-		value = usb_ep_dequeue (epdata->ep, priv->req);
-	else
-		value = -EINVAL;
+		usb_ep_dequeue (epdata->ep, priv->req);
  	// spin_unlock(&epdata->dev->lock);
  	local_irq_enable();
-
-	return value;
  }

  static void ep_user_copy_worker(struct work_struct *work)
@@ -537,7 +531,6 @@ static ssize_t ep_aio(struct kiocb *iocb,
  	iocb->private = priv;
  	priv->iocb = iocb;

-	kiocb_set_cancel_fn(iocb, ep_aio_cancel);
  	get_ep(epdata);
  	priv->epdata = epdata;
  	priv->actual = 0;
@@ -709,6 +702,7 @@ static const struct file_operations ep_io_operations = {
  	.unlocked_ioctl = ep_ioctl,
  	.read_iter =	ep_read_iter,
  	.write_iter =	ep_write_iter,
+	.cancel_kiocb = ep_cancel_kiocb,
  };

  /* ENDPOINT INITIALIZATION
diff --git a/fs/aio.c b/fs/aio.c
index bb2ff48991f3..c10c2cfc979e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -203,7 +203,6 @@ struct aio_kiocb {
  	};

  	struct kioctx		*ki_ctx;
-	kiocb_cancel_fn		*ki_cancel;

  	struct io_event		ki_res;

@@ -587,22 +586,6 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
  #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
  #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)

-void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
-{
-	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
-	struct kioctx *ctx = req->ki_ctx;
-	unsigned long flags;
-
-	if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
-		return;
-
-	spin_lock_irqsave(&ctx->ctx_lock, flags);
-	list_add_tail(&req->ki_list, &ctx->active_reqs);
-	req->ki_cancel = cancel;
-	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
-}
-EXPORT_SYMBOL(kiocb_set_cancel_fn);
-
  /*
   * free_ioctx() should be RCU delayed to synchronize against the RCU
   * protected lookup_ioctx() and also needs process context to call
@@ -634,6 +617,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
  	queue_rcu_work(system_wq, &ctx->free_rwork);
  }

+static void aio_cancel_and_del(struct aio_kiocb *req);
+
  /*
   * When this function runs, the kioctx has been removed from the "hash table"
   * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
@@ -649,8 +634,7 @@ static void free_ioctx_users(struct percpu_ref *ref)
  	while (!list_empty(&ctx->active_reqs)) {
  		req = list_first_entry(&ctx->active_reqs,
  				       struct aio_kiocb, ki_list);
-		req->ki_cancel(&req->rw);
-		list_del_init(&req->ki_list);
+		aio_cancel_and_del(req);
  	}

  	spin_unlock_irq(&ctx->ctx_lock);
@@ -1593,8 +1577,26 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb,
  	if (ret < 0)
  		return ret;
  	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
-	if (!ret)
-		aio_rw_done(req, call_read_iter(file, req, &iter));
+	if (ret)
+		goto free_iovec;
+
+	/*
+	 * If the .cancel_kiocb() callback has been set, add the request
+	 * to the list of active requests.
+	 */
+	if (req->ki_filp->f_op->cancel_kiocb) {
+		struct aio_kiocb *aio = container_of(req, struct aio_kiocb, rw);
+		struct kioctx *ctx = aio->ki_ctx;
+		unsigned long flags;
+
+		spin_lock_irqsave(&ctx->ctx_lock, flags);
+		list_add_tail(&aio->ki_list, &ctx->active_reqs);
+		spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+	}
+
+	aio_rw_done(req, call_read_iter(file, req, &iter));
+
+free_iovec:
  	kfree(iovec);
  	return ret;
  }
@@ -1715,6 +1717,47 @@ static void poll_iocb_unlock_wq(struct poll_iocb *req)
  	rcu_read_unlock();
  }

+static bool aio_poll_cancel(struct aio_kiocb *aiocb)
+{
+	struct poll_iocb *req = &aiocb->poll;
+	struct kioctx *ctx = aiocb->ki_ctx;
+
+	lockdep_assert_held(&ctx->ctx_lock);
+
+	if (!poll_iocb_lock_wq(req)) {
+		/*
+		 * Not a polled request or polling has already been cancelled.
+		 */
+		return false;
+	}
+
+	WRITE_ONCE(req->cancelled, true);
+	if (!req->work_scheduled) {
+		schedule_work(&aiocb->poll.work);
+		req->work_scheduled = true;
+	}
+	poll_iocb_unlock_wq(req);
+
+	return true;
+}
+
+static void aio_cancel_and_del(struct aio_kiocb *req)
+{
+	struct kioctx *ctx = req->ki_ctx;
+
+	lockdep_assert_held(&ctx->ctx_lock);
+
+	if (!aio_poll_cancel(req)) {
+		void (*cancel_kiocb)(struct kiocb *) =
+			req->rw.ki_filp->f_op->cancel_kiocb;
+
+		WARN_ON_ONCE(!cancel_kiocb);
+		if (cancel_kiocb)
+			cancel_kiocb(&req->rw);
+	}
+	list_del_init(&req->ki_list);
+}
+
  static void aio_poll_complete_work(struct work_struct *work)
  {
  	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1760,24 +1803,6 @@ static void aio_poll_complete_work(struct work_struct *work)
  	iocb_put(iocb);
  }

-/* assumes we are called with irqs disabled */
-static int aio_poll_cancel(struct kiocb *iocb)
-{
-	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
-	struct poll_iocb *req = &aiocb->poll;
-
-	if (poll_iocb_lock_wq(req)) {
-		WRITE_ONCE(req->cancelled, true);
-		if (!req->work_scheduled) {
-			schedule_work(&aiocb->poll.work);
-			req->work_scheduled = true;
-		}
-		poll_iocb_unlock_wq(req);
-	} /* else, the request was force-cancelled by POLLFREE already */
-
-	return 0;
-}
-
  static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
  		void *key)
  {
@@ -1945,7 +1970,6 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
  			 * active_reqs so that it can be cancelled if needed.
  			 */
  			list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
-			aiocb->ki_cancel = aio_poll_cancel;
  		}
  		if (on_queue)
  			poll_iocb_unlock_wq(req);
@@ -2189,8 +2213,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
  	/* TODO: use a hash or array, this sucks. */
  	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
  		if (kiocb->ki_res.obj == obj) {
-			ret = kiocb->ki_cancel(&kiocb->rw);
-			list_del_init(&kiocb->ki_list);
+			aio_cancel_and_del(kiocb);
  			break;
  		}
  	}
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 86892a4fe7c8..2aa6d0be3171 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -4,20 +4,13 @@

  #include <linux/aio_abi.h>

-struct kioctx;
-struct kiocb;
  struct mm_struct;

-typedef int (kiocb_cancel_fn)(struct kiocb *);
-
  /* prototypes */
  #ifdef CONFIG_AIO
  extern void exit_aio(struct mm_struct *mm);
-void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
  #else
  static inline void exit_aio(struct mm_struct *mm) { }
-static inline void kiocb_set_cancel_fn(struct kiocb *req,
-				       kiocb_cancel_fn *cancel) { }
  #endif /* CONFIG_AIO */

  #endif /* __LINUX__AIO_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed5966a70495..7ec714878637 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2021,6 +2021,7 @@ struct file_operations {
  	int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
  	int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
  				unsigned int poll_flags);
+	void (*cancel_kiocb)(struct kiocb *);
  } __randomize_layout;

  /* Wrap a directory iterator that needs exclusive inode access */
diff mbox series

Patch

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 6bff6cb93789..adc00689fe3b 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -31,7 +31,6 @@ 
 #include <linux/usb/composite.h>
 #include <linux/usb/functionfs.h>
 
-#include <linux/aio.h>
 #include <linux/kthread.h>
 #include <linux/poll.h>
 #include <linux/eventfd.h>
@@ -1157,7 +1156,7 @@  ffs_epfile_open(struct inode *inode, struct file *file)
 	return stream_open(inode, file);
 }
 
-static int ffs_aio_cancel(struct kiocb *kiocb)
+static int ffs_epfile_cancel_kiocb(struct kiocb *kiocb)
 {
 	struct ffs_io_data *io_data = kiocb->private;
 	struct ffs_epfile *epfile = kiocb->ki_filp->private_data;
@@ -1198,9 +1197,6 @@  static ssize_t ffs_epfile_write_iter(struct kiocb *kiocb, struct iov_iter *from)
 
 	kiocb->private = p;
 
-	if (p->aio)
-		kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
-
 	res = ffs_epfile_io(kiocb->ki_filp, p);
 	if (res == -EIOCBQUEUED)
 		return res;
@@ -1242,9 +1238,6 @@  static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to)
 
 	kiocb->private = p;
 
-	if (p->aio)
-		kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
-
 	res = ffs_epfile_io(kiocb->ki_filp, p);
 	if (res == -EIOCBQUEUED)
 		return res;
@@ -1356,6 +1349,7 @@  static const struct file_operations ffs_epfile_operations = {
 	.release =	ffs_epfile_release,
 	.unlocked_ioctl =	ffs_epfile_ioctl,
 	.compat_ioctl = compat_ptr_ioctl,
+	.cancel_kiocb = ffs_epfile_cancel_kiocb,
 };
 
 
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 03179b1880fd..a4c03cfb3baa 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -22,7 +22,6 @@ 
 #include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/kthread.h>
-#include <linux/aio.h>
 #include <linux/uio.h>
 #include <linux/refcount.h>
 #include <linux/delay.h>
@@ -446,7 +445,7 @@  struct kiocb_priv {
 	unsigned		actual;
 };
 
-static int ep_aio_cancel(struct kiocb *iocb)
+static int ep_cancel_kiocb(struct kiocb *iocb)
 {
 	struct kiocb_priv	*priv = iocb->private;
 	struct ep_data		*epdata;
@@ -537,7 +536,6 @@  static ssize_t ep_aio(struct kiocb *iocb,
 	iocb->private = priv;
 	priv->iocb = iocb;
 
-	kiocb_set_cancel_fn(iocb, ep_aio_cancel);
 	get_ep(epdata);
 	priv->epdata = epdata;
 	priv->actual = 0;
@@ -709,6 +707,7 @@  static const struct file_operations ep_io_operations = {
 	.unlocked_ioctl = ep_ioctl,
 	.read_iter =	ep_read_iter,
 	.write_iter =	ep_write_iter,
+	.cancel_kiocb = ep_cancel_kiocb,
 };
 
 /* ENDPOINT INITIALIZATION
diff --git a/fs/aio.c b/fs/aio.c
index bb2ff48991f3..0b13d6be773d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -203,7 +203,6 @@  struct aio_kiocb {
 	};
 
 	struct kioctx		*ki_ctx;
-	kiocb_cancel_fn		*ki_cancel;
 
 	struct io_event		ki_res;
 
@@ -587,22 +586,6 @@  static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
 #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
 #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
 
-void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
-{
-	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
-	struct kioctx *ctx = req->ki_ctx;
-	unsigned long flags;
-
-	if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
-		return;
-
-	spin_lock_irqsave(&ctx->ctx_lock, flags);
-	list_add_tail(&req->ki_list, &ctx->active_reqs);
-	req->ki_cancel = cancel;
-	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
-}
-EXPORT_SYMBOL(kiocb_set_cancel_fn);
-
 /*
  * free_ioctx() should be RCU delayed to synchronize against the RCU
  * protected lookup_ioctx() and also needs process context to call
@@ -634,6 +617,8 @@  static void free_ioctx_reqs(struct percpu_ref *ref)
 	queue_rcu_work(system_wq, &ctx->free_rwork);
 }
 
+static void aio_cancel_and_del(struct aio_kiocb *req);
+
 /*
  * When this function runs, the kioctx has been removed from the "hash table"
  * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
@@ -649,8 +634,7 @@  static void free_ioctx_users(struct percpu_ref *ref)
 	while (!list_empty(&ctx->active_reqs)) {
 		req = list_first_entry(&ctx->active_reqs,
 				       struct aio_kiocb, ki_list);
-		req->ki_cancel(&req->rw);
-		list_del_init(&req->ki_list);
+		aio_cancel_and_del(req);
 	}
 
 	spin_unlock_irq(&ctx->ctx_lock);
@@ -1556,6 +1540,20 @@  static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
 {
 	switch (ret) {
 	case -EIOCBQUEUED:
+		/*
+		 * If the .cancel_kiocb() callback has been set, add the request
+		 * to the list of active requests.
+		 */
+		if (req->ki_filp->f_op->cancel_kiocb) {
+			struct aio_kiocb *iocb =
+				container_of(req, struct aio_kiocb, rw);
+			struct kioctx *ctx = iocb->ki_ctx;
+			unsigned long flags;
+
+			spin_lock_irqsave(&ctx->ctx_lock, flags);
+			list_add_tail(&iocb->ki_list, &ctx->active_reqs);
+			spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+		}
 		break;
 	case -ERESTARTSYS:
 	case -ERESTARTNOINTR:
@@ -1715,6 +1713,41 @@  static void poll_iocb_unlock_wq(struct poll_iocb *req)
 	rcu_read_unlock();
 }
 
+/* assumes we are called with irqs disabled */
+static int aio_poll_cancel(struct aio_kiocb *aiocb)
+{
+	struct poll_iocb *req = &aiocb->poll;
+	struct kioctx *ctx = aiocb->ki_ctx;
+
+	lockdep_assert_held(&ctx->ctx_lock);
+
+	if (!poll_iocb_lock_wq(req)) {
+		/* Not a polled request or already cancelled. */
+		return 0;
+	}
+
+	WRITE_ONCE(req->cancelled, true);
+	if (!req->work_scheduled) {
+		schedule_work(&aiocb->poll.work);
+		req->work_scheduled = true;
+	}
+	poll_iocb_unlock_wq(req);
+
+	return 0;
+}
+
+static void aio_cancel_and_del(struct aio_kiocb *req)
+{
+	struct kioctx *ctx = req->ki_ctx;
+
+	lockdep_assert_held(&ctx->ctx_lock);
+
+	if (req->rw.ki_filp->f_op->cancel_kiocb)
+		req->rw.ki_filp->f_op->cancel_kiocb(&req->rw);
+	aio_poll_cancel(req);
+	list_del_init(&req->ki_list);
+}
+
 static void aio_poll_complete_work(struct work_struct *work)
 {
 	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1760,24 +1793,6 @@  static void aio_poll_complete_work(struct work_struct *work)
 	iocb_put(iocb);
 }
 
-/* assumes we are called with irqs disabled */
-static int aio_poll_cancel(struct kiocb *iocb)
-{
-	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
-	struct poll_iocb *req = &aiocb->poll;
-
-	if (poll_iocb_lock_wq(req)) {
-		WRITE_ONCE(req->cancelled, true);
-		if (!req->work_scheduled) {
-			schedule_work(&aiocb->poll.work);
-			req->work_scheduled = true;
-		}
-		poll_iocb_unlock_wq(req);
-	} /* else, the request was force-cancelled by POLLFREE already */
-
-	return 0;
-}
-
 static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 		void *key)
 {
@@ -1945,7 +1960,6 @@  static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 			 * active_reqs so that it can be cancelled if needed.
 			 */
 			list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
-			aiocb->ki_cancel = aio_poll_cancel;
 		}
 		if (on_queue)
 			poll_iocb_unlock_wq(req);
@@ -2189,8 +2203,7 @@  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 	/* TODO: use a hash or array, this sucks. */
 	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
 		if (kiocb->ki_res.obj == obj) {
-			ret = kiocb->ki_cancel(&kiocb->rw);
-			list_del_init(&kiocb->ki_list);
+			aio_cancel_and_del(kiocb);
 			break;
 		}
 	}
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 86892a4fe7c8..2aa6d0be3171 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -4,20 +4,13 @@ 
 
 #include <linux/aio_abi.h>
 
-struct kioctx;
-struct kiocb;
 struct mm_struct;
 
-typedef int (kiocb_cancel_fn)(struct kiocb *);
-
 /* prototypes */
 #ifdef CONFIG_AIO
 extern void exit_aio(struct mm_struct *mm);
-void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
 #else
 static inline void exit_aio(struct mm_struct *mm) { }
-static inline void kiocb_set_cancel_fn(struct kiocb *req,
-				       kiocb_cancel_fn *cancel) { }
 #endif /* CONFIG_AIO */
 
 #endif /* __LINUX__AIO_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed5966a70495..36cd982c167d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2021,6 +2021,7 @@  struct file_operations {
 	int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
 	int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
 				unsigned int poll_flags);
+	int (*cancel_kiocb)(struct kiocb *);
 } __randomize_layout;
 
 /* Wrap a directory iterator that needs exclusive inode access */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f5a97dec5169..7c05464c0ad0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1092,6 +1092,11 @@  static inline unsigned int folio_order(struct folio *folio)
 	return folio->_flags_1 & 0xff;
 }
 
+/*
+ * Include <linux/fs.h> to work around a circular dependency between
+ * <linux/fs.h> and <linux/huge_mm.h>.
+ */
+#include <linux/fs.h>
 #include <linux/huge_mm.h>
 
 /*
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index f4fe593c1400..4eb96f08ec4f 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -3,6 +3,11 @@ 
 #define LINUX_MM_INLINE_H
 
 #include <linux/atomic.h>
+/*
+ * Include <linux/fs.h> to work around a circular dependency between
+ * <linux/fs.h> and <linux/huge_mm.h>.
+ */
+#include <linux/fs.h>
 #include <linux/huge_mm.h>
 #include <linux/mm_types.h>
 #include <linux/swap.h>