@@ -295,7 +295,7 @@ struct io_ring_ctx {
spinlock_t completion_lock;
bool poll_multi_queue;
- bool cq_waiting;
+ atomic_t cq_wait_nr;
/*
* ->iopoll_list is protected by the ctx->uring_lock for
@@ -1279,31 +1279,38 @@ static __cold void io_fallback_tw(struct io_uring_task *tctx)
}
}
-static void io_req_local_work_add(struct io_kiocb *req)
+static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
{
struct io_ring_ctx *ctx = req->ctx;
+ bool first;
percpu_ref_get(&ctx->refs);
- if (!llist_add(&req->io_task_work.node, &ctx->work_llist))
- goto put_ref;
-
+ first = llist_add(&req->io_task_work.node, &ctx->work_llist);
/* needed for the following wake up */
smp_mb__after_atomic();
- if (unlikely(atomic_read(&req->task->io_uring->in_cancel))) {
- io_move_task_work_from_local(ctx);
- goto put_ref;
+ if (first) {
+ if (unlikely(atomic_read(&req->task->io_uring->in_cancel))) {
+ io_move_task_work_from_local(ctx);
+ goto put_ref;
+ }
+
+ if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
+ atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
+ if (ctx->has_evfd)
+ io_eventfd_signal(ctx);
}
- if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
- atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
- if (ctx->has_evfd)
- io_eventfd_signal(ctx);
+ if (atomic_read(&ctx->cq_wait_nr) <= 0)
+ goto put_ref;
- if (READ_ONCE(ctx->cq_waiting))
- wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
+ if (!(flags & IOU_F_TWQ_FACILE))
+ atomic_set(&ctx->cq_wait_nr, 0);
+ else if (atomic_dec_return(&ctx->cq_wait_nr) > 0)
+ goto put_ref;
+ wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
put_ref:
percpu_ref_put(&ctx->refs);
}
@@ -1315,7 +1322,7 @@ void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
if (!(flags & IOU_F_TWQ_FORCE_NORMAL) &&
(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) {
- io_req_local_work_add(req);
+ io_req_local_work_add(req, flags);
return;
}
@@ -2601,7 +2608,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
unsigned long check_cq;
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
- WRITE_ONCE(ctx->cq_waiting, 1);
+ int to_wait = (int) iowq.cq_tail - READ_ONCE(ctx->rings->cq.tail);
+
+ atomic_set(&ctx->cq_wait_nr, to_wait);
set_current_state(TASK_INTERRUPTIBLE);
} else {
prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
@@ -2610,7 +2619,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
ret = io_cqring_wait_schedule(ctx, &iowq);
__set_current_state(TASK_RUNNING);
- WRITE_ONCE(ctx->cq_waiting, 0);
+ atomic_set(&ctx->cq_wait_nr, 0);
if (ret < 0)
break;
@@ -18,6 +18,7 @@
enum {
/* don't use deferred task_work */
IOU_F_TWQ_FORCE_NORMAL = 1,
+ IOU_F_TWQ_FACILE = 2,
};
enum {
@@ -33,7 +33,7 @@ static inline void io_notif_flush(struct io_kiocb *notif)
/* drop slot's master ref */
if (refcount_dec_and_test(&nd->uarg.refcnt))
- io_req_task_work_add(notif);
+ __io_req_task_work_add(notif, IOU_F_TWQ_FACILE);
}
static inline int io_notif_account_mem(struct io_kiocb *notif, unsigned len)
@@ -304,7 +304,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
return;
io_req_set_res(req, io_fixup_rw_res(req, res), 0);
req->io_task_work.func = io_req_rw_complete;
- io_req_task_work_add(req);
+ __io_req_task_work_add(req, IOU_F_TWQ_FACILE);
}
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
Every task_work will try to wake the task to be executed, which causes excessive scheduling with corresponding overhead. For some tw it's justified, but others won't do much but post a single CQE. When a task waits for multiple cqes, every such task_work will wake it up. Instead, the task may give a hint about how many cqes it waits for, io_req_local_work_add() will compare against it and skip wake ups if #cqes + #tw items is not enough to satisfy the task. The optimisation is used only for simple enough tws, more complex and/or urgent items will force wake up. It's also limited to DEFER_TASKRUN. The trade-off is having extra atomics in io_req_local_work_add() but saving more on rescheduling the task.. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> --- include/linux/io_uring_types.h | 2 +- io_uring/io_uring.c | 41 +++++++++++++++++++++------------- io_uring/io_uring.h | 1 + io_uring/notif.h | 2 +- io_uring/rw.c | 2 +- 5 files changed, 29 insertions(+), 19 deletions(-)