@@ -1095,18 +1095,33 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
goto out;
}
-int fuse_uring_queue_fuse_req(struct fuse_conn *fc, struct fuse_req *req)
+static int fuse_uring_get_req_qid(struct fuse_req *req, struct fuse_ring *ring,
+ bool async)
{
- struct fuse_ring *ring = fc->ring;
- struct fuse_ring_queue *queue;
- int qid = 0;
- struct fuse_ring_ent *ring_ent = NULL;
- int res;
- bool async = test_bit(FR_BACKGROUND, &req->flags);
- struct list_head *req_queue, *ent_queue;
+ int cpu_off = 0;
+ size_t req_size = 0;
+ int qid;
- if (ring->per_core_queue) {
- int cpu_off;
+ if (!ring->per_core_queue)
+ return 0;
+
+ /*
+ * async has on a different core (see below) introduces context
+ * switching - should be avoided for small requests
+ */
+ if (async) {
+ switch (req->args->opcode) {
+ case FUSE_READ:
+ req_size = req->args->out_args[0].size;
+ break;
+ case FUSE_WRITE:
+ req_size = req->args->in_args[1].size;
+ break;
+ default:
+ /* anything else, <= 4K */
+ req_size = 0;
+ break;
+ }
/*
* async requests are best handled on another core, the current
@@ -1120,17 +1135,33 @@ int fuse_uring_queue_fuse_req(struct fuse_conn *fc, struct fuse_req *req)
* It should also not persistently switch between cores - makes
* it hard for the scheduler.
*/
- cpu_off = async ? 1 : 0;
- qid = (task_cpu(current) + cpu_off) % ring->nr_queues;
-
- if (unlikely(qid >= ring->nr_queues)) {
- WARN_ONCE(1,
- "Core number (%u) exceeds nr ueues (%zu)\n",
- qid, ring->nr_queues);
- qid = 0;
- }
+ if (req_size > FUSE_URING_MIN_ASYNC_SIZE)
+ cpu_off = 1;
}
+ qid = (task_cpu(current) + cpu_off) % ring->nr_queues;
+
+ if (unlikely(qid >= ring->nr_queues)) {
+ WARN_ONCE(1, "Core number (%u) exceeds nr queues (%zu)\n",
+ qid, ring->nr_queues);
+ qid = 0;
+ }
+
+ return qid;
+}
+
+int fuse_uring_queue_fuse_req(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct fuse_ring *ring = fc->ring;
+ struct fuse_ring_queue *queue;
+ struct fuse_ring_ent *ring_ent = NULL;
+ int res;
+ int async = test_bit(FR_BACKGROUND, &req->flags) &&
+ !req->args->async_blocking;
+ struct list_head *ent_queue, *req_queue;
+ int qid;
+
+ qid = fuse_uring_get_req_qid(req, ring, async);
queue = fuse_uring_get_queue(ring, qid);
req_queue = async ? &queue->async_fuse_req_queue :
&queue->sync_fuse_req_queue;
@@ -11,6 +11,13 @@
#include "linux/compiler_types.h"
#include "linux/rbtree_types.h"
+/**
+ * Minimal async size with uring communication. Async is handled on a different
+ * core and that has overhead, so the async queue is only used beginning
+ * with a certain size - XXX should this be a tunable parameter?
+ */
+#define FUSE_URING_MIN_ASYNC_SIZE (16384)
+
#if IS_ENABLED(CONFIG_FUSE_IO_URING)
/* IORING_MAX_ENTRIES */
@@ -7,6 +7,7 @@
*/
#include "fuse_i.h"
+#include "dev_uring_i.h"
#include <linux/pagemap.h>
#include <linux/slab.h>
@@ -955,11 +956,22 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
{
struct fuse_file *ff = file->private_data;
struct fuse_mount *fm = ff->fm;
+ struct fuse_conn *fc = fm->fc;
struct fuse_args_pages *ap = &ia->ap;
loff_t pos = page_offset(ap->pages[0]);
size_t count = ap->num_pages << PAGE_SHIFT;
ssize_t res;
int err;
+ unsigned int async = fc->async_read;
+
+ /*
+ * sync request stay longer on the same core - important with uring
+ * Check here and not only in dev_uring.c as we have control in
+ * fuse_simple_request if it should wake up on the same core,
+ * avoids application core switching
+ */
+ if (async && fuse_uring_ready(fc) && count <= FUSE_URING_MIN_ASYNC_SIZE)
+ async = 0;
ap->args.out_pages = true;
ap->args.page_zeroing = true;
@@ -974,7 +986,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
ia->read.attr_ver = fuse_get_attr_version(fm->fc);
- if (fm->fc->async_read) {
+ if (async) {
ia->ff = fuse_file_get(ff);
ap->args.end = fuse_readpages_end;
err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
This is another optimization - async switches between cores (as of now uses core + 1) to send IO, but using another core also means overhead - set a minimal IO size for that. Signed-off-by: Bernd Schubert <bschubert@ddn.com> --- I didn't annotate exact benchmark data, but can extract it (but needs verification) jobs /dev/fuse uring uring uring (same core) (core + 1) (conditional core + 1) 1 127598 313944 261641 330445 2 254806 593925 576516 551392 4 626144 1074837 1022533 1065389 8 1535953 1892787 2038420 2087627 16 2259253 2959607 3521665 3602580 24 2606776 2769790 4636297 4670717 32 2287126 2636150 5389404 5763385 I.e. this is mostly to compensate for slight degradation with core + 1 for small requests with few cores. --- fs/fuse/dev_uring.c | 69 +++++++++++++++++++++++++++++++++++++-------------- fs/fuse/dev_uring_i.h | 7 ++++++ fs/fuse/file.c | 14 ++++++++++- 3 files changed, 70 insertions(+), 20 deletions(-)