diff mbox series

[RFC] fuse: optimize writepages search

Message ID b762fcc4-1ddf-70c2-3189-544779186d3d@virtuozzo.com (mailing list archive)
State New, archived
Headers show
Series [RFC] fuse: optimize writepages search | expand

Commit Message

Vasily Averin Sept. 19, 2019, 2:11 p.m. UTC
Dear Miklos,

This patch was originally developed for RHEL7-based 
Virtuozzo kernels and widely used few years
but seems it was not submitted upstream.
I've rebased it to v5.3, compiled but was not tested,
just to get feedback and then update it.

Author: Maxim Patlasov <mpatlasov@virtuozzo.com>

The patch re-works fi->writepages replacing list with rb-tree.
This improves performance because kernel fuse iterates through
fi->writepages for each writeback page and typical number of
entries is about 800 (for 100MB of fuse writeback).

Before patch:

10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s

 2  1      0 57445400  40416 6323676    0    0    33 374743 8633 19210  1  8 88  3  0

  29.86%  [kernel]               [k] _raw_spin_lock
  26.62%  [fuse]                 [k] fuse_page_is_writeback

After patch:

10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s

 2  9      0 53676040  31744 10265984    0    0    64 854790 10956 48387  1  6 88  6  0

  23.55%  [kernel]             [k] copy_user_enhanced_fast_string
   9.87%  [kernel]             [k] __memcpy
   3.10%  [kernel]             [k] _raw_spin_lock

Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
---
 fs/fuse/file.c   | 60 ++++++++++++++++++++++++++++++++++++++----------
 fs/fuse/fuse_i.h |  4 ++--
 2 files changed, 50 insertions(+), 14 deletions(-)

Comments

Miklos Szeredi June 11, 2020, 8:53 a.m. UTC | #1
On Thu, Sep 19, 2019 at 4:11 PM Vasily Averin <vvs@virtuozzo.com> wrote:
>
> Dear Miklos,
>
> This patch was originally developed for RHEL7-based
> Virtuozzo kernels and widely used few years
> but seems it was not submitted upstream.
> I've rebased it to v5.3, compiled but was not tested,
> just to get feedback and then update it.

Applied and  pulled by Linus.

Thanks,
Miklos
diff mbox series

Patch

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 91c99724dee0..ef7feb897ada 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -338,17 +338,23 @@  u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
 static struct fuse_req *fuse_find_writeback(struct fuse_inode *fi,
 					    pgoff_t idx_from, pgoff_t idx_to)
 {
-	struct fuse_req *req;
+	struct rb_node *n;
+
+	n = fi->writepages.rb_node;
 
-	list_for_each_entry(req, &fi->writepages, writepages_entry) {
+	while (n) {
+		struct fuse_req *req;
 		pgoff_t curr_index;
 
+		req = rb_entry(n, struct fuse_req, writepages_entry);
 		WARN_ON(get_fuse_inode(req->inode) != fi);
 		curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
-		if (idx_from < curr_index + req->num_pages &&
-		    curr_index <= idx_to) {
+		if (idx_from >= curr_index + req->num_pages)
+			n = n->rb_right;
+		else if (idx_to < curr_index)
+			n = n->rb_left;
+		else
 			return req;
-		}
 	}
 	return NULL;
 }
@@ -1527,7 +1533,7 @@  static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
 	struct backing_dev_info *bdi = inode_to_bdi(inode);
 	int i;
 
-	list_del(&req->writepages_entry);
+	rb_erase(&req->writepages_entry, &fi->writepages);
 	for (i = 0; i < req->num_pages; i++) {
 		dec_wb_stat(&bdi->wb, WB_WRITEBACK);
 		dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
@@ -1605,6 +1611,36 @@  __acquires(fi->lock)
 	}
 }
 
+static int tree_insert(struct rb_root *root, struct fuse_req *ins_req)
+{
+	pgoff_t idx_from = ins_req->misc.write.in.offset >> PAGE_SHIFT;
+	pgoff_t idx_to   = idx_from + (ins_req->num_pages ?
+				ins_req->num_pages - 1 : 0);
+	struct rb_node **p = &root->rb_node;
+	struct rb_node  *parent = NULL;
+
+	while (*p) {
+		struct fuse_req *req;
+		pgoff_t curr_index;
+
+		parent = *p;
+		req = rb_entry(parent, struct fuse_req, writepages_entry);
+		BUG_ON(req->inode != ins_req->inode);
+		curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
+
+		if (idx_from >= curr_index + req->num_pages)
+			p = &(*p)->rb_right;
+		else if (idx_to < curr_index)
+			p = &(*p)->rb_left;
+		else
+			BUG();
+	}
+
+	rb_link_node(&ins_req->writepages_entry, parent, p);
+	rb_insert_color(&ins_req->writepages_entry, root);
+	return 0;
+}
+
 static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
 {
 	struct inode *inode = req->inode;
@@ -1619,7 +1655,7 @@  static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
 		req->misc.write.next = next->misc.write.next;
 		next->misc.write.next = NULL;
 		next->ff = fuse_file_get(req->ff);
-		list_add(&next->writepages_entry, &fi->writepages);
+		tree_insert(&fi->writepages, next);
 
 		/*
 		 * Skip fuse_flush_writepages() to make it easy to crop requests
@@ -1735,7 +1771,7 @@  static int fuse_writepage_locked(struct page *page)
 	inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
 
 	spin_lock(&fi->lock);
-	list_add(&req->writepages_entry, &fi->writepages);
+	tree_insert(&fi->writepages, req);
 	list_add_tail(&req->list, &fi->queued_writes);
 	fuse_flush_writepages(inode);
 	spin_unlock(&fi->lock);
@@ -1820,10 +1856,10 @@  static bool fuse_writepage_in_flight(struct fuse_req *new_req,
 	WARN_ON(new_req->num_pages != 0);
 
 	spin_lock(&fi->lock);
-	list_del(&new_req->writepages_entry);
+	rb_erase(&new_req->writepages_entry, &fi->writepages);
 	old_req = fuse_find_writeback(fi, page->index, page->index);
 	if (!old_req) {
-		list_add(&new_req->writepages_entry, &fi->writepages);
+		tree_insert(&fi->writepages, new_req);
 		spin_unlock(&fi->lock);
 		return false;
 	}
@@ -1940,7 +1976,7 @@  static int fuse_writepages_fill(struct page *page,
 		req->inode = inode;
 
 		spin_lock(&fi->lock);
-		list_add(&req->writepages_entry, &fi->writepages);
+		tree_insert(&fi->writepages, req);
 		spin_unlock(&fi->lock);
 
 		data->req = req;
@@ -3262,5 +3298,5 @@  void fuse_init_file_inode(struct inode *inode)
 	INIT_LIST_HEAD(&fi->queued_writes);
 	fi->writectr = 0;
 	init_waitqueue_head(&fi->page_waitq);
-	INIT_LIST_HEAD(&fi->writepages);
+	fi->writepages = RB_ROOT;
 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 24dbca777775..bbb3ca395892 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -114,7 +114,7 @@  struct fuse_inode {
 			wait_queue_head_t page_waitq;
 
 			/* List of writepage requestst (pending or sent) */
-			struct list_head writepages;
+			struct rb_root writepages;
 		};
 
 		/* readdir cache (directory only) */
@@ -437,7 +437,7 @@  struct fuse_req {
 	struct fuse_io_priv *io;
 
 	/** Link on fi->writepages */
-	struct list_head writepages_entry;
+	struct rb_node writepages_entry;
 
 	/** Request completion callback */
 	void (*end)(struct fuse_conn *, struct fuse_req *);