diff mbox series

[RFC] fuse: enable writeback cgroup to limit dirty page cache

Message ID 20240830120540.2446680-1-yangerkun@huaweicloud.com (mailing list archive)
State New
Headers show
Series [RFC] fuse: enable writeback cgroup to limit dirty page cache | expand

Commit Message

yangerkun Aug. 30, 2024, 12:05 p.m. UTC
From: Yang Erkun <yangerkun@huawei.com>

Commit 3be5a52b30aa("fuse: support writable mmap") give a strict limit
for about 1% max dirty ratio for fuse to protect that fuse won't slow
down the hole system by hogging lots of dirty memory. It works well for
system without memcg. But now for system with memcg, since fuse does
not support writeback cgroup, this max dirty ratio won't work for the
memcg's max bytes.

So it seems reasonable to enable writeback cgroup for fuse. Same commit
as above shows why we manage wb's count within fuse itself. In order to
support writeback cgroup, we need inode_to_wb to find the right wb,
and this needs some locks to pretect it. We now choose
inode->i_mapping->i_pages.xa_lock to do this.

Signed-off-by: Yang Erkun <yangerkun@huawei.com>
---
 fs/fuse/file.c  | 33 ++++++++++++++++++++++++---------
 fs/fuse/inode.c |  3 ++-
 2 files changed, 26 insertions(+), 10 deletions(-)

Comments

yangerkun Sept. 7, 2024, 3:22 a.m. UTC | #1
Hi,

Gently ping for this.

Only btrfs/ext2/ext4/f2fs/xfs/blkdev support SB_I_CGROUPWB which can
limit dirty pagecache with memcg's max bytes, and for fs like nfs/cifs
/fuse(and so on) still can not do that. Does this seems a big issue we 
should try to fix...

在 2024/8/30 20:05, Yang Erkun 写道:
> From: Yang Erkun <yangerkun@huawei.com>
> 
> Commit 3be5a52b30aa("fuse: support writable mmap") give a strict limit
> for about 1% max dirty ratio for fuse to protect that fuse won't slow
> down the hole system by hogging lots of dirty memory. It works well for
> system without memcg. But now for system with memcg, since fuse does
> not support writeback cgroup, this max dirty ratio won't work for the
> memcg's max bytes.
> 
> So it seems reasonable to enable writeback cgroup for fuse. Same commit
> as above shows why we manage wb's count within fuse itself. In order to
> support writeback cgroup, we need inode_to_wb to find the right wb,
> and this needs some locks to pretect it. We now choose
> inode->i_mapping->i_pages.xa_lock to do this.
> 
> Signed-off-by: Yang Erkun <yangerkun@huawei.com>
> ---
>   fs/fuse/file.c  | 33 ++++++++++++++++++++++++---------
>   fs/fuse/inode.c |  3 ++-
>   2 files changed, 26 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index f39456c65ed7..4248eaf46c39 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -1774,15 +1774,20 @@ static void fuse_writepage_finish(struct fuse_mount *fm,
>   {
>   	struct fuse_args_pages *ap = &wpa->ia.ap;
>   	struct inode *inode = wpa->inode;
> +	struct address_space *mapping = inode->i_mapping;
>   	struct fuse_inode *fi = get_fuse_inode(inode);
> -	struct backing_dev_info *bdi = inode_to_bdi(inode);
> +	struct bdi_writeback *wb;
> +	unsigned long flags;
>   	int i;
>   
> +	xa_lock_irqsave(&mapping->i_pages, flags);
> +	wb = inode_to_wb(inode);
>   	for (i = 0; i < ap->num_pages; i++) {
> -		dec_wb_stat(&bdi->wb, WB_WRITEBACK);
> +		dec_wb_stat(wb, WB_WRITEBACK);
>   		dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
> -		wb_writeout_inc(&bdi->wb);
> +		wb_writeout_inc(wb);
>   	}
> +	xa_unlock_irqrestore(&mapping->i_pages, flags);
>   	wake_up(&fi->page_waitq);
>   }
>   
> @@ -2084,8 +2089,10 @@ static int fuse_writepage_locked(struct folio *folio)
>   	ap->args.end = fuse_writepage_end;
>   	wpa->inode = inode;
>   
> -	inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
> +	xa_lock(&mapping->i_pages);
> +	inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
>   	node_stat_add_folio(tmp_folio, NR_WRITEBACK_TEMP);
> +	xa_unlock(&mapping->i_pages);
>   
>   	spin_lock(&fi->lock);
>   	tree_insert(&fi->writepages, wpa);
> @@ -2169,7 +2176,8 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
>   static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
>   			       struct page *page)
>   {
> -	struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
> +	struct inode *inode = new_wpa->inode;
> +	struct fuse_inode *fi = get_fuse_inode(inode);
>   	struct fuse_writepage_args *tmp;
>   	struct fuse_writepage_args *old_wpa;
>   	struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
> @@ -2204,11 +2212,15 @@ static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
>   	spin_unlock(&fi->lock);
>   
>   	if (tmp) {
> -		struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode);
> +		struct address_space *mapping = inode->i_mapping;
> +		struct bdi_writeback *wb;
>   
> -		dec_wb_stat(&bdi->wb, WB_WRITEBACK);
> +		xa_lock(&mapping->i_pages);
> +		wb = inode_to_wb(new_wpa->inode);
> +		dec_wb_stat(wb, WB_WRITEBACK);
>   		dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP);
> -		wb_writeout_inc(&bdi->wb);
> +		wb_writeout_inc(wb);
> +		xa_unlock(&mapping->i_pages);
>   		fuse_writepage_free(new_wpa);
>   	}
>   
> @@ -2256,6 +2268,7 @@ static int fuse_writepages_fill(struct folio *folio,
>   	struct fuse_writepage_args *wpa = data->wpa;
>   	struct fuse_args_pages *ap = &wpa->ia.ap;
>   	struct inode *inode = data->inode;
> +	struct address_space *mapping = inode->i_mapping;
>   	struct fuse_inode *fi = get_fuse_inode(inode);
>   	struct fuse_conn *fc = get_fuse_conn(inode);
>   	struct page *tmp_page;
> @@ -2319,8 +2332,10 @@ static int fuse_writepages_fill(struct folio *folio,
>   	ap->descs[ap->num_pages].length = PAGE_SIZE;
>   	data->orig_pages[ap->num_pages] = &folio->page;
>   
> -	inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
> +	xa_lock(&mapping->i_pages);
> +	inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
>   	inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
> +	xa_unlock(&mapping->i_pages);
>   
>   	err = 0;
>   	if (data->wpa) {
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index d8ab4e93916f..71d08f0a8514 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -1566,7 +1566,8 @@ static void fuse_sb_defaults(struct super_block *sb)
>   	sb->s_maxbytes = MAX_LFS_FILESIZE;
>   	sb->s_time_gran = 1;
>   	sb->s_export_op = &fuse_export_operations;
> -	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
> +	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE |
> +			SB_I_CGROUPWB;
>   	if (sb->s_user_ns != &init_user_ns)
>   		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
>   	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
diff mbox series

Patch

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f39456c65ed7..4248eaf46c39 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1774,15 +1774,20 @@  static void fuse_writepage_finish(struct fuse_mount *fm,
 {
 	struct fuse_args_pages *ap = &wpa->ia.ap;
 	struct inode *inode = wpa->inode;
+	struct address_space *mapping = inode->i_mapping;
 	struct fuse_inode *fi = get_fuse_inode(inode);
-	struct backing_dev_info *bdi = inode_to_bdi(inode);
+	struct bdi_writeback *wb;
+	unsigned long flags;
 	int i;
 
+	xa_lock_irqsave(&mapping->i_pages, flags);
+	wb = inode_to_wb(inode);
 	for (i = 0; i < ap->num_pages; i++) {
-		dec_wb_stat(&bdi->wb, WB_WRITEBACK);
+		dec_wb_stat(wb, WB_WRITEBACK);
 		dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
-		wb_writeout_inc(&bdi->wb);
+		wb_writeout_inc(wb);
 	}
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 	wake_up(&fi->page_waitq);
 }
 
@@ -2084,8 +2089,10 @@  static int fuse_writepage_locked(struct folio *folio)
 	ap->args.end = fuse_writepage_end;
 	wpa->inode = inode;
 
-	inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
+	xa_lock(&mapping->i_pages);
+	inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
 	node_stat_add_folio(tmp_folio, NR_WRITEBACK_TEMP);
+	xa_unlock(&mapping->i_pages);
 
 	spin_lock(&fi->lock);
 	tree_insert(&fi->writepages, wpa);
@@ -2169,7 +2176,8 @@  static void fuse_writepages_send(struct fuse_fill_wb_data *data)
 static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
 			       struct page *page)
 {
-	struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
+	struct inode *inode = new_wpa->inode;
+	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_writepage_args *tmp;
 	struct fuse_writepage_args *old_wpa;
 	struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
@@ -2204,11 +2212,15 @@  static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
 	spin_unlock(&fi->lock);
 
 	if (tmp) {
-		struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode);
+		struct address_space *mapping = inode->i_mapping;
+		struct bdi_writeback *wb;
 
-		dec_wb_stat(&bdi->wb, WB_WRITEBACK);
+		xa_lock(&mapping->i_pages);
+		wb = inode_to_wb(new_wpa->inode);
+		dec_wb_stat(wb, WB_WRITEBACK);
 		dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP);
-		wb_writeout_inc(&bdi->wb);
+		wb_writeout_inc(wb);
+		xa_unlock(&mapping->i_pages);
 		fuse_writepage_free(new_wpa);
 	}
 
@@ -2256,6 +2268,7 @@  static int fuse_writepages_fill(struct folio *folio,
 	struct fuse_writepage_args *wpa = data->wpa;
 	struct fuse_args_pages *ap = &wpa->ia.ap;
 	struct inode *inode = data->inode;
+	struct address_space *mapping = inode->i_mapping;
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct page *tmp_page;
@@ -2319,8 +2332,10 @@  static int fuse_writepages_fill(struct folio *folio,
 	ap->descs[ap->num_pages].length = PAGE_SIZE;
 	data->orig_pages[ap->num_pages] = &folio->page;
 
-	inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
+	xa_lock(&mapping->i_pages);
+	inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
 	inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
+	xa_unlock(&mapping->i_pages);
 
 	err = 0;
 	if (data->wpa) {
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d8ab4e93916f..71d08f0a8514 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1566,7 +1566,8 @@  static void fuse_sb_defaults(struct super_block *sb)
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_time_gran = 1;
 	sb->s_export_op = &fuse_export_operations;
-	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
+	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE |
+			SB_I_CGROUPWB;
 	if (sb->s_user_ns != &init_user_ns)
 		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
 	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);