diff mbox series

[RFC,v2,2/8] ovl: implement ->writepages operation

Message ID 20201025034117.4918-3-cgxu519@mykernel.net (mailing list archive)
State New, archived
Headers show
Series implement containerized syncfs for overlayfs | expand

Commit Message

Chengguang Xu Oct. 25, 2020, 3:41 a.m. UTC
Implement overlayfs' ->writepages operation so that
we can sync dirty data/metadata to upper filesystem.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
---
 fs/overlayfs/inode.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

Comments

Jan Kara Nov. 2, 2020, 5:17 p.m. UTC | #1
On Sun 25-10-20 11:41:11, Chengguang Xu wrote:
> Implement overlayfs' ->writepages operation so that
> we can sync dirty data/metadata to upper filesystem.
> 
> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
> ---
>  fs/overlayfs/inode.c | 26 ++++++++++++++++++++++++++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
> index b584dca845ba..f27fc5be34df 100644
> --- a/fs/overlayfs/inode.c
> +++ b/fs/overlayfs/inode.c
> @@ -11,6 +11,7 @@
>  #include <linux/posix_acl.h>
>  #include <linux/ratelimit.h>
>  #include <linux/fiemap.h>
> +#include <linux/writeback.h>
>  #include "overlayfs.h"
>  
>  
> @@ -516,7 +517,32 @@ static const struct inode_operations ovl_special_inode_operations = {
>  	.update_time	= ovl_update_time,
>  };
>  
> +static int ovl_writepages(struct address_space *mapping,
> +			  struct writeback_control *wbc)
> +{
> +	struct inode *upper_inode = ovl_inode_upper(mapping->host);
> +	struct ovl_fs *ofs =  mapping->host->i_sb->s_fs_info;
> +	struct writeback_control tmp_wbc = *wbc;
> +
> +	if (!ovl_should_sync(ofs))
> +		return 0;
> +
> +	/*
> +	 * for sync(2) writeback, it has a separate external IO
> +	 * completion path by checking PAGECACHE_TAG_WRITEBACK
> +	 * in pagecache, we have to set for_sync to 0 in thie case,
> +	 * let writeback waits completion after syncing individual
> +	 * dirty inode, because we haven't implemented overlayfs'
> +	 * own pagecache yet.
> +	 */
> +	if (wbc->for_sync && (wbc->sync_mode == WB_SYNC_ALL))
> +		tmp_wbc.for_sync = 0;

This looks really hacky as it closely depends on the internal details of
writeback implementation. I'd be more open to say export wait_sb_inodes()
for overlayfs use... Because that's what I gather you need in your
overlayfs ->syncfs() implementation.

								Honza

> +
> +	return sync_inode(upper_inode, &tmp_wbc);
> +}
> +
>  static const struct address_space_operations ovl_aops = {
> +	.writepages		= ovl_writepages,
>  	/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
>  	.direct_IO		= noop_direct_IO,
>  };
> -- 
> 2.26.2
> 
>
Chengguang Xu Nov. 4, 2020, 12:18 p.m. UTC | #2
---- 在 星期二, 2020-11-03 01:17:41 Jan Kara <jack@suse.cz> 撰写 ----
 > On Sun 25-10-20 11:41:11, Chengguang Xu wrote:
 > > Implement overlayfs' ->writepages operation so that
 > > we can sync dirty data/metadata to upper filesystem.
 > > 
 > > Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
 > > ---
 > >  fs/overlayfs/inode.c | 26 ++++++++++++++++++++++++++
 > >  1 file changed, 26 insertions(+)
 > > 
 > > diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
 > > index b584dca845ba..f27fc5be34df 100644
 > > --- a/fs/overlayfs/inode.c
 > > +++ b/fs/overlayfs/inode.c
 > > @@ -11,6 +11,7 @@
 > >  #include <linux/posix_acl.h>
 > >  #include <linux/ratelimit.h>
 > >  #include <linux/fiemap.h>
 > > +#include <linux/writeback.h>
 > >  #include "overlayfs.h"
 > >  
 > >  
 > > @@ -516,7 +517,32 @@ static const struct inode_operations ovl_special_inode_operations = {
 > >      .update_time    = ovl_update_time,
 > >  };
 > >  
 > > +static int ovl_writepages(struct address_space *mapping,
 > > +              struct writeback_control *wbc)
 > > +{
 > > +    struct inode *upper_inode = ovl_inode_upper(mapping->host);
 > > +    struct ovl_fs *ofs =  mapping->host->i_sb->s_fs_info;
 > > +    struct writeback_control tmp_wbc = *wbc;
 > > +
 > > +    if (!ovl_should_sync(ofs))
 > > +        return 0;
 > > +
 > > +    /*
 > > +     * for sync(2) writeback, it has a separate external IO
 > > +     * completion path by checking PAGECACHE_TAG_WRITEBACK
 > > +     * in pagecache, we have to set for_sync to 0 in thie case,
 > > +     * let writeback waits completion after syncing individual
 > > +     * dirty inode, because we haven't implemented overlayfs'
 > > +     * own pagecache yet.
 > > +     */
 > > +    if (wbc->for_sync && (wbc->sync_mode == WB_SYNC_ALL))
 > > +        tmp_wbc.for_sync = 0;
 > 
 > This looks really hacky as it closely depends on the internal details of
 > writeback implementation. I'd be more open to say export wait_sb_inodes()
 > for overlayfs use... Because that's what I gather you need in your
 > overlayfs ->syncfs() implementation.
 > 

Does  that mean we gather synced overlay's inode into a new waiting list(overlay's) and
do the waiting behavior in overlay's ->syncfs() ?


Thanks,
Chengguang
Jan Kara Nov. 5, 2020, 1:55 p.m. UTC | #3
On Wed 04-11-20 20:18:16, Chengguang Xu wrote:
>  ---- 在 星期二, 2020-11-03 01:17:41 Jan Kara <jack@suse.cz> 撰写 ----
>  > On Sun 25-10-20 11:41:11, Chengguang Xu wrote:
>  > > Implement overlayfs' ->writepages operation so that
>  > > we can sync dirty data/metadata to upper filesystem.
>  > > 
>  > > Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
>  > > ---
>  > >  fs/overlayfs/inode.c | 26 ++++++++++++++++++++++++++
>  > >  1 file changed, 26 insertions(+)
>  > > 
>  > > diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
>  > > index b584dca845ba..f27fc5be34df 100644
>  > > --- a/fs/overlayfs/inode.c
>  > > +++ b/fs/overlayfs/inode.c
>  > > @@ -11,6 +11,7 @@
>  > >  #include <linux/posix_acl.h>
>  > >  #include <linux/ratelimit.h>
>  > >  #include <linux/fiemap.h>
>  > > +#include <linux/writeback.h>
>  > >  #include "overlayfs.h"
>  > >  
>  > >  
>  > > @@ -516,7 +517,32 @@ static const struct inode_operations ovl_special_inode_operations = {
>  > >      .update_time    = ovl_update_time,
>  > >  };
>  > >  
>  > > +static int ovl_writepages(struct address_space *mapping,
>  > > +              struct writeback_control *wbc)
>  > > +{
>  > > +    struct inode *upper_inode = ovl_inode_upper(mapping->host);
>  > > +    struct ovl_fs *ofs =  mapping->host->i_sb->s_fs_info;
>  > > +    struct writeback_control tmp_wbc = *wbc;
>  > > +
>  > > +    if (!ovl_should_sync(ofs))
>  > > +        return 0;
>  > > +
>  > > +    /*
>  > > +     * for sync(2) writeback, it has a separate external IO
>  > > +     * completion path by checking PAGECACHE_TAG_WRITEBACK
>  > > +     * in pagecache, we have to set for_sync to 0 in thie case,
>  > > +     * let writeback waits completion after syncing individual
>  > > +     * dirty inode, because we haven't implemented overlayfs'
>  > > +     * own pagecache yet.
>  > > +     */
>  > > +    if (wbc->for_sync && (wbc->sync_mode == WB_SYNC_ALL))
>  > > +        tmp_wbc.for_sync = 0;
>  > 
>  > This looks really hacky as it closely depends on the internal details of
>  > writeback implementation. I'd be more open to say export wait_sb_inodes()
>  > for overlayfs use... Because that's what I gather you need in your
>  > overlayfs ->syncfs() implementation.
>  > 
> 
> Does  that mean we gather synced overlay's inode into a new waiting list(overlay's) and
> do the waiting behavior in overlay's ->syncfs() ?

My idea was that you'd just use the standard writeback logic which ends up
gathering upper_sb inodes in the upper_sb->s_inodes_wb and then wait for
them in overlay's ->syncfs(). Maybe we'll end up waiting for more inodes
than strictly necessary but it shouldn't be too bad I'd say...

								Honza
Chengguang Xu Nov. 6, 2020, 5:57 a.m. UTC | #4
---- 在 星期四, 2020-11-05 21:55:06 Jan Kara <jack@suse.cz> 撰写 ----
 > On Wed 04-11-20 20:18:16, Chengguang Xu wrote:
 > >  ---- 在 星期二, 2020-11-03 01:17:41 Jan Kara <jack@suse.cz> 撰写 ----
 > >  > On Sun 25-10-20 11:41:11, Chengguang Xu wrote:
 > >  > > Implement overlayfs' ->writepages operation so that
 > >  > > we can sync dirty data/metadata to upper filesystem.
 > >  > > 
 > >  > > Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
 > >  > > ---
 > >  > >  fs/overlayfs/inode.c | 26 ++++++++++++++++++++++++++
 > >  > >  1 file changed, 26 insertions(+)
 > >  > > 
 > >  > > diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
 > >  > > index b584dca845ba..f27fc5be34df 100644
 > >  > > --- a/fs/overlayfs/inode.c
 > >  > > +++ b/fs/overlayfs/inode.c
 > >  > > @@ -11,6 +11,7 @@
 > >  > >  #include <linux/posix_acl.h>
 > >  > >  #include <linux/ratelimit.h>
 > >  > >  #include <linux/fiemap.h>
 > >  > > +#include <linux/writeback.h>
 > >  > >  #include "overlayfs.h"
 > >  > >  
 > >  > >  
 > >  > > @@ -516,7 +517,32 @@ static const struct inode_operations ovl_special_inode_operations = {
 > >  > >      .update_time    = ovl_update_time,
 > >  > >  };
 > >  > >  
 > >  > > +static int ovl_writepages(struct address_space *mapping,
 > >  > > +              struct writeback_control *wbc)
 > >  > > +{
 > >  > > +    struct inode *upper_inode = ovl_inode_upper(mapping->host);
 > >  > > +    struct ovl_fs *ofs =  mapping->host->i_sb->s_fs_info;
 > >  > > +    struct writeback_control tmp_wbc = *wbc;
 > >  > > +
 > >  > > +    if (!ovl_should_sync(ofs))
 > >  > > +        return 0;
 > >  > > +
 > >  > > +    /*
 > >  > > +     * for sync(2) writeback, it has a separate external IO
 > >  > > +     * completion path by checking PAGECACHE_TAG_WRITEBACK
 > >  > > +     * in pagecache, we have to set for_sync to 0 in thie case,
 > >  > > +     * let writeback waits completion after syncing individual
 > >  > > +     * dirty inode, because we haven't implemented overlayfs'
 > >  > > +     * own pagecache yet.
 > >  > > +     */
 > >  > > +    if (wbc->for_sync && (wbc->sync_mode == WB_SYNC_ALL))
 > >  > > +        tmp_wbc.for_sync = 0;
 > >  > 
 > >  > This looks really hacky as it closely depends on the internal details of
 > >  > writeback implementation. I'd be more open to say export wait_sb_inodes()
 > >  > for overlayfs use... Because that's what I gather you need in your
 > >  > overlayfs ->syncfs() implementation.
 > >  > 
 > > 
 > > Does  that mean we gather synced overlay's inode into a new waiting list(overlay's) and
 > > do the waiting behavior in overlay's ->syncfs() ?
 > 
 > My idea was that you'd just use the standard writeback logic which ends up
 > gathering upper_sb inodes in the upper_sb->s_inodes_wb and then wait for
 > them in overlay's ->syncfs(). Maybe we'll end up waiting for more inodes
 > than strictly necessary but it shouldn't be too bad I'd say...
 > 

Yeah, I agree with you, I'll modify in next version.


Thanks,
Chengguang
diff mbox series

Patch

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b584dca845ba..f27fc5be34df 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -11,6 +11,7 @@ 
 #include <linux/posix_acl.h>
 #include <linux/ratelimit.h>
 #include <linux/fiemap.h>
+#include <linux/writeback.h>
 #include "overlayfs.h"
 
 
@@ -516,7 +517,32 @@  static const struct inode_operations ovl_special_inode_operations = {
 	.update_time	= ovl_update_time,
 };
 
+static int ovl_writepages(struct address_space *mapping,
+			  struct writeback_control *wbc)
+{
+	struct inode *upper_inode = ovl_inode_upper(mapping->host);
+	struct ovl_fs *ofs =  mapping->host->i_sb->s_fs_info;
+	struct writeback_control tmp_wbc = *wbc;
+
+	if (!ovl_should_sync(ofs))
+		return 0;
+
+	/*
+	 * for sync(2) writeback, it has a separate external IO
+	 * completion path by checking PAGECACHE_TAG_WRITEBACK
+	 * in pagecache, we have to set for_sync to 0 in thie case,
+	 * let writeback waits completion after syncing individual
+	 * dirty inode, because we haven't implemented overlayfs'
+	 * own pagecache yet.
+	 */
+	if (wbc->for_sync && (wbc->sync_mode == WB_SYNC_ALL))
+		tmp_wbc.for_sync = 0;
+
+	return sync_inode(upper_inode, &tmp_wbc);
+}
+
 static const struct address_space_operations ovl_aops = {
+	.writepages		= ovl_writepages,
 	/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
 	.direct_IO		= noop_direct_IO,
 };