[RFC,v4,4/9] namespace: Add umount_end superblock operation
diff mbox

Message ID 20171117174552.18722-5-JPEWhacker@gmail.com
State New
Headers show

Commit Message

Joshua Watt Nov. 17, 2017, 5:45 p.m. UTC
The umount_end operation allows cleaning of state set by umount_begin in
the event the filesystem doesn't actually get unmounted.

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
---
 fs/namespace.c     | 22 ++++++++++++++++++++--
 include/linux/fs.h |  1 +
 2 files changed, 21 insertions(+), 2 deletions(-)

Comments

Jeff Layton Dec. 6, 2017, 11:54 a.m. UTC | #1
On Fri, 2017-11-17 at 11:45 -0600, Joshua Watt wrote:
> The umount_end operation allows cleaning of state set by umount_begin in
> the event the filesystem doesn't actually get unmounted.
> 
> Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
> ---
>  fs/namespace.c     | 22 ++++++++++++++++++++--
>  include/linux/fs.h |  1 +
>  2 files changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index d18deb4c410b..d2587be4d08b 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -1524,6 +1524,12 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
>  	}
>  }
>  
> +static void umount_end(struct super_block *sb, int flags)
> +{
> +	if (flags & MNT_FORCE && sb->s_op->umount_end)

nit: might get some complaints from compiler about order of operations
there. I'd put parenthesis around the flags & MNT_FORCE.

> +		sb->s_op->umount_end(sb);
> +}
> +
>  static void shrink_submounts(struct mount *mnt);
>  
>  static int do_umount(struct mount *mnt, int flags)
> @@ -1589,12 +1595,16 @@ static int do_umount(struct mount *mnt, int flags)
>  		 * Special case for "unmounting" root ...
>  		 * we just try to remount it readonly.
>  		 */
> -		if (!capable(CAP_SYS_ADMIN))
> -			return -EPERM;
> +		if (!capable(CAP_SYS_ADMIN)) {
> +			retval = -EPERM;
> +			goto out_umount_end;
> +		}
>  		down_write(&sb->s_umount);
>  		if (!sb_rdonly(sb))
>  			retval = do_remount_sb(sb, SB_RDONLY, NULL, 0);
>  		up_write(&sb->s_umount);
> +		/* Still mounted. Always invoke the cleanup */
> +		umount_end(sb, flags);
>  		return retval;
>  	}
>  
> @@ -1617,6 +1627,14 @@ static int do_umount(struct mount *mnt, int flags)
>  	}
>  	unlock_mount_hash();
>  	namespace_unlock();
> +
> +out_umount_end:
> +	/* If the umount failed and the file system is still mounted, allow the
> +	 * driver to cleanup any state it may have setup in umount_begin(). Note
> +	 * that this is purposely *not* called when MNT_DETACH is specified.
> +	 */
> +	if (retval)
> +		umount_end(sb, flags);
>  	return retval;
>  }
>  
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 885266aae2d7..5443c22da18f 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1816,6 +1816,7 @@ struct super_operations {
>  	int (*statfs) (struct dentry *, struct kstatfs *);
>  	int (*remount_fs) (struct super_block *, int *, char *);
>  	void (*umount_begin) (struct super_block *);
> +	void (*umount_end)(struct super_block *);
>  
>  	int (*show_options)(struct seq_file *, struct dentry *);
>  	int (*show_devname)(struct seq_file *, struct dentry *);

Since this involves vfs-level changes, please cc linux-fsdevel@vger.kern
el.org on further postings. Other filesystem devs may be interested in
what you're doing here.
Al Viro Dec. 6, 2017, 12:14 p.m. UTC | #2
On Fri, Nov 17, 2017 at 11:45:47AM -0600, Joshua Watt wrote:
> The umount_end operation allows cleaning of state set by umount_begin in
> the event the filesystem doesn't actually get unmounted.

The locking doesn't make any sense.  This thing can come at *any* moment -
one process does this force-unmount of yours, another comes and accesses
the damn thing just as you've decided that umount has failed and go
to call that method.

In other words, filesystem can be accessed before and during that call.
And that is not to mention the fact that in another namespace the same
super_block might remain mounted through all of that, giving overlapping
accesses during and after ->umount_begin(), so the latter can't do
anything other than "try and kill those waiting on that fs" anyway.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Al Viro Dec. 6, 2017, 12:33 p.m. UTC | #3
On Wed, Dec 06, 2017 at 12:14:41PM +0000, Al Viro wrote:
> On Fri, Nov 17, 2017 at 11:45:47AM -0600, Joshua Watt wrote:
> > The umount_end operation allows cleaning of state set by umount_begin in
> > the event the filesystem doesn't actually get unmounted.
> 
> The locking doesn't make any sense.  This thing can come at *any* moment -
> one process does this force-unmount of yours, another comes and accesses
> the damn thing just as you've decided that umount has failed and go
> to call that method.

Consider, BTW, the situation when another umount -f comes just as you've
dropped ->s_umount.  Now your ->umount_end() comes *after* ->umount_begin()
from the second call.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joshua Watt Dec. 6, 2017, 3:41 p.m. UTC | #4
On Wed, 2017-12-06 at 12:33 +0000, Al Viro wrote:
> On Wed, Dec 06, 2017 at 12:14:41PM +0000, Al Viro wrote:
> > On Fri, Nov 17, 2017 at 11:45:47AM -0600, Joshua Watt wrote:
> > > The umount_end operation allows cleaning of state set by
> > > umount_begin in
> > > the event the filesystem doesn't actually get unmounted.
> > 
> > The locking doesn't make any sense.  This thing can come at *any*
> > moment -
> > one process does this force-unmount of yours, another comes and
> > accesses
> > the damn thing just as you've decided that umount has failed and go
> > to call that method.
> 
> Consider, BTW, the situation when another umount -f comes just as
> you've
> dropped ->s_umount.  Now your ->umount_end() comes *after*
> ->umount_begin()
> from the second call.

Yes I realised that was a posibility, which is why the NFS
implementation of this uses an atomic counter in ->umount_begin() and 
->umount_end(). My line of thinking was that most fs drivers are
probably going to ignore ->umount_end(), so it is only those that
implement it that need to actually account for that problem.

Or rather put, we can punt that problem to the FS driver writers if
they choose to implement ->umount_end(). Maybe that isn't fair to the
fs driver writers? Or maybe it just needs better documentation of that
expectation?

cc'ing linux-fsdevel@vger.kernel.org, original message chain can be
found here:
http://www.spinics.net/lists/linux-nfs/msg66483.html

Thanks,
Joshua Watt

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/namespace.c b/fs/namespace.c
index d18deb4c410b..d2587be4d08b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1524,6 +1524,12 @@  static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
 	}
 }
 
+static void umount_end(struct super_block *sb, int flags)
+{
+	if (flags & MNT_FORCE && sb->s_op->umount_end)
+		sb->s_op->umount_end(sb);
+}
+
 static void shrink_submounts(struct mount *mnt);
 
 static int do_umount(struct mount *mnt, int flags)
@@ -1589,12 +1595,16 @@  static int do_umount(struct mount *mnt, int flags)
 		 * Special case for "unmounting" root ...
 		 * we just try to remount it readonly.
 		 */
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
+		if (!capable(CAP_SYS_ADMIN)) {
+			retval = -EPERM;
+			goto out_umount_end;
+		}
 		down_write(&sb->s_umount);
 		if (!sb_rdonly(sb))
 			retval = do_remount_sb(sb, SB_RDONLY, NULL, 0);
 		up_write(&sb->s_umount);
+		/* Still mounted. Always invoke the cleanup */
+		umount_end(sb, flags);
 		return retval;
 	}
 
@@ -1617,6 +1627,14 @@  static int do_umount(struct mount *mnt, int flags)
 	}
 	unlock_mount_hash();
 	namespace_unlock();
+
+out_umount_end:
+	/* If the umount failed and the file system is still mounted, allow the
+	 * driver to cleanup any state it may have setup in umount_begin(). Note
+	 * that this is purposely *not* called when MNT_DETACH is specified.
+	 */
+	if (retval)
+		umount_end(sb, flags);
 	return retval;
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 885266aae2d7..5443c22da18f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1816,6 +1816,7 @@  struct super_operations {
 	int (*statfs) (struct dentry *, struct kstatfs *);
 	int (*remount_fs) (struct super_block *, int *, char *);
 	void (*umount_begin) (struct super_block *);
+	void (*umount_end)(struct super_block *);
 
 	int (*show_options)(struct seq_file *, struct dentry *);
 	int (*show_devname)(struct seq_file *, struct dentry *);