diff mbox series

[f2fs-dev,02/12] nilfs2: use setup_bdev_super to de-duplicate the mount code

Message ID 20230802154131.2221419-3-hch@lst.de (mailing list archive)
State Accepted
Commit c1e012ea9e835f81ac6062e8aa9e72940a671b95
Headers show
Series [f2fs-dev,01/12] fs: export setup_bdev_super | expand

Commit Message

Christoph Hellwig Aug. 2, 2023, 3:41 p.m. UTC
Use the generic setup_bdev_super helper to open the main block device
and do various bits of superblock setup instead of duplicating the
logic.  This includes moving to the new scheme implemented in common
code that only opens the block device after the superblock has allocated.

It does not yet convert nilfs2 to the new mount API, but doing so will
become a bit simpler after this first step.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nilfs2/super.c | 81 ++++++++++++++++++-----------------------------
 1 file changed, 30 insertions(+), 51 deletions(-)

Comments

Jan Kara Aug. 3, 2023, 11:46 a.m. UTC | #1
On Wed 02-08-23 17:41:21, Christoph Hellwig wrote:
> Use the generic setup_bdev_super helper to open the main block device
> and do various bits of superblock setup instead of duplicating the
> logic.  This includes moving to the new scheme implemented in common
> code that only opens the block device after the superblock has allocated.
> 
> It does not yet convert nilfs2 to the new mount API, but doing so will
> become a bit simpler after this first step.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

AFAICS nilfs2 could *almost* use mount_bdev() directly and then just do its
snapshot thing after mount_bdev() returns. But it has this weird logic
that: "if the superblock is already mounted but we can shrink the whole
dcache, then do remount instead of ignoring mount options". Firstly, this
looks racy - what prevents someone from say opening a file on the sb just
after nilfs_tree_is_busy() shrinks dcache? Secondly, it is inconsistent
with any other filesystem so it's going to surprise sysadmins not
intimately knowing nilfs2. Thirdly, from userspace you cannot tell what
your mount call is going to do. Last but not least, what is it really good
for? Ryusuke, can you explain please?

								Honza

> ---
>  fs/nilfs2/super.c | 81 ++++++++++++++++++-----------------------------
>  1 file changed, 30 insertions(+), 51 deletions(-)
> 
> diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
> index 0ef8c71bde8e5f..a5d1fa4e7552f6 100644
> --- a/fs/nilfs2/super.c
> +++ b/fs/nilfs2/super.c
> @@ -35,6 +35,7 @@
>  #include <linux/writeback.h>
>  #include <linux/seq_file.h>
>  #include <linux/mount.h>
> +#include <linux/fs_context.h>
>  #include "nilfs.h"
>  #include "export.h"
>  #include "mdt.h"
> @@ -1216,7 +1217,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
>  }
>  
>  struct nilfs_super_data {
> -	struct block_device *bdev;
>  	__u64 cno;
>  	int flags;
>  };
> @@ -1283,64 +1283,49 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd)
>  
>  static int nilfs_set_bdev_super(struct super_block *s, void *data)
>  {
> -	s->s_bdev = data;
> -	s->s_dev = s->s_bdev->bd_dev;
> +	s->s_dev = *(dev_t *)data;
>  	return 0;
>  }
>  
>  static int nilfs_test_bdev_super(struct super_block *s, void *data)
>  {
> -	return (void *)s->s_bdev == data;
> +	return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
>  }
>  
>  static struct dentry *
>  nilfs_mount(struct file_system_type *fs_type, int flags,
>  	     const char *dev_name, void *data)
>  {
> -	struct nilfs_super_data sd;
> +	struct nilfs_super_data sd = { .flags = flags };
>  	struct super_block *s;
> -	struct dentry *root_dentry;
> -	int err, s_new = false;
> +	dev_t dev;
> +	int err;
>  
> -	sd.bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type,
> -				     NULL);
> -	if (IS_ERR(sd.bdev))
> -		return ERR_CAST(sd.bdev);
> +	if (nilfs_identify(data, &sd))
> +		return ERR_PTR(-EINVAL);
>  
> -	sd.cno = 0;
> -	sd.flags = flags;
> -	if (nilfs_identify((char *)data, &sd)) {
> -		err = -EINVAL;
> -		goto failed;
> -	}
> +	err = lookup_bdev(dev_name, &dev);
> +	if (err)
> +		return ERR_PTR(err);
>  
> -	/*
> -	 * once the super is inserted into the list by sget, s_umount
> -	 * will protect the lockfs code from trying to start a snapshot
> -	 * while we are mounting
> -	 */
> -	mutex_lock(&sd.bdev->bd_fsfreeze_mutex);
> -	if (sd.bdev->bd_fsfreeze_count > 0) {
> -		mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
> -		err = -EBUSY;
> -		goto failed;
> -	}
>  	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags,
> -		 sd.bdev);
> -	mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
> -	if (IS_ERR(s)) {
> -		err = PTR_ERR(s);
> -		goto failed;
> -	}
> +		 &dev);
> +	if (IS_ERR(s))
> +		return ERR_CAST(s);
>  
>  	if (!s->s_root) {
> -		s_new = true;
> -
> -		/* New superblock instance created */
> -		snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev);
> -		sb_set_blocksize(s, block_size(sd.bdev));
> -
> -		err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0);
> +		/*
> +		 * We drop s_umount here because we need to open the bdev and
> +		 * bdev->open_mutex ranks above s_umount (blkdev_put() ->
> +		 * __invalidate_device()). It is safe because we have active sb
> +		 * reference and SB_BORN is not set yet.
> +		 */
> +		up_write(&s->s_umount);
> +		err = setup_bdev_super(s, flags, NULL);
> +		down_write(&s->s_umount);
> +		if (!err)
> +			err = nilfs_fill_super(s, data,
> +					       flags & SB_SILENT ? 1 : 0);
>  		if (err)
>  			goto failed_super;
>  
> @@ -1366,24 +1351,18 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
>  	}
>  
>  	if (sd.cno) {
> +		struct dentry *root_dentry;
> +
>  		err = nilfs_attach_snapshot(s, sd.cno, &root_dentry);
>  		if (err)
>  			goto failed_super;
> -	} else {
> -		root_dentry = dget(s->s_root);
> +		return root_dentry;
>  	}
>  
> -	if (!s_new)
> -		blkdev_put(sd.bdev, fs_type);
> -
> -	return root_dentry;
> +	return dget(s->s_root);
>  
>   failed_super:
>  	deactivate_locked_super(s);
> -
> - failed:
> -	if (!s_new)
> -		blkdev_put(sd.bdev, fs_type);
>  	return ERR_PTR(err);
>  }
>  
> -- 
> 2.39.2
>
Ryusuke Konishi Aug. 4, 2023, 2:01 a.m. UTC | #2
On Thu, Aug 3, 2023 at 8:46 PM Jan Kara wrote:
>
> On Wed 02-08-23 17:41:21, Christoph Hellwig wrote:
> > Use the generic setup_bdev_super helper to open the main block device
> > and do various bits of superblock setup instead of duplicating the
> > logic.  This includes moving to the new scheme implemented in common
> > code that only opens the block device after the superblock has allocated.
> >
> > It does not yet convert nilfs2 to the new mount API, but doing so will
> > become a bit simpler after this first step.
> >
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
>
> AFAICS nilfs2 could *almost* use mount_bdev() directly and then just do its

> snapshot thing after mount_bdev() returns. But it has this weird logic
> that: "if the superblock is already mounted but we can shrink the whole
> dcache, then do remount instead of ignoring mount options". Firstly, this
> looks racy - what prevents someone from say opening a file on the sb just
> after nilfs_tree_is_busy() shrinks dcache? Secondly, it is inconsistent
> with any other filesystem so it's going to surprise sysadmins not
> intimately knowing nilfs2. Thirdly, from userspace you cannot tell what
> your mount call is going to do. Last but not least, what is it really good
> for? Ryusuke, can you explain please?
>
>                                                                 Honza

I think you are referring to the following part:

>        if (!s->s_root) {
...
>        } else if (!sd.cno) {
>                if (nilfs_tree_is_busy(s->s_root)) {
>                        if ((flags ^ s->s_flags) & SB_RDONLY) {
>                                nilfs_err(s,
>                                          "the device already has a %s mount.",
>                                          sb_rdonly(s) ? "read-only" : "read/write");
>                                err = -EBUSY;
>                                goto failed_super;
>                        }
>                } else {
>                        /*
>                         * Try remount to setup mount states if the current
>                         * tree is not mounted and only snapshots use this sb.
>                         */
>                        err = nilfs_remount(s, &flags, data);
>                        if (err)
>                                goto failed_super;
>                }
>        }

What this logic is trying to do is, if there is already a nilfs2 mount
instance for the device, and are trying to mounting the current tree
(sd.cno is 0, so this is not a snapshot mount), then will switch
depending on whether the current tree has a mount:

- If the current tree is mounted, it's just like a normal filesystem.
(A read-only mount and a read/write mount can't coexist, so check
that, and reuse the instance if possible)
- Otherwise, i.e. for snapshot mounts only, do whatever is necessary
to add a new current mount, such as starting a log writer.
   Since it does the same thing that nilfs_remount does, so
nilfs_remount() is used there.

Whether or not there is a current tree mount can be determined by
d_count(s->s_root) > 1 as nilfs_tree_is_busy() does.
Where s->s_root is always the root dentry of the current tree, not
that of the mounted snapshot.

I remember that calling shrink_dcache_parent() before this test was to
do the test correctly if there was garbage left in the dcache from the
past current mount.

If the current tree isn't mounted, it just cleans up the garbage, and
the reference count wouldn't have incremented in parallel.

If the current tree is mounted, d_count(s->s_root) will not decrease
to 1, so it's not a problem.
However, this will cause unexpected dcache shrinkage for the in-use
tree, so it's not a good idea, as you pointed out.  If there is
another way of judging without this side effect, it should be
replaced.

I will reply here once.

Regards,
Ryusuke Konishi
Ryusuke Konishi Aug. 4, 2023, 5:04 a.m. UTC | #3
On Thu, Aug 3, 2023 at 12:41 AM Christoph Hellwig wrote:
>
> Use the generic setup_bdev_super helper to open the main block device
> and do various bits of superblock setup instead of duplicating the
> logic.  This includes moving to the new scheme implemented in common
> code that only opens the block device after the superblock has allocated.
>
> It does not yet convert nilfs2 to the new mount API, but doing so will
> become a bit simpler after this first step.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Acked-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>

This patch itself looks to properly convert nilfs_mount etc.  Thank you so much.

Regards,
Ryusuke Konishi
Jan Kara Aug. 10, 2023, 11:05 a.m. UTC | #4
On Fri 04-08-23 11:01:39, Ryusuke Konishi wrote:
> On Thu, Aug 3, 2023 at 8:46 PM Jan Kara wrote:
> >
> > On Wed 02-08-23 17:41:21, Christoph Hellwig wrote:
> > > Use the generic setup_bdev_super helper to open the main block device
> > > and do various bits of superblock setup instead of duplicating the
> > > logic.  This includes moving to the new scheme implemented in common
> > > code that only opens the block device after the superblock has allocated.
> > >
> > > It does not yet convert nilfs2 to the new mount API, but doing so will
> > > become a bit simpler after this first step.
> > >
> > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> >
> > AFAICS nilfs2 could *almost* use mount_bdev() directly and then just do its
> 
> > snapshot thing after mount_bdev() returns. But it has this weird logic
> > that: "if the superblock is already mounted but we can shrink the whole
> > dcache, then do remount instead of ignoring mount options". Firstly, this
> > looks racy - what prevents someone from say opening a file on the sb just
> > after nilfs_tree_is_busy() shrinks dcache? Secondly, it is inconsistent
> > with any other filesystem so it's going to surprise sysadmins not
> > intimately knowing nilfs2. Thirdly, from userspace you cannot tell what
> > your mount call is going to do. Last but not least, what is it really good
> > for? Ryusuke, can you explain please?
> >
> >                                                                 Honza
> 
> I think you are referring to the following part:
> 
> >        if (!s->s_root) {
> ...
> >        } else if (!sd.cno) {
> >                if (nilfs_tree_is_busy(s->s_root)) {
> >                        if ((flags ^ s->s_flags) & SB_RDONLY) {
> >                                nilfs_err(s,
> >                                          "the device already has a %s mount.",
> >                                          sb_rdonly(s) ? "read-only" : "read/write");
> >                                err = -EBUSY;
> >                                goto failed_super;
> >                        }
> >                } else {
> >                        /*
> >                         * Try remount to setup mount states if the current
> >                         * tree is not mounted and only snapshots use this sb.
> >                         */
> >                        err = nilfs_remount(s, &flags, data);
> >                        if (err)
> >                                goto failed_super;
> >                }
> >        }
> 
> What this logic is trying to do is, if there is already a nilfs2 mount
> instance for the device, and are trying to mounting the current tree
> (sd.cno is 0, so this is not a snapshot mount), then will switch
> depending on whether the current tree has a mount:
> 
> - If the current tree is mounted, it's just like a normal filesystem.
> (A read-only mount and a read/write mount can't coexist, so check
> that, and reuse the instance if possible)
> - Otherwise, i.e. for snapshot mounts only, do whatever is necessary
> to add a new current mount, such as starting a log writer.
>    Since it does the same thing that nilfs_remount does, so
> nilfs_remount() is used there.
> 
> Whether or not there is a current tree mount can be determined by
> d_count(s->s_root) > 1 as nilfs_tree_is_busy() does.
> Where s->s_root is always the root dentry of the current tree, not
> that of the mounted snapshot.

I see now, thanks for explanation! But one thing still is not clear to me.
If you say have a snapshot mounted read-write and then you mount the
current snapshot (cno == 0) read-only, you'll switch the whole superblock
to read-only state. So also the mounted snapshot is suddently read-only
which is unexpected and actually supposedly breaks things because you can
still have file handles open for writing on the snapshot etc.. So how do
you solve that?

								Honza
Ryusuke Konishi Aug. 10, 2023, 4:39 p.m. UTC | #5
On Thu, Aug 10, 2023 at 8:05 PM Jan Kara wrote:
>
> On Fri 04-08-23 11:01:39, Ryusuke Konishi wrote:
> > On Thu, Aug 3, 2023 at 8:46 PM Jan Kara wrote:
> > >
> > > On Wed 02-08-23 17:41:21, Christoph Hellwig wrote:
> > > > Use the generic setup_bdev_super helper to open the main block device
> > > > and do various bits of superblock setup instead of duplicating the
> > > > logic.  This includes moving to the new scheme implemented in common
> > > > code that only opens the block device after the superblock has allocated.
> > > >
> > > > It does not yet convert nilfs2 to the new mount API, but doing so will
> > > > become a bit simpler after this first step.
> > > >
> > > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > >
> > > AFAICS nilfs2 could *almost* use mount_bdev() directly and then just do its
> >
> > > snapshot thing after mount_bdev() returns. But it has this weird logic
> > > that: "if the superblock is already mounted but we can shrink the whole
> > > dcache, then do remount instead of ignoring mount options". Firstly, this
> > > looks racy - what prevents someone from say opening a file on the sb just
> > > after nilfs_tree_is_busy() shrinks dcache? Secondly, it is inconsistent
> > > with any other filesystem so it's going to surprise sysadmins not
> > > intimately knowing nilfs2. Thirdly, from userspace you cannot tell what
> > > your mount call is going to do. Last but not least, what is it really good
> > > for? Ryusuke, can you explain please?
> > >
> > >                                                                 Honza
> >
> > I think you are referring to the following part:
> >
> > >        if (!s->s_root) {
> > ...
> > >        } else if (!sd.cno) {
> > >                if (nilfs_tree_is_busy(s->s_root)) {
> > >                        if ((flags ^ s->s_flags) & SB_RDONLY) {
> > >                                nilfs_err(s,
> > >                                          "the device already has a %s mount.",
> > >                                          sb_rdonly(s) ? "read-only" : "read/write");
> > >                                err = -EBUSY;
> > >                                goto failed_super;
> > >                        }
> > >                } else {
> > >                        /*
> > >                         * Try remount to setup mount states if the current
> > >                         * tree is not mounted and only snapshots use this sb.
> > >                         */
> > >                        err = nilfs_remount(s, &flags, data);
> > >                        if (err)
> > >                                goto failed_super;
> > >                }
> > >        }
> >
> > What this logic is trying to do is, if there is already a nilfs2 mount
> > instance for the device, and are trying to mounting the current tree
> > (sd.cno is 0, so this is not a snapshot mount), then will switch
> > depending on whether the current tree has a mount:
> >
> > - If the current tree is mounted, it's just like a normal filesystem.
> > (A read-only mount and a read/write mount can't coexist, so check
> > that, and reuse the instance if possible)
> > - Otherwise, i.e. for snapshot mounts only, do whatever is necessary
> > to add a new current mount, such as starting a log writer.
> >    Since it does the same thing that nilfs_remount does, so
> > nilfs_remount() is used there.
> >
> > Whether or not there is a current tree mount can be determined by
> > d_count(s->s_root) > 1 as nilfs_tree_is_busy() does.
> > Where s->s_root is always the root dentry of the current tree, not
> > that of the mounted snapshot.
>
> I see now, thanks for explanation! But one thing still is not clear to me.
> If you say have a snapshot mounted read-write and then you mount the
> current snapshot (cno == 0) read-only, you'll switch the whole superblock
> to read-only state. So also the mounted snapshot is suddently read-only
> which is unexpected and actually supposedly breaks things because you can
> still have file handles open for writing on the snapshot etc.. So how do
> you solve that?
>
>                                                                 Honza

One thing I have to tell you as a premise is that nilfs2's snapshot
mounts (cno != 0) are read-only.

The read-only option is mandatory for nilfs2 snapshot mounts, so
remounting to read/write mode will result in an error.
This constraint is checked in nilfs_parse_snapshot_option() which is
called from nilfs_identify().

In fact, any write mode file/inode operations on a snapshot mount will
result in an EROFS error, regardless of whether the coexisting current
tree mount is read-only or read/write (i.e. regardless of the
read-only flag of the superblock instance).

This is mostly (and possibly entirely) accomplished at the vfs layer
by checking the MNT_READONLY flag in mnt_flags of the vfsmount
structure, and even on the nilfs2 side,  iops->permission
(=nilfs_permission) rejects write operations on snapshot mounts.

Therefore, the problem you pointed out shouldn't occur in the first
place since the situation where a snapshot with a handle in write mode
suddenly becomes read-only doesn't happen.   Unless I'm missing
something..

Regards,
Ryusuke Konishi
Jan Kara Aug. 10, 2023, 6:14 p.m. UTC | #6
On Fri 11-08-23 01:39:10, Ryusuke Konishi wrote:
> On Thu, Aug 10, 2023 at 8:05 PM Jan Kara wrote:
> >
> > On Fri 04-08-23 11:01:39, Ryusuke Konishi wrote:
> > > On Thu, Aug 3, 2023 at 8:46 PM Jan Kara wrote:
> > > >
> > > > On Wed 02-08-23 17:41:21, Christoph Hellwig wrote:
> > > > > Use the generic setup_bdev_super helper to open the main block device
> > > > > and do various bits of superblock setup instead of duplicating the
> > > > > logic.  This includes moving to the new scheme implemented in common
> > > > > code that only opens the block device after the superblock has allocated.
> > > > >
> > > > > It does not yet convert nilfs2 to the new mount API, but doing so will
> > > > > become a bit simpler after this first step.
> > > > >
> > > > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > >
> > > > AFAICS nilfs2 could *almost* use mount_bdev() directly and then just do its
> > >
> > > > snapshot thing after mount_bdev() returns. But it has this weird logic
> > > > that: "if the superblock is already mounted but we can shrink the whole
> > > > dcache, then do remount instead of ignoring mount options". Firstly, this
> > > > looks racy - what prevents someone from say opening a file on the sb just
> > > > after nilfs_tree_is_busy() shrinks dcache? Secondly, it is inconsistent
> > > > with any other filesystem so it's going to surprise sysadmins not
> > > > intimately knowing nilfs2. Thirdly, from userspace you cannot tell what
> > > > your mount call is going to do. Last but not least, what is it really good
> > > > for? Ryusuke, can you explain please?
> > > >
> > > >                                                                 Honza
> > >
> > > I think you are referring to the following part:
> > >
> > > >        if (!s->s_root) {
> > > ...
> > > >        } else if (!sd.cno) {
> > > >                if (nilfs_tree_is_busy(s->s_root)) {
> > > >                        if ((flags ^ s->s_flags) & SB_RDONLY) {
> > > >                                nilfs_err(s,
> > > >                                          "the device already has a %s mount.",
> > > >                                          sb_rdonly(s) ? "read-only" : "read/write");
> > > >                                err = -EBUSY;
> > > >                                goto failed_super;
> > > >                        }
> > > >                } else {
> > > >                        /*
> > > >                         * Try remount to setup mount states if the current
> > > >                         * tree is not mounted and only snapshots use this sb.
> > > >                         */
> > > >                        err = nilfs_remount(s, &flags, data);
> > > >                        if (err)
> > > >                                goto failed_super;
> > > >                }
> > > >        }
> > >
> > > What this logic is trying to do is, if there is already a nilfs2 mount
> > > instance for the device, and are trying to mounting the current tree
> > > (sd.cno is 0, so this is not a snapshot mount), then will switch
> > > depending on whether the current tree has a mount:
> > >
> > > - If the current tree is mounted, it's just like a normal filesystem.
> > > (A read-only mount and a read/write mount can't coexist, so check
> > > that, and reuse the instance if possible)
> > > - Otherwise, i.e. for snapshot mounts only, do whatever is necessary
> > > to add a new current mount, such as starting a log writer.
> > >    Since it does the same thing that nilfs_remount does, so
> > > nilfs_remount() is used there.
> > >
> > > Whether or not there is a current tree mount can be determined by
> > > d_count(s->s_root) > 1 as nilfs_tree_is_busy() does.
> > > Where s->s_root is always the root dentry of the current tree, not
> > > that of the mounted snapshot.
> >
> > I see now, thanks for explanation! But one thing still is not clear to me.
> > If you say have a snapshot mounted read-write and then you mount the
> > current snapshot (cno == 0) read-only, you'll switch the whole superblock
> > to read-only state. So also the mounted snapshot is suddently read-only
> > which is unexpected and actually supposedly breaks things because you can
> > still have file handles open for writing on the snapshot etc.. So how do
> > you solve that?
> >
> >                                                                 Honza
> 
> One thing I have to tell you as a premise is that nilfs2's snapshot
> mounts (cno != 0) are read-only.
> 
> The read-only option is mandatory for nilfs2 snapshot mounts, so
> remounting to read/write mode will result in an error.
> This constraint is checked in nilfs_parse_snapshot_option() which is
> called from nilfs_identify().
> 
> In fact, any write mode file/inode operations on a snapshot mount will
> result in an EROFS error, regardless of whether the coexisting current
> tree mount is read-only or read/write (i.e. regardless of the
> read-only flag of the superblock instance).
> 
> This is mostly (and possibly entirely) accomplished at the vfs layer
> by checking the MNT_READONLY flag in mnt_flags of the vfsmount
> structure, and even on the nilfs2 side,  iops->permission
> (=nilfs_permission) rejects write operations on snapshot mounts.
> 
> Therefore, the problem you pointed out shouldn't occur in the first
> place since the situation where a snapshot with a handle in write mode
> suddenly becomes read-only doesn't happen.   Unless I'm missing
> something..

No, I think you are correct. This particular case should be safe because
MNT_READONLY flags on the mounts used by snapshots will still keep them
read-only even if you remount the superblock to read-write mode for the
current snapshot. So I see why this is useful and I agree this isn't easy
to implement using mount_bdev() so no special code reduction here ;).
Thanks for patient explanation!

								Honza
diff mbox series

Patch

diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0ef8c71bde8e5f..a5d1fa4e7552f6 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -35,6 +35,7 @@ 
 #include <linux/writeback.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
+#include <linux/fs_context.h>
 #include "nilfs.h"
 #include "export.h"
 #include "mdt.h"
@@ -1216,7 +1217,6 @@  static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 }
 
 struct nilfs_super_data {
-	struct block_device *bdev;
 	__u64 cno;
 	int flags;
 };
@@ -1283,64 +1283,49 @@  static int nilfs_identify(char *data, struct nilfs_super_data *sd)
 
 static int nilfs_set_bdev_super(struct super_block *s, void *data)
 {
-	s->s_bdev = data;
-	s->s_dev = s->s_bdev->bd_dev;
+	s->s_dev = *(dev_t *)data;
 	return 0;
 }
 
 static int nilfs_test_bdev_super(struct super_block *s, void *data)
 {
-	return (void *)s->s_bdev == data;
+	return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
 }
 
 static struct dentry *
 nilfs_mount(struct file_system_type *fs_type, int flags,
 	     const char *dev_name, void *data)
 {
-	struct nilfs_super_data sd;
+	struct nilfs_super_data sd = { .flags = flags };
 	struct super_block *s;
-	struct dentry *root_dentry;
-	int err, s_new = false;
+	dev_t dev;
+	int err;
 
-	sd.bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type,
-				     NULL);
-	if (IS_ERR(sd.bdev))
-		return ERR_CAST(sd.bdev);
+	if (nilfs_identify(data, &sd))
+		return ERR_PTR(-EINVAL);
 
-	sd.cno = 0;
-	sd.flags = flags;
-	if (nilfs_identify((char *)data, &sd)) {
-		err = -EINVAL;
-		goto failed;
-	}
+	err = lookup_bdev(dev_name, &dev);
+	if (err)
+		return ERR_PTR(err);
 
-	/*
-	 * once the super is inserted into the list by sget, s_umount
-	 * will protect the lockfs code from trying to start a snapshot
-	 * while we are mounting
-	 */
-	mutex_lock(&sd.bdev->bd_fsfreeze_mutex);
-	if (sd.bdev->bd_fsfreeze_count > 0) {
-		mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
-		err = -EBUSY;
-		goto failed;
-	}
 	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags,
-		 sd.bdev);
-	mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
-	if (IS_ERR(s)) {
-		err = PTR_ERR(s);
-		goto failed;
-	}
+		 &dev);
+	if (IS_ERR(s))
+		return ERR_CAST(s);
 
 	if (!s->s_root) {
-		s_new = true;
-
-		/* New superblock instance created */
-		snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev);
-		sb_set_blocksize(s, block_size(sd.bdev));
-
-		err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0);
+		/*
+		 * We drop s_umount here because we need to open the bdev and
+		 * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+		 * __invalidate_device()). It is safe because we have active sb
+		 * reference and SB_BORN is not set yet.
+		 */
+		up_write(&s->s_umount);
+		err = setup_bdev_super(s, flags, NULL);
+		down_write(&s->s_umount);
+		if (!err)
+			err = nilfs_fill_super(s, data,
+					       flags & SB_SILENT ? 1 : 0);
 		if (err)
 			goto failed_super;
 
@@ -1366,24 +1351,18 @@  nilfs_mount(struct file_system_type *fs_type, int flags,
 	}
 
 	if (sd.cno) {
+		struct dentry *root_dentry;
+
 		err = nilfs_attach_snapshot(s, sd.cno, &root_dentry);
 		if (err)
 			goto failed_super;
-	} else {
-		root_dentry = dget(s->s_root);
+		return root_dentry;
 	}
 
-	if (!s_new)
-		blkdev_put(sd.bdev, fs_type);
-
-	return root_dentry;
+	return dget(s->s_root);
 
  failed_super:
 	deactivate_locked_super(s);
-
- failed:
-	if (!s_new)
-		blkdev_put(sd.bdev, fs_type);
 	return ERR_PTR(err);
 }