diff mbox series

[v6,03/24] erofs: add super block operations

Message ID 20190802125347.166018-4-gaoxiang25@huawei.com (mailing list archive)
State New, archived
Headers show
Series erofs: promote erofs from staging | expand

Commit Message

Gao Xiang Aug. 2, 2019, 12:53 p.m. UTC
This commit adds erofs super block operations, including (u)mount,
remount_fs, show_options, statfs, in addition to some private
icache management functions.

Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
 fs/erofs/super.c | 437 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 437 insertions(+)
 create mode 100644 fs/erofs/super.c

Comments

Christoph Hellwig Aug. 29, 2019, 10:15 a.m. UTC | #1
On Fri, Aug 02, 2019 at 08:53:26PM +0800, Gao Xiang wrote:
> +static int __init erofs_init_inode_cache(void)
> +{
> +	erofs_inode_cachep = kmem_cache_create("erofs_inode",
> +					       sizeof(struct erofs_vnode), 0,
> +					       SLAB_RECLAIM_ACCOUNT,
> +					       init_once);
> +
> +	return erofs_inode_cachep ? 0 : -ENOMEM;

Please just use normal if/else.  Also having this function seems
entirely pointless.

> +static void erofs_exit_inode_cache(void)
> +{
> +	kmem_cache_destroy(erofs_inode_cachep);
> +}

Same for this one.

> +static void free_inode(struct inode *inode)

Please use an erofs_ prefix for all your functions.

> +{
> +	struct erofs_vnode *vi = EROFS_V(inode);

Why is this called vnode instead of inode?  That seems like a rather
odd naming for a Linux file system.

> +
> +	/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
> +	if (is_inode_fast_symlink(inode))
> +		kfree(inode->i_link);

is_inode_fast_symlink only shows up in a later patch.  And really
obsfucates the check here in the only caller as you can just do an
unconditional kfree here - i_link will be NULL except for the case
where you explicitly set it.

Also this code is nothing like ext4, so the code seems a little confusing.

> +static bool check_layout_compatibility(struct super_block *sb,
> +				       struct erofs_super_block *layout)
> +{
> +	const unsigned int requirements = le32_to_cpu(layout->requirements);

Why is the variable name for the on-disk subperblock layout?  We usually
still calls this something with sb in the name, e.g. dsb. for disk
super block.

> +	EROFS_SB(sb)->requirements = requirements;
> +
> +	/* check if current kernel meets all mandatory requirements */
> +	if (requirements & (~EROFS_ALL_REQUIREMENTS)) {
> +		errln("unidentified requirements %x, please upgrade kernel version",
> +		      requirements & ~EROFS_ALL_REQUIREMENTS);
> +		return false;
> +	}
> +	return true;

Note that normally we call this features, but that doesn't really
matter too much.

> +static int superblock_read(struct super_block *sb)
> +{
> +	struct erofs_sb_info *sbi;
> +	struct buffer_head *bh;
> +	struct erofs_super_block *layout;
> +	unsigned int blkszbits;
> +	int ret;
> +
> +	bh = sb_bread(sb, 0);

Is there any good reasons to use buffer heads like this in new code
vs directly using bios?

> +
> +	sbi->blocks = le32_to_cpu(layout->blocks);
> +	sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr);
> +	sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1;
> +	sbi->root_nid = le16_to_cpu(layout->root_nid);
> +	sbi->inos = le64_to_cpu(layout->inos);
> +
> +	sbi->build_time = le64_to_cpu(layout->build_time);
> +	sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec);
> +
> +	memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
> +	memcpy(sbi->volume_name, layout->volume_name,
> +	       sizeof(layout->volume_name));

s_uuid should preferably be a uuid_t (assuming it is a real BE uuid,
if it is le it should be a guid_t).

> +/* set up default EROFS parameters */
> +static void default_options(struct erofs_sb_info *sbi)
> +{
> +}

No need to add an empty function.

> +static int erofs_fill_super(struct super_block *sb, void *data, int silent)
> +{
> +	struct inode *inode;
> +	struct erofs_sb_info *sbi;
> +	int err;
> +
> +	infoln("fill_super, device -> %s", sb->s_id);
> +	infoln("options -> %s", (char *)data);

That is some very verbose debug info.  We usually don't add that and
let people trace the function instead.  Also you should probably
implement the new mount API.
new mount API.

> +static void erofs_kill_sb(struct super_block *sb)
> +{
> +	struct erofs_sb_info *sbi;
> +
> +	WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
> +	infoln("unmounting for %s", sb->s_id);
> +
> +	kill_block_super(sb);
> +
> +	sbi = EROFS_SB(sb);
> +	if (!sbi)
> +		return;
> +	kfree(sbi);
> +	sb->s_fs_info = NULL;
> +}

Why is this needed?  You can just free your sb privatte information in
->put_super and wire up kill_block_super as the ->kill_sb method
directly.
Gao Xiang Aug. 29, 2019, 10:50 a.m. UTC | #2
Hi Christoph,

On Thu, Aug 29, 2019 at 03:15:45AM -0700, Christoph Hellwig wrote:
> On Fri, Aug 02, 2019 at 08:53:26PM +0800, Gao Xiang wrote:
> > +static int __init erofs_init_inode_cache(void)
> > +{
> > +	erofs_inode_cachep = kmem_cache_create("erofs_inode",
> > +					       sizeof(struct erofs_vnode), 0,
> > +					       SLAB_RECLAIM_ACCOUNT,
> > +					       init_once);
> > +
> > +	return erofs_inode_cachep ? 0 : -ENOMEM;
> 
> Please just use normal if/else.  Also having this function seems
> entirely pointless.
> 
> > +static void erofs_exit_inode_cache(void)
> > +{
> > +	kmem_cache_destroy(erofs_inode_cachep);
> > +}
> 
> Same for this one.
> 
> > +static void free_inode(struct inode *inode)
> 
> Please use an erofs_ prefix for all your functions.

It is already a static function, I have no idea what is wrong here.

> 
> > +{
> > +	struct erofs_vnode *vi = EROFS_V(inode);
> 
> Why is this called vnode instead of inode?  That seems like a rather
> odd naming for a Linux file system.

I don't know anything difference of that, it is just a naming.

> 
> > +
> > +	/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
> > +	if (is_inode_fast_symlink(inode))
> > +		kfree(inode->i_link);
> 
> is_inode_fast_symlink only shows up in a later patch.  And really
> obsfucates the check here in the only caller as you can just do an
> unconditional kfree here - i_link will be NULL except for the case
> where you explicitly set it.

I cannot fully understand your point (sorry about my English),
I will reply you about this later.

> 
> Also this code is nothing like ext4, so the code seems a little confusing.
> 
> > +static bool check_layout_compatibility(struct super_block *sb,
> > +				       struct erofs_super_block *layout)
> > +{
> > +	const unsigned int requirements = le32_to_cpu(layout->requirements);
> 
> Why is the variable name for the on-disk subperblock layout?  We usually
> still calls this something with sb in the name, e.g. dsb. for disk
> super block.

I can change it later, sbi and dsb (It has not good meaning in Chinese, although).

> 
> > +	EROFS_SB(sb)->requirements = requirements;
> > +
> > +	/* check if current kernel meets all mandatory requirements */
> > +	if (requirements & (~EROFS_ALL_REQUIREMENTS)) {
> > +		errln("unidentified requirements %x, please upgrade kernel version",
> > +		      requirements & ~EROFS_ALL_REQUIREMENTS);
> > +		return false;
> > +	}
> > +	return true;
> 
> Note that normally we call this features, but that doesn't really
> matter too much.
> 
> > +static int superblock_read(struct super_block *sb)
> > +{
> > +	struct erofs_sb_info *sbi;
> > +	struct buffer_head *bh;
> > +	struct erofs_super_block *layout;
> > +	unsigned int blkszbits;
> > +	int ret;
> > +
> > +	bh = sb_bread(sb, 0);
> 
> Is there any good reasons to use buffer heads like this in new code
> vs directly using bios?

This page can save in bdev page cache, it contains not only the erofs
superblock so it can be fetched in page cache later.

> 
> > +
> > +	sbi->blocks = le32_to_cpu(layout->blocks);
> > +	sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr);
> > +	sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1;
> > +	sbi->root_nid = le16_to_cpu(layout->root_nid);
> > +	sbi->inos = le64_to_cpu(layout->inos);
> > +
> > +	sbi->build_time = le64_to_cpu(layout->build_time);
> > +	sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec);
> > +
> > +	memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
> > +	memcpy(sbi->volume_name, layout->volume_name,
> > +	       sizeof(layout->volume_name));
> 
> s_uuid should preferably be a uuid_t (assuming it is a real BE uuid,
> if it is le it should be a guid_t).

I just copied it from f2fs, I have no idea which one is best and
which fs I could refer to.

> 
> > +/* set up default EROFS parameters */
> > +static void default_options(struct erofs_sb_info *sbi)
> > +{
> > +}
> 
> No need to add an empty function.

Later patch will fill this function.

> 
> > +static int erofs_fill_super(struct super_block *sb, void *data, int silent)
> > +{
> > +	struct inode *inode;
> > +	struct erofs_sb_info *sbi;
> > +	int err;
> > +
> > +	infoln("fill_super, device -> %s", sb->s_id);
> > +	infoln("options -> %s", (char *)data);
> 
> That is some very verbose debug info.  We usually don't add that and
> let people trace the function instead.  Also you should probably
> implement the new mount API.
> new mount API.

Al think it is not urgent as well,
https://lore.kernel.org/driverdev-devel/20190721040547.GF17978@ZenIV.linux.org.uk/

 Al said,
 >> I agree with you, it seems better to just use s_id in community and
 >> delete erofs_mount_private stuffs...
 >> Yet I don't look into how to use new fs_context, could I keep using
 >> legacy mount interface and fix them all?
 >
 > Sure.

> 
> > +static void erofs_kill_sb(struct super_block *sb)
> > +{
> > +	struct erofs_sb_info *sbi;
> > +
> > +	WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
> > +	infoln("unmounting for %s", sb->s_id);
> > +
> > +	kill_block_super(sb);
> > +
> > +	sbi = EROFS_SB(sb);
> > +	if (!sbi)
> > +		return;
> > +	kfree(sbi);
> > +	sb->s_fs_info = NULL;
> > +}
> 
> Why is this needed?  You can just free your sb privatte information in
> ->put_super and wire up kill_block_super as the ->kill_sb method
> directly.

See Al's comments,
https://lore.kernel.org/r/20190720224955.GD17978@ZenIV.linux.org.uk/

Thanks,
Gao Xiang
Christoph Hellwig Aug. 30, 2019, 4:39 p.m. UTC | #3
On Thu, Aug 29, 2019 at 06:50:48PM +0800, Gao Xiang wrote:
> > Please use an erofs_ prefix for all your functions.
> 
> It is already a static function, I have no idea what is wrong here.

Which part of all wasn't clear?  Have you looked at the prefixes for
most functions in the various other big filesystems?

> > > +	/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
> > > +	if (is_inode_fast_symlink(inode))
> > > +		kfree(inode->i_link);
> > 
> > is_inode_fast_symlink only shows up in a later patch.  And really
> > obsfucates the check here in the only caller as you can just do an
> > unconditional kfree here - i_link will be NULL except for the case
> > where you explicitly set it.
> 
> I cannot fully understand your point (sorry about my English),
> I will reply you about this later.

With that I mean that you should:

 1) remove is_inode_fast_symlink and just opencode it in the few places
    using it
 2) remove the check in this place entirely as it is not needed
 3) remove the comment quoted above as it is more confusing than not
    having the comment

> > Is there any good reasons to use buffer heads like this in new code
> > vs directly using bios?
> 
> This page can save in bdev page cache, it contains not only the erofs
> superblock so it can be fetched in page cache later.

If you want it in the page cache why not use read_mapping_page or similar?

> > > +/* set up default EROFS parameters */
> > > +static void default_options(struct erofs_sb_info *sbi)
> > > +{
> > > +}
> > 
> > No need to add an empty function.
> 
> Later patch will fill this function.

Please only add the function in the patch actually adding the
functionality.

> > > +}
> > 
> > Why is this needed?  You can just free your sb privatte information in
> > ->put_super and wire up kill_block_super as the ->kill_sb method
> > directly.
> 
> See Al's comments,
> https://lore.kernel.org/r/20190720224955.GD17978@ZenIV.linux.org.uk/

With that code it makes sense.  In this paticular patch it does not.
So please add it only when actually needed.
Gao Xiang Aug. 30, 2019, 5:15 p.m. UTC | #4
Hi Christoph,

On Fri, Aug 30, 2019 at 09:39:10AM -0700, Christoph Hellwig wrote:
> On Thu, Aug 29, 2019 at 06:50:48PM +0800, Gao Xiang wrote:
> > > Please use an erofs_ prefix for all your functions.
> > 
> > It is already a static function, I have no idea what is wrong here.
> 
> Which part of all wasn't clear?  Have you looked at the prefixes for
> most functions in the various other big filesystems?

I will add erofs prefix to free_inode as you said.

At least, all non-prefix functions in erofs are all static functions,
it won't pollute namespace... I will add "erofs_" to other meaningful
callbacks...And as you can see...

cifs/cifsfs.c
1303:cifs_init_inodecache(void)
1509:   rc = cifs_init_inodecache();

hpfs/super.c
254:static int init_inodecache(void)
771:    int err = init_inodecache();

minix/inode.c
84:static int __init init_inodecache(void)
665:    int err = init_inodecache();

isofs/inode.c
88:static int __init init_inodecache(void)
1580:   int err = init_inodecache();

bfs/inode.c
261:static int __init init_inodecache(void)
468:    int err = init_inodecache();

ext4/super.c
1144:static int __init init_inodecache(void)
6115:   err = init_inodecache();

reiserfs/super.c
666:static int __init init_inodecache(void)
2606:   ret = init_inodecache();

squashfs/super.c
406:static int __init init_inodecache(void)
430:    int err = init_inodecache();

udf/super.c
177:static int __init init_inodecache(void)
232:    err = init_inodecache();

qnx4/inode.c
358:static int init_inodecache(void)
399:    err = init_inodecache();

ufs/super.c
1463:static int __init init_inodecache(void)
1517:   int err = init_inodecache();

qnx6/inode.c
618:static int init_inodecache(void)
659:    err = init_inodecache();

f2fs/super.c
3540:static int __init init_inodecache(void)
3572:   err = init_inodecache();


> 
> > > > +	/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
> > > > +	if (is_inode_fast_symlink(inode))
> > > > +		kfree(inode->i_link);
> > > 
> > > is_inode_fast_symlink only shows up in a later patch.  And really
> > > obsfucates the check here in the only caller as you can just do an
> > > unconditional kfree here - i_link will be NULL except for the case
> > > where you explicitly set it.
> > 
> > I cannot fully understand your point (sorry about my English),
> > I will reply you about this later.
> 
> With that I mean that you should:
> 
>  1) remove is_inode_fast_symlink and just opencode it in the few places
>     using it
>  2) remove the check in this place entirely as it is not needed
>  3) remove the comment quoted above as it is more confusing than not
>     having the comment

Got it, thanks!

> 
> > > Is there any good reasons to use buffer heads like this in new code
> > > vs directly using bios?
> > 
> > This page can save in bdev page cache, it contains not only the erofs
> > superblock so it can be fetched in page cache later.
> 
> If you want it in the page cache why not use read_mapping_page or similar?

It's reasonable, I will change as you suggested.
(The difference is whether it has some buffer_head to the sb page or not...)

> 
> > > > +/* set up default EROFS parameters */
> > > > +static void default_options(struct erofs_sb_info *sbi)
> > > > +{
> > > > +}
> > > 
> > > No need to add an empty function.
> > 
> > Later patch will fill this function.
> 
> Please only add the function in the patch actually adding the
> functionality.

That was my fault when spilting patches...considering
it's an >7KLOC filesystem (maybe spilting the whole xfs or
ext4 properly is more harder)... Anyway, that is my fault.

> 
> > > > +}
> > > 
> > > Why is this needed?  You can just free your sb privatte information in
> > > ->put_super and wire up kill_block_super as the ->kill_sb method
> > > directly.
> > 
> > See Al's comments,
> > https://lore.kernel.org/r/20190720224955.GD17978@ZenIV.linux.org.uk/
> 
> With that code it makes sense.  In this paticular patch it does not.
> So please add it only when actually needed.

Same as above...

Thanks,
Gao Xiang
Gao Xiang Aug. 31, 2019, 12:54 a.m. UTC | #5
Hi Christoph,

On Sat, Aug 31, 2019 at 01:15:10AM +0800, Gao Xiang wrote:

[]

> > 
> > > > > +	/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
> > > > > +	if (is_inode_fast_symlink(inode))
> > > > > +		kfree(inode->i_link);
> > > > 
> > > > is_inode_fast_symlink only shows up in a later patch.  And really
> > > > obsfucates the check here in the only caller as you can just do an
> > > > unconditional kfree here - i_link will be NULL except for the case
> > > > where you explicitly set it.
> > > 
> > > I cannot fully understand your point (sorry about my English),
> > > I will reply you about this later.
> > 
> > With that I mean that you should:
> > 
> >  1) remove is_inode_fast_symlink and just opencode it in the few places
> >     using it
> >  2) remove the check in this place entirely as it is not needed

Add some words about this suggestion since I'm addressing this place, it
seems it could not (or I am not sure at least) be freed unconditionally

	union {
		struct pipe_inode_info	*i_pipe;
		struct block_device	*i_bdev;
		struct cdev		*i_cdev;
		char			*i_link;
		unsigned		i_dir_seq;
	};

while I saw what shmem did, it seems that they handle as follows:
3636 static void shmem_free_in_core_inode(struct inode *inode)
3637 {
3638         if (S_ISLNK(inode->i_mode))
3639                 kfree(inode->i_link);
3640         kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
3641 }

I think that would be some check on it to get it is a symlink (for
i_dir_seq it seems unsafe).... I think the original check is ok but
I will opencode it instead.

Thanks,
Gao Xiang
Amir Goldstein Aug. 31, 2019, 6:34 a.m. UTC | #6
On Fri, Aug 30, 2019 at 8:16 PM Gao Xiang <gaoxiang25@huawei.com> wrote:
>
> Hi Christoph,
>
> On Fri, Aug 30, 2019 at 09:39:10AM -0700, Christoph Hellwig wrote:
> > On Thu, Aug 29, 2019 at 06:50:48PM +0800, Gao Xiang wrote:
> > > > Please use an erofs_ prefix for all your functions.
> > >
> > > It is already a static function, I have no idea what is wrong here.
> >
> > Which part of all wasn't clear?  Have you looked at the prefixes for
> > most functions in the various other big filesystems?
>
> I will add erofs prefix to free_inode as you said.
>
> At least, all non-prefix functions in erofs are all static functions,
> it won't pollute namespace... I will add "erofs_" to other meaningful
> callbacks...And as you can see...
>
> cifs/cifsfs.c
> 1303:cifs_init_inodecache(void)
> 1509:   rc = cifs_init_inodecache();
>
> hpfs/super.c
> 254:static int init_inodecache(void)
> 771:    int err = init_inodecache();
>
> minix/inode.c
> 84:static int __init init_inodecache(void)
> 665:    int err = init_inodecache();
>

Hi Gao,

"They did it first" is never a good reply for code review comments.
Nobody cares if you copy&paste code with init_inodecache().
I understand why you thought static function names do not pollute
the (linker) namespace, but they do pollute the global namespace.

free_inode() as a local function name is one of the worst examples
for VFS namespace pollution.

VFS code uses function names like those a lot in the global namespace, e.g.:
clear_inode(),new_inode().

For example from recent history of namespace collision caused by your line
of thinking, see:
e6fd2093a85d md: namespace private helper names

Besides, you really have nothing to loose from prefixing everything
with erofs_, do you? It's better for review, for debugging...

Thanks,
Amir.
Gao Xiang Aug. 31, 2019, 6:48 a.m. UTC | #7
On Sat, Aug 31, 2019 at 09:34:44AM +0300, Amir Goldstein wrote:
> On Fri, Aug 30, 2019 at 8:16 PM Gao Xiang <gaoxiang25@huawei.com> wrote:
> >
> > Hi Christoph,
> >
> > On Fri, Aug 30, 2019 at 09:39:10AM -0700, Christoph Hellwig wrote:
> > > On Thu, Aug 29, 2019 at 06:50:48PM +0800, Gao Xiang wrote:
> > > > > Please use an erofs_ prefix for all your functions.
> > > >
> > > > It is already a static function, I have no idea what is wrong here.
> > >
> > > Which part of all wasn't clear?  Have you looked at the prefixes for
> > > most functions in the various other big filesystems?
> >
> > I will add erofs prefix to free_inode as you said.
> >
> > At least, all non-prefix functions in erofs are all static functions,
> > it won't pollute namespace... I will add "erofs_" to other meaningful
> > callbacks...And as you can see...
> >
> > cifs/cifsfs.c
> > 1303:cifs_init_inodecache(void)
> > 1509:   rc = cifs_init_inodecache();
> >
> > hpfs/super.c
> > 254:static int init_inodecache(void)
> > 771:    int err = init_inodecache();
> >
> > minix/inode.c
> > 84:static int __init init_inodecache(void)
> > 665:    int err = init_inodecache();
> >
> 
> Hi Gao,
> 
> "They did it first" is never a good reply for code review comments.
> Nobody cares if you copy&paste code with init_inodecache().
> I understand why you thought static function names do not pollute
> the (linker) namespace, but they do pollute the global namespace.
> 
> free_inode() as a local function name is one of the worst examples
> for VFS namespace pollution.
> 
> VFS code uses function names like those a lot in the global namespace, e.g.:
> clear_inode(),new_inode().
> 
> For example from recent history of namespace collision caused by your line
> of thinking, see:
> e6fd2093a85d md: namespace private helper names
> 
> Besides, you really have nothing to loose from prefixing everything
> with erofs_, do you? It's better for review, for debugging...

Hi Amir,

Thanks for you kind reply...

Yes, I understand that some generic header files
could have the same function names and cause bad
behaviors...

I will fix them, my only one question is "if all
function/variable names are prefixed with "erofs_"
(including all inline helpers in header files),
it seems somewhat strange... (too many statements
start "erofs_" in the source code...)"

I will fix common and short names at once...

Thanks,
Gao Xiang

> 
> Thanks,
> Amir.
Gao Xiang Sept. 1, 2019, 8:54 a.m. UTC | #8
Hi Christoph,

Here is also my redo-ed comments...

On Thu, Aug 29, 2019 at 03:15:45AM -0700, Christoph Hellwig wrote:
> On Fri, Aug 02, 2019 at 08:53:26PM +0800, Gao Xiang wrote:
> > +static int __init erofs_init_inode_cache(void)
> > +{
> > +	erofs_inode_cachep = kmem_cache_create("erofs_inode",
> > +					       sizeof(struct erofs_vnode), 0,
> > +					       SLAB_RECLAIM_ACCOUNT,
> > +					       init_once);
> > +
> > +	return erofs_inode_cachep ? 0 : -ENOMEM;
> 
> Please just use normal if/else.  Also having this function seems
> entirely pointless.

Fixed in
https://lore.kernel.org/r/20190901055130.30572-7-hsiangkao@aol.com/

> 
> > +static void erofs_exit_inode_cache(void)
> > +{
> > +	kmem_cache_destroy(erofs_inode_cachep);
> > +}
> 
> Same for this one.

Fixed in
https://lore.kernel.org/r/20190901055130.30572-7-hsiangkao@aol.com/

> 
> > +static void free_inode(struct inode *inode)
> 
> Please use an erofs_ prefix for all your functions.

free_inode and most short, common static functions are fixed in
https://lore.kernel.org/r/20190901055130.30572-19-hsiangkao@aol.com/

For all non-static functions, all are prefixed with "erofs_"

> 
> > +{
> > +	struct erofs_vnode *vi = EROFS_V(inode);
> 
> Why is this called vnode instead of inode?  That seems like a rather
> odd naming for a Linux file system.

Fixed in
https://lore.kernel.org/r/20190901055130.30572-8-hsiangkao@aol.com/

> 
> > +
> > +	/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
> > +	if (is_inode_fast_symlink(inode))
> > +		kfree(inode->i_link);
> 
> is_inode_fast_symlink only shows up in a later patch.  And really
> obsfucates the check here in the only caller as you can just do an
> unconditional kfree here - i_link will be NULL except for the case
> where you explicitly set it.

Fixed in
https://lore.kernel.org/r/20190901055130.30572-10-hsiangkao@aol.com/

and with my following comments....
https://lore.kernel.org/r/20190831005446.GA233871@architecture4/

> 
> Also this code is nothing like ext4, so the code seems a little confusing.
> 
> > +static bool check_layout_compatibility(struct super_block *sb,
> > +				       struct erofs_super_block *layout)
> > +{
> > +	const unsigned int requirements = le32_to_cpu(layout->requirements);
> 
> Why is the variable name for the on-disk subperblock layout?  We usually
> still calls this something with sb in the name, e.g. dsb. for disk
> super block.

Fixed in
https://lore.kernel.org/r/20190901055130.30572-12-hsiangkao@aol.com/

> 
> > +	EROFS_SB(sb)->requirements = requirements;
> > +
> > +	/* check if current kernel meets all mandatory requirements */
> > +	if (requirements & (~EROFS_ALL_REQUIREMENTS)) {
> > +		errln("unidentified requirements %x, please upgrade kernel version",
> > +		      requirements & ~EROFS_ALL_REQUIREMENTS);
> > +		return false;
> > +	}
> > +	return true;
> 
> Note that normally we call this features, but that doesn't really
> matter too much.

No modification at this... (some comments already right here...)

 20 /* 128-byte erofs on-disk super block */
 21 struct erofs_super_block {
...
 24         __le32 features;        /* (aka. feature_compat) */
...
 38         __le32 requirements;    /* (aka. feature_incompat) */
...
 41 };

> 
> > +static int superblock_read(struct super_block *sb)
> > +{
> > +	struct erofs_sb_info *sbi;
> > +	struct buffer_head *bh;
> > +	struct erofs_super_block *layout;
> > +	unsigned int blkszbits;
> > +	int ret;
> > +
> > +	bh = sb_bread(sb, 0);
> 
> Is there any good reasons to use buffer heads like this in new code
> vs directly using bios?

As you said, I want it in the page cache.

The reason "why not use read_mapping_page or similar?" is simply
read_mapping_page -> .readpage -> (for bdev inode) block_read_full_page
 -> create_page_buffers anyway...

sb_bread haven't obsoleted... It has similar function though...

> 
> > +
> > +	sbi->blocks = le32_to_cpu(layout->blocks);
> > +	sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr);
> > +	sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1;
> > +	sbi->root_nid = le16_to_cpu(layout->root_nid);
> > +	sbi->inos = le64_to_cpu(layout->inos);
> > +
> > +	sbi->build_time = le64_to_cpu(layout->build_time);
> > +	sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec);
> > +
> > +	memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
> > +	memcpy(sbi->volume_name, layout->volume_name,
> > +	       sizeof(layout->volume_name));
> 
> s_uuid should preferably be a uuid_t (assuming it is a real BE uuid,
> if it is le it should be a guid_t).

For this case, I have no idea how to deal with...
I have little knowledge about this uuid stuff, so I just copied
from f2fs... (Could be no urgent of this field...)

> 
> > +/* set up default EROFS parameters */
> > +static void default_options(struct erofs_sb_info *sbi)
> > +{
> > +}
> 
> No need to add an empty function.

My fault of spilting patches...

> 
> > +static int erofs_fill_super(struct super_block *sb, void *data, int silent)
> > +{
> > +	struct inode *inode;
> > +	struct erofs_sb_info *sbi;
> > +	int err;
> > +
> > +	infoln("fill_super, device -> %s", sb->s_id);
> > +	infoln("options -> %s", (char *)data);
> 
> That is some very verbose debug info.  We usually don't add that and
> let people trace the function instead.  Also you should probably
> implement the new mount API.
> new mount API.

Fixed in
https://lore.kernel.org/r/20190901055130.30572-13-hsiangkao@aol.com/

(For new mount API,
 https://lore.kernel.org/r/20190721040547.GF17978@ZenIV.linux.org.uk/
 , I will a look later)

> 
> > +static void erofs_kill_sb(struct super_block *sb)
> > +{
> > +	struct erofs_sb_info *sbi;
> > +
> > +	WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
> > +	infoln("unmounting for %s", sb->s_id);
> > +
> > +	kill_block_super(sb);
> > +
> > +	sbi = EROFS_SB(sb);
> > +	if (!sbi)
> > +		return;
> > +	kfree(sbi);
> > +	sb->s_fs_info = NULL;
> > +}
> 
> Why is this needed?  You can just free your sb privatte information in
> ->put_super and wire up kill_block_super as the ->kill_sb method
> directly.

The background is Al's comments in erofs v2....
(which simplify erofs_fill_super logic)
https://lore.kernel.org/r/20190720224955.GD17978@ZenIV.linux.org.uk/

with a specific notation...
https://lore.kernel.org/r/20190721040547.GF17978@ZenIV.linux.org.uk/

"
> OTOH, for the case of NULL ->s_root ->put_super() won't be called
> at all, so in that case you need it directly in ->kill_sb().
"

Thanks,
Gao Xiang
Christoph Hellwig Sept. 2, 2019, 12:51 p.m. UTC | #9
On Sun, Sep 01, 2019 at 04:54:55PM +0800, Gao Xiang wrote:
> No modification at this... (some comments already right here...)

>  20 /* 128-byte erofs on-disk super block */
>  21 struct erofs_super_block {
> ...
>  24         __le32 features;        /* (aka. feature_compat) */
> ...
>  38         __le32 requirements;    /* (aka. feature_incompat) */
> ...
>  41 };

This is only cosmetic, why not stick to feature_compat and
feature_incompat?

> > > +	bh = sb_bread(sb, 0);
> > 
> > Is there any good reasons to use buffer heads like this in new code
> > vs directly using bios?
> 
> As you said, I want it in the page cache.
> 
> The reason "why not use read_mapping_page or similar?" is simply
> read_mapping_page -> .readpage -> (for bdev inode) block_read_full_page
>  -> create_page_buffers anyway...
> 
> sb_bread haven't obsoleted... It has similar function though...

With the different that it keeps you isolated from the buffer_head
internals.  This seems to be your only direct use of buffer heads,
which while not deprecated are a bit of an ugly step child.  So if
you can easily avoid creating a buffer_head dependency in a new
filesystem I think you should avoid it.

> > > +	sbi->build_time = le64_to_cpu(layout->build_time);
> > > +	sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec);
> > > +
> > > +	memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
> > > +	memcpy(sbi->volume_name, layout->volume_name,
> > > +	       sizeof(layout->volume_name));
> > 
> > s_uuid should preferably be a uuid_t (assuming it is a real BE uuid,
> > if it is le it should be a guid_t).
> 
> For this case, I have no idea how to deal with...
> I have little knowledge about this uuid stuff, so I just copied
> from f2fs... (Could be no urgent of this field...)

Who fills out this field in the on-disk format and how?

> The background is Al's comments in erofs v2....
> (which simplify erofs_fill_super logic)
> https://lore.kernel.org/r/20190720224955.GD17978@ZenIV.linux.org.uk/
> 
> with a specific notation...
> https://lore.kernel.org/r/20190721040547.GF17978@ZenIV.linux.org.uk/
> 
> "
> > OTOH, for the case of NULL ->s_root ->put_super() won't be called
> > at all, so in that case you need it directly in ->kill_sb().
> "

Yes.  Although none of that is relevant for this initial version,
just after more features are added.
Gao Xiang Sept. 2, 2019, 2:43 p.m. UTC | #10
Hi Christoph,

On Mon, Sep 02, 2019 at 05:51:09AM -0700, Christoph Hellwig wrote:
> On Sun, Sep 01, 2019 at 04:54:55PM +0800, Gao Xiang wrote:
> > No modification at this... (some comments already right here...)
> 
> >  20 /* 128-byte erofs on-disk super block */
> >  21 struct erofs_super_block {
> > ...
> >  24         __le32 features;        /* (aka. feature_compat) */
> > ...
> >  38         __le32 requirements;    /* (aka. feature_incompat) */
> > ...
> >  41 };
> 
> This is only cosmetic, why not stick to feature_compat and
> feature_incompat?

Okay, will fix. (however, in my mind, I'm some confused why
"features" could be incompatible...)

> 
> > > > +	bh = sb_bread(sb, 0);
> > > 
> > > Is there any good reasons to use buffer heads like this in new code
> > > vs directly using bios?
> > 
> > As you said, I want it in the page cache.
> > 
> > The reason "why not use read_mapping_page or similar?" is simply
> > read_mapping_page -> .readpage -> (for bdev inode) block_read_full_page
> >  -> create_page_buffers anyway...
> > 
> > sb_bread haven't obsoleted... It has similar function though...
> 
> With the different that it keeps you isolated from the buffer_head
> internals.  This seems to be your only direct use of buffer heads,
> which while not deprecated are a bit of an ugly step child.  So if
> you can easily avoid creating a buffer_head dependency in a new
> filesystem I think you should avoid it.

OK, let's use read_mapping_page instead.

> 
> > > > +	sbi->build_time = le64_to_cpu(layout->build_time);
> > > > +	sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec);
> > > > +
> > > > +	memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
> > > > +	memcpy(sbi->volume_name, layout->volume_name,
> > > > +	       sizeof(layout->volume_name));
> > > 
> > > s_uuid should preferably be a uuid_t (assuming it is a real BE uuid,
> > > if it is le it should be a guid_t).
> > 
> > For this case, I have no idea how to deal with...
> > I have little knowledge about this uuid stuff, so I just copied
> > from f2fs... (Could be no urgent of this field...)
> 
> Who fills out this field in the on-disk format and how?

mkfs.erofs, but this field leaves 0 for now. Is that reasonable?
(using libuuid can generate it easily...)

> 
> > The background is Al's comments in erofs v2....
> > (which simplify erofs_fill_super logic)
> > https://lore.kernel.org/r/20190720224955.GD17978@ZenIV.linux.org.uk/
> > 
> > with a specific notation...
> > https://lore.kernel.org/r/20190721040547.GF17978@ZenIV.linux.org.uk/
> > 
> > "
> > > OTOH, for the case of NULL ->s_root ->put_super() won't be called
> > > at all, so in that case you need it directly in ->kill_sb().
> > "
> 
> Yes.  Although none of that is relevant for this initial version,
> just after more features are added.

This patch uses it actually... since no failure path in erofs_fill_super()
and s_root will be filled nearly at the end of the function...

Thanks,
Gao Xiang
Christoph Hellwig Sept. 2, 2019, 3:19 p.m. UTC | #11
On Mon, Sep 02, 2019 at 10:43:04PM +0800, Gao Xiang wrote:
> Hi Christoph,
> > > ...
> > >  24         __le32 features;        /* (aka. feature_compat) */
> > > ...
> > >  38         __le32 requirements;    /* (aka. feature_incompat) */
> > > ...
> > >  41 };
> > 
> > This is only cosmetic, why not stick to feature_compat and
> > feature_incompat?
> 
> Okay, will fix. (however, in my mind, I'm some confused why
> "features" could be incompatible...)

The feature is incompatible if it requires changes to the driver.
An easy to understand historic example is that ext3 originally did not
have the file types in the directory entry.  Adding them means old
file system drivers can not read a file system with this new feature,
so an incompat flag has to be added.

> > > > > +	memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
> > > > > +	memcpy(sbi->volume_name, layout->volume_name,
> > > > > +	       sizeof(layout->volume_name));
> > > > 
> > > > s_uuid should preferably be a uuid_t (assuming it is a real BE uuid,
> > > > if it is le it should be a guid_t).
> > > 
> > > For this case, I have no idea how to deal with...
> > > I have little knowledge about this uuid stuff, so I just copied
> > > from f2fs... (Could be no urgent of this field...)
> > 
> > Who fills out this field in the on-disk format and how?
> 
> mkfs.erofs, but this field leaves 0 for now. Is that reasonable?
> (using libuuid can generate it easily...)

If the filed is always zero for now please don't fill it out.  If you
decide it is worth adding the uuid eventually please add a compat
feature flag that you have a valid uuid and only fill out the field
if the file system actualy has a valid uuid.
Gao Xiang Sept. 2, 2019, 3:24 p.m. UTC | #12
Hi Christoph,

On Mon, Sep 02, 2019 at 08:19:10AM -0700, Christoph Hellwig wrote:
> On Mon, Sep 02, 2019 at 10:43:04PM +0800, Gao Xiang wrote:
> > Hi Christoph,
> > > > ...
> > > >  24         __le32 features;        /* (aka. feature_compat) */
> > > > ...
> > > >  38         __le32 requirements;    /* (aka. feature_incompat) */
> > > > ...
> > > >  41 };
> > > 
> > > This is only cosmetic, why not stick to feature_compat and
> > > feature_incompat?
> > 
> > Okay, will fix. (however, in my mind, I'm some confused why
> > "features" could be incompatible...)
> 
> The feature is incompatible if it requires changes to the driver.
> An easy to understand historic example is that ext3 originally did not
> have the file types in the directory entry.  Adding them means old
> file system drivers can not read a file system with this new feature,
> so an incompat flag has to be added.

Got it.

> 
> > > > > > +	memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
> > > > > > +	memcpy(sbi->volume_name, layout->volume_name,
> > > > > > +	       sizeof(layout->volume_name));
> > > > > 
> > > > > s_uuid should preferably be a uuid_t (assuming it is a real BE uuid,
> > > > > if it is le it should be a guid_t).
> > > > 
> > > > For this case, I have no idea how to deal with...
> > > > I have little knowledge about this uuid stuff, so I just copied
> > > > from f2fs... (Could be no urgent of this field...)
> > > 
> > > Who fills out this field in the on-disk format and how?
> > 
> > mkfs.erofs, but this field leaves 0 for now. Is that reasonable?
> > (using libuuid can generate it easily...)
> 
> If the filed is always zero for now please don't fill it out.  If you
> decide it is worth adding the uuid eventually please add a compat
> feature flag that you have a valid uuid and only fill out the field
> if the file system actualy has a valid uuid.

Okay. Will do that then (as a note here).

Thanks,
Gao Xiang
diff mbox series

Patch

diff --git a/fs/erofs/super.c b/fs/erofs/super.c
new file mode 100644
index 000000000000..cd4bd6f48173
--- /dev/null
+++ b/fs/erofs/super.c
@@ -0,0 +1,437 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/fs/erofs/super.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include <linux/module.h>
+#include <linux/buffer_head.h>
+#include <linux/statfs.h>
+#include <linux/parser.h>
+#include <linux/seq_file.h>
+#include "internal.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/erofs.h>
+
+static struct kmem_cache *erofs_inode_cachep __read_mostly;
+
+static void init_once(void *ptr)
+{
+	struct erofs_vnode *vi = ptr;
+
+	inode_init_once(&vi->vfs_inode);
+}
+
+static int __init erofs_init_inode_cache(void)
+{
+	erofs_inode_cachep = kmem_cache_create("erofs_inode",
+					       sizeof(struct erofs_vnode), 0,
+					       SLAB_RECLAIM_ACCOUNT,
+					       init_once);
+
+	return erofs_inode_cachep ? 0 : -ENOMEM;
+}
+
+static void erofs_exit_inode_cache(void)
+{
+	kmem_cache_destroy(erofs_inode_cachep);
+}
+
+static struct inode *alloc_inode(struct super_block *sb)
+{
+	struct erofs_vnode *vi =
+		kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL);
+
+	if (!vi)
+		return NULL;
+
+	/* zero out everything except vfs_inode */
+	memset(vi, 0, offsetof(struct erofs_vnode, vfs_inode));
+	return &vi->vfs_inode;
+}
+
+static void free_inode(struct inode *inode)
+{
+	struct erofs_vnode *vi = EROFS_V(inode);
+
+	/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
+	if (is_inode_fast_symlink(inode))
+		kfree(inode->i_link);
+
+	kmem_cache_free(erofs_inode_cachep, vi);
+}
+
+static bool check_layout_compatibility(struct super_block *sb,
+				       struct erofs_super_block *layout)
+{
+	const unsigned int requirements = le32_to_cpu(layout->requirements);
+
+	EROFS_SB(sb)->requirements = requirements;
+
+	/* check if current kernel meets all mandatory requirements */
+	if (requirements & (~EROFS_ALL_REQUIREMENTS)) {
+		errln("unidentified requirements %x, please upgrade kernel version",
+		      requirements & ~EROFS_ALL_REQUIREMENTS);
+		return false;
+	}
+	return true;
+}
+
+static int superblock_read(struct super_block *sb)
+{
+	struct erofs_sb_info *sbi;
+	struct buffer_head *bh;
+	struct erofs_super_block *layout;
+	unsigned int blkszbits;
+	int ret;
+
+	bh = sb_bread(sb, 0);
+
+	if (!bh) {
+		errln("cannot read erofs superblock");
+		return -EIO;
+	}
+
+	sbi = EROFS_SB(sb);
+	layout = (struct erofs_super_block *)((u8 *)bh->b_data
+		 + EROFS_SUPER_OFFSET);
+
+	ret = -EINVAL;
+	if (le32_to_cpu(layout->magic) != EROFS_SUPER_MAGIC_V1) {
+		errln("cannot find valid erofs superblock");
+		goto out;
+	}
+
+	blkszbits = layout->blkszbits;
+	/* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
+	if (unlikely(blkszbits != LOG_BLOCK_SIZE)) {
+		errln("blksize %u isn't supported on this platform",
+		      1 << blkszbits);
+		goto out;
+	}
+
+	if (!check_layout_compatibility(sb, layout))
+		goto out;
+
+	sbi->blocks = le32_to_cpu(layout->blocks);
+	sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr);
+	sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1;
+	sbi->root_nid = le16_to_cpu(layout->root_nid);
+	sbi->inos = le64_to_cpu(layout->inos);
+
+	sbi->build_time = le64_to_cpu(layout->build_time);
+	sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec);
+
+	memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
+	memcpy(sbi->volume_name, layout->volume_name,
+	       sizeof(layout->volume_name));
+
+	ret = 0;
+out:
+	brelse(bh);
+	return ret;
+}
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+const char *erofs_fault_name[FAULT_MAX] = {
+	[FAULT_KMALLOC]		= "kmalloc",
+	[FAULT_READ_IO]		= "read IO error",
+};
+
+static void __erofs_build_fault_attr(struct erofs_sb_info *sbi,
+				     unsigned int rate)
+{
+	struct erofs_fault_info *ffi = &sbi->fault_info;
+
+	if (rate) {
+		atomic_set(&ffi->inject_ops, 0);
+		ffi->inject_rate = rate;
+		ffi->inject_type = (1 << FAULT_MAX) - 1;
+	} else {
+		memset(ffi, 0, sizeof(struct erofs_fault_info));
+	}
+
+	set_opt(sbi, FAULT_INJECTION);
+}
+
+static int erofs_build_fault_attr(struct erofs_sb_info *sbi,
+				  substring_t *args)
+{
+	int rate = 0;
+
+	if (args->from && match_int(args, &rate))
+		return -EINVAL;
+
+	__erofs_build_fault_attr(sbi, rate);
+	return 0;
+}
+
+static unsigned int erofs_get_fault_rate(struct erofs_sb_info *sbi)
+{
+	return sbi->fault_info.inject_rate;
+}
+#else
+static void __erofs_build_fault_attr(struct erofs_sb_info *sbi,
+				     unsigned int rate)
+{
+}
+
+static int erofs_build_fault_attr(struct erofs_sb_info *sbi,
+				  substring_t *args)
+{
+	infoln("fault_injection options not supported");
+	return 0;
+}
+
+static unsigned int erofs_get_fault_rate(struct erofs_sb_info *sbi)
+{
+	return 0;
+}
+#endif
+
+/* set up default EROFS parameters */
+static void default_options(struct erofs_sb_info *sbi)
+{
+}
+
+enum {
+	Opt_fault_injection,
+	Opt_err
+};
+
+static match_table_t erofs_tokens = {
+	{Opt_fault_injection, "fault_injection=%u"},
+	{Opt_err, NULL}
+};
+
+static int parse_options(struct super_block *sb, char *options)
+{
+	substring_t args[MAX_OPT_ARGS];
+	char *p;
+	int err;
+
+	if (!options)
+		return 0;
+
+	while ((p = strsep(&options, ","))) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		args[0].to = args[0].from = NULL;
+		token = match_token(p, erofs_tokens, args);
+
+		switch (token) {
+		case Opt_fault_injection:
+			err = erofs_build_fault_attr(EROFS_SB(sb), args);
+			if (err)
+				return err;
+			break;
+		default:
+			errln("Unrecognized mount option \"%s\" or missing value", p);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int erofs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *inode;
+	struct erofs_sb_info *sbi;
+	int err;
+
+	infoln("fill_super, device -> %s", sb->s_id);
+	infoln("options -> %s", (char *)data);
+
+	sb->s_magic = EROFS_SUPER_MAGIC;
+
+	if (unlikely(!sb_set_blocksize(sb, EROFS_BLKSIZ))) {
+		errln("failed to set erofs blksize");
+		return -EINVAL;
+	}
+
+	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+	if (unlikely(!sbi))
+		return -ENOMEM;
+
+	sb->s_fs_info = sbi;
+	err = superblock_read(sb);
+	if (err)
+		return err;
+
+	sb->s_flags |= SB_RDONLY | SB_NOATIME;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_time_gran = 1;
+
+	sb->s_op = &erofs_sops;
+
+	/* set erofs default mount options */
+	default_options(sbi);
+
+	err = parse_options(sb, data);
+	if (unlikely(err))
+		return err;
+
+	if (!silent)
+		infoln("root inode @ nid %llu", ROOT_NID(sbi));
+
+	/* get the root inode */
+	inode = erofs_iget(sb, ROOT_NID(sbi), true);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	if (unlikely(!S_ISDIR(inode->i_mode))) {
+		errln("rootino(nid %llu) is not a directory(i_mode %o)",
+		      ROOT_NID(sbi), inode->i_mode);
+		iput(inode);
+		return -EINVAL;
+	}
+
+	sb->s_root = d_make_root(inode);
+	if (unlikely(!sb->s_root))
+		return -ENOMEM;
+
+	if (!silent)
+		infoln("mounted on %s with opts: %s.", sb->s_id, (char *)data);
+	return 0;
+}
+
+static struct dentry *erofs_mount(struct file_system_type *fs_type, int flags,
+				  const char *dev_name, void *data)
+{
+	return mount_bdev(fs_type, flags, dev_name, data, erofs_fill_super);
+}
+
+/*
+ * could be triggered after deactivate_locked_super()
+ * is called, thus including umount and failed to initialize.
+ */
+static void erofs_kill_sb(struct super_block *sb)
+{
+	struct erofs_sb_info *sbi;
+
+	WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
+	infoln("unmounting for %s", sb->s_id);
+
+	kill_block_super(sb);
+
+	sbi = EROFS_SB(sb);
+	if (!sbi)
+		return;
+	kfree(sbi);
+	sb->s_fs_info = NULL;
+}
+
+static struct file_system_type erofs_fs_type = {
+	.owner          = THIS_MODULE,
+	.name           = "erofs",
+	.mount          = erofs_mount,
+	.kill_sb        = erofs_kill_sb,
+	.fs_flags       = FS_REQUIRES_DEV,
+};
+MODULE_ALIAS_FS("erofs");
+
+static int __init erofs_module_init(void)
+{
+	int err;
+
+	erofs_check_ondisk_layout_definitions();
+	infoln("initializing erofs " EROFS_VERSION);
+
+	err = erofs_init_inode_cache();
+	if (err)
+		goto icache_err;
+
+	err = register_filesystem(&erofs_fs_type);
+	if (err)
+		goto fs_err;
+
+	infoln("successfully to initialize erofs");
+	return 0;
+
+fs_err:
+	erofs_exit_inode_cache();
+icache_err:
+	return err;
+}
+
+static void __exit erofs_module_exit(void)
+{
+	unregister_filesystem(&erofs_fs_type);
+	erofs_exit_inode_cache();
+	infoln("successfully finalize erofs");
+}
+
+/* get filesystem statistics */
+static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+
+	buf->f_type = sb->s_magic;
+	buf->f_bsize = EROFS_BLKSIZ;
+	buf->f_blocks = sbi->blocks;
+	buf->f_bfree = buf->f_bavail = 0;
+
+	buf->f_files = ULLONG_MAX;
+	buf->f_ffree = ULLONG_MAX - sbi->inos;
+
+	buf->f_namelen = EROFS_NAME_LEN;
+
+	buf->f_fsid.val[0] = (u32)id;
+	buf->f_fsid.val[1] = (u32)(id >> 32);
+	return 0;
+}
+
+static int erofs_show_options(struct seq_file *seq, struct dentry *root)
+{
+	struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
+
+	if (test_opt(sbi, FAULT_INJECTION))
+		seq_printf(seq, ",fault_injection=%u",
+			   erofs_get_fault_rate(sbi));
+	return 0;
+}
+
+static int erofs_remount(struct super_block *sb, int *flags, char *data)
+{
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+	unsigned int org_mnt_opt = sbi->mount_opt;
+	unsigned int org_inject_rate = erofs_get_fault_rate(sbi);
+	int err;
+
+	DBG_BUGON(!sb_rdonly(sb));
+	err = parse_options(sb, data);
+	if (err)
+		goto out;
+
+	*flags |= SB_RDONLY;
+	return 0;
+out:
+	__erofs_build_fault_attr(sbi, org_inject_rate);
+	sbi->mount_opt = org_mnt_opt;
+
+	return err;
+}
+
+const struct super_operations erofs_sops = {
+	.alloc_inode = alloc_inode,
+	.free_inode = free_inode,
+	.statfs = erofs_statfs,
+	.show_options = erofs_show_options,
+	.remount_fs = erofs_remount,
+};
+
+module_init(erofs_module_init);
+module_exit(erofs_module_exit);
+
+MODULE_DESCRIPTION("Enhanced ROM File System");
+MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc.");
+MODULE_LICENSE("GPL");
+