diff mbox series

[v2,03/24] erofs: add super block operations

Message ID 20190711145755.33908-4-gaoxiang25@huawei.com (mailing list archive)
State New, archived
Headers show
Series erofs: promote erofs from staging | expand

Commit Message

Gao Xiang July 11, 2019, 2:57 p.m. UTC
This commit adds erofs super block operations, including (u)mount,
remount_fs, show_options, statfs, in addition to some private
icache management functions.

Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
 fs/erofs/super.c | 502 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 502 insertions(+)
 create mode 100644 fs/erofs/super.c

Comments

Al Viro July 20, 2019, 10:49 p.m. UTC | #1
On Thu, Jul 11, 2019 at 10:57:34PM +0800, Gao Xiang wrote:
> This commit adds erofs super block operations, including (u)mount,
> remount_fs, show_options, statfs, in addition to some private
> icache management functions.

Could you explain what's the point of this

> +	/* save the device name to sbi */
> +	sbi->dev_name = __getname();
> +	if (!sbi->dev_name) {
> +		err = -ENOMEM;
> +		goto err_devname;
> +	}
> +
> +	snprintf(sbi->dev_name, PATH_MAX, "%s", dev_name);
> +	sbi->dev_name[PATH_MAX - 1] = '\0';

... and this?

> +struct erofs_mount_private {
> +	const char *dev_name;
> +	char *options;
> +};
> +
> +/* support mount_bdev() with options */
> +static int erofs_fill_super(struct super_block *sb,
> +			    void *_priv, int silent)
> +{
> +	struct erofs_mount_private *priv = _priv;
> +
> +	return erofs_read_super(sb, priv->dev_name,
> +		priv->options, silent);
> +}
> +
> +static struct dentry *erofs_mount(
> +	struct file_system_type *fs_type, int flags,
> +	const char *dev_name, void *data)
> +{
> +	struct erofs_mount_private priv = {
> +		.dev_name = dev_name,
> +		.options = data
> +	};
> +
> +	return mount_bdev(fs_type, flags, dev_name,
> +		&priv, erofs_fill_super);
> +}

AFAICS, the only use of sbi->dev_name is debugging printks and
all of those have sb->s_id available, with device name stored
in there.  Which makes the whole thing bloody weird - what's
wrong with simply passing data to mount_bdev (instead of
&priv), folding erofs_read_super() into erofs_fill_super(),
replacing sbi->dev_name with sb->s_id and killing sbi->dev_name,
along with the associated allocation, freeing, handling of
allocation failure, etc.?

For drivers/staging location that would be (compile-tested only)
the diff below.  I suspect that you could simplify fill_super
a bit further if you added ->kill_sb() along the lines of

	sbi = EROFS(sb);
#ifdef EROFS_FS_HAS_MANAGED_CACHE
	if (sbi && !sb->s_root)
		iput(sbi->managed_cache);
#endif
	kill_block_super(sb);
	kfree(sbi);
and took freeing sbi out of your ->put_super().  Then fill_super()
would simply return -E... on all failure exits, leaving all cleanup
to ->kill_sb().  E.g. initialization of the same ->managed_cache
would become
#ifdef EROFS_FS_HAS_MANAGED_CACHE
	inode = erofs_init_managed_cache(sb);
        if (IS_ERR(inode))
		return PTR_ERR(inode);
	sbi->managed_cache = inode;
#endif
etc.  Matter of taste, but IME if destructor parallels the cleanups on failure
exits in constructor it often makes sense to make use of that and kill the
duplication...  Anyway, that's a separate store; sbi->dev_name is a lot more
obvious one.


diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index 382258fc124d..16bab07e69d8 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -117,8 +117,6 @@ struct erofs_sb_info {
 	u8 volume_name[16];             /* volume name */
 	u32 requirements;
 
-	char *dev_name;
-
 	unsigned int mount_opt;
 	unsigned int shrinker_run_no;
 
diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c
index cadbcc11702a..a6ee69d0ce45 100644
--- a/drivers/staging/erofs/super.c
+++ b/drivers/staging/erofs/super.c
@@ -367,15 +367,14 @@ static struct inode *erofs_init_managed_cache(struct super_block *sb)
 
 #endif
 
-static int erofs_read_super(struct super_block *sb,
-			    const char *dev_name,
+static int erofs_fill_super(struct super_block *sb,
 			    void *data, int silent)
 {
 	struct inode *inode;
 	struct erofs_sb_info *sbi;
 	int err = -EINVAL;
 
-	infoln("read_super, device -> %s", dev_name);
+	infoln("read_super, device -> %s", sb->s_id);
 	infoln("options -> %s", (char *)data);
 
 	if (unlikely(!sb_set_blocksize(sb, EROFS_BLKSIZ))) {
@@ -453,20 +452,10 @@ static int erofs_read_super(struct super_block *sb,
 		goto err_iget;
 	}
 
-	/* save the device name to sbi */
-	sbi->dev_name = __getname();
-	if (!sbi->dev_name) {
-		err = -ENOMEM;
-		goto err_devname;
-	}
-
-	snprintf(sbi->dev_name, PATH_MAX, "%s", dev_name);
-	sbi->dev_name[PATH_MAX - 1] = '\0';
-
 	erofs_register_super(sb);
 
 	if (!silent)
-		infoln("mounted on %s with opts: %s.", dev_name,
+		infoln("mounted on %s with opts: %s.", sb->s_id,
 		       (char *)data);
 	return 0;
 	/*
@@ -474,9 +463,6 @@ static int erofs_read_super(struct super_block *sb,
 	 * the following name convention, thus new features
 	 * can be integrated easily without renaming labels.
 	 */
-err_devname:
-	dput(sb->s_root);
-	sb->s_root = NULL;
 err_iget:
 #ifdef EROFS_FS_HAS_MANAGED_CACHE
 	iput(sbi->managed_cache);
@@ -504,8 +490,7 @@ static void erofs_put_super(struct super_block *sb)
 
 	WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
 
-	infoln("unmounted for %s", sbi->dev_name);
-	__putname(sbi->dev_name);
+	infoln("unmounted for %s", sb->s_id);
 
 #ifdef EROFS_FS_HAS_MANAGED_CACHE
 	iput(sbi->managed_cache);
@@ -525,33 +510,12 @@ static void erofs_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 }
 
-
-struct erofs_mount_private {
-	const char *dev_name;
-	char *options;
-};
-
-/* support mount_bdev() with options */
-static int erofs_fill_super(struct super_block *sb,
-			    void *_priv, int silent)
-{
-	struct erofs_mount_private *priv = _priv;
-
-	return erofs_read_super(sb, priv->dev_name,
-		priv->options, silent);
-}
-
 static struct dentry *erofs_mount(
 	struct file_system_type *fs_type, int flags,
 	const char *dev_name, void *data)
 {
-	struct erofs_mount_private priv = {
-		.dev_name = dev_name,
-		.options = data
-	};
-
 	return mount_bdev(fs_type, flags, dev_name,
-		&priv, erofs_fill_super);
+		data, erofs_fill_super);
 }
 
 static void erofs_kill_sb(struct super_block *sb)
Gao Xiang July 21, 2019, 3:08 a.m. UTC | #2
Hi Al,

On 2019/7/21 ????6:49, Al Viro wrote:
> On Thu, Jul 11, 2019 at 10:57:34PM +0800, Gao Xiang wrote:
>> This commit adds erofs super block operations, including (u)mount,
>> remount_fs, show_options, statfs, in addition to some private
>> icache management functions.
> Could you explain what's the point of this
>
>> +	/* save the device name to sbi */
>> +	sbi->dev_name = __getname();
>> +	if (!sbi->dev_name) {
>> +		err = -ENOMEM;
>> +		goto err_devname;
>> +	}
>> +
>> +	snprintf(sbi->dev_name, PATH_MAX, "%s", dev_name);
>> +	sbi->dev_name[PATH_MAX - 1] = '\0';
> ... and this?

Thanks for your kindly reply...

Yes, I remember the above code you already mentioned months ago... The
detail is that

It is for debugging use as you said below, mainly for our internal
testers whose jobs are
to read kmsg logs and catch kernel problems. sb->s_id (device number)
maybe not
straight-forward for them compared with dev_name...

The initial purpose of erofs_mount_private was to passing multi private
data from erofs_mount
to erofs_read_super, which was written before fs_contest was introduced.

I agree with you, it seems better to just use s_id in community and
delete erofs_mount_private stuffs...
Yet I don't look into how to use new fs_context, could I keep using
legacy mount interface and fix them all?


>
>> +struct erofs_mount_private {
>> +	const char *dev_name;
>> +	char *options;
>> +};
>> +
>> +/* support mount_bdev() with options */
>> +static int erofs_fill_super(struct super_block *sb,
>> +			    void *_priv, int silent)
>> +{
>> +	struct erofs_mount_private *priv = _priv;
>> +
>> +	return erofs_read_super(sb, priv->dev_name,
>> +		priv->options, silent);
>> +}
>> +
>> +static struct dentry *erofs_mount(
>> +	struct file_system_type *fs_type, int flags,
>> +	const char *dev_name, void *data)
>> +{
>> +	struct erofs_mount_private priv = {
>> +		.dev_name = dev_name,
>> +		.options = data
>> +	};
>> +
>> +	return mount_bdev(fs_type, flags, dev_name,
>> +		&priv, erofs_fill_super);
>> +}
> AFAICS, the only use of sbi->dev_name is debugging printks and
> all of those have sb->s_id available, with device name stored
> in there.  Which makes the whole thing bloody weird - what's
> wrong with simply passing data to mount_bdev (instead of
> &priv), folding erofs_read_super() into erofs_fill_super(),
> replacing sbi->dev_name with sb->s_id and killing sbi->dev_name,
> along with the associated allocation, freeing, handling of
> allocation failure, etc.?


OK, make sense. I will do...


>
> For drivers/staging location that would be (compile-tested only)
> the diff below.  I suspect that you could simplify fill_super
> a bit further if you added ->kill_sb() along the lines of
>
> 	sbi = EROFS(sb);
> #ifdef EROFS_FS_HAS_MANAGED_CACHE
> 	if (sbi && !sb->s_root)
> 		iput(sbi->managed_cache);
> #endif
> 	kill_block_super(sb);
> 	kfree(sbi);
> and took freeing sbi out of your ->put_super().  Then fill_super()
> would simply return -E... on all failure exits, leaving all cleanup
> to ->kill_sb().  E.g. initialization of the same ->managed_cache
> would become
> #ifdef EROFS_FS_HAS_MANAGED_CACHE
> 	inode = erofs_init_managed_cache(sb);
>         if (IS_ERR(inode))
> 		return PTR_ERR(inode);
> 	sbi->managed_cache = inode;
> #endif
> etc.  Matter of taste, but IME if destructor parallels the cleanups on failure
> exits in constructor it often makes sense to make use of that and kill the
> duplication...  Anyway, that's a separate store; sbi->dev_name is a lot more
> obvious one.


I guess if I don't misunderstand, that is another suggestion -- in
short, leave all destructors to .kill_sb() and
cleanup fill_super(). I think it makes sense as well, though the reason
why the initial erofs code was is that
I just refer other filesystems such as ext4 and f2fs which handle
failure in fill_super() constructor as well.
Anyway, your suggestion is a good idea (it's more cleaner), I will play
with it.

I will kill dev_name in patch v3 at least, and try to clean up all
failure exits as you mentioned.

Thanks for your suggestions... Let me resend v3 later :)

Thanks,
Gao Xiang


>
>
> diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
> index 382258fc124d..16bab07e69d8 100644
> --- a/drivers/staging/erofs/internal.h
> +++ b/drivers/staging/erofs/internal.h
> @@ -117,8 +117,6 @@ struct erofs_sb_info {
>  	u8 volume_name[16];             /* volume name */
>  	u32 requirements;
>  
> -	char *dev_name;
> -
>  	unsigned int mount_opt;
>  	unsigned int shrinker_run_no;
>  
> diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c
> index cadbcc11702a..a6ee69d0ce45 100644
> --- a/drivers/staging/erofs/super.c
> +++ b/drivers/staging/erofs/super.c
> @@ -367,15 +367,14 @@ static struct inode *erofs_init_managed_cache(struct super_block *sb)
>  
>  #endif
>  
> -static int erofs_read_super(struct super_block *sb,
> -			    const char *dev_name,
> +static int erofs_fill_super(struct super_block *sb,
>  			    void *data, int silent)
>  {
>  	struct inode *inode;
>  	struct erofs_sb_info *sbi;
>  	int err = -EINVAL;
>  
> -	infoln("read_super, device -> %s", dev_name);
> +	infoln("read_super, device -> %s", sb->s_id);
>  	infoln("options -> %s", (char *)data);
>  
>  	if (unlikely(!sb_set_blocksize(sb, EROFS_BLKSIZ))) {
> @@ -453,20 +452,10 @@ static int erofs_read_super(struct super_block *sb,
>  		goto err_iget;
>  	}
>  
> -	/* save the device name to sbi */
> -	sbi->dev_name = __getname();
> -	if (!sbi->dev_name) {
> -		err = -ENOMEM;
> -		goto err_devname;
> -	}
> -
> -	snprintf(sbi->dev_name, PATH_MAX, "%s", dev_name);
> -	sbi->dev_name[PATH_MAX - 1] = '\0';
> -
>  	erofs_register_super(sb);
>  
>  	if (!silent)
> -		infoln("mounted on %s with opts: %s.", dev_name,
> +		infoln("mounted on %s with opts: %s.", sb->s_id,
>  		       (char *)data);
>  	return 0;
>  	/*
> @@ -474,9 +463,6 @@ static int erofs_read_super(struct super_block *sb,
>  	 * the following name convention, thus new features
>  	 * can be integrated easily without renaming labels.
>  	 */
> -err_devname:
> -	dput(sb->s_root);
> -	sb->s_root = NULL;
>  err_iget:
>  #ifdef EROFS_FS_HAS_MANAGED_CACHE
>  	iput(sbi->managed_cache);
> @@ -504,8 +490,7 @@ static void erofs_put_super(struct super_block *sb)
>  
>  	WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
>  
> -	infoln("unmounted for %s", sbi->dev_name);
> -	__putname(sbi->dev_name);
> +	infoln("unmounted for %s", sb->s_id);
>  
>  #ifdef EROFS_FS_HAS_MANAGED_CACHE
>  	iput(sbi->managed_cache);
> @@ -525,33 +510,12 @@ static void erofs_put_super(struct super_block *sb)
>  	sb->s_fs_info = NULL;
>  }
>  
> -
> -struct erofs_mount_private {
> -	const char *dev_name;
> -	char *options;
> -};
> -
> -/* support mount_bdev() with options */
> -static int erofs_fill_super(struct super_block *sb,
> -			    void *_priv, int silent)
> -{
> -	struct erofs_mount_private *priv = _priv;
> -
> -	return erofs_read_super(sb, priv->dev_name,
> -		priv->options, silent);
> -}
> -
>  static struct dentry *erofs_mount(
>  	struct file_system_type *fs_type, int flags,
>  	const char *dev_name, void *data)
>  {
> -	struct erofs_mount_private priv = {
> -		.dev_name = dev_name,
> -		.options = data
> -	};
> -
>  	return mount_bdev(fs_type, flags, dev_name,
> -		&priv, erofs_fill_super);
> +		data, erofs_fill_super);
>  }
>  
>  static void erofs_kill_sb(struct super_block *sb)
Al Viro July 21, 2019, 4:05 a.m. UTC | #3
On Sun, Jul 21, 2019 at 11:08:42AM +0800, Gao Xiang wrote:

> It is for debugging use as you said below, mainly for our internal
> testers whose jobs are
> to read kmsg logs and catch kernel problems. sb->s_id (device number)
> maybe not
> straight-forward for them compared with dev_name...

Huh? ->s_id is something like "sdb7" - it's bdev_name(), not a device
number...

> The initial purpose of erofs_mount_private was to passing multi private
> data from erofs_mount
> to erofs_read_super, which was written before fs_contest was introduced.

That has nothing to do with fs_context (well, other than fs_context conversions
affecting the code very close to that).

> I agree with you, it seems better to just use s_id in community and
> delete erofs_mount_private stuffs...
> Yet I don't look into how to use new fs_context, could I keep using
> legacy mount interface and fix them all?

Sure.

> I guess if I don't misunderstand, that is another suggestion -- in
> short, leave all destructors to .kill_sb() and
> cleanup fill_super().

Just be careful with that iput() there - AFAICS, if fs went live (i.e.
if ->s_root is non-NULL), you really need it done only from put_super();
OTOH, for the case of NULL ->s_root ->put_super() won't be called at all,
so in that case you need it directly in ->kill_sb().
Gao Xiang July 21, 2019, 4:12 a.m. UTC | #4
On 2019/7/21 12:05, Al Viro wrote:
> On Sun, Jul 21, 2019 at 11:08:42AM +0800, Gao Xiang wrote:
> 
>> It is for debugging use as you said below, mainly for our internal
>> testers whose jobs are
>> to read kmsg logs and catch kernel problems. sb->s_id (device number)
>> maybe not
>> straight-forward for them compared with dev_name...
> 
> Huh? ->s_id is something like "sdb7" - it's bdev_name(), not a device
> number...

You are right. Forgive me, actually we use /dev/block/by-name/system
to mount fs... we have to do some lookup if using sdbX instead.


> 
>> The initial purpose of erofs_mount_private was to passing multi private
>> data from erofs_mount
>> to erofs_read_super, which was written before fs_contest was introduced.
> 
> That has nothing to do with fs_context (well, other than fs_context conversions
> affecting the code very close to that).

OK. That is fine.

> 
>> I agree with you, it seems better to just use s_id in community and
>> delete erofs_mount_private stuffs...
>> Yet I don't look into how to use new fs_context, could I keep using
>> legacy mount interface and fix them all?
> 
> Sure.
> 
>> I guess if I don't misunderstand, that is another suggestion -- in
>> short, leave all destructors to .kill_sb() and
>> cleanup fill_super().
> 
> Just be careful with that iput() there - AFAICS, if fs went live (i.e.
> if ->s_root is non-NULL), you really need it done only from put_super();
> OTOH, for the case of NULL ->s_root ->put_super() won't be called at all,
> so in that case you need it directly in ->kill_sb().

I got it. I will do a quick try now :) But in case of introducing issues,
I guess I need to do some fault injection by hand.....

Thanks,
Gao Xiang

>
Gao Xiang July 21, 2019, 6:05 p.m. UTC | #5
On 2019/7/21 ??????12:12, Gao Xiang wrote:
> 
> 
> On 2019/7/21 12:05, Al Viro wrote:
>> On Sun, Jul 21, 2019 at 11:08:42AM +0800, Gao Xiang wrote:
>>
>>> It is for debugging use as you said below, mainly for our internal
>>> testers whose jobs are
>>> to read kmsg logs and catch kernel problems. sb->s_id (device number)
>>> maybe not
>>> straight-forward for them compared with dev_name...
>>
>> Huh? ->s_id is something like "sdb7" - it's bdev_name(), not a device
>> number...
> 
> You are right. Forgive me, actually we use /dev/block/by-name/system
> to mount fs... we have to do some lookup if using sdbX instead.
> 
> 
>>
>>> The initial purpose of erofs_mount_private was to passing multi private
>>> data from erofs_mount
>>> to erofs_read_super, which was written before fs_contest was introduced.
>>
>> That has nothing to do with fs_context (well, other than fs_context conversions
>> affecting the code very close to that).
> 
> OK. That is fine.
> 
>>
>>> I agree with you, it seems better to just use s_id in community and
>>> delete erofs_mount_private stuffs...
>>> Yet I don't look into how to use new fs_context, could I keep using
>>> legacy mount interface and fix them all?
>>
>> Sure.
>>
>>> I guess if I don't misunderstand, that is another suggestion -- in
>>> short, leave all destructors to .kill_sb() and
>>> cleanup fill_super().
>>
>> Just be careful with that iput() there - AFAICS, if fs went live (i.e.
>> if ->s_root is non-NULL), you really need it done only from put_super();
>> OTOH, for the case of NULL ->s_root ->put_super() won't be called at all,
>> so in that case you need it directly in ->kill_sb().
> 
> I got it. I will do a quick try now :) But in case of introducing issues,
> I guess I need to do some fault injection by hand.....

I try to fix them in

https://git.kernel.org/pub/scm/linux/kernel/git/xiang/linux.git/tree/fs/erofs/super.c?h=erofs-outofstaging

, including:

1) remove unneeded sbi->dev_name;

2) remove all destructors in fill_super()
349         /* get the root inode */
350         inode = erofs_iget(sb, ROOT_NID(sbi), true);
351         if (IS_ERR(inode))
352                 return PTR_ERR(inode);
353
354         if (unlikely(!S_ISDIR(inode->i_mode))) {
355                 errln("rootino(nid %llu) is not a directory(i_mode %o)",
356                       ROOT_NID(sbi), inode->i_mode);
357                 iput(inode);
358                 return -EINVAL;
359         }
360
361         sb->s_root = d_make_root(inode);
362         if (unlikely(!sb->s_root))
363                 return -ENOMEM;
364
365         erofs_shrinker_register(sb);
366 #ifdef EROFS_FS_HAS_MANAGED_CACHE
367         /* sb->s_umount is locked here, SB_BORN and SB_ACTIVE are not set */
368         mc = erofs_init_managed_cache(sb);
369         if (IS_ERR(mc))
370                 return PTR_ERR(mc);
371         sbi->managed_cache = mc;
372 #endif

...

385 /*
386  * could be triggered after deactivate_locked_super()
387  * is called, thus including umount and failed to initialize.
388  */
389 static void erofs_kill_sb(struct super_block *sb)
390 {
391         struct erofs_sb_info *sbi;
392
393         WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
394         infoln("unmounting erofs for %s", sb->s_id);
395
396         kill_block_super(sb);
397
398         sbi = EROFS_SB(sb);
399         if (!sbi)
400                 return;
401         kfree(sbi);
402         sb->s_fs_info = NULL;
403 }
404
405 /* called when ->s_root is non-NULL */
406 static void erofs_put_super(struct super_block *sb)
407 {
408         struct erofs_sb_info *const sbi = EROFS_SB(sb);
409
410         DBG_BUGON(!sbi);
411
412 #ifdef EROFS_FS_HAS_MANAGED_CACHE
413         iput(sbi->managed_cache);
414         sbi->managed_cache = NULL;
415 #endif
416         erofs_shrinker_unregister(sb);
417 }

...

and I injected some faults on error paths and it seems fine...
Could you kindly check whether it makes sense? (if I understand all correctly....)

The whole patchset will be resent this morning (a few hours later), I have to sleep...


Thanks,
Gao Xiang
diff mbox series

Patch

diff --git a/fs/erofs/super.c b/fs/erofs/super.c
new file mode 100644
index 000000000000..d83e55bdd4a8
--- /dev/null
+++ b/fs/erofs/super.c
@@ -0,0 +1,502 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/fs/erofs/super.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include <linux/module.h>
+#include <linux/buffer_head.h>
+#include <linux/statfs.h>
+#include <linux/parser.h>
+#include <linux/seq_file.h>
+#include "internal.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/erofs.h>
+
+static struct kmem_cache *erofs_inode_cachep __read_mostly;
+
+static void init_once(void *ptr)
+{
+	struct erofs_vnode *vi = ptr;
+
+	inode_init_once(&vi->vfs_inode);
+}
+
+static int __init erofs_init_inode_cache(void)
+{
+	erofs_inode_cachep = kmem_cache_create("erofs_inode",
+					       sizeof(struct erofs_vnode), 0,
+					       SLAB_RECLAIM_ACCOUNT,
+					       init_once);
+
+	return erofs_inode_cachep ? 0 : -ENOMEM;
+}
+
+static void erofs_exit_inode_cache(void)
+{
+	kmem_cache_destroy(erofs_inode_cachep);
+}
+
+static struct inode *alloc_inode(struct super_block *sb)
+{
+	struct erofs_vnode *vi =
+		kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL);
+
+	if (!vi)
+		return NULL;
+
+	/* zero out everything except vfs_inode */
+	memset(vi, 0, offsetof(struct erofs_vnode, vfs_inode));
+	return &vi->vfs_inode;
+}
+
+static void free_inode(struct inode *inode)
+{
+	struct erofs_vnode *vi = EROFS_V(inode);
+
+	/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
+	if (is_inode_fast_symlink(inode))
+		kfree(inode->i_link);
+
+	kmem_cache_free(erofs_inode_cachep, vi);
+}
+
+static bool check_layout_compatibility(struct super_block *sb,
+				       struct erofs_super_block *layout)
+{
+	const unsigned int requirements = le32_to_cpu(layout->requirements);
+
+	EROFS_SB(sb)->requirements = requirements;
+
+	/* check if current kernel meets all mandatory requirements */
+	if (requirements & (~EROFS_ALL_REQUIREMENTS)) {
+		errln("unidentified requirements %x, please upgrade kernel version",
+		      requirements & ~EROFS_ALL_REQUIREMENTS);
+		return false;
+	}
+	return true;
+}
+
+static int superblock_read(struct super_block *sb)
+{
+	struct erofs_sb_info *sbi;
+	struct buffer_head *bh;
+	struct erofs_super_block *layout;
+	unsigned int blkszbits;
+	int ret;
+
+	bh = sb_bread(sb, 0);
+
+	if (!bh) {
+		errln("cannot read erofs superblock");
+		return -EIO;
+	}
+
+	sbi = EROFS_SB(sb);
+	layout = (struct erofs_super_block *)((u8 *)bh->b_data
+		 + EROFS_SUPER_OFFSET);
+
+	ret = -EINVAL;
+	if (le32_to_cpu(layout->magic) != EROFS_SUPER_MAGIC_V1) {
+		errln("cannot find valid erofs superblock");
+		goto out;
+	}
+
+	blkszbits = layout->blkszbits;
+	/* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
+	if (unlikely(blkszbits != LOG_BLOCK_SIZE)) {
+		errln("blksize %u isn't supported on this platform",
+		      1 << blkszbits);
+		goto out;
+	}
+
+	if (!check_layout_compatibility(sb, layout))
+		goto out;
+
+	sbi->blocks = le32_to_cpu(layout->blocks);
+	sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr);
+	sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1;
+	sbi->root_nid = le16_to_cpu(layout->root_nid);
+	sbi->inos = le64_to_cpu(layout->inos);
+
+	sbi->build_time = le64_to_cpu(layout->build_time);
+	sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec);
+
+	memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
+	memcpy(sbi->volume_name, layout->volume_name,
+	       sizeof(layout->volume_name));
+
+	ret = 0;
+out:
+	brelse(bh);
+	return ret;
+}
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+const char *erofs_fault_name[FAULT_MAX] = {
+	[FAULT_KMALLOC]		= "kmalloc",
+	[FAULT_READ_IO]		= "read IO error",
+};
+
+static void __erofs_build_fault_attr(struct erofs_sb_info *sbi,
+				     unsigned int rate)
+{
+	struct erofs_fault_info *ffi = &sbi->fault_info;
+
+	if (rate) {
+		atomic_set(&ffi->inject_ops, 0);
+		ffi->inject_rate = rate;
+		ffi->inject_type = (1 << FAULT_MAX) - 1;
+	} else {
+		memset(ffi, 0, sizeof(struct erofs_fault_info));
+	}
+
+	set_opt(sbi, FAULT_INJECTION);
+}
+
+static int erofs_build_fault_attr(struct erofs_sb_info *sbi,
+				  substring_t *args)
+{
+	int rate = 0;
+
+	if (args->from && match_int(args, &rate))
+		return -EINVAL;
+
+	__erofs_build_fault_attr(sbi, rate);
+	return 0;
+}
+
+static unsigned int erofs_get_fault_rate(struct erofs_sb_info *sbi)
+{
+	return sbi->fault_info.inject_rate;
+}
+#else
+static void __erofs_build_fault_attr(struct erofs_sb_info *sbi,
+				     unsigned int rate)
+{
+}
+
+static int erofs_build_fault_attr(struct erofs_sb_info *sbi,
+				  substring_t *args)
+{
+	infoln("fault_injection options not supported");
+	return 0;
+}
+
+static unsigned int erofs_get_fault_rate(struct erofs_sb_info *sbi)
+{
+	return 0;
+}
+#endif
+
+/* set up default EROFS parameters */
+static void default_options(struct erofs_sb_info *sbi)
+{
+}
+
+enum {
+	Opt_fault_injection,
+	Opt_err
+};
+
+static match_table_t erofs_tokens = {
+	{Opt_fault_injection, "fault_injection=%u"},
+	{Opt_err, NULL}
+};
+
+static int parse_options(struct super_block *sb, char *options)
+{
+	substring_t args[MAX_OPT_ARGS];
+	char *p;
+	int err;
+
+	if (!options)
+		return 0;
+
+	while ((p = strsep(&options, ","))) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		args[0].to = args[0].from = NULL;
+		token = match_token(p, erofs_tokens, args);
+
+		switch (token) {
+		case Opt_fault_injection:
+			err = erofs_build_fault_attr(EROFS_SB(sb), args);
+			if (err)
+				return err;
+			break;
+
+		default:
+			errln("Unrecognized mount option \"%s\" or missing value", p);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int erofs_read_super(struct super_block *sb,
+			    const char *dev_name,
+			    void *data, int silent)
+{
+	struct inode *inode;
+	struct erofs_sb_info *sbi;
+	int err = -EINVAL;
+
+	infoln("read_super, device -> %s", dev_name);
+	infoln("options -> %s", (char *)data);
+
+	if (unlikely(!sb_set_blocksize(sb, EROFS_BLKSIZ))) {
+		errln("failed to set erofs blksize");
+		goto err;
+	}
+
+	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+	if (unlikely(!sbi)) {
+		err = -ENOMEM;
+		goto err;
+	}
+	sb->s_fs_info = sbi;
+
+	err = superblock_read(sb);
+	if (err)
+		goto err_sbread;
+
+	sb->s_magic = EROFS_SUPER_MAGIC;
+	sb->s_flags |= SB_RDONLY | SB_NOATIME;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_time_gran = 1;
+
+	sb->s_op = &erofs_sops;
+
+	/* set erofs default mount options */
+	default_options(sbi);
+
+	err = parse_options(sb, data);
+	if (err)
+		goto err_parseopt;
+
+	if (!silent)
+		infoln("root inode @ nid %llu", ROOT_NID(sbi));
+
+	/* get the root inode */
+	inode = erofs_iget(sb, ROOT_NID(sbi), true);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		goto err_iget;
+	}
+
+	if (!S_ISDIR(inode->i_mode)) {
+		errln("rootino(nid %llu) is not a directory(i_mode %o)",
+		      ROOT_NID(sbi), inode->i_mode);
+		err = -EINVAL;
+		iput(inode);
+		goto err_iget;
+	}
+
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root) {
+		err = -ENOMEM;
+		goto err_iget;
+	}
+
+	/* save the device name to sbi */
+	sbi->dev_name = __getname();
+	if (!sbi->dev_name) {
+		err = -ENOMEM;
+		goto err_devname;
+	}
+
+	snprintf(sbi->dev_name, PATH_MAX, "%s", dev_name);
+	sbi->dev_name[PATH_MAX - 1] = '\0';
+
+	if (!silent)
+		infoln("mounted on %s with opts: %s.", dev_name,
+		       (char *)data);
+	return 0;
+	/*
+	 * please add a label for each exit point and use
+	 * the following name convention, thus new features
+	 * can be integrated easily without renaming labels.
+	 */
+err_devname:
+	dput(sb->s_root);
+	sb->s_root = NULL;
+err_iget:
+err_parseopt:
+err_sbread:
+	sb->s_fs_info = NULL;
+	kfree(sbi);
+err:
+	return err;
+}
+
+/*
+ * could be triggered after deactivate_locked_super()
+ * is called, thus including umount and failed to initialize.
+ */
+static void erofs_put_super(struct super_block *sb)
+{
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+	/* for cases which are failed in "read_super" */
+	if (!sbi)
+		return;
+
+	WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
+
+	infoln("unmounted for %s", sbi->dev_name);
+	__putname(sbi->dev_name);
+
+	kfree(sbi);
+	sb->s_fs_info = NULL;
+}
+
+
+struct erofs_mount_private {
+	const char *dev_name;
+	char *options;
+};
+
+/* support mount_bdev() with options */
+static int erofs_fill_super(struct super_block *sb,
+			    void *_priv, int silent)
+{
+	struct erofs_mount_private *priv = _priv;
+
+	return erofs_read_super(sb, priv->dev_name,
+		priv->options, silent);
+}
+
+static struct dentry *erofs_mount(
+	struct file_system_type *fs_type, int flags,
+	const char *dev_name, void *data)
+{
+	struct erofs_mount_private priv = {
+		.dev_name = dev_name,
+		.options = data
+	};
+
+	return mount_bdev(fs_type, flags, dev_name,
+		&priv, erofs_fill_super);
+}
+
+static void erofs_kill_sb(struct super_block *sb)
+{
+	kill_block_super(sb);
+}
+
+static struct file_system_type erofs_fs_type = {
+	.owner          = THIS_MODULE,
+	.name           = "erofs",
+	.mount          = erofs_mount,
+	.kill_sb        = erofs_kill_sb,
+	.fs_flags       = FS_REQUIRES_DEV,
+};
+MODULE_ALIAS_FS("erofs");
+
+static int __init erofs_module_init(void)
+{
+	int err;
+
+	erofs_check_ondisk_layout_definitions();
+	infoln("initializing erofs " EROFS_VERSION);
+
+	err = erofs_init_inode_cache();
+	if (err)
+		goto icache_err;
+
+	err = register_filesystem(&erofs_fs_type);
+	if (err)
+		goto fs_err;
+
+	infoln("successfully to initialize erofs");
+	return 0;
+
+fs_err:
+	erofs_exit_inode_cache();
+icache_err:
+	return err;
+}
+
+static void __exit erofs_module_exit(void)
+{
+	unregister_filesystem(&erofs_fs_type);
+	erofs_exit_inode_cache();
+	infoln("successfully finalize erofs");
+}
+
+/* get filesystem statistics */
+static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+
+	buf->f_type = sb->s_magic;
+	buf->f_bsize = EROFS_BLKSIZ;
+	buf->f_blocks = sbi->blocks;
+	buf->f_bfree = buf->f_bavail = 0;
+
+	buf->f_files = ULLONG_MAX;
+	buf->f_ffree = ULLONG_MAX - sbi->inos;
+
+	buf->f_namelen = EROFS_NAME_LEN;
+
+	buf->f_fsid.val[0] = (u32)id;
+	buf->f_fsid.val[1] = (u32)(id >> 32);
+	return 0;
+}
+
+static int erofs_show_options(struct seq_file *seq, struct dentry *root)
+{
+	struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
+
+	if (test_opt(sbi, FAULT_INJECTION))
+		seq_printf(seq, ",fault_injection=%u",
+			   erofs_get_fault_rate(sbi));
+	return 0;
+}
+
+static int erofs_remount(struct super_block *sb, int *flags, char *data)
+{
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+	unsigned int org_mnt_opt = sbi->mount_opt;
+	unsigned int org_inject_rate = erofs_get_fault_rate(sbi);
+	int err;
+
+	DBG_BUGON(!sb_rdonly(sb));
+	err = parse_options(sb, data);
+	if (err)
+		goto out;
+
+	*flags |= SB_RDONLY;
+	return 0;
+out:
+	__erofs_build_fault_attr(sbi, org_inject_rate);
+	sbi->mount_opt = org_mnt_opt;
+
+	return err;
+}
+
+const struct super_operations erofs_sops = {
+	.put_super = erofs_put_super,
+	.alloc_inode = alloc_inode,
+	.free_inode = free_inode,
+	.statfs = erofs_statfs,
+	.show_options = erofs_show_options,
+	.remount_fs = erofs_remount,
+};
+
+module_init(erofs_module_init);
+module_exit(erofs_module_exit);
+
+MODULE_DESCRIPTION("Enhanced ROM File System");
+MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc.");
+MODULE_LICENSE("GPL");
+