diff mbox series

[v6,05/24] erofs: add inode operations

Message ID 20190802125347.166018-6-gaoxiang25@huawei.com (mailing list archive)
State New, archived
Headers show
Series erofs: promote erofs from staging | expand

Commit Message

Gao Xiang Aug. 2, 2019, 12:53 p.m. UTC
This adds core functions to get, read an inode.
It adds statx support as well.

Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
 fs/erofs/inode.c | 291 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 291 insertions(+)
 create mode 100644 fs/erofs/inode.c

Comments

Christoph Hellwig Aug. 29, 2019, 10:24 a.m. UTC | #1
On Fri, Aug 02, 2019 at 08:53:28PM +0800, Gao Xiang wrote:
> This adds core functions to get, read an inode.
> It adds statx support as well.
> 
> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
> ---
>  fs/erofs/inode.c | 291 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 291 insertions(+)
>  create mode 100644 fs/erofs/inode.c
> 
> diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
> new file mode 100644
> index 000000000000..b6ea997bc4ae
> --- /dev/null
> +++ b/fs/erofs/inode.c
> @@ -0,0 +1,291 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * linux/fs/erofs/inode.c
> + *
> + * Copyright (C) 2017-2018 HUAWEI, Inc.
> + *             http://www.huawei.com/
> + * Created by Gao Xiang <gaoxiang25@huawei.com>
> + */
> +#include "internal.h"
> +
> +#include <trace/events/erofs.h>
> +
> +/* no locking */
> +static int read_inode(struct inode *inode, void *data)
> +{
> +	struct erofs_vnode *vi = EROFS_V(inode);
> +	struct erofs_inode_v1 *v1 = data;
> +	const unsigned int advise = le16_to_cpu(v1->i_advise);
> +	erofs_blk_t nblks = 0;
> +
> +	vi->datamode = __inode_data_mapping(advise);

What is the deal with these magic underscores here and various
other similar helpers?

> +	/* fast symlink (following ext4) */

This actually originates in FFS.  But it is so common that the comment
seems a little pointless.

> +	if (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) {
> +		char *lnk = erofs_kmalloc(sbi, inode->i_size + 1, GFP_KERNEL);

Please just use plain kmalloc everywhere and let the normal kernel
error injection code take care of injeting any errors.

> +		/* inline symlink data shouldn't across page boundary as well */

... should not cross ..

> +		if (unlikely(m_pofs + inode->i_size > PAGE_SIZE)) {
> +			DBG_BUGON(1);
> +			kfree(lnk);
> +			return -EIO;
> +		}
> +
> +		/* get in-page inline data */

s/get/copy/, but the comment seems rather pointless.

> +		memcpy(lnk, data + m_pofs, inode->i_size);
> +		lnk[inode->i_size] = '\0';
> +
> +		inode->i_link = lnk;
> +		set_inode_fast_symlink(inode);

Please just set the ops directly instead of obsfucating that in a single
caller, single line inline function.  And please set it instead of the
normal symlink iops in the same place where you also set those.:w

> +	err = read_inode(inode, data + ofs);
> +	if (!err) {

	if (err)
		goto out_unlock;

.. and save one level of indentation.

> +		if (is_inode_layout_compression(inode)) {

The name of this helper is a little odd.  But I think just
opencoding it seems generally cleaner anyway.


> +			err = -ENOTSUPP;
> +			goto out_unlock;
> +		}
> +
> +		inode->i_mapping->a_ops = &erofs_raw_access_aops;
> +
> +		/* fill last page if inline data is available */
> +		err = fill_inline_data(inode, data, ofs);

Well, I think you should move the is_inode_flat_inline and
(S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) checks from that
helper here, as otherwise you make everyone wonder why you'd always
fill out the inline data.

> +static inline struct inode *erofs_iget_locked(struct super_block *sb,
> +					      erofs_nid_t nid)
> +{
> +	const unsigned long hashval = erofs_inode_hash(nid);
> +
> +#if BITS_PER_LONG >= 64
> +	/* it is safe to use iget_locked for >= 64-bit platform */
> +	return iget_locked(sb, hashval);
> +#else
> +	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
> +		erofs_iget_set_actor, &nid);
> +#endif

Just use the slightly more complicated 32-bit version everywhere so that
you have a single actually tested code path.  And then remove this
helper.
Gao Xiang Aug. 29, 2019, 11:59 a.m. UTC | #2
On Thu, Aug 29, 2019 at 03:24:26AM -0700, Christoph Hellwig wrote:

[]

> 
> > +
> > +		/* fill last page if inline data is available */
> > +		err = fill_inline_data(inode, data, ofs);
> 
> Well, I think you should move the is_inode_flat_inline and
> (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) checks from that
> helper here, as otherwise you make everyone wonder why you'd always
> fill out the inline data.

Currently, fill_inline_data() only fills for fast symlink,
later we can fill any tail-end block (such as dir block)
for our requirements.

And I think that is minor.

> 
> > +static inline struct inode *erofs_iget_locked(struct super_block *sb,
> > +					      erofs_nid_t nid)
> > +{
> > +	const unsigned long hashval = erofs_inode_hash(nid);
> > +
> > +#if BITS_PER_LONG >= 64
> > +	/* it is safe to use iget_locked for >= 64-bit platform */
> > +	return iget_locked(sb, hashval);
> > +#else
> > +	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
> > +		erofs_iget_set_actor, &nid);
> > +#endif
> 
> Just use the slightly more complicated 32-bit version everywhere so that
> you have a single actually tested code path.  And then remove this
> helper.

The consideration is simply because iget_locked performs better
than iget5_locked.

Thanks,
Gao Xiang
Christoph Hellwig Aug. 30, 2019, 4:42 p.m. UTC | #3
On Thu, Aug 29, 2019 at 07:59:22PM +0800, Gao Xiang wrote:
> On Thu, Aug 29, 2019 at 03:24:26AM -0700, Christoph Hellwig wrote:
> 
> []
> 
> > 
> > > +
> > > +		/* fill last page if inline data is available */
> > > +		err = fill_inline_data(inode, data, ofs);
> > 
> > Well, I think you should move the is_inode_flat_inline and
> > (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) checks from that
> > helper here, as otherwise you make everyone wonder why you'd always
> > fill out the inline data.
> 
> Currently, fill_inline_data() only fills for fast symlink,
> later we can fill any tail-end block (such as dir block)
> for our requirements.

So change it when that later changes actually come in.  And even then
having the checks outside the function is a lot more obvious.

> And I think that is minor.

The problem is that each of these issues might appear minor on their
own.  But combined a lot of the coding style choices lead to code that
is more suitable an obsfucated code contest than the Linux kernel as
trying to understand even just a few places requires jumping through
tons of helpers with misleading names and spread over various files.

> The consideration is simply because iget_locked performs better
> than iget5_locked.

In what benchmark do the differences show up?
Gao Xiang Aug. 30, 2019, 6:46 p.m. UTC | #4
Hi Christoph,

On Fri, Aug 30, 2019 at 09:42:05AM -0700, Christoph Hellwig wrote:
> On Thu, Aug 29, 2019 at 07:59:22PM +0800, Gao Xiang wrote:
> > On Thu, Aug 29, 2019 at 03:24:26AM -0700, Christoph Hellwig wrote:
> > 
> > []
> > 
> > > 
> > > > +
> > > > +		/* fill last page if inline data is available */
> > > > +		err = fill_inline_data(inode, data, ofs);
> > > 
> > > Well, I think you should move the is_inode_flat_inline and
> > > (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) checks from that
> > > helper here, as otherwise you make everyone wonder why you'd always
> > > fill out the inline data.
> > 
> > Currently, fill_inline_data() only fills for fast symlink,
> > later we can fill any tail-end block (such as dir block)
> > for our requirements.
> 
> So change it when that later changes actually come in.  And even then
> having the checks outside the function is a lot more obvious.

Okay.

> 
> > And I think that is minor.
> 
> The problem is that each of these issues might appear minor on their
> own.  But combined a lot of the coding style choices lead to code that
> is more suitable an obsfucated code contest than the Linux kernel as
> trying to understand even just a few places requires jumping through
> tons of helpers with misleading names and spread over various files.
> 
> > The consideration is simply because iget_locked performs better
> > than iget5_locked.
> 
> In what benchmark do the differences show up?

In a word, no benchmark here, just because
"unsigned long on 32-bit platforms is 4 bytes."
but erofs nid is a 64-bit number.

iget_locked will do find_inode_fast (no callback at all)
rather than iget5_locked --> find_inode (test callback) ->
            inode_insert5(set callback) for each new inode.

For most 64-bit platforms, iget_locked is enough,
32-bit platforms become rare...

Thanks,
Gao Xiang
Gao Xiang Sept. 1, 2019, 9:34 a.m. UTC | #5
Hi Christoph,

Here are redo-ed comments of your suggestions...

On Thu, Aug 29, 2019 at 03:24:26AM -0700, Christoph Hellwig wrote:
> On Fri, Aug 02, 2019 at 08:53:28PM +0800, Gao Xiang wrote:
> > This adds core functions to get, read an inode.
> > It adds statx support as well.
> > 
> > Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
> > ---
> >  fs/erofs/inode.c | 291 +++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 291 insertions(+)
> >  create mode 100644 fs/erofs/inode.c
> > 
> > diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
> > new file mode 100644
> > index 000000000000..b6ea997bc4ae
> > --- /dev/null
> > +++ b/fs/erofs/inode.c
> > @@ -0,0 +1,291 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * linux/fs/erofs/inode.c
> > + *
> > + * Copyright (C) 2017-2018 HUAWEI, Inc.
> > + *             http://www.huawei.com/
> > + * Created by Gao Xiang <gaoxiang25@huawei.com>
> > + */
> > +#include "internal.h"
> > +
> > +#include <trace/events/erofs.h>
> > +
> > +/* no locking */
> > +static int read_inode(struct inode *inode, void *data)
> > +{
> > +	struct erofs_vnode *vi = EROFS_V(inode);
> > +	struct erofs_inode_v1 *v1 = data;
> > +	const unsigned int advise = le16_to_cpu(v1->i_advise);
> > +	erofs_blk_t nblks = 0;
> > +
> > +	vi->datamode = __inode_data_mapping(advise);
> 
> What is the deal with these magic underscores here and various
> other similar helpers?

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-17-hsiangkao@aol.com/

underscores means 'internal' in my thought, it seems somewhat
some common practice of Linux kernel, or some recent discussions
about it?... I didn't notice these discussions...

> 
> > +	/* fast symlink (following ext4) */
> 
> This actually originates in FFS.  But it is so common that the comment
> seems a little pointless.

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-9-hsiangkao@aol.com/

> 
> > +	if (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) {
> > +		char *lnk = erofs_kmalloc(sbi, inode->i_size + 1, GFP_KERNEL);
> 
> Please just use plain kmalloc everywhere and let the normal kernel
> error injection code take care of injeting any errors.

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-20-hsiangkao@aol.com/

> 
> > +		/* inline symlink data shouldn't across page boundary as well */
> 
> ... should not cross ..

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-9-hsiangkao@aol.com/

> 
> > +		if (unlikely(m_pofs + inode->i_size > PAGE_SIZE)) {
> > +			DBG_BUGON(1);
> > +			kfree(lnk);
> > +			return -EIO;
> > +		}
> > +
> > +		/* get in-page inline data */
> 
> s/get/copy/, but the comment seems rather pointless.

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-9-hsiangkao@aol.com/

> 
> > +		memcpy(lnk, data + m_pofs, inode->i_size);
> > +		lnk[inode->i_size] = '\0';
> > +
> > +		inode->i_link = lnk;
> > +		set_inode_fast_symlink(inode);
> 
> Please just set the ops directly instead of obsfucating that in a single
> caller, single line inline function.  And please set it instead of the
> normal symlink iops in the same place where you also set those.:w

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-10-hsiangkao@aol.com/

> 
> > +	err = read_inode(inode, data + ofs);
> > +	if (!err) {
> 
> 	if (err)
> 		goto out_unlock;
> 
> .. and save one level of indentation.

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-22-hsiangkao@aol.com/

> 
> > +		if (is_inode_layout_compression(inode)) {
> 
> The name of this helper is a little odd.  But I think just
> opencoding it seems generally cleaner anyway.

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-11-hsiangkao@aol.com/

> 
> 
> > +			err = -ENOTSUPP;
> > +			goto out_unlock;
> > +		}
> > +
> > +		inode->i_mapping->a_ops = &erofs_raw_access_aops;
> > +
> > +		/* fill last page if inline data is available */
> > +		err = fill_inline_data(inode, data, ofs);
> 
> Well, I think you should move the is_inode_flat_inline and
> (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) checks from that
> helper here, as otherwise you make everyone wonder why you'd always
> fill out the inline data.

fill_inline_data is killed, and the similar function turns into
erofs_fill_symlink which is called at erofs_fill_inode():

 		case S_IFLNK:
-			/* by default, page_get_link is used for symlink */
-			inode->i_op = &erofs_symlink_iops;
+			err = erofs_fill_symlink(inode, data, ofs);
+			if (err)
+				goto out_unlock;
 			inode_nohighmem(inode);
 			break;

> 
> > +static inline struct inode *erofs_iget_locked(struct super_block *sb,
> > +					      erofs_nid_t nid)
> > +{
> > +	const unsigned long hashval = erofs_inode_hash(nid);
> > +
> > +#if BITS_PER_LONG >= 64
> > +	/* it is safe to use iget_locked for >= 64-bit platform */
> > +	return iget_locked(sb, hashval);
> > +#else
> > +	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
> > +		erofs_iget_set_actor, &nid);
> > +#endif
> 
> Just use the slightly more complicated 32-bit version everywhere so that
> you have a single actually tested code path.  And then remove this
> helper.

As I said before, 64-bit platforms is common currently,
I think iget_locked is enough.
https://lore.kernel.org/r/20190830184606.GA175612@architecture4/

Thanks,
Gao Xiang
Christoph Hellwig Sept. 2, 2019, 12:53 p.m. UTC | #6
On Sun, Sep 01, 2019 at 05:34:00PM +0800, Gao Xiang wrote:
> > > +	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
> > > +		erofs_iget_set_actor, &nid);
> > > +#endif
> > 
> > Just use the slightly more complicated 32-bit version everywhere so that
> > you have a single actually tested code path.  And then remove this
> > helper.
> 
> As I said before, 64-bit platforms is common currently,
> I think iget_locked is enough.
> https://lore.kernel.org/r/20190830184606.GA175612@architecture4/

The problem with that is that you now have two entirely different
code paths.  And the 32-bit one will probably get very little testing
and eventually bitrot.  We defintively had problems of that sort in
XFS in the past, so my suggestion is to not go down the root of
separate code for 32-bit vs 64-bit unless it makes a real difference
for a real-life workload.
David Sterba Sept. 2, 2019, 1:43 p.m. UTC | #7
On Sun, Sep 01, 2019 at 05:34:00PM +0800, Gao Xiang wrote:
> > > +static int read_inode(struct inode *inode, void *data)
> > > +{
> > > +	struct erofs_vnode *vi = EROFS_V(inode);
> > > +	struct erofs_inode_v1 *v1 = data;
> > > +	const unsigned int advise = le16_to_cpu(v1->i_advise);
> > > +	erofs_blk_t nblks = 0;
> > > +
> > > +	vi->datamode = __inode_data_mapping(advise);
> > 
> > What is the deal with these magic underscores here and various
> > other similar helpers?
> 
> Fixed in
> https://lore.kernel.org/linux-fsdevel/20190901055130.30572-17-hsiangkao@aol.com/
> 
> underscores means 'internal' in my thought, it seems somewhat
> some common practice of Linux kernel, or some recent discussions
> about it?... I didn't notice these discussions...

I know about a few valid uses of the underscores:

* pattern where the __underscored version does not do locking, while the other
  does
* similarly for atomic and non-atomic version
* macro that needs to manipulate the argument name (like glue some
  prefix, so the macro does not have underscores and is supposed to be
  used instead of the function with underscores that needs the full name
  of a variable/constant/..
* underscore function takes a few more parameters to further tune the
  behaviour, but most users are fine with the defaults and that is
  provided as a function without underscores
* in case you have just one function of the kind, don't use the underscores

I can lookup examples if you're interested or if the brief description
is not sufficient. The list covers what I've seen and used, but the list
may be incomplete.
Gao Xiang Sept. 2, 2019, 1:55 p.m. UTC | #8
Hi David,

On Mon, Sep 02, 2019 at 03:43:29PM +0200, David Sterba wrote:
> On Sun, Sep 01, 2019 at 05:34:00PM +0800, Gao Xiang wrote:
> > > > +static int read_inode(struct inode *inode, void *data)
> > > > +{
> > > > +	struct erofs_vnode *vi = EROFS_V(inode);
> > > > +	struct erofs_inode_v1 *v1 = data;
> > > > +	const unsigned int advise = le16_to_cpu(v1->i_advise);
> > > > +	erofs_blk_t nblks = 0;
> > > > +
> > > > +	vi->datamode = __inode_data_mapping(advise);
> > > 
> > > What is the deal with these magic underscores here and various
> > > other similar helpers?
> > 
> > Fixed in
> > https://lore.kernel.org/linux-fsdevel/20190901055130.30572-17-hsiangkao@aol.com/
> > 
> > underscores means 'internal' in my thought, it seems somewhat
> > some common practice of Linux kernel, or some recent discussions
> > about it?... I didn't notice these discussions...
> 
> I know about a few valid uses of the underscores:
> 
> * pattern where the __underscored version does not do locking, while the other
>   does
> * similarly for atomic and non-atomic version
> * macro that needs to manipulate the argument name (like glue some
>   prefix, so the macro does not have underscores and is supposed to be
>   used instead of the function with underscores that needs the full name
>   of a variable/constant/..
> * underscore function takes a few more parameters to further tune the
>   behaviour, but most users are fine with the defaults and that is
>   provided as a function without underscores
> * in case you have just one function of the kind, don't use the underscores
> 
> I can lookup examples if you're interested or if the brief description
> is not sufficient. The list covers what I've seen and used, but the list
> may be incomplete.

Thanks, I learn a lot from the above. [thumb]

Thanks,
Gao Xiang
diff mbox series

Patch

diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
new file mode 100644
index 000000000000..b6ea997bc4ae
--- /dev/null
+++ b/fs/erofs/inode.c
@@ -0,0 +1,291 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/fs/erofs/inode.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "internal.h"
+
+#include <trace/events/erofs.h>
+
+/* no locking */
+static int read_inode(struct inode *inode, void *data)
+{
+	struct erofs_vnode *vi = EROFS_V(inode);
+	struct erofs_inode_v1 *v1 = data;
+	const unsigned int advise = le16_to_cpu(v1->i_advise);
+	erofs_blk_t nblks = 0;
+
+	vi->datamode = __inode_data_mapping(advise);
+
+	if (unlikely(vi->datamode >= EROFS_INODE_LAYOUT_MAX)) {
+		errln("unsupported data mapping %u of nid %llu",
+		      vi->datamode, vi->nid);
+		DBG_BUGON(1);
+		return -EIO;
+	}
+
+	if (__inode_version(advise) == EROFS_INODE_LAYOUT_V2) {
+		struct erofs_inode_v2 *v2 = data;
+
+		vi->inode_isize = sizeof(struct erofs_inode_v2);
+		vi->xattr_isize = ondisk_xattr_ibody_size(v2->i_xattr_icount);
+
+		inode->i_mode = le16_to_cpu(v2->i_mode);
+		vi->raw_blkaddr = le32_to_cpu(v2->i_u.raw_blkaddr);
+
+		i_uid_write(inode, le32_to_cpu(v2->i_uid));
+		i_gid_write(inode, le32_to_cpu(v2->i_gid));
+		set_nlink(inode, le32_to_cpu(v2->i_nlink));
+
+		/* ns timestamp */
+		inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
+			le64_to_cpu(v2->i_ctime);
+		inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
+			le32_to_cpu(v2->i_ctime_nsec);
+
+		inode->i_size = le64_to_cpu(v2->i_size);
+
+		/* total blocks for compressed files */
+		if (is_inode_layout_compression(inode))
+			nblks = le32_to_cpu(v2->i_u.compressed_blocks);
+	} else if (__inode_version(advise) == EROFS_INODE_LAYOUT_V1) {
+		struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+
+		vi->inode_isize = sizeof(struct erofs_inode_v1);
+		vi->xattr_isize = ondisk_xattr_ibody_size(v1->i_xattr_icount);
+
+		inode->i_mode = le16_to_cpu(v1->i_mode);
+		vi->raw_blkaddr = le32_to_cpu(v1->i_u.raw_blkaddr);
+
+		i_uid_write(inode, le16_to_cpu(v1->i_uid));
+		i_gid_write(inode, le16_to_cpu(v1->i_gid));
+		set_nlink(inode, le16_to_cpu(v1->i_nlink));
+
+		/* use build time to derive all file time */
+		inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
+			sbi->build_time;
+		inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
+			sbi->build_time_nsec;
+
+		inode->i_size = le32_to_cpu(v1->i_size);
+		if (is_inode_layout_compression(inode))
+			nblks = le32_to_cpu(v1->i_u.compressed_blocks);
+	} else {
+		errln("unsupported on-disk inode version %u of nid %llu",
+		      __inode_version(advise), vi->nid);
+		DBG_BUGON(1);
+		return -EIO;
+	}
+
+	if (!nblks)
+		/* measure inode.i_blocks as generic filesystems */
+		inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9;
+	else
+		inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK;
+	return 0;
+}
+
+/*
+ * try_lock can be required since locking order is:
+ *   file data(fs_inode)
+ *        meta(bd_inode)
+ * but the majority of the callers is "iget",
+ * in that case we are pretty sure no deadlock since
+ * no data operations exist. However I tend to
+ * try_lock since it takes no much overhead and
+ * will success immediately.
+ */
+static int fill_inline_data(struct inode *inode, void *data,
+			    unsigned int m_pofs)
+{
+	struct erofs_vnode *vi = EROFS_V(inode);
+	struct erofs_sb_info *sbi = EROFS_I_SB(inode);
+
+	/* should be inode inline C */
+	if (!is_inode_flat_inline(inode))
+		return 0;
+
+	/* fast symlink (following ext4) */
+	if (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) {
+		char *lnk = erofs_kmalloc(sbi, inode->i_size + 1, GFP_KERNEL);
+
+		if (unlikely(!lnk))
+			return -ENOMEM;
+
+		m_pofs += vi->inode_isize + vi->xattr_isize;
+
+		/* inline symlink data shouldn't across page boundary as well */
+		if (unlikely(m_pofs + inode->i_size > PAGE_SIZE)) {
+			DBG_BUGON(1);
+			kfree(lnk);
+			return -EIO;
+		}
+
+		/* get in-page inline data */
+		memcpy(lnk, data + m_pofs, inode->i_size);
+		lnk[inode->i_size] = '\0';
+
+		inode->i_link = lnk;
+		set_inode_fast_symlink(inode);
+	}
+	return 0;
+}
+
+static int fill_inode(struct inode *inode, int isdir)
+{
+	struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+	struct erofs_vnode *vi = EROFS_V(inode);
+	struct page *page;
+	void *data;
+	int err;
+	erofs_blk_t blkaddr;
+	unsigned int ofs;
+
+	trace_erofs_fill_inode(inode, isdir);
+
+	blkaddr = erofs_blknr(iloc(sbi, vi->nid));
+	ofs = erofs_blkoff(iloc(sbi, vi->nid));
+
+	debugln("%s, reading inode nid %llu at %u of blkaddr %u",
+		__func__, vi->nid, ofs, blkaddr);
+
+	page = erofs_get_meta_page(inode->i_sb, blkaddr, isdir);
+
+	if (IS_ERR(page)) {
+		errln("failed to get inode (nid: %llu) page, err %ld",
+		      vi->nid, PTR_ERR(page));
+		return PTR_ERR(page);
+	}
+
+	DBG_BUGON(!PageUptodate(page));
+	data = page_address(page);
+
+	err = read_inode(inode, data + ofs);
+	if (!err) {
+		/* setup the new inode */
+		if (S_ISREG(inode->i_mode)) {
+			inode->i_op = &erofs_generic_iops;
+			inode->i_fop = &generic_ro_fops;
+		} else if (S_ISDIR(inode->i_mode)) {
+			inode->i_op = &erofs_dir_iops;
+			inode->i_fop = &erofs_dir_fops;
+		} else if (S_ISLNK(inode->i_mode)) {
+			/* by default, page_get_link is used for symlink */
+			inode->i_op = &erofs_symlink_iops;
+			inode_nohighmem(inode);
+		} else {
+			err = -EIO;
+			goto out_unlock;
+		}
+
+		if (is_inode_layout_compression(inode)) {
+			err = -ENOTSUPP;
+			goto out_unlock;
+		}
+
+		inode->i_mapping->a_ops = &erofs_raw_access_aops;
+
+		/* fill last page if inline data is available */
+		err = fill_inline_data(inode, data, ofs);
+	}
+
+out_unlock:
+	unlock_page(page);
+	put_page(page);
+	return err;
+}
+
+/*
+ * erofs nid is 64bits, but i_ino is 'unsigned long', therefore
+ * we should do more for 32-bit platform to find the right inode.
+ */
+#if BITS_PER_LONG == 32
+static int erofs_ilookup_test_actor(struct inode *inode, void *opaque)
+{
+	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
+
+	return EROFS_V(inode)->nid == nid;
+}
+
+static int erofs_iget_set_actor(struct inode *inode, void *opaque)
+{
+	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
+
+	inode->i_ino = erofs_inode_hash(nid);
+	return 0;
+}
+#endif
+
+static inline struct inode *erofs_iget_locked(struct super_block *sb,
+					      erofs_nid_t nid)
+{
+	const unsigned long hashval = erofs_inode_hash(nid);
+
+#if BITS_PER_LONG >= 64
+	/* it is safe to use iget_locked for >= 64-bit platform */
+	return iget_locked(sb, hashval);
+#else
+	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
+		erofs_iget_set_actor, &nid);
+#endif
+}
+
+struct inode *erofs_iget(struct super_block *sb,
+			 erofs_nid_t nid,
+			 bool isdir)
+{
+	struct inode *inode = erofs_iget_locked(sb, nid);
+
+	if (unlikely(!inode))
+		return ERR_PTR(-ENOMEM);
+
+	if (inode->i_state & I_NEW) {
+		int err;
+		struct erofs_vnode *vi = EROFS_V(inode);
+
+		vi->nid = nid;
+
+		err = fill_inode(inode, isdir);
+		if (likely(!err))
+			unlock_new_inode(inode);
+		else {
+			iget_failed(inode);
+			inode = ERR_PTR(err);
+		}
+	}
+	return inode;
+}
+
+int erofs_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int query_flags)
+{
+	struct inode *const inode = d_inode(path->dentry);
+
+	if (is_inode_layout_compression(inode))
+		stat->attributes |= STATX_ATTR_COMPRESSED;
+
+	stat->attributes |= STATX_ATTR_IMMUTABLE;
+	stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
+				  STATX_ATTR_IMMUTABLE);
+
+	generic_fillattr(inode, stat);
+	return 0;
+}
+
+const struct inode_operations erofs_generic_iops = {
+	.getattr = erofs_getattr,
+};
+
+const struct inode_operations erofs_symlink_iops = {
+	.get_link = page_get_link,
+	.getattr = erofs_getattr,
+};
+
+const struct inode_operations erofs_fast_symlink_iops = {
+	.get_link = simple_get_link,
+	.getattr = erofs_getattr,
+};
+