diff mbox series

[v6,01/24] erofs: add on-disk layout

Message ID 20190802125347.166018-2-gaoxiang25@huawei.com (mailing list archive)
State New, archived
Headers show
Series erofs: promote erofs from staging | expand

Commit Message

Gao Xiang Aug. 2, 2019, 12:53 p.m. UTC
This commit adds the on-disk layout header file of erofs.
On-disk format is compatible with erofs-staging added in 4.19.

In addition, add EROFS_SUPER_MAGIC_V1 to magic.h.

Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
 fs/erofs/erofs_fs.h        | 316 +++++++++++++++++++++++++++++++++++++
 include/uapi/linux/magic.h |   1 +
 2 files changed, 317 insertions(+)
 create mode 100644 fs/erofs/erofs_fs.h

Comments

Christoph Hellwig Aug. 29, 2019, 9:59 a.m. UTC | #1
> --- /dev/null
> +++ b/fs/erofs/erofs_fs.h
> @@ -0,0 +1,316 @@
> +/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */
> +/*
> + * linux/fs/erofs/erofs_fs.h

Please remove the pointless file names in the comment headers.

> +struct erofs_super_block {
> +/*  0 */__le32 magic;           /* in the little endian */
> +/*  4 */__le32 checksum;        /* crc32c(super_block) */
> +/*  8 */__le32 features;        /* (aka. feature_compat) */
> +/* 12 */__u8 blkszbits;         /* support block_size == PAGE_SIZE only */

Please remove all the byte offset comments.  That is something that can
easily be checked with gdb or pahole.

> +/* 64 */__u8 volume_name[16];   /* volume name */
> +/* 80 */__le32 requirements;    /* (aka. feature_incompat) */
> +
> +/* 84 */__u8 reserved2[44];
> +} __packed;                     /* 128 bytes */

Please don't add __packed.  In this case I think you don't need it
(but double check with pahole), but even if you would need it using
proper padding fields and making sure all fields are naturally aligned
will give you much better code generation on architectures that don't
support native unaligned access.

> +/*
> + * erofs inode data mapping:
> + * 0 - inode plain without inline data A:
> + * inode, [xattrs], ... | ... | no-holed data
> + * 1 - inode VLE compression B (legacy):
> + * inode, [xattrs], extents ... | ...
> + * 2 - inode plain with inline data C:
> + * inode, [xattrs], last_inline_data, ... | ... | no-holed data
> + * 3 - inode compression D:
> + * inode, [xattrs], map_header, extents ... | ...
> + * 4~7 - reserved
> + */
> +enum {
> +	EROFS_INODE_FLAT_PLAIN,

This one doesn't actually seem to be used.

> +	EROFS_INODE_FLAT_COMPRESSION_LEGACY,

why are we adding a legacy field to a brand new file system?

> +	EROFS_INODE_FLAT_INLINE,
> +	EROFS_INODE_FLAT_COMPRESSION,
> +	EROFS_INODE_LAYOUT_MAX

It seems like these come from the on-disk format, in which case they
should have explicit values assigned to them.

Btw, I think it generally helps file system implementation quality
if you use a separate header for the on-disk structures vs in-memory
structures, as that keeps it clear in everyones mind what needs to
stay persistent and what can be chenged easily.

> +static bool erofs_inode_is_data_compressed(unsigned int datamode)
> +{
> +	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
> +		return true;
> +	return datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
> +}

This looks like a really obsfucated way to write:

	return datamode == EROFS_INODE_FLAT_COMPRESSION ||
		datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;

> +/* 28 */__le32 i_reserved2;
> +} __packed;

Sane comment as above.

> +
> +/* 32 bytes on-disk inode */
> +#define EROFS_INODE_LAYOUT_V1   0
> +/* 64 bytes on-disk inode */
> +#define EROFS_INODE_LAYOUT_V2   1
> +
> +struct erofs_inode_v2 {
> +/*  0 */__le16 i_advise;

Why do we have two inode version in a newly added file system?

> +#define ondisk_xattr_ibody_size(count)	({\
> +	u32 __count = le16_to_cpu(count); \
> +	((__count) == 0) ? 0 : \
> +	sizeof(struct erofs_xattr_ibody_header) + \
> +		sizeof(__u32) * ((__count) - 1); })

This would be much more readable as a function.

> +#define EROFS_XATTR_ENTRY_SIZE(entry) EROFS_XATTR_ALIGN( \
> +	sizeof(struct erofs_xattr_entry) + \
> +	(entry)->e_name_len + le16_to_cpu((entry)->e_value_size))

Same here.

> +/* available compression algorithm types */
> +enum {
> +	Z_EROFS_COMPRESSION_LZ4,
> +	Z_EROFS_COMPRESSION_MAX
> +};

Seems like an on-disk value again that should use explicitly assigned
numbers.
Gao Xiang Aug. 29, 2019, 10:32 a.m. UTC | #2
Hi Christoph,

On Thu, Aug 29, 2019 at 02:59:54AM -0700, Christoph Hellwig wrote:
> > --- /dev/null
> > +++ b/fs/erofs/erofs_fs.h
> > @@ -0,0 +1,316 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */
> > +/*
> > + * linux/fs/erofs/erofs_fs.h
> 
> Please remove the pointless file names in the comment headers.

Already removed in the latest version.

> 
> > +struct erofs_super_block {
> > +/*  0 */__le32 magic;           /* in the little endian */
> > +/*  4 */__le32 checksum;        /* crc32c(super_block) */
> > +/*  8 */__le32 features;        /* (aka. feature_compat) */
> > +/* 12 */__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
> 
> Please remove all the byte offset comments.  That is something that can
> easily be checked with gdb or pahole.

I have no idea the actual issue here.
It will help all developpers better add fields or calculate
these offsets in their mind, and with care.

Rather than they didn't run "gdb" or "pahole" and change it by mistake.

> 
> > +/* 64 */__u8 volume_name[16];   /* volume name */
> > +/* 80 */__le32 requirements;    /* (aka. feature_incompat) */
> > +
> > +/* 84 */__u8 reserved2[44];
> > +} __packed;                     /* 128 bytes */
> 
> Please don't add __packed.  In this case I think you don't need it
> (but double check with pahole), but even if you would need it using
> proper padding fields and making sure all fields are naturally aligned
> will give you much better code generation on architectures that don't
> support native unaligned access.

If you can see more, all on-disk fields in EROFS are naturally aligned,
I can remove all of these as you like, but I think that is not very urgent.

> 
> > +/*
> > + * erofs inode data mapping:
> > + * 0 - inode plain without inline data A:
> > + * inode, [xattrs], ... | ... | no-holed data
> > + * 1 - inode VLE compression B (legacy):
> > + * inode, [xattrs], extents ... | ...
> > + * 2 - inode plain with inline data C:
> > + * inode, [xattrs], last_inline_data, ... | ... | no-holed data
> > + * 3 - inode compression D:
> > + * inode, [xattrs], map_header, extents ... | ...
> > + * 4~7 - reserved
> > + */
> > +enum {
> > +	EROFS_INODE_FLAT_PLAIN,
> 
> This one doesn't actually seem to be used.

It could be better has a name though, because erofs.mkfs uses it,
and we keep this on-disk file up with erofs-utils.

> 
> > +	EROFS_INODE_FLAT_COMPRESSION_LEGACY,
> 
> why are we adding a legacy field to a brand new file system?

the difference is just EROFS_INODE_FLAT_COMPRESSION_LEGACY doesn't have
z_erofs_map_header, nothing special at all.

> 
> > +	EROFS_INODE_FLAT_INLINE,
> > +	EROFS_INODE_FLAT_COMPRESSION,
> > +	EROFS_INODE_LAYOUT_MAX
> 
> It seems like these come from the on-disk format, in which case they
> should have explicit values assigned to them.
> 
> Btw, I think it generally helps file system implementation quality
> if you use a separate header for the on-disk structures vs in-memory
> structures, as that keeps it clear in everyones mind what needs to
> stay persistent and what can be chenged easily.

All fields in this file are on-disk representation.

> 
> > +static bool erofs_inode_is_data_compressed(unsigned int datamode)
> > +{
> > +	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
> > +		return true;
> > +	return datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
> > +}
> 
> This looks like a really obsfucated way to write:
> 
> 	return datamode == EROFS_INODE_FLAT_COMPRESSION ||
> 		datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;

It depends on the personal choise, if you like, I will change into your form.

> 
> > +/* 28 */__le32 i_reserved2;
> > +} __packed;
> 
> Sane comment as above.
> 
> > +
> > +/* 32 bytes on-disk inode */
> > +#define EROFS_INODE_LAYOUT_V1   0
> > +/* 64 bytes on-disk inode */
> > +#define EROFS_INODE_LAYOUT_V2   1
> > +
> > +struct erofs_inode_v2 {
> > +/*  0 */__le16 i_advise;
> 
> Why do we have two inode version in a newly added file system?

v2 is an exhanced on-disk inode form, it has 64 bytes,
v1 is more compacted one, which is already suitable
for Android use case of course.

There is no new and old, both are used for the current EROFS.

> 
> > +#define ondisk_xattr_ibody_size(count)	({\
> > +	u32 __count = le16_to_cpu(count); \
> > +	((__count) == 0) ? 0 : \
> > +	sizeof(struct erofs_xattr_ibody_header) + \
> > +		sizeof(__u32) * ((__count) - 1); })
> 
> This would be much more readable as a function.
> 
> > +#define EROFS_XATTR_ENTRY_SIZE(entry) EROFS_XATTR_ALIGN( \
> > +	sizeof(struct erofs_xattr_entry) + \
> > +	(entry)->e_name_len + le16_to_cpu((entry)->e_value_size))
> 
> Same here.

Personal tendency, because we are working in a dedicated team rather than
an individual person.

But I can fix as you like.

> 
> > +/* available compression algorithm types */
> > +enum {
> > +	Z_EROFS_COMPRESSION_LZ4,
> > +	Z_EROFS_COMPRESSION_MAX
> > +};
> 
> Seems like an on-disk value again that should use explicitly assigned
> numbers.

I can fix it up as you like but I still cannot get
what is critical issues here.

Thanks,
Gao Xiang
Christoph Hellwig Aug. 29, 2019, 10:36 a.m. UTC | #3
On Thu, Aug 29, 2019 at 06:32:53PM +0800, Gao Xiang wrote:
> I can fix it up as you like but I still cannot get
> what is critical issues here.

The problem is that the whole codebase is way substandard quality,
looking a lot like Linux code from 20 years ago.  Yes, we already have
plenty of code of that standard in the tree, but we should not add more.
Gao Xiang Aug. 29, 2019, 10:58 a.m. UTC | #4
Hi Christoph,

On Thu, Aug 29, 2019 at 03:36:04AM -0700, Christoph Hellwig wrote:
> On Thu, Aug 29, 2019 at 06:32:53PM +0800, Gao Xiang wrote:
> > I can fix it up as you like but I still cannot get
> > what is critical issues here.
> 
> The problem is that the whole codebase is way substandard quality,
> looking a lot like Linux code from 20 years ago.  Yes, we already have
> plenty of code of that standard in the tree, but we should not add more.

I still cannot get your point what does your substandard quality mean,
please refer to some thing critical in EROFS (and I noticed that your
new code still has bug) rather than naming.

Thanks,
Gao Xiang
Gao Xiang Aug. 29, 2019, 3:41 p.m. UTC | #5
Hi Christoph,

On Thu, Aug 29, 2019 at 02:59:54AM -0700, Christoph Hellwig wrote:

[]

> 
> > +static bool erofs_inode_is_data_compressed(unsigned int datamode)
> > +{
> > +	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
> > +		return true;
> > +	return datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
> > +}
> 
> This looks like a really obsfucated way to write:
> 
> 	return datamode == EROFS_INODE_FLAT_COMPRESSION ||
> 		datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;

Add a word about this, the above approach is not horrible if more
datamode add here and comments, e.g

static bool erofs_inode_is_data_compressed(unsigned int datamode)
{
	/* has z_erofs_map_header */
	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
		return true;
	/* some blablabla */
	if (datamode == (1) )
		return true;
	/* some blablablabla */
	if (datamode == (2) )
		return true;
	/* no z_erofs_map_header */
	return datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
}

vs.

static bool erofs_inode_is_data_compressed(unsigned int datamode)
{
	/* has z_erofs_map_header */
	return datamode == EROFS_INODE_FLAT_COMPRESSION ||
		/* some blablabla */
	       datamode == (1) ||
	       	/* some blablablabla */
	       datamode == (2) ||
	        /* no z_erofs_map_header */
	       datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
}

I have no idea which one is better.
Anyway, if you still like the form, I will change it.

Thanks,
Gao Xiang
Joe Perches Aug. 29, 2019, 3:58 p.m. UTC | #6
On Thu, 2019-08-29 at 18:32 +0800, Gao Xiang wrote:
> Hi Christoph,
> 
> On Thu, Aug 29, 2019 at 02:59:54AM -0700, Christoph Hellwig wrote:
> > > --- /dev/null
> > > +++ b/fs/erofs/erofs_fs.h
> > > @@ -0,0 +1,316 @@
> > > +/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */
> > > +/*
> > > + * linux/fs/erofs/erofs_fs.h
> > 
> > Please remove the pointless file names in the comment headers.
> 
> Already removed in the latest version.
> 
> > > +struct erofs_super_block {
> > > +/*  0 */__le32 magic;           /* in the little endian */
> > > +/*  4 */__le32 checksum;        /* crc32c(super_block) */
> > > +/*  8 */__le32 features;        /* (aka. feature_compat) */
> > > +/* 12 */__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
> > 
> > Please remove all the byte offset comments.  That is something that can
> > easily be checked with gdb or pahole.
> 
> I have no idea the actual issue here.
> It will help all developpers better add fields or calculate
> these offsets in their mind, and with care.
> 
> Rather than they didn't run "gdb" or "pahole" and change it by mistake.

I think Christoph is not right here.

Using external tools for validation is extra work
when necessary for understanding the code.

The expected offset is somewhat valuable, but
perhaps the form is a bit off given the visual
run-in to the field types.

The extra work with this form is manipulating all
the offsets whenever a structure change occurs.

The comments might be better with a form more like:

struct erofs_super_block {	/* offset description */
	__le32 magic;		/*   0  */
	__le32 checksum;	/*   4  crc32c(super_block) */
	__le32 features;	/*   8  (aka. feature_compat) */
	__u8 blkszbits;		/*  12  support block_size == PAGE_SIZE only */
Gao Xiang Aug. 29, 2019, 5:26 p.m. UTC | #7
Hi Joe,

On Thu, Aug 29, 2019 at 08:58:17AM -0700, Joe Perches wrote:
> On Thu, 2019-08-29 at 18:32 +0800, Gao Xiang wrote:
> > Hi Christoph,
> > 
> > On Thu, Aug 29, 2019 at 02:59:54AM -0700, Christoph Hellwig wrote:
> > > > --- /dev/null
> > > > +++ b/fs/erofs/erofs_fs.h
> > > > @@ -0,0 +1,316 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */
> > > > +/*
> > > > + * linux/fs/erofs/erofs_fs.h
> > > 
> > > Please remove the pointless file names in the comment headers.
> > 
> > Already removed in the latest version.
> > 
> > > > +struct erofs_super_block {
> > > > +/*  0 */__le32 magic;           /* in the little endian */
> > > > +/*  4 */__le32 checksum;        /* crc32c(super_block) */
> > > > +/*  8 */__le32 features;        /* (aka. feature_compat) */
> > > > +/* 12 */__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
> > > 
> > > Please remove all the byte offset comments.  That is something that can
> > > easily be checked with gdb or pahole.
> > 
> > I have no idea the actual issue here.
> > It will help all developpers better add fields or calculate
> > these offsets in their mind, and with care.
> > 
> > Rather than they didn't run "gdb" or "pahole" and change it by mistake.
> 
> I think Christoph is not right here.
> 
> Using external tools for validation is extra work
> when necessary for understanding the code.
> 
> The expected offset is somewhat valuable, but
> perhaps the form is a bit off given the visual
> run-in to the field types.
> 
> The extra work with this form is manipulating all
> the offsets whenever a structure change occurs.
> 
> The comments might be better with a form more like:

Thanks for your comment.
I will change those places as you suggested, that is fine.

Thanks,
Gao Xiang

> 
> struct erofs_super_block {	/* offset description */
> 	__le32 magic;		/*   0  */
> 	__le32 checksum;	/*   4  crc32c(super_block) */
> 	__le32 features;	/*   8  (aka. feature_compat) */
> 	__u8 blkszbits;		/*  12  support block_size == PAGE_SIZE only */
> 
>
David Sterba Aug. 30, 2019, 12:07 p.m. UTC | #8
On Thu, Aug 29, 2019 at 08:58:17AM -0700, Joe Perches wrote:
> On Thu, 2019-08-29 at 18:32 +0800, Gao Xiang wrote:
> > Hi Christoph,
> > 
> > On Thu, Aug 29, 2019 at 02:59:54AM -0700, Christoph Hellwig wrote:
> > > > --- /dev/null
> > > > +++ b/fs/erofs/erofs_fs.h
> > > > @@ -0,0 +1,316 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */
> > > > +/*
> > > > + * linux/fs/erofs/erofs_fs.h
> > > 
> > > Please remove the pointless file names in the comment headers.
> > 
> > Already removed in the latest version.
> > 
> > > > +struct erofs_super_block {
> > > > +/*  0 */__le32 magic;           /* in the little endian */
> > > > +/*  4 */__le32 checksum;        /* crc32c(super_block) */
> > > > +/*  8 */__le32 features;        /* (aka. feature_compat) */
> > > > +/* 12 */__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
> > > 
> > > Please remove all the byte offset comments.  That is something that can
> > > easily be checked with gdb or pahole.
> > 
> > I have no idea the actual issue here.
> > It will help all developpers better add fields or calculate
> > these offsets in their mind, and with care.
> > 
> > Rather than they didn't run "gdb" or "pahole" and change it by mistake.
> 
> I think Christoph is not right here.
> 
> Using external tools for validation is extra work
> when necessary for understanding the code.

The advantage of using the external tools that the information about
offsets is provably correct ...

> The expected offset is somewhat valuable, but
> perhaps the form is a bit off given the visual
> run-in to the field types.
> 
> The extra work with this form is manipulating all
> the offsets whenever a structure change occurs.

... while this is error prone.

> The comments might be better with a form more like:
> 
> struct erofs_super_block {	/* offset description */
> 	__le32 magic;		/*   0  */
> 	__le32 checksum;	/*   4  crc32c(super_block) */
> 	__le32 features;	/*   8  (aka. feature_compat) */
> 	__u8 blkszbits;		/*  12  support block_size == PAGE_SIZE only */
Gao Xiang Aug. 30, 2019, 12:18 p.m. UTC | #9
Hi David,

On Fri, Aug 30, 2019 at 02:07:14PM +0200, David Sterba wrote:
> On Thu, Aug 29, 2019 at 08:58:17AM -0700, Joe Perches wrote:
> > On Thu, 2019-08-29 at 18:32 +0800, Gao Xiang wrote:
> > > Hi Christoph,
> > > 
> > > On Thu, Aug 29, 2019 at 02:59:54AM -0700, Christoph Hellwig wrote:
> > > > > --- /dev/null
> > > > > +++ b/fs/erofs/erofs_fs.h
> > > > > @@ -0,0 +1,316 @@
> > > > > +/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */
> > > > > +/*
> > > > > + * linux/fs/erofs/erofs_fs.h
> > > > 
> > > > Please remove the pointless file names in the comment headers.
> > > 
> > > Already removed in the latest version.
> > > 
> > > > > +struct erofs_super_block {
> > > > > +/*  0 */__le32 magic;           /* in the little endian */
> > > > > +/*  4 */__le32 checksum;        /* crc32c(super_block) */
> > > > > +/*  8 */__le32 features;        /* (aka. feature_compat) */
> > > > > +/* 12 */__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
> > > > 
> > > > Please remove all the byte offset comments.  That is something that can
> > > > easily be checked with gdb or pahole.
> > > 
> > > I have no idea the actual issue here.
> > > It will help all developpers better add fields or calculate
> > > these offsets in their mind, and with care.
> > > 
> > > Rather than they didn't run "gdb" or "pahole" and change it by mistake.
> > 
> > I think Christoph is not right here.
> > 
> > Using external tools for validation is extra work
> > when necessary for understanding the code.
> 
> The advantage of using the external tools that the information about
> offsets is provably correct ...
> 
> > The expected offset is somewhat valuable, but
> > perhaps the form is a bit off given the visual
> > run-in to the field types.
> > 
> > The extra work with this form is manipulating all
> > the offsets whenever a structure change occurs.
> 
> ... while this is error prone.

I will redo a full patchset and comments addressing
what Christoph all said yesterday.

Either form is fine with me for this case, let's remove
them instead.

Thanks,
Gao Xiang

> 
> > The comments might be better with a form more like:
> > 
> > struct erofs_super_block {	/* offset description */
> > 	__le32 magic;		/*   0  */
> > 	__le32 checksum;	/*   4  crc32c(super_block) */
> > 	__le32 features;	/*   8  (aka. feature_compat) */
> > 	__u8 blkszbits;		/*  12  support block_size == PAGE_SIZE only */
Gao Xiang Sept. 1, 2019, 7:54 a.m. UTC | #10
Hi Christoph,

Sorry about my first response, sincerely...
Here is my redo-ed comments to all your suggestions...

On Thu, Aug 29, 2019 at 02:59:54AM -0700, Christoph Hellwig wrote:
> > --- /dev/null
> > +++ b/fs/erofs/erofs_fs.h
> > @@ -0,0 +1,316 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */
> > +/*
> > + * linux/fs/erofs/erofs_fs.h
> 
> Please remove the pointless file names in the comment headers.

Has already fixed in the latest version, and I will resend
the whole v9 addressing all suggestions from you these days...

However it's somewhat hard to spilt the whole code prefectly
since erofs is ~7KLOC code and linux-fsdevel mailing list
have some limitation, I have spilted it in the form of features...

> 
> > +struct erofs_super_block {
> > +/*  0 */__le32 magic;           /* in the little endian */
> > +/*  4 */__le32 checksum;        /* crc32c(super_block) */
> > +/*  8 */__le32 features;        /* (aka. feature_compat) */
> > +/* 12 */__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
> 
> Please remove all the byte offset comments.  That is something that can
> easily be checked with gdb or pahole.

fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-2-hsiangkao@aol.com/

> 
> > +/* 64 */__u8 volume_name[16];   /* volume name */
> > +/* 80 */__le32 requirements;    /* (aka. feature_incompat) */
> > +
> > +/* 84 */__u8 reserved2[44];
> > +} __packed;                     /* 128 bytes */
> 
> Please don't add __packed.  In this case I think you don't need it
> (but double check with pahole), but even if you would need it using
> proper padding fields and making sure all fields are naturally aligned
> will give you much better code generation on architectures that don't
> support native unaligned access.

fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-5-hsiangkao@aol.com/

> 
> > +/*
> > + * erofs inode data mapping:
> > + * 0 - inode plain without inline data A:
> > + * inode, [xattrs], ... | ... | no-holed data
> > + * 1 - inode VLE compression B (legacy):
> > + * inode, [xattrs], extents ... | ...
> > + * 2 - inode plain with inline data C:
> > + * inode, [xattrs], last_inline_data, ... | ... | no-holed data
> > + * 3 - inode compression D:
> > + * inode, [xattrs], map_header, extents ... | ...
> > + * 4~7 - reserved
> > + */
> > +enum {
> > +	EROFS_INODE_FLAT_PLAIN,
> 
> This one doesn't actually seem to be used.

It could be better has a name though, because 1) erofs.mkfs uses this
definition explicitly, and we keep this on-disk definition erofs_fs.h
file up with erofs-utils.

2) For kernel use, first we have,
   datamode < EROFS_INODE_LAYOUT_MAX; and
   !erofs_inode_is_data_compressed, so there are only two mode here,
        1) EROFS_INODE_FLAT_INLINE,
        2) EROFS_INODE_FLAT_PLAIN
   if its datamode isn't EROFS_INODE_FLAT_INLINE (tail-end block packing),
   it should be EROFS_INODE_FLAT_PLAIN.

   The detailed logic in erofs_read_inode and
   erofs_map_blocks_flatmode....

> 
> > +	EROFS_INODE_FLAT_COMPRESSION_LEGACY,
> 
> why are we adding a legacy field to a brand new file system?

The difference is just EROFS_INODE_FLAT_COMPRESSION_LEGACY doesn't
have z_erofs_map_header, so it only supports default (4k clustersize)
fixed-sized output compression rather than per-file setting, nothing
special at all...

> 
> > +	EROFS_INODE_FLAT_INLINE,
> > +	EROFS_INODE_FLAT_COMPRESSION,
> > +	EROFS_INODE_LAYOUT_MAX
> 
> It seems like these come from the on-disk format, in which case they
> should have explicit values assigned to them.

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-3-hsiangkao@aol.com/

> 
> Btw, I think it generally helps file system implementation quality
> if you use a separate header for the on-disk structures vs in-memory
> structures, as that keeps it clear in everyones mind what needs to
> stay persistent and what can be chenged easily.

All fields in this file are on-disk representation by design
(no logic for in-memory presentation).

> 
> > +static bool erofs_inode_is_data_compressed(unsigned int datamode)
> > +{
> > +	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
> > +		return true;
> > +	return datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
> > +}
> 
> This looks like a really obsfucated way to write:
> 
> 	return datamode == EROFS_INODE_FLAT_COMPRESSION ||
> 		datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-6-hsiangkao@aol.com/

> 
> > +/* 28 */__le32 i_reserved2;
> > +} __packed;
> 
> Sane comment as above.

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-5-hsiangkao@aol.com/

> 
> > +
> > +/* 32 bytes on-disk inode */
> > +#define EROFS_INODE_LAYOUT_V1   0
> > +/* 64 bytes on-disk inode */
> > +#define EROFS_INODE_LAYOUT_V2   1
> > +
> > +struct erofs_inode_v2 {
> > +/*  0 */__le16 i_advise;
> 
> Why do we have two inode version in a newly added file system?

There is no new or old, both can be used for the current EROFS in one image.

v2 is an exhanced on-disk inode form, it has 64 bytes,
v1 is more reduced one, which is already suitable for Android use case.

> 
> > +#define ondisk_xattr_ibody_size(count)	({\
> > +	u32 __count = le16_to_cpu(count); \
> > +	((__count) == 0) ? 0 : \
> > +	sizeof(struct erofs_xattr_ibody_header) + \
> > +		sizeof(__u32) * ((__count) - 1); })
> 
> This would be much more readable as a function.

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-4-hsiangkao@aol.com/
> 
> > +#define EROFS_XATTR_ENTRY_SIZE(entry) EROFS_XATTR_ALIGN( \
> > +	sizeof(struct erofs_xattr_entry) + \
> > +	(entry)->e_name_len + le16_to_cpu((entry)->e_value_size))
> 
> Same here.

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-4-hsiangkao@aol.com/

> 
> > +/* available compression algorithm types */
> > +enum {
> > +	Z_EROFS_COMPRESSION_LZ4,
> > +	Z_EROFS_COMPRESSION_MAX
> > +};
> 
> Seems like an on-disk value again that should use explicitly assigned
> numbers.

Fixed in
https://lore.kernel.org/linux-fsdevel/20190901055130.30572-3-hsiangkao@aol.com/

Thanks,
Gao Xiang
Pavel Machek Sept. 2, 2019, 8:40 a.m. UTC | #11
Hi!

> > +struct erofs_super_block {
> > +/*  0 */__le32 magic;           /* in the little endian */
> > +/*  4 */__le32 checksum;        /* crc32c(super_block) */
> > +/*  8 */__le32 features;        /* (aka. feature_compat) */
> > +/* 12 */__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
> 
> Please remove all the byte offset comments.  That is something that can
> easily be checked with gdb or pahole.

I don't think I agree. gdb will tell you byte offsets _on one
architecture_. But filesystem is supposed to be portable between them. 

> > +/* 64 */__u8 volume_name[16];   /* volume name */
> > +/* 80 */__le32 requirements;    /* (aka. feature_incompat) */
> > +
> > +/* 84 */__u8 reserved2[44];
> > +} __packed;                     /* 128 bytes */
> 
> Please don't add __packed.  In this case I think you don't need it
> (but double check with pahole), but even if you would need it using
> proper padding fields and making sure all fields are naturally aligned
> will give you much better code generation on architectures that don't
> support native unaligned access.

This is on-disk structure, right?

drivers/staging/erofs/super.c:	struct erofs_super_block *layout;
drivers/staging/erofs/super.c:	layout = (struct erofs_super_block
*)((u8 *)bh->b_data

So __packed is right thing to do. If architecture accesses that
slowly, that's ungood, but different structures between architectures
would be really bad.

Best regards,
								Pavel
Pavel Machek Sept. 2, 2019, 8:43 a.m. UTC | #12
Hi!

> > > Rather than they didn't run "gdb" or "pahole" and change it by mistake.
> > 
> > I think Christoph is not right here.
> > 
> > Using external tools for validation is extra work
> > when necessary for understanding the code.
> 
> The advantage of using the external tools that the information about
> offsets is provably correct ...

No. gdb tells you what the actual offsets _are_.

> > The expected offset is somewhat valuable, but
> > perhaps the form is a bit off given the visual
> > run-in to the field types.
> > 
> > The extra work with this form is manipulating all
> > the offsets whenever a structure change occurs.
> 
> ... while this is error prone.

While the comment tells you what they _should be_.

								Pavel
Gao Xiang Sept. 2, 2019, 10:35 a.m. UTC | #13
Hi Pavel,

(Thanks...)

On Mon, Sep 02, 2019 at 10:40:20AM +0200, Pavel Machek wrote:

> 
> So __packed is right thing to do. If architecture accesses that
> slowly, that's ungood, but different structures between architectures
> would be really bad.

(...a little word, it seems that Christoph was trying to say that
 it's unnecessary to __packed for this case since we designed most
 erofs on-disk format in natural alignment... Anyway, I updated,
 that seems okay...)

Thanks,
Gao Xiang

> 
> Best regards,
> 								Pavel
> -- 
> DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
> HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Christoph Hellwig Sept. 2, 2019, 12:45 p.m. UTC | #14
On Sun, Sep 01, 2019 at 03:54:11PM +0800, Gao Xiang wrote:
> It could be better has a name though, because 1) erofs.mkfs uses this
> definition explicitly, and we keep this on-disk definition erofs_fs.h
> file up with erofs-utils.
> 
> 2) For kernel use, first we have,
>    datamode < EROFS_INODE_LAYOUT_MAX; and
>    !erofs_inode_is_data_compressed, so there are only two mode here,
>         1) EROFS_INODE_FLAT_INLINE,
>         2) EROFS_INODE_FLAT_PLAIN
>    if its datamode isn't EROFS_INODE_FLAT_INLINE (tail-end block packing),
>    it should be EROFS_INODE_FLAT_PLAIN.
> 
>    The detailed logic in erofs_read_inode and
>    erofs_map_blocks_flatmode....

Ok.  At least the explicit numbering makes this a little more obvious
now.  What seems fairly odd is that there are only various places that
check for some inode layouts/formats but nothing that does a switch
over all of them.

> > why are we adding a legacy field to a brand new file system?
> 
> The difference is just EROFS_INODE_FLAT_COMPRESSION_LEGACY doesn't
> have z_erofs_map_header, so it only supports default (4k clustersize)
> fixed-sized output compression rather than per-file setting, nothing
> special at all...

It still seems odd to add a legacy field to a brand new file system.

> > structures, as that keeps it clear in everyones mind what needs to
> > stay persistent and what can be chenged easily.
> 
> All fields in this file are on-disk representation by design
> (no logic for in-memory presentation).

Ok, make sense.    Maybe add a note to the top of the file comment
that this is the on-disk format.

One little oddity is that erofs_inode_is_data_compressed is here, while
is_inode_flat_inline is in internal.h.  There are arguments for either
place, but I'd suggest to keep the related macros together.
Gao Xiang Sept. 2, 2019, 1:02 p.m. UTC | #15
Hi Christoph,

On Mon, Sep 02, 2019 at 05:45:21AM -0700, Christoph Hellwig wrote:
> On Sun, Sep 01, 2019 at 03:54:11PM +0800, Gao Xiang wrote:
> > It could be better has a name though, because 1) erofs.mkfs uses this
> > definition explicitly, and we keep this on-disk definition erofs_fs.h
> > file up with erofs-utils.
> > 
> > 2) For kernel use, first we have,
> >    datamode < EROFS_INODE_LAYOUT_MAX; and
> >    !erofs_inode_is_data_compressed, so there are only two mode here,
> >         1) EROFS_INODE_FLAT_INLINE,
> >         2) EROFS_INODE_FLAT_PLAIN
> >    if its datamode isn't EROFS_INODE_FLAT_INLINE (tail-end block packing),
> >    it should be EROFS_INODE_FLAT_PLAIN.
> > 
> >    The detailed logic in erofs_read_inode and
> >    erofs_map_blocks_flatmode....
> 
> Ok.  At least the explicit numbering makes this a little more obvious
> now.  What seems fairly odd is that there are only various places that
> check for some inode layouts/formats but nothing that does a switch
> over all of them.

(Maybe not explicitly for this part....)

erofs_map_blocks_flatmode()
...
 97         nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
 98         lastblk = nblocks - is_inode_flat_inline(inode);
                                ^ here
...

Believe me EROFS_INODE_FLAT_PLAIN is used widely for EROFS images....
(if EROFS_INODE_FLAT_INLINE tail-end packing is not suitable and
 no compression....)

> 
> > > why are we adding a legacy field to a brand new file system?
> > 
> > The difference is just EROFS_INODE_FLAT_COMPRESSION_LEGACY doesn't
> > have z_erofs_map_header, so it only supports default (4k clustersize)
> > fixed-sized output compression rather than per-file setting, nothing
> > special at all...
> 
> It still seems odd to add a legacy field to a brand new file system.

Since 4.19 EROFS only supports EROFS_INODE_FLAT_COMPRESSION_LEGACY
(per-filesystem setting), we'd like to introduce per-file setting and
more configration for future requirements....

> 
> > > structures, as that keeps it clear in everyones mind what needs to
> > > stay persistent and what can be chenged easily.
> > 
> > All fields in this file are on-disk representation by design
> > (no logic for in-memory presentation).
> 
> Ok, make sense.    Maybe add a note to the top of the file comment
> that this is the on-disk format.
> 
> One little oddity is that erofs_inode_is_data_compressed is here, while
> is_inode_flat_inline is in internal.h.  There are arguments for either
> place, but I'd suggest to keep the related macros together.

(Just my personal thought... erofs_inode_is_data_compressed operates
ondisk field like datamode (because we have 2 datamode for compression,
need to wrap them to judge if the file is compressed...)
so it stays at erofs_fs.h... is_inode_flat_inline operates in-memory
struct inode so it in internal.h....)

Thanks,
Gao Xiang
David Sterba Sept. 2, 2019, 2:07 p.m. UTC | #16
On Mon, Sep 02, 2019 at 10:43:03AM +0200, Pavel Machek wrote:
> > > > Rather than they didn't run "gdb" or "pahole" and change it by mistake.
> > > 
> > > I think Christoph is not right here.
> > > 
> > > Using external tools for validation is extra work
> > > when necessary for understanding the code.
> > 
> > The advantage of using the external tools that the information about
> > offsets is provably correct ...
> 
> No. gdb tells you what the actual offsets _are_.

Ok, reading your reply twice, I think we have different perspectives. I
don't trust the comments.

The tool I had in mind is pahole that parses dwarf information about the
structures, the same as gdb does. The actual value of the struct members
is the thing that needs to be investigated in memory dumps or disk image
dumps.

> > > The expected offset is somewhat valuable, but
> > > perhaps the form is a bit off given the visual
> > > run-in to the field types.
> > > 
> > > The extra work with this form is manipulating all
> > > the offsets whenever a structure change occurs.
> > 
> > ... while this is error prone.
> 
> While the comment tells you what they _should be_.

That's exactly the source of confusion and bugs. For me an acceptable
way of asserting that a value has certain offset is a build check, eg.
like

BUILD_BUG_ON(strct my_superblock, magic, 16);
Pavel Machek Sept. 3, 2019, 11:27 a.m. UTC | #17
Hi!

> > No. gdb tells you what the actual offsets _are_.
> 
> Ok, reading your reply twice, I think we have different perspectives. I
> don't trust the comments.
> 
> The tool I had in mind is pahole that parses dwarf information about the
> structures, the same as gdb does. The actual value of the struct members
> is the thing that needs to be investigated in memory dumps or disk image
> dumps.
> 
> > > > The expected offset is somewhat valuable, but
> > > > perhaps the form is a bit off given the visual
> > > > run-in to the field types.
> > > > 
> > > > The extra work with this form is manipulating all
> > > > the offsets whenever a structure change occurs.
> > > 
> > > ... while this is error prone.
> > 
> > While the comment tells you what they _should be_.
> 
> That's exactly the source of confusion and bugs. For me an acceptable
> way of asserting that a value has certain offset is a build check, eg.
> like
> 
> BUILD_BUG_ON(strct my_superblock, magic, 16);

Yes, that would work, too. As would documentation file with the disk
structures.

Best regards,
								Pavel
diff mbox series

Patch

diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
new file mode 100644
index 000000000000..b307060dd220
--- /dev/null
+++ b/fs/erofs/erofs_fs.h
@@ -0,0 +1,316 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */
+/*
+ * linux/fs/erofs/erofs_fs.h
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef __EROFS_FS_H
+#define __EROFS_FS_H
+
+/* Enhanced(Extended) ROM File System */
+#define EROFS_SUPER_OFFSET      1024
+
+/*
+ * Any bits that aren't in EROFS_ALL_REQUIREMENTS should be
+ * incompatible with this kernel version.
+ */
+#define EROFS_REQUIREMENT_LZ4_0PADDING	0x00000001
+#define EROFS_ALL_REQUIREMENTS		0
+
+struct erofs_super_block {
+/*  0 */__le32 magic;           /* in the little endian */
+/*  4 */__le32 checksum;        /* crc32c(super_block) */
+/*  8 */__le32 features;        /* (aka. feature_compat) */
+/* 12 */__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
+/* 13 */__u8 reserved;
+
+/* 14 */__le16 root_nid;
+/* 16 */__le64 inos;            /* total valid ino # (== f_files - f_favail) */
+
+/* 24 */__le64 build_time;      /* inode v1 time derivation */
+/* 32 */__le32 build_time_nsec;
+/* 36 */__le32 blocks;          /* used for statfs */
+/* 40 */__le32 meta_blkaddr;
+/* 44 */__le32 xattr_blkaddr;
+/* 48 */__u8 uuid[16];          /* 128-bit uuid for volume */
+/* 64 */__u8 volume_name[16];   /* volume name */
+/* 80 */__le32 requirements;    /* (aka. feature_incompat) */
+
+/* 84 */__u8 reserved2[44];
+} __packed;                     /* 128 bytes */
+
+/*
+ * erofs inode data mapping:
+ * 0 - inode plain without inline data A:
+ * inode, [xattrs], ... | ... | no-holed data
+ * 1 - inode VLE compression B (legacy):
+ * inode, [xattrs], extents ... | ...
+ * 2 - inode plain with inline data C:
+ * inode, [xattrs], last_inline_data, ... | ... | no-holed data
+ * 3 - inode compression D:
+ * inode, [xattrs], map_header, extents ... | ...
+ * 4~7 - reserved
+ */
+enum {
+	EROFS_INODE_FLAT_PLAIN,
+	EROFS_INODE_FLAT_COMPRESSION_LEGACY,
+	EROFS_INODE_FLAT_INLINE,
+	EROFS_INODE_FLAT_COMPRESSION,
+	EROFS_INODE_LAYOUT_MAX
+};
+
+static bool erofs_inode_is_data_compressed(unsigned int datamode)
+{
+	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
+		return true;
+	return datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+}
+
+/* bit definitions of inode i_advise */
+#define EROFS_I_VERSION_BITS            1
+#define EROFS_I_DATA_MAPPING_BITS       3
+
+#define EROFS_I_VERSION_BIT             0
+#define EROFS_I_DATA_MAPPING_BIT        1
+
+struct erofs_inode_v1 {
+/*  0 */__le16 i_advise;
+
+/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
+/*  2 */__le16 i_xattr_icount;
+/*  4 */__le16 i_mode;
+/*  6 */__le16 i_nlink;
+/*  8 */__le32 i_size;
+/* 12 */__le32 i_reserved;
+/* 16 */union {
+		/* file total compressed blocks for data mapping 1 */
+		__le32 compressed_blocks;
+		__le32 raw_blkaddr;
+
+		/* for device files, used to indicate old/new device # */
+		__le32 rdev;
+	} i_u __packed;
+/* 20 */__le32 i_ino;           /* only used for 32-bit stat compatibility */
+/* 24 */__le16 i_uid;
+/* 26 */__le16 i_gid;
+/* 28 */__le32 i_reserved2;
+} __packed;
+
+/* 32 bytes on-disk inode */
+#define EROFS_INODE_LAYOUT_V1   0
+/* 64 bytes on-disk inode */
+#define EROFS_INODE_LAYOUT_V2   1
+
+struct erofs_inode_v2 {
+/*  0 */__le16 i_advise;
+
+/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
+/*  2 */__le16 i_xattr_icount;
+/*  4 */__le16 i_mode;
+/*  6 */__le16 i_reserved;
+/*  8 */__le64 i_size;
+/* 16 */union {
+		/* file total compressed blocks for data mapping 1 */
+		__le32 compressed_blocks;
+		__le32 raw_blkaddr;
+
+		/* for device files, used to indicate old/new device # */
+		__le32 rdev;
+	} i_u __packed;
+
+	/* only used for 32-bit stat compatibility */
+/* 20 */__le32 i_ino;
+
+/* 24 */__le32 i_uid;
+/* 28 */__le32 i_gid;
+/* 32 */__le64 i_ctime;
+/* 40 */__le32 i_ctime_nsec;
+/* 44 */__le32 i_nlink;
+/* 48 */__u8   i_reserved2[16];
+} __packed;                     /* 64 bytes */
+
+#define EROFS_MAX_SHARED_XATTRS         (128)
+/* h_shared_count between 129 ... 255 are special # */
+#define EROFS_SHARED_XATTR_EXTENT       (255)
+
+/*
+ * inline xattrs (n == i_xattr_icount):
+ * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes
+ *          12 bytes           /                   \
+ *                            /                     \
+ *                           /-----------------------\
+ *                           |  erofs_xattr_entries+ |
+ *                           +-----------------------+
+ * inline xattrs must starts in erofs_xattr_ibody_header,
+ * for read-only fs, no need to introduce h_refcount
+ */
+struct erofs_xattr_ibody_header {
+	__le32 h_reserved;
+	__u8   h_shared_count;
+	__u8   h_reserved2[7];
+	__le32 h_shared_xattrs[0];      /* shared xattr id array */
+} __packed;
+
+/* Name indexes */
+#define EROFS_XATTR_INDEX_USER              1
+#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS  2
+#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
+#define EROFS_XATTR_INDEX_TRUSTED           4
+#define EROFS_XATTR_INDEX_LUSTRE            5
+#define EROFS_XATTR_INDEX_SECURITY          6
+
+/* xattr entry (for both inline & shared xattrs) */
+struct erofs_xattr_entry {
+	__u8   e_name_len;      /* length of name */
+	__u8   e_name_index;    /* attribute name index */
+	__le16 e_value_size;    /* size of attribute value */
+	/* followed by e_name and e_value */
+	char   e_name[0];       /* attribute name */
+} __packed;
+
+#define ondisk_xattr_ibody_size(count)	({\
+	u32 __count = le16_to_cpu(count); \
+	((__count) == 0) ? 0 : \
+	sizeof(struct erofs_xattr_ibody_header) + \
+		sizeof(__u32) * ((__count) - 1); })
+
+#define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry))
+#define EROFS_XATTR_ENTRY_SIZE(entry) EROFS_XATTR_ALIGN( \
+	sizeof(struct erofs_xattr_entry) + \
+	(entry)->e_name_len + le16_to_cpu((entry)->e_value_size))
+
+/* available compression algorithm types */
+enum {
+	Z_EROFS_COMPRESSION_LZ4,
+	Z_EROFS_COMPRESSION_MAX
+};
+
+/*
+ * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
+ *  e.g. for 4k logical cluster size,      4B        if compacted 2B is off;
+ *                                  (4B) + 2B + (4B) if compacted 2B is on.
+ */
+#define Z_EROFS_ADVISE_COMPACTED_2B_BIT         0
+
+#define Z_EROFS_ADVISE_COMPACTED_2B     (1 << Z_EROFS_ADVISE_COMPACTED_2B_BIT)
+
+struct z_erofs_map_header {
+	__le32	h_reserved1;
+	__le16	h_advise;
+	/*
+	 * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
+	 * bit 4-7 : algorithm type of head 2 (logical cluster type 11).
+	 */
+	__u8	h_algorithmtype;
+	/*
+	 * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
+	 * bit 3-4 : (physical - logical) cluster bits of head 1:
+	 *       For example, if logical clustersize = 4096, 1 for 8192.
+	 * bit 5-7 : (physical - logical) cluster bits of head 2.
+	 */
+	__u8	h_clusterbits;
+};
+
+#define Z_EROFS_VLE_LEGACY_HEADER_PADDING       8
+
+/*
+ * Z_EROFS Variable-sized Logical Extent cluster type:
+ *    0 - literal (uncompressed) cluster
+ *    1 - compressed cluster (for the head logical cluster)
+ *    2 - compressed cluster (for the other logical clusters)
+ *
+ * In detail,
+ *    0 - literal (uncompressed) cluster,
+ *        di_advise = 0
+ *        di_clusterofs = the literal data offset of the cluster
+ *        di_blkaddr = the blkaddr of the literal cluster
+ *
+ *    1 - compressed cluster (for the head logical cluster)
+ *        di_advise = 1
+ *        di_clusterofs = the decompressed data offset of the cluster
+ *        di_blkaddr = the blkaddr of the compressed cluster
+ *
+ *    2 - compressed cluster (for the other logical clusters)
+ *        di_advise = 2
+ *        di_clusterofs =
+ *           the decompressed data offset in its own head cluster
+ *        di_u.delta[0] = distance to its corresponding head cluster
+ *        di_u.delta[1] = distance to its corresponding tail cluster
+ *                (di_advise could be 0, 1 or 2)
+ */
+enum {
+	Z_EROFS_VLE_CLUSTER_TYPE_PLAIN,
+	Z_EROFS_VLE_CLUSTER_TYPE_HEAD,
+	Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD,
+	Z_EROFS_VLE_CLUSTER_TYPE_RESERVED,
+	Z_EROFS_VLE_CLUSTER_TYPE_MAX
+};
+
+#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS        2
+#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT         0
+
+struct z_erofs_vle_decompressed_index {
+	__le16 di_advise;
+	/* where to decompress in the head cluster */
+	__le16 di_clusterofs;
+
+	union {
+		/* for the head cluster */
+		__le32 blkaddr;
+		/*
+		 * for the rest clusters
+		 * eg. for 4k page-sized cluster, maximum 4K*64k = 256M)
+		 * [0] - pointing to the head cluster
+		 * [1] - pointing to the tail cluster
+		 */
+		__le16 delta[2];
+	} di_u __packed;		/* 8 bytes */
+} __packed;
+
+#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \
+	(round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \
+	 sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING)
+
+/* dirent sorts in alphabet order, thus we can do binary search */
+struct erofs_dirent {
+	__le64 nid;     /*  0, node number */
+	__le16 nameoff; /*  8, start offset of file name */
+	__u8 file_type; /* 10, file type */
+	__u8 reserved;  /* 11, reserved */
+} __packed;
+
+/* file types used in inode_info->flags */
+enum {
+	EROFS_FT_UNKNOWN,
+	EROFS_FT_REG_FILE,
+	EROFS_FT_DIR,
+	EROFS_FT_CHRDEV,
+	EROFS_FT_BLKDEV,
+	EROFS_FT_FIFO,
+	EROFS_FT_SOCK,
+	EROFS_FT_SYMLINK,
+	EROFS_FT_MAX
+};
+
+#define EROFS_NAME_LEN      255
+
+/* check the EROFS on-disk layout strictly at compile time */
+static inline void erofs_check_ondisk_layout_definitions(void)
+{
+	BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
+	BUILD_BUG_ON(sizeof(struct erofs_inode_v1) != 32);
+	BUILD_BUG_ON(sizeof(struct erofs_inode_v2) != 64);
+	BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
+	BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
+	BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
+	BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
+	BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
+
+	BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
+		     Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
+}
+
+#endif
+
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 1274c692e59c..903cc2d2750b 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -19,6 +19,7 @@ 
 #define SQUASHFS_MAGIC		0x73717368
 #define ECRYPTFS_SUPER_MAGIC	0xf15f
 #define EFS_SUPER_MAGIC		0x414A53
+#define EROFS_SUPER_MAGIC_V1	0xE0F5E1E2
 #define EXT2_SUPER_MAGIC	0xEF53
 #define EXT3_SUPER_MAGIC	0xEF53
 #define XENFS_SUPER_MAGIC	0xabba1974