diff mbox series

[f2fs-dev] f2fs: fix unaligned field offset in 32-bits platform

Message ID 20230307151408.58490-1-chao@kernel.org (mailing list archive)
State Accepted
Commit 8cac643eaa93fcac4bd0dcf2da5085e5551adff3
Headers show
Series [f2fs-dev] f2fs: fix unaligned field offset in 32-bits platform | expand

Commit Message

Chao Yu March 7, 2023, 3:14 p.m. UTC
F2FS-fs (dm-x): inconsistent rbtree, cur(3470333575168) next(3320009719808)
------------[ cut here ]------------
kernel BUG at fs/f2fs/gc.c:602!
Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM
PC is at get_victim_by_default+0x13c0/0x1498
LR is at f2fs_check_rb_tree_consistence+0xc4/0xd4
....
[<c04d98b0>] (get_victim_by_default) from [<c04d4f44>] (f2fs_gc+0x220/0x6cc)
[<c04d4f44>] (f2fs_gc) from [<c04d4780>] (gc_thread_func+0x2ac/0x708)
[<c04d4780>] (gc_thread_func) from [<c015c774>] (kthread+0x1a8/0x1b4)
[<c015c774>] (kthread) from [<c01010b4>] (ret_from_fork+0x14/0x20)

The reason is there is __packed attribute in struct rb_entry, but there
is no __packed attribute in struct victim_entry, so wrong offset of key
field will be parsed in struct rb_entry in f2fs_check_rb_tree_consistence,
it describes memory layouts of struct rb_entry and struct victim_entry in
32-bits platform as below:

struct rb_entry {
   [0] struct rb_node rb_node;
       union {
           struct {...};
  [12]     unsigned long long key;
       } __packed;
}
size of struct rb_entry: 20

struct victim_entry {
   [0] struct rb_node rb_node;
       union {
           struct {...};
  [16]     struct victim_info vi;
       };
  [32] struct list_head list;
}
size of struct victim_entry: 40

This patch tries to add __packed attribute in below structure:
- discard_info, discard_cmd
- extent_info, extent_node
- victim_info, victim_entry
in order to fix this unaligned field offset issue in 32-bits platform.

Fixes: 004b68621897 ("f2fs: use rb-tree to track pending discard commands")
Fixes: 13054c548a1c ("f2fs: introduce infra macro and data structure of rb-tree extent cache")
Fixes: 093749e296e2 ("f2fs: support age threshold based garbage collection")
Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
Signed-off-by: Chao Yu <chao@kernel.org>
---
 fs/f2fs/f2fs.h | 6 +++---
 fs/f2fs/gc.h   | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

Comments

Jaegeuk Kim March 7, 2023, 5:26 p.m. UTC | #1
Cc'ed stable. Thanks.

On 03/07, Chao Yu wrote:
> F2FS-fs (dm-x): inconsistent rbtree, cur(3470333575168) next(3320009719808)
> ------------[ cut here ]------------
> kernel BUG at fs/f2fs/gc.c:602!
> Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM
> PC is at get_victim_by_default+0x13c0/0x1498
> LR is at f2fs_check_rb_tree_consistence+0xc4/0xd4
> ....
> [<c04d98b0>] (get_victim_by_default) from [<c04d4f44>] (f2fs_gc+0x220/0x6cc)
> [<c04d4f44>] (f2fs_gc) from [<c04d4780>] (gc_thread_func+0x2ac/0x708)
> [<c04d4780>] (gc_thread_func) from [<c015c774>] (kthread+0x1a8/0x1b4)
> [<c015c774>] (kthread) from [<c01010b4>] (ret_from_fork+0x14/0x20)
> 
> The reason is there is __packed attribute in struct rb_entry, but there
> is no __packed attribute in struct victim_entry, so wrong offset of key
> field will be parsed in struct rb_entry in f2fs_check_rb_tree_consistence,
> it describes memory layouts of struct rb_entry and struct victim_entry in
> 32-bits platform as below:
> 
> struct rb_entry {
>    [0] struct rb_node rb_node;
>        union {
>            struct {...};
>   [12]     unsigned long long key;
>        } __packed;
> }
> size of struct rb_entry: 20
> 
> struct victim_entry {
>    [0] struct rb_node rb_node;
>        union {
>            struct {...};
>   [16]     struct victim_info vi;
>        };
>   [32] struct list_head list;
> }
> size of struct victim_entry: 40
> 
> This patch tries to add __packed attribute in below structure:
> - discard_info, discard_cmd
> - extent_info, extent_node
> - victim_info, victim_entry
> in order to fix this unaligned field offset issue in 32-bits platform.
> 
> Fixes: 004b68621897 ("f2fs: use rb-tree to track pending discard commands")
> Fixes: 13054c548a1c ("f2fs: introduce infra macro and data structure of rb-tree extent cache")
> Fixes: 093749e296e2 ("f2fs: support age threshold based garbage collection")
> Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
> Signed-off-by: Chao Yu <chao@kernel.org>
> ---
>  fs/f2fs/f2fs.h | 6 +++---
>  fs/f2fs/gc.h   | 4 ++--
>  2 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index b0ab2062038a..17fa7572ceed 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -349,7 +349,7 @@ struct discard_info {
>  	block_t lstart;			/* logical start address */
>  	block_t len;			/* length */
>  	block_t start;			/* actual start address in dev */
> -};
> +} __packed;
>  
>  struct discard_cmd {
>  	struct rb_node rb_node;		/* rb node located in rb-tree */
> @@ -361,7 +361,7 @@ struct discard_cmd {
>  		};
>  		struct discard_info di;	/* discard info */
>  
> -	};
> +	} __packed;
>  	struct list_head list;		/* command list */
>  	struct completion wait;		/* compleation */
>  	struct block_device *bdev;	/* bdev */
> @@ -660,7 +660,7 @@ struct extent_info {
>  			unsigned long long last_blocks;
>  		};
>  	};
> -};
> +} __packed;
>  
>  struct extent_node {
>  	struct rb_node rb_node;		/* rb node located in rb-tree */
> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
> index 15bd1d680f67..304937d9a084 100644
> --- a/fs/f2fs/gc.h
> +++ b/fs/f2fs/gc.h
> @@ -58,7 +58,7 @@ struct gc_inode_list {
>  struct victim_info {
>  	unsigned long long mtime;	/* mtime of section */
>  	unsigned int segno;		/* section No. */
> -};
> +} __packed;
>  
>  struct victim_entry {
>  	struct rb_node rb_node;		/* rb node located in rb-tree */
> @@ -68,7 +68,7 @@ struct victim_entry {
>  			unsigned int segno;		/* segment No. */
>  		};
>  		struct victim_info vi;	/* victim info */
> -	};
> +	} __packed;
>  	struct list_head list;
>  };
>  
> -- 
> 2.36.1
patchwork-bot+f2fs@kernel.org March 7, 2023, 5:40 p.m. UTC | #2
Hello:

This patch was applied to jaegeuk/f2fs.git (dev)
by Jaegeuk Kim <jaegeuk@kernel.org>:

On Tue,  7 Mar 2023 23:14:08 +0800 you wrote:
> F2FS-fs (dm-x): inconsistent rbtree, cur(3470333575168) next(3320009719808)
> ------------[ cut here ]------------
> kernel BUG at fs/f2fs/gc.c:602!
> Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM
> PC is at get_victim_by_default+0x13c0/0x1498
> LR is at f2fs_check_rb_tree_consistence+0xc4/0xd4
> ....
> [<c04d98b0>] (get_victim_by_default) from [<c04d4f44>] (f2fs_gc+0x220/0x6cc)
> [<c04d4f44>] (f2fs_gc) from [<c04d4780>] (gc_thread_func+0x2ac/0x708)
> [<c04d4780>] (gc_thread_func) from [<c015c774>] (kthread+0x1a8/0x1b4)
> [<c015c774>] (kthread) from [<c01010b4>] (ret_from_fork+0x14/0x20)
> 
> [...]

Here is the summary with links:
  - [f2fs-dev] f2fs: fix unaligned field offset in 32-bits platform
    https://git.kernel.org/jaegeuk/f2fs/c/8cac643eaa93

You are awesome, thank you!
Chao Yu March 8, 2023, 1:31 a.m. UTC | #3
On 2023/3/8 1:26, Jaegeuk Kim wrote:
> Cc'ed stable. Thanks.

Oh, thanks for adding the missed tag! Jaegeuk.

Thanks,

> 
> On 03/07, Chao Yu wrote:
>> F2FS-fs (dm-x): inconsistent rbtree, cur(3470333575168) next(3320009719808)
>> ------------[ cut here ]------------
>> kernel BUG at fs/f2fs/gc.c:602!
>> Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM
>> PC is at get_victim_by_default+0x13c0/0x1498
>> LR is at f2fs_check_rb_tree_consistence+0xc4/0xd4
>> ....
>> [<c04d98b0>] (get_victim_by_default) from [<c04d4f44>] (f2fs_gc+0x220/0x6cc)
>> [<c04d4f44>] (f2fs_gc) from [<c04d4780>] (gc_thread_func+0x2ac/0x708)
>> [<c04d4780>] (gc_thread_func) from [<c015c774>] (kthread+0x1a8/0x1b4)
>> [<c015c774>] (kthread) from [<c01010b4>] (ret_from_fork+0x14/0x20)
>>
>> The reason is there is __packed attribute in struct rb_entry, but there
>> is no __packed attribute in struct victim_entry, so wrong offset of key
>> field will be parsed in struct rb_entry in f2fs_check_rb_tree_consistence,
>> it describes memory layouts of struct rb_entry and struct victim_entry in
>> 32-bits platform as below:
>>
>> struct rb_entry {
>>     [0] struct rb_node rb_node;
>>         union {
>>             struct {...};
>>    [12]     unsigned long long key;
>>         } __packed;
>> }
>> size of struct rb_entry: 20
>>
>> struct victim_entry {
>>     [0] struct rb_node rb_node;
>>         union {
>>             struct {...};
>>    [16]     struct victim_info vi;
>>         };
>>    [32] struct list_head list;
>> }
>> size of struct victim_entry: 40
>>
>> This patch tries to add __packed attribute in below structure:
>> - discard_info, discard_cmd
>> - extent_info, extent_node
>> - victim_info, victim_entry
>> in order to fix this unaligned field offset issue in 32-bits platform.
>>
>> Fixes: 004b68621897 ("f2fs: use rb-tree to track pending discard commands")
>> Fixes: 13054c548a1c ("f2fs: introduce infra macro and data structure of rb-tree extent cache")
>> Fixes: 093749e296e2 ("f2fs: support age threshold based garbage collection")
>> Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
>> Signed-off-by: Chao Yu <chao@kernel.org>
>> ---
>>   fs/f2fs/f2fs.h | 6 +++---
>>   fs/f2fs/gc.h   | 4 ++--
>>   2 files changed, 5 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index b0ab2062038a..17fa7572ceed 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -349,7 +349,7 @@ struct discard_info {
>>   	block_t lstart;			/* logical start address */
>>   	block_t len;			/* length */
>>   	block_t start;			/* actual start address in dev */
>> -};
>> +} __packed;
>>   
>>   struct discard_cmd {
>>   	struct rb_node rb_node;		/* rb node located in rb-tree */
>> @@ -361,7 +361,7 @@ struct discard_cmd {
>>   		};
>>   		struct discard_info di;	/* discard info */
>>   
>> -	};
>> +	} __packed;
>>   	struct list_head list;		/* command list */
>>   	struct completion wait;		/* compleation */
>>   	struct block_device *bdev;	/* bdev */
>> @@ -660,7 +660,7 @@ struct extent_info {
>>   			unsigned long long last_blocks;
>>   		};
>>   	};
>> -};
>> +} __packed;
>>   
>>   struct extent_node {
>>   	struct rb_node rb_node;		/* rb node located in rb-tree */
>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
>> index 15bd1d680f67..304937d9a084 100644
>> --- a/fs/f2fs/gc.h
>> +++ b/fs/f2fs/gc.h
>> @@ -58,7 +58,7 @@ struct gc_inode_list {
>>   struct victim_info {
>>   	unsigned long long mtime;	/* mtime of section */
>>   	unsigned int segno;		/* section No. */
>> -};
>> +} __packed;
>>   
>>   struct victim_entry {
>>   	struct rb_node rb_node;		/* rb node located in rb-tree */
>> @@ -68,7 +68,7 @@ struct victim_entry {
>>   			unsigned int segno;		/* segment No. */
>>   		};
>>   		struct victim_info vi;	/* victim info */
>> -	};
>> +	} __packed;
>>   	struct list_head list;
>>   };
>>   
>> -- 
>> 2.36.1
David Laight March 8, 2023, 10:16 a.m. UTC | #4
From: Chao Yu <chao@kernel.org>
> Sent: 07 March 2023 15:14
> 
> F2FS-fs (dm-x): inconsistent rbtree, cur(3470333575168) next(3320009719808)
> ------------[ cut here ]------------
> kernel BUG at fs/f2fs/gc.c:602!
> Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM
> PC is at get_victim_by_default+0x13c0/0x1498
> LR is at f2fs_check_rb_tree_consistence+0xc4/0xd4
> ....
> [<c04d98b0>] (get_victim_by_default) from [<c04d4f44>] (f2fs_gc+0x220/0x6cc)
> [<c04d4f44>] (f2fs_gc) from [<c04d4780>] (gc_thread_func+0x2ac/0x708)
> [<c04d4780>] (gc_thread_func) from [<c015c774>] (kthread+0x1a8/0x1b4)
> [<c015c774>] (kthread) from [<c01010b4>] (ret_from_fork+0x14/0x20)
> 
> The reason is there is __packed attribute in struct rb_entry, but there
> is no __packed attribute in struct victim_entry, so wrong offset of key
> field will be parsed in struct rb_entry in f2fs_check_rb_tree_consistence,
> it describes memory layouts of struct rb_entry and struct victim_entry in
> 32-bits platform as below:
> 
> struct rb_entry {
>    [0] struct rb_node rb_node;
>        union {
>            struct {...};
>   [12]     unsigned long long key;
>        } __packed;

This __packed removes the 4-byte pad before the union.
I bet it should be removed...

> }
> size of struct rb_entry: 20
> 
> struct victim_entry {
>    [0] struct rb_node rb_node;
>        union {
>            struct {...};
>   [16]     struct victim_info vi;
>        };
>   [32] struct list_head list;
> }
> size of struct victim_entry: 40
> 
> This patch tries to add __packed attribute in below structure:
> - discard_info, discard_cmd
> - extent_info, extent_node
> - victim_info, victim_entry
> in order to fix this unaligned field offset issue in 32-bits platform.

Have you looked at the amount of extra code that gets generated
on systems that fault misaligned accesses?

Plausibly adding __packed __aligned(4) will restrict the compiler
to just aligning 64bit items on 32bit boundaries.
But even then is you pass the address of a misaligned structure
to another function it will fault later of.

You haven't actually said where the misalignment comes from.
If the code is doing (foo *)(ptr + 1) then that is broken
when the alignments of 'ptr' and 'foo' differ.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Jaegeuk Kim March 9, 2023, 11:25 p.m. UTC | #5
On 03/07, Chao Yu wrote:
> F2FS-fs (dm-x): inconsistent rbtree, cur(3470333575168) next(3320009719808)
> ------------[ cut here ]------------
> kernel BUG at fs/f2fs/gc.c:602!
> Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM
> PC is at get_victim_by_default+0x13c0/0x1498
> LR is at f2fs_check_rb_tree_consistence+0xc4/0xd4
> ....
> [<c04d98b0>] (get_victim_by_default) from [<c04d4f44>] (f2fs_gc+0x220/0x6cc)
> [<c04d4f44>] (f2fs_gc) from [<c04d4780>] (gc_thread_func+0x2ac/0x708)
> [<c04d4780>] (gc_thread_func) from [<c015c774>] (kthread+0x1a8/0x1b4)
> [<c015c774>] (kthread) from [<c01010b4>] (ret_from_fork+0x14/0x20)
> 
> The reason is there is __packed attribute in struct rb_entry, but there
> is no __packed attribute in struct victim_entry, so wrong offset of key
> field will be parsed in struct rb_entry in f2fs_check_rb_tree_consistence,
> it describes memory layouts of struct rb_entry and struct victim_entry in
> 32-bits platform as below:
> 
> struct rb_entry {
>    [0] struct rb_node rb_node;
>        union {
>            struct {...};
>   [12]     unsigned long long key;
>        } __packed;
> }
> size of struct rb_entry: 20
> 
> struct victim_entry {
>    [0] struct rb_node rb_node;
>        union {
>            struct {...};
>   [16]     struct victim_info vi;

Why 16?
Don't we get the key from "unsigned long long mtime" starting at [12]?

	struct rb_node rb_node;		/* rb node located in rb-tree */
	union {
		struct {
			unsigned long long mtime;	/* mtime of section */
			unsigned int segno;		/* segment No. */
		};
		struct victim_info vi;	/* victim info */
	}; 

>        };
>   [32] struct list_head list;
> }
> size of struct victim_entry: 40
> 
> This patch tries to add __packed attribute in below structure:
> - discard_info, discard_cmd
> - extent_info, extent_node
> - victim_info, victim_entry
> in order to fix this unaligned field offset issue in 32-bits platform.
> 
> Fixes: 004b68621897 ("f2fs: use rb-tree to track pending discard commands")
> Fixes: 13054c548a1c ("f2fs: introduce infra macro and data structure of rb-tree extent cache")
> Fixes: 093749e296e2 ("f2fs: support age threshold based garbage collection")
> Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
> Signed-off-by: Chao Yu <chao@kernel.org>
> ---
>  fs/f2fs/f2fs.h | 6 +++---
>  fs/f2fs/gc.h   | 4 ++--
>  2 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index b0ab2062038a..17fa7572ceed 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -349,7 +349,7 @@ struct discard_info {
>  	block_t lstart;			/* logical start address */
>  	block_t len;			/* length */
>  	block_t start;			/* actual start address in dev */
> -};
> +} __packed;
>  
>  struct discard_cmd {
>  	struct rb_node rb_node;		/* rb node located in rb-tree */
> @@ -361,7 +361,7 @@ struct discard_cmd {
>  		};
>  		struct discard_info di;	/* discard info */
>  
> -	};
> +	} __packed;
>  	struct list_head list;		/* command list */
>  	struct completion wait;		/* compleation */
>  	struct block_device *bdev;	/* bdev */
> @@ -660,7 +660,7 @@ struct extent_info {
>  			unsigned long long last_blocks;
>  		};
>  	};
> -};
> +} __packed;
>  
>  struct extent_node {
>  	struct rb_node rb_node;		/* rb node located in rb-tree */
> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
> index 15bd1d680f67..304937d9a084 100644
> --- a/fs/f2fs/gc.h
> +++ b/fs/f2fs/gc.h
> @@ -58,7 +58,7 @@ struct gc_inode_list {
>  struct victim_info {
>  	unsigned long long mtime;	/* mtime of section */
>  	unsigned int segno;		/* section No. */
> -};
> +} __packed;
>  
>  struct victim_entry {
>  	struct rb_node rb_node;		/* rb node located in rb-tree */
> @@ -68,7 +68,7 @@ struct victim_entry {
>  			unsigned int segno;		/* segment No. */
>  		};
>  		struct victim_info vi;	/* victim info */
> -	};
> +	} __packed;
>  	struct list_head list;
>  };
>  
> -- 
> 2.36.1
Jaegeuk Kim March 9, 2023, 11:54 p.m. UTC | #6
On 03/08, David Laight wrote:
> From: Chao Yu <chao@kernel.org>
> > Sent: 07 March 2023 15:14
> > 
> > F2FS-fs (dm-x): inconsistent rbtree, cur(3470333575168) next(3320009719808)
> > ------------[ cut here ]------------
> > kernel BUG at fs/f2fs/gc.c:602!
> > Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM
> > PC is at get_victim_by_default+0x13c0/0x1498
> > LR is at f2fs_check_rb_tree_consistence+0xc4/0xd4
> > ....
> > [<c04d98b0>] (get_victim_by_default) from [<c04d4f44>] (f2fs_gc+0x220/0x6cc)
> > [<c04d4f44>] (f2fs_gc) from [<c04d4780>] (gc_thread_func+0x2ac/0x708)
> > [<c04d4780>] (gc_thread_func) from [<c015c774>] (kthread+0x1a8/0x1b4)
> > [<c015c774>] (kthread) from [<c01010b4>] (ret_from_fork+0x14/0x20)
> > 
> > The reason is there is __packed attribute in struct rb_entry, but there
> > is no __packed attribute in struct victim_entry, so wrong offset of key
> > field will be parsed in struct rb_entry in f2fs_check_rb_tree_consistence,
> > it describes memory layouts of struct rb_entry and struct victim_entry in
> > 32-bits platform as below:
> > 
> > struct rb_entry {
> >    [0] struct rb_node rb_node;
> >        union {
> >            struct {...};
> >   [12]     unsigned long long key;
> >        } __packed;
> 
> This __packed removes the 4-byte pad before the union.
> I bet it should be removed...

struct rb_node {
        unsigned long  __rb_parent_color;
        struct rb_node *rb_right;
        struct rb_node *rb_left;
} __attribute__((aligned(sizeof(long))));

Hmm, isn't this aligned to 32bits originally? Why does 32bits pad 4-bytes
if we remove __packed?

> 
> > }
> > size of struct rb_entry: 20
> > 
> > struct victim_entry {
> >    [0] struct rb_node rb_node;
> >        union {
> >            struct {...};
> >   [16]     struct victim_info vi;
> >        };
> >   [32] struct list_head list;
> > }
> > size of struct victim_entry: 40
> > 
> > This patch tries to add __packed attribute in below structure:
> > - discard_info, discard_cmd
> > - extent_info, extent_node
> > - victim_info, victim_entry
> > in order to fix this unaligned field offset issue in 32-bits platform.
> 
> Have you looked at the amount of extra code that gets generated
> on systems that fault misaligned accesses?
> 
> Plausibly adding __packed __aligned(4) will restrict the compiler
> to just aligning 64bit items on 32bit boundaries.
> But even then is you pass the address of a misaligned structure
> to another function it will fault later of.
> 
> You haven't actually said where the misalignment comes from.
> If the code is doing (foo *)(ptr + 1) then that is broken
> when the alignments of 'ptr' and 'foo' differ.

IIUC, the problem comes since we access the same object with two structures
to handle rb_tree.

E.g.,

[struct extent_node]                   [struct rb_entry]
struct rb_node rb_node;                struct rb_node rb_node;
                                       union {
struct extent_info ei;                   struct {
  unsigned int fofs;                       unsigned int ofs;
  unsigned int len;                        unsigned int len;
                                         };
                                         unsigned long long key;
                                       } __packed;

So, I think if we get a different offset of fofs or ofs between in
extent_node and rb_entry, further work'll access a wrong memory since
we simply cast the object pointer between two.

> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
David Laight March 10, 2023, 9:28 a.m. UTC | #7
From: Jaegeuk Kim
> Sent: 09 March 2023 23:55
> 
> On 03/08, David Laight wrote:
> > From: Chao Yu <chao@kernel.org>
> > > Sent: 07 March 2023 15:14
> > >
> > > F2FS-fs (dm-x): inconsistent rbtree, cur(3470333575168) next(3320009719808)
> > > ------------[ cut here ]------------
> > > kernel BUG at fs/f2fs/gc.c:602!
> > > Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM
> > > PC is at get_victim_by_default+0x13c0/0x1498
> > > LR is at f2fs_check_rb_tree_consistence+0xc4/0xd4
> > > ....
> > > [<c04d98b0>] (get_victim_by_default) from [<c04d4f44>] (f2fs_gc+0x220/0x6cc)
> > > [<c04d4f44>] (f2fs_gc) from [<c04d4780>] (gc_thread_func+0x2ac/0x708)
> > > [<c04d4780>] (gc_thread_func) from [<c015c774>] (kthread+0x1a8/0x1b4)
> > > [<c015c774>] (kthread) from [<c01010b4>] (ret_from_fork+0x14/0x20)
> > >
> > > The reason is there is __packed attribute in struct rb_entry, but there
> > > is no __packed attribute in struct victim_entry, so wrong offset of key
> > > field will be parsed in struct rb_entry in f2fs_check_rb_tree_consistence,
> > > it describes memory layouts of struct rb_entry and struct victim_entry in
> > > 32-bits platform as below:
> > >
> > > struct rb_entry {
> > >    [0] struct rb_node rb_node;
> > >        union {
> > >            struct {...};
> > >   [12]     unsigned long long key;
> > >        } __packed;
> >
> > This __packed removes the 4-byte pad before the union.
> > I bet it should be removed...
> 
> struct rb_node {
>         unsigned long  __rb_parent_color;
>         struct rb_node *rb_right;
>         struct rb_node *rb_left;
> } __attribute__((aligned(sizeof(long))));
> 
> Hmm, isn't this aligned to 32bits originally? Why does 32bits pad 4-bytes
> if we remove __packed?

That attribute is entirely pointless.
IIRC It is a request to increase the alignment to that of long.
It wouldn't have any effect even if 'packed' was also specified.
(Unless you are building for 64-bit windows.)

The 'issue' is that on arm32 (unlike x86) 'long long' has
64bit alignment.
So without the __packed on the union there is a 4 byte hole
before the union.

...
> IIUC, the problem comes since we access the same object with two structures
> to handle rb_tree.
> 
> E.g.,
> 
> [struct extent_node]                   [struct rb_entry]
> struct rb_node rb_node;                struct rb_node rb_node;
>                                        union {
> struct extent_info ei;                   struct {
>   unsigned int fofs;                       unsigned int ofs;
>   unsigned int len;                        unsigned int len;
>                                          };
>                                          unsigned long long key;
>                                        } __packed;
> 
> So, I think if we get a different offset of fofs or ofs between in
> extent_node and rb_entry, further work'll access a wrong memory since
> we simply cast the object pointer between two.

That example actually happens to work.
But replace 'unsigned int' with 'long long' and it all fails.

That is horribly broken (by design).
You can't do that and expect it to work at all.
This is another case where you don't want __packed, but to
request a compilation error if padding was added.

The safest 'fix' (it is still broken by design) is to change the
alignment of rb_node to be that of 'long long' and remove the
__packed from the union.
That adds a 4 byte pad to rb_node on some, but not all, 32bit
architectures.
Then all the code that used the above pattern is (probably) ok.

You can use (if I've spelt them right) aligned(Alignof(long long))
but not aligned(__alignof(long long)) because the latter returns
the preferred alignment not the actual alignment (8 not 4 on x86).
I think Alignof() is ok for current kernels, but not for anything
that might get backported to stable.
You can use __alignof(struct {long long x;}).

Actually this should also work:
struct rb_node {
    union {
        long long alignment;
        struct {
            unsigned long  __rb_parent_color;
            struct rb_node *rb_right;
            struct rb_node *rb_left;
        }
    }
};

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Jaegeuk Kim March 10, 2023, 9:08 p.m. UTC | #8
On 03/10, David Laight wrote:
> From: Jaegeuk Kim
> > Sent: 09 March 2023 23:55
> > 
> > On 03/08, David Laight wrote:
> > > From: Chao Yu <chao@kernel.org>
> > > > Sent: 07 March 2023 15:14
> > > >
> > > > F2FS-fs (dm-x): inconsistent rbtree, cur(3470333575168) next(3320009719808)
> > > > ------------[ cut here ]------------
> > > > kernel BUG at fs/f2fs/gc.c:602!
> > > > Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM
> > > > PC is at get_victim_by_default+0x13c0/0x1498
> > > > LR is at f2fs_check_rb_tree_consistence+0xc4/0xd4
> > > > ....
> > > > [<c04d98b0>] (get_victim_by_default) from [<c04d4f44>] (f2fs_gc+0x220/0x6cc)
> > > > [<c04d4f44>] (f2fs_gc) from [<c04d4780>] (gc_thread_func+0x2ac/0x708)
> > > > [<c04d4780>] (gc_thread_func) from [<c015c774>] (kthread+0x1a8/0x1b4)
> > > > [<c015c774>] (kthread) from [<c01010b4>] (ret_from_fork+0x14/0x20)
> > > >
> > > > The reason is there is __packed attribute in struct rb_entry, but there
> > > > is no __packed attribute in struct victim_entry, so wrong offset of key
> > > > field will be parsed in struct rb_entry in f2fs_check_rb_tree_consistence,
> > > > it describes memory layouts of struct rb_entry and struct victim_entry in
> > > > 32-bits platform as below:
> > > >
> > > > struct rb_entry {
> > > >    [0] struct rb_node rb_node;
> > > >        union {
> > > >            struct {...};
> > > >   [12]     unsigned long long key;
> > > >        } __packed;
> > >
> > > This __packed removes the 4-byte pad before the union.
> > > I bet it should be removed...
> > 
> > struct rb_node {
> >         unsigned long  __rb_parent_color;
> >         struct rb_node *rb_right;
> >         struct rb_node *rb_left;
> > } __attribute__((aligned(sizeof(long))));
> > 
> > Hmm, isn't this aligned to 32bits originally? Why does 32bits pad 4-bytes
> > if we remove __packed?
> 
> That attribute is entirely pointless.
> IIRC It is a request to increase the alignment to that of long.
> It wouldn't have any effect even if 'packed' was also specified.
> (Unless you are building for 64-bit windows.)
> 
> The 'issue' is that on arm32 (unlike x86) 'long long' has
> 64bit alignment.
> So without the __packed on the union there is a 4 byte hole
> before the union.
> 
> ...
> > IIUC, the problem comes since we access the same object with two structures
> > to handle rb_tree.
> > 
> > E.g.,
> > 
> > [struct extent_node]                   [struct rb_entry]
> > struct rb_node rb_node;                struct rb_node rb_node;
> >                                        union {
> > struct extent_info ei;                   struct {
> >   unsigned int fofs;                       unsigned int ofs;
> >   unsigned int len;                        unsigned int len;
> >                                          };
> >                                          unsigned long long key;
> >                                        } __packed;
> > 
> > So, I think if we get a different offset of fofs or ofs between in
> > extent_node and rb_entry, further work'll access a wrong memory since
> > we simply cast the object pointer between two.
> 
> That example actually happens to work.
> But replace 'unsigned int' with 'long long' and it all fails.
> 
> That is horribly broken (by design).
> You can't do that and expect it to work at all.
> This is another case where you don't want __packed, but to
> request a compilation error if padding was added.
> 
> The safest 'fix' (it is still broken by design) is to change the
> alignment of rb_node to be that of 'long long' and remove the
> __packed from the union.
> That adds a 4 byte pad to rb_node on some, but not all, 32bit
> architectures.
> Then all the code that used the above pattern is (probably) ok.
> 
> You can use (if I've spelt them right) aligned(Alignof(long long))
> but not aligned(__alignof(long long)) because the latter returns
> the preferred alignment not the actual alignment (8 not 4 on x86).
> I think Alignof() is ok for current kernels, but not for anything
> that might get backported to stable.
> You can use __alignof(struct {long long x;}).
> 
> Actually this should also work:
> struct rb_node {
>     union {
>         long long alignment;
>         struct {
>             unsigned long  __rb_parent_color;
>             struct rb_node *rb_right;
>             struct rb_node *rb_left;
>         }
>     }
> };

Thank you for the explanation. IMHO, it'd be good to keep the existing rb_node
for all the other components, but a problem of wrong design in f2fs.

I posted three patches to remove this buggy rb_entry sharing.
https://lore.kernel.org/lkml/20230310210454.2350881-1-jaegeuk@kernel.org/T/#t

> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
diff mbox series

Patch

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b0ab2062038a..17fa7572ceed 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -349,7 +349,7 @@  struct discard_info {
 	block_t lstart;			/* logical start address */
 	block_t len;			/* length */
 	block_t start;			/* actual start address in dev */
-};
+} __packed;
 
 struct discard_cmd {
 	struct rb_node rb_node;		/* rb node located in rb-tree */
@@ -361,7 +361,7 @@  struct discard_cmd {
 		};
 		struct discard_info di;	/* discard info */
 
-	};
+	} __packed;
 	struct list_head list;		/* command list */
 	struct completion wait;		/* compleation */
 	struct block_device *bdev;	/* bdev */
@@ -660,7 +660,7 @@  struct extent_info {
 			unsigned long long last_blocks;
 		};
 	};
-};
+} __packed;
 
 struct extent_node {
 	struct rb_node rb_node;		/* rb node located in rb-tree */
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 15bd1d680f67..304937d9a084 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -58,7 +58,7 @@  struct gc_inode_list {
 struct victim_info {
 	unsigned long long mtime;	/* mtime of section */
 	unsigned int segno;		/* section No. */
-};
+} __packed;
 
 struct victim_entry {
 	struct rb_node rb_node;		/* rb node located in rb-tree */
@@ -68,7 +68,7 @@  struct victim_entry {
 			unsigned int segno;		/* segment No. */
 		};
 		struct victim_info vi;	/* victim info */
-	};
+	} __packed;
 	struct list_head list;
 };