Message ID | 20200331031450.12182-1-dongli.zhang@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/1] mm: slub: fix corrupted freechain in deactivate_slab() | expand |
Hi, May I get the feedback for this patch? This reduces the chance of page fault when freepointer is corrupted while "slub_debug=F" is set. Thank you very much! Dongli Zhang On 3/30/20 8:14 PM, Dongli Zhang wrote: > The slub_debug is able to fix the corrupted slab freelist/page. However, > alloc_debug_processing() only checks the validity of current and next > freepointer during allocation path. As a result, once some objects have > their freepointers corrupted, deactivate_slab() may lead to page fault. > > Below is from a test kernel module when > 'slub_debug=PUF,kmalloc-128 slub_nomerge'. The test kernel corrupts the > freepointer of one free object on purpose. Unfortunately, deactivate_slab() > does not detect it when iterating the freechain. > > [ 92.665260] BUG: unable to handle page fault for address: 00000000123456f8 > [ 92.671597] #PF: supervisor read access in kernel mode > [ 92.676159] #PF: error_code(0x0000) - not-present page > [ 92.681666] PGD 0 P4D 0 > [ 92.684923] Oops: 0000 [#1] SMP PTI > ... ... > [ 92.706684] RIP: 0010:deactivate_slab.isra.92+0xed/0x490 > ... ... > [ 92.819781] Call Trace: > [ 92.823129] ? ext4_htree_store_dirent+0x30/0xf0 > [ 92.829488] ? ext4_htree_store_dirent+0x30/0xf0 > [ 92.834852] ? stack_trace_save+0x46/0x70 > [ 92.839342] ? init_object+0x66/0x80 > [ 92.843729] ? ___slab_alloc+0x536/0x570 > [ 92.847664] ___slab_alloc+0x536/0x570 > [ 92.851696] ? __find_get_block+0x23d/0x2c0 > [ 92.856763] ? ext4_htree_store_dirent+0x30/0xf0 > [ 92.862258] ? _cond_resched+0x10/0x40 > [ 92.866925] ? __getblk_gfp+0x27/0x2a0 > [ 92.872136] ? ext4_htree_store_dirent+0x30/0xf0 > [ 92.878394] ? __slab_alloc+0x17/0x30 > [ 92.883222] __slab_alloc+0x17/0x30 > [ 92.887210] __kmalloc+0x1d9/0x200 > [ 92.891448] ext4_htree_store_dirent+0x30/0xf0 > [ 92.896748] htree_dirblock_to_tree+0xcb/0x1c0 > [ 92.902398] ext4_htree_fill_tree+0x1bc/0x2d0 > [ 92.907749] ext4_readdir+0x54f/0x920 > [ 92.912725] iterate_dir+0x88/0x190 > [ 92.917072] __x64_sys_getdents+0xa6/0x140 > [ 92.922760] ? fillonedir+0xb0/0xb0 > [ 92.927020] ? do_syscall_64+0x49/0x170 > [ 92.931603] ? __ia32_sys_getdents+0x130/0x130 > [ 92.937012] do_syscall_64+0x49/0x170 > [ 92.940754] entry_SYSCALL_64_after_hwframe+0x44/0xa9 > > Therefore, this patch adds extra consistency check in deactivate_slab(). > Once an object's freepointer is corrupted, all following objects starting > at this object are isolated. > > Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com> > --- > mm/slub.c | 14 ++++++++++++++ > 1 file changed, 14 insertions(+) > > diff --git a/mm/slub.c b/mm/slub.c > index 6589b41d5a60..c27e2d993535 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -2082,6 +2082,20 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, > void *prior; > unsigned long counters; > > + if ((s->flags & SLAB_CONSISTENCY_CHECKS) && > + !check_valid_pointer(s, page, nextfree)) { > + /* > + * If 'nextfree' is invalid, it is possible that > + * the object at 'freelist' is already corrupted. > + * Therefore, all objects starting at 'freelist' > + * are isolated. > + */ > + object_err(s, page, freelist, "Freechain corrupt"); > + freelist = NULL; > + slab_fix(s, "Isolate corrupted freechain"); > + break; > + } > + > do { > prior = page->freelist; > counters = page->counters; >
On Mon, 30 Mar 2020 20:14:50 -0700 Dongli Zhang <dongli.zhang@oracle.com> wrote: > The slub_debug is able to fix the corrupted slab freelist/page. However, > alloc_debug_processing() only checks the validity of current and next > freepointer during allocation path. As a result, once some objects have > their freepointers corrupted, deactivate_slab() may lead to page fault. > > Below is from a test kernel module when > 'slub_debug=PUF,kmalloc-128 slub_nomerge'. The test kernel corrupts the > freepointer of one free object on purpose. Unfortunately, deactivate_slab() > does not detect it when iterating the freechain. > > ... > > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -2082,6 +2082,20 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, > void *prior; > unsigned long counters; > > + if ((s->flags & SLAB_CONSISTENCY_CHECKS) && > + !check_valid_pointer(s, page, nextfree)) { > + /* > + * If 'nextfree' is invalid, it is possible that > + * the object at 'freelist' is already corrupted. > + * Therefore, all objects starting at 'freelist' > + * are isolated. > + */ > + object_err(s, page, freelist, "Freechain corrupt"); > + freelist = NULL; > + slab_fix(s, "Isolate corrupted freechain"); > + break; > + } > + > do { > prior = page->freelist; > counters = page->counters; We could do it this way: --- a/mm/slub.c~mm-slub-fix-corrupted-freechain-in-deactivate_slab-fix +++ a/mm/slub.c @@ -2083,6 +2083,7 @@ static void deactivate_slab(struct kmem_ void *prior; unsigned long counters; +#ifdef CONFIG_SLAB_DEBUG if ((s->flags & SLAB_CONSISTENCY_CHECKS) && !check_valid_pointer(s, page, nextfree)) { /* @@ -2096,6 +2097,7 @@ static void deactivate_slab(struct kmem_ slab_fix(s, "Isolate corrupted freechain"); break; } +#endif do { prior = page->freelist; But it's a bit ugly. How about this? --- a/mm/slub.c~mm-slub-fix-corrupted-freechain-in-deactivate_slab-fix +++ a/mm/slub.c @@ -650,6 +650,20 @@ static void slab_bug(struct kmem_cache * va_end(args); } +static bool freelist_corrupted(struct kmem_cache *s, struct page *page, + void *freelist, void *nextfree) +{ + if ((s->flags & SLAB_CONSISTENCY_CHECKS) && + !check_valid_pointer(s, page, nextfree)) { + object_err(s, page, freelist, "Freechain corrupt"); + freelist = NULL; + slab_fix(s, "Isolate corrupted freechain"); + return true; + } + + return false; +} + static void slab_fix(struct kmem_cache *s, char *fmt, ...) { struct va_format vaf; @@ -1400,6 +1414,11 @@ static inline void inc_slabs_node(struct static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {} +static bool freelist_corrupted(struct kmem_cache *s, struct page *page, + void *freelist, void *nextfree) +{ + return false; +} #endif /* CONFIG_SLUB_DEBUG */ /* @@ -2083,19 +2102,13 @@ static void deactivate_slab(struct kmem_ void *prior; unsigned long counters; - if ((s->flags & SLAB_CONSISTENCY_CHECKS) && - !check_valid_pointer(s, page, nextfree)) { - /* - * If 'nextfree' is invalid, it is possible that - * the object at 'freelist' is already corrupted. - * Therefore, all objects starting at 'freelist' - * are isolated. - */ - object_err(s, page, freelist, "Freechain corrupt"); - freelist = NULL; - slab_fix(s, "Isolate corrupted freechain"); + /* + * If 'nextfree' is invalid, it is possible that the object at + * 'freelist' is already corrupted. So isolate all objects + * starting at 'freelist'. + */ + if (freelist_corrupted(s, page, freelist, nextfree)) break; - } do { prior = page->freelist;
On 4/17/20 6:12 PM, Andrew Morton wrote: > On Mon, 30 Mar 2020 20:14:50 -0700 Dongli Zhang <dongli.zhang@oracle.com> wrote: > >> The slub_debug is able to fix the corrupted slab freelist/page. However, >> alloc_debug_processing() only checks the validity of current and next >> freepointer during allocation path. As a result, once some objects have >> their freepointers corrupted, deactivate_slab() may lead to page fault. >> >> Below is from a test kernel module when >> 'slub_debug=PUF,kmalloc-128 slub_nomerge'. The test kernel corrupts the >> freepointer of one free object on purpose. Unfortunately, deactivate_slab() >> does not detect it when iterating the freechain. >> >> ... >> >> --- a/mm/slub.c >> +++ b/mm/slub.c >> @@ -2082,6 +2082,20 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, >> void *prior; >> unsigned long counters; >> >> + if ((s->flags & SLAB_CONSISTENCY_CHECKS) && >> + !check_valid_pointer(s, page, nextfree)) { >> + /* >> + * If 'nextfree' is invalid, it is possible that >> + * the object at 'freelist' is already corrupted. >> + * Therefore, all objects starting at 'freelist' >> + * are isolated. >> + */ >> + object_err(s, page, freelist, "Freechain corrupt"); >> + freelist = NULL; >> + slab_fix(s, "Isolate corrupted freechain"); >> + break; >> + } >> + >> do { >> prior = page->freelist; >> counters = page->counters; > > We could do it this way: > > --- a/mm/slub.c~mm-slub-fix-corrupted-freechain-in-deactivate_slab-fix > +++ a/mm/slub.c > @@ -2083,6 +2083,7 @@ static void deactivate_slab(struct kmem_ > void *prior; > unsigned long counters; > > +#ifdef CONFIG_SLAB_DEBUG > if ((s->flags & SLAB_CONSISTENCY_CHECKS) && > !check_valid_pointer(s, page, nextfree)) { > /* > @@ -2096,6 +2097,7 @@ static void deactivate_slab(struct kmem_ > slab_fix(s, "Isolate corrupted freechain"); > break; > } > +#endif > > do { > prior = page->freelist; > > But it's a bit ugly. How about this? Sorry that I did not realize check_valid_pointer() requires CONFIG_SLAB_DEBUG. Yes, it is much better to encapsulate it into freelist_corrupted() and just return false when CONFIG_SLAB_DEBUG is not involved. The check_object() has similar implementation. Should I resend with your "Signed-off-by" or you would just fix it when applying? It is the first time I submit a patch to mm so that I am not familiar with the mm policy/process. Thank you very much for the feedback! Dongli Zhang > > --- a/mm/slub.c~mm-slub-fix-corrupted-freechain-in-deactivate_slab-fix > +++ a/mm/slub.c > @@ -650,6 +650,20 @@ static void slab_bug(struct kmem_cache * > va_end(args); > } > > +static bool freelist_corrupted(struct kmem_cache *s, struct page *page, > + void *freelist, void *nextfree) > +{ > + if ((s->flags & SLAB_CONSISTENCY_CHECKS) && > + !check_valid_pointer(s, page, nextfree)) { > + object_err(s, page, freelist, "Freechain corrupt"); > + freelist = NULL; > + slab_fix(s, "Isolate corrupted freechain"); > + return true; > + } > + > + return false; > +} > + > static void slab_fix(struct kmem_cache *s, char *fmt, ...) > { > struct va_format vaf; > @@ -1400,6 +1414,11 @@ static inline void inc_slabs_node(struct > static inline void dec_slabs_node(struct kmem_cache *s, int node, > int objects) {} > > +static bool freelist_corrupted(struct kmem_cache *s, struct page *page, > + void *freelist, void *nextfree) > +{ > + return false; > +} > #endif /* CONFIG_SLUB_DEBUG */ > > /* > @@ -2083,19 +2102,13 @@ static void deactivate_slab(struct kmem_ > void *prior; > unsigned long counters; > > - if ((s->flags & SLAB_CONSISTENCY_CHECKS) && > - !check_valid_pointer(s, page, nextfree)) { > - /* > - * If 'nextfree' is invalid, it is possible that > - * the object at 'freelist' is already corrupted. > - * Therefore, all objects starting at 'freelist' > - * are isolated. > - */ > - object_err(s, page, freelist, "Freechain corrupt"); > - freelist = NULL; > - slab_fix(s, "Isolate corrupted freechain"); > + /* > + * If 'nextfree' is invalid, it is possible that the object at > + * 'freelist' is already corrupted. So isolate all objects > + * starting at 'freelist'. > + */ > + if (freelist_corrupted(s, page, freelist, nextfree)) > break; > - } > > do { > prior = page->freelist; > _ >
On Fri, 17 Apr 2020 18:56:51 -0700 Dongli Zhang <dongli.zhang@oracle.com> wrote: > > @@ -2096,6 +2097,7 @@ static void deactivate_slab(struct kmem_ > > slab_fix(s, "Isolate corrupted freechain"); > > break; > > } > > +#endif > > > > do { > > prior = page->freelist; > > > > But it's a bit ugly. How about this? > > Sorry that I did not realize check_valid_pointer() requires CONFIG_SLAB_DEBUG. > > Yes, it is much better to encapsulate it into freelist_corrupted() and just > return false when CONFIG_SLAB_DEBUG is not involved. The check_object() has > similar implementation. > > Should I resend with your "Signed-off-by" or you would just fix it when applying? That's OK. I'll fold the patches together and update the changelog before sending the patch in to Linus.
diff --git a/mm/slub.c b/mm/slub.c index 6589b41d5a60..c27e2d993535 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2082,6 +2082,20 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, void *prior; unsigned long counters; + if ((s->flags & SLAB_CONSISTENCY_CHECKS) && + !check_valid_pointer(s, page, nextfree)) { + /* + * If 'nextfree' is invalid, it is possible that + * the object at 'freelist' is already corrupted. + * Therefore, all objects starting at 'freelist' + * are isolated. + */ + object_err(s, page, freelist, "Freechain corrupt"); + freelist = NULL; + slab_fix(s, "Isolate corrupted freechain"); + break; + } + do { prior = page->freelist; counters = page->counters;
The slub_debug is able to fix the corrupted slab freelist/page. However, alloc_debug_processing() only checks the validity of current and next freepointer during allocation path. As a result, once some objects have their freepointers corrupted, deactivate_slab() may lead to page fault. Below is from a test kernel module when 'slub_debug=PUF,kmalloc-128 slub_nomerge'. The test kernel corrupts the freepointer of one free object on purpose. Unfortunately, deactivate_slab() does not detect it when iterating the freechain. [ 92.665260] BUG: unable to handle page fault for address: 00000000123456f8 [ 92.671597] #PF: supervisor read access in kernel mode [ 92.676159] #PF: error_code(0x0000) - not-present page [ 92.681666] PGD 0 P4D 0 [ 92.684923] Oops: 0000 [#1] SMP PTI ... ... [ 92.706684] RIP: 0010:deactivate_slab.isra.92+0xed/0x490 ... ... [ 92.819781] Call Trace: [ 92.823129] ? ext4_htree_store_dirent+0x30/0xf0 [ 92.829488] ? ext4_htree_store_dirent+0x30/0xf0 [ 92.834852] ? stack_trace_save+0x46/0x70 [ 92.839342] ? init_object+0x66/0x80 [ 92.843729] ? ___slab_alloc+0x536/0x570 [ 92.847664] ___slab_alloc+0x536/0x570 [ 92.851696] ? __find_get_block+0x23d/0x2c0 [ 92.856763] ? ext4_htree_store_dirent+0x30/0xf0 [ 92.862258] ? _cond_resched+0x10/0x40 [ 92.866925] ? __getblk_gfp+0x27/0x2a0 [ 92.872136] ? ext4_htree_store_dirent+0x30/0xf0 [ 92.878394] ? __slab_alloc+0x17/0x30 [ 92.883222] __slab_alloc+0x17/0x30 [ 92.887210] __kmalloc+0x1d9/0x200 [ 92.891448] ext4_htree_store_dirent+0x30/0xf0 [ 92.896748] htree_dirblock_to_tree+0xcb/0x1c0 [ 92.902398] ext4_htree_fill_tree+0x1bc/0x2d0 [ 92.907749] ext4_readdir+0x54f/0x920 [ 92.912725] iterate_dir+0x88/0x190 [ 92.917072] __x64_sys_getdents+0xa6/0x140 [ 92.922760] ? fillonedir+0xb0/0xb0 [ 92.927020] ? do_syscall_64+0x49/0x170 [ 92.931603] ? __ia32_sys_getdents+0x130/0x130 [ 92.937012] do_syscall_64+0x49/0x170 [ 92.940754] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Therefore, this patch adds extra consistency check in deactivate_slab(). Once an object's freepointer is corrupted, all following objects starting at this object are isolated. Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com> --- mm/slub.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+)