Message ID | 20230825211426.3798691-1-jannh@google.com (mailing list archive) |
---|---|
State | Handled Elsewhere |
Headers | show |
Series | slub: Introduce CONFIG_SLUB_RCU_DEBUG | expand |
On Fri, 25 Aug 2023 at 23:15, Jann Horn <jannh@google.com> wrote: > > Currently, KASAN is unable to catch use-after-free in SLAB_TYPESAFE_BY_RCU > slabs because use-after-free is allowed within the RCU grace period by > design. > > Add a SLUB debugging feature which RCU-delays every individual > kmem_cache_free() before either actually freeing the object or handing it > off to KASAN, and change KASAN to poison freed objects as normal when this > option is enabled. > > Note that this creates a 16-byte unpoisoned area in the middle of the > slab metadata area, which kinda sucks but seems to be necessary in order > to be able to store an rcu_head in there without triggering an ASAN > splat during RCU callback processing. Nice! Can't we unpoision this rcu_head right before call_rcu() and repoison after receiving the callback? What happens on cache destruction? Currently we purge quarantine on cache destruction to be able to safely destroy the cache. I suspect we may need to somehow purge rcu callbacks as well, or do something else. > For now I've configured Kconfig.kasan to always enable this feature in the > GENERIC and SW_TAGS modes; I'm not forcibly enabling it in HW_TAGS mode > because I'm not sure if it might have unwanted performance degradation > effects there. > > Signed-off-by: Jann Horn <jannh@google.com> > --- > can I get a review from the KASAN folks of this? > I have been running it on my laptop for a bit and it seems to be working > fine. > > Notes: > With this patch, a UAF on a TYPESAFE_BY_RCU will splat with an error > like this (tested by reverting a security bugfix). > Note that, in the ASAN memory state dump, we can see the little > unpoisoned 16-byte areas storing the rcu_head. > > BUG: KASAN: slab-use-after-free in folio_lock_anon_vma_read+0x129/0x4c0 > Read of size 8 at addr ffff888004e85b00 by task forkforkfork/592 > > CPU: 0 PID: 592 Comm: forkforkfork Not tainted 6.5.0-rc7-00105-gae70c1e1f6f5-dirty #334 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 > Call Trace: > <TASK> > dump_stack_lvl+0x4a/0x80 > print_report+0xcf/0x660 > kasan_report+0xd4/0x110 > folio_lock_anon_vma_read+0x129/0x4c0 > rmap_walk_anon+0x1cc/0x290 > folio_referenced+0x277/0x2a0 > shrink_folio_list+0xb8c/0x1680 > reclaim_folio_list+0xdc/0x1f0 > reclaim_pages+0x211/0x280 > madvise_cold_or_pageout_pte_range+0x812/0xb70 > walk_pgd_range+0x70b/0xce0 > __walk_page_range+0x343/0x360 > walk_page_range+0x227/0x280 > madvise_pageout+0x1cd/0x2d0 > do_madvise+0x552/0x15a0 > __x64_sys_madvise+0x62/0x70 > do_syscall_64+0x3b/0x90 > entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > [...] > </TASK> > > Allocated by task 574: > kasan_save_stack+0x33/0x60 > kasan_set_track+0x25/0x30 > __kasan_slab_alloc+0x6e/0x70 > kmem_cache_alloc+0xfd/0x2b0 > anon_vma_fork+0x88/0x270 > dup_mmap+0x87c/0xc10 > copy_process+0x3399/0x3590 > kernel_clone+0x10e/0x480 > __do_sys_clone+0xa1/0xe0 > do_syscall_64+0x3b/0x90 > entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > > Freed by task 0: > kasan_save_stack+0x33/0x60 > kasan_set_track+0x25/0x30 > kasan_save_free_info+0x2b/0x50 > __kasan_slab_free+0xfe/0x180 > slab_free_after_rcu_debug+0xad/0x200 > rcu_core+0x638/0x1620 > __do_softirq+0x14c/0x581 > > Last potentially related work creation: > kasan_save_stack+0x33/0x60 > __kasan_record_aux_stack+0x94/0xa0 > __call_rcu_common.constprop.0+0x47/0x730 > __put_anon_vma+0x6e/0x150 > unlink_anon_vmas+0x277/0x2e0 > vma_complete+0x341/0x580 > vma_merge+0x613/0xff0 > mprotect_fixup+0x1c0/0x510 > do_mprotect_pkey+0x5a7/0x710 > __x64_sys_mprotect+0x47/0x60 > do_syscall_64+0x3b/0x90 > entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > > Second to last potentially related work creation: > [...] > > The buggy address belongs to the object at ffff888004e85b00 > which belongs to the cache anon_vma of size 192 > The buggy address is located 0 bytes inside of > freed 192-byte region [ffff888004e85b00, ffff888004e85bc0) > > The buggy address belongs to the physical page: > [...] > > Memory state around the buggy address: > ffff888004e85a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > ffff888004e85a80: 00 00 00 00 00 00 00 00 fc 00 00 fc fc fc fc fc > >ffff888004e85b00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > ^ > ffff888004e85b80: fb fb fb fb fb fb fb fb fc 00 00 fc fc fc fc fc > ffff888004e85c00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > > include/linux/kasan.h | 6 ++++ > include/linux/slub_def.h | 3 ++ > lib/Kconfig.kasan | 2 ++ > mm/Kconfig.debug | 21 +++++++++++++ > mm/kasan/common.c | 15 ++++++++- > mm/slub.c | 66 +++++++++++++++++++++++++++++++++++++--- > 6 files changed, 107 insertions(+), 6 deletions(-) > > diff --git a/include/linux/kasan.h b/include/linux/kasan.h > index 819b6bc8ac08..45e07caf4704 100644 > --- a/include/linux/kasan.h > +++ b/include/linux/kasan.h > @@ -229,6 +229,8 @@ static __always_inline bool kasan_check_byte(const void *addr) > return true; > } > > +size_t kasan_align(size_t size); > + > #else /* CONFIG_KASAN */ > > static inline void kasan_unpoison_range(const void *address, size_t size) {} > @@ -278,6 +280,10 @@ static inline bool kasan_check_byte(const void *address) > { > return true; > } > +static inline size_t kasan_align(size_t size) > +{ > + return size; > +} > > #endif /* CONFIG_KASAN */ > > diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h > index deb90cf4bffb..b87be8fce64a 100644 > --- a/include/linux/slub_def.h > +++ b/include/linux/slub_def.h > @@ -120,6 +120,9 @@ struct kmem_cache { > int refcount; /* Refcount for slab cache destroy */ > void (*ctor)(void *); > unsigned int inuse; /* Offset to metadata */ > +#ifdef CONFIG_SLUB_RCU_DEBUG > + unsigned int debug_rcu_head_offset; > +#endif > unsigned int align; /* Alignment */ > unsigned int red_left_pad; /* Left redzone padding size */ > const char *name; /* Name (only for display!) */ > diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan > index fdca89c05745..7ff7de96c6e4 100644 > --- a/lib/Kconfig.kasan > +++ b/lib/Kconfig.kasan > @@ -79,6 +79,7 @@ config KASAN_GENERIC > depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC > depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS > select SLUB_DEBUG if SLUB > + select SLUB_RCU_DEBUG if SLUB_DEBUG > select CONSTRUCTORS > help > Enables Generic KASAN. > @@ -96,6 +97,7 @@ config KASAN_SW_TAGS > depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS > depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS > select SLUB_DEBUG if SLUB > + select SLUB_RCU_DEBUG if SLUB_DEBUG > select CONSTRUCTORS > help > Enables Software Tag-Based KASAN. > diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug > index 018a5bd2f576..99cce7f0fbef 100644 > --- a/mm/Kconfig.debug > +++ b/mm/Kconfig.debug > @@ -78,6 +78,27 @@ config SLUB_DEBUG_ON > off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying > "slub_debug=-". > > +config SLUB_RCU_DEBUG > + bool "Make use-after-free detection possible in TYPESAFE_BY_RCU caches" > + depends on SLUB && SLUB_DEBUG > + default n > + help > + Make SLAB_TYPESAFE_BY_RCU caches behave approximately as if the cache > + was not marked as SLAB_TYPESAFE_BY_RCU and every caller used > + kfree_rcu() instead. > + > + This is intended for use in combination with KASAN, to enable KASAN to > + detect use-after-free accesses in such caches. > + (KFENCE is able to do that independent of this flag.) > + > + This might degrade performance. > + > + If you're using this for testing bugs / fuzzing and care about > + catching all the bugs WAY more than performance, you might want to > + also turn on CONFIG_RCU_STRICT_GRACE_PERIOD. > + > + If unsure, say N. > + > config PAGE_OWNER > bool "Track page owner" > depends on DEBUG_KERNEL && STACKTRACE_SUPPORT > diff --git a/mm/kasan/common.c b/mm/kasan/common.c > index 256930da578a..b4a3504f9f5e 100644 > --- a/mm/kasan/common.c > +++ b/mm/kasan/common.c > @@ -191,6 +191,13 @@ void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache, > if (kasan_requires_meta()) > kasan_init_object_meta(cache, object); > > +#ifdef CONFIG_SLUB_RCU_DEBUG > + if (cache->flags & SLAB_TYPESAFE_BY_RCU) { > + kasan_unpoison(object + cache->debug_rcu_head_offset, > + sizeof(struct rcu_head), false); > + } > +#endif /* CONFIG_SLUB_RCU_DEBUG */ > + > /* Tag is ignored in set_tag() without CONFIG_KASAN_SW/HW_TAGS */ > object = set_tag(object, assign_tag(cache, object, true)); > > @@ -218,7 +225,8 @@ static inline bool ____kasan_slab_free(struct kmem_cache *cache, void *object, > } > > /* RCU slabs could be legally used after free within the RCU period */ > - if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) > + if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU) && > + !IS_ENABLED(CONFIG_SLUB_RCU_DEBUG)) > return false; > > if (!kasan_byte_accessible(tagged_object)) { > @@ -450,3 +458,8 @@ bool __kasan_check_byte(const void *address, unsigned long ip) > } > return true; > } > + > +size_t kasan_align(size_t size) > +{ > + return round_up(size, KASAN_GRANULE_SIZE); > +} > diff --git a/mm/slub.c b/mm/slub.c > index e3b5d5c0eb3a..bae6c2bc1e5f 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -1108,7 +1108,8 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab, > * A. Free pointer (if we cannot overwrite object on free) > * B. Tracking data for SLAB_STORE_USER > * C. Original request size for kmalloc object (SLAB_STORE_USER enabled) > - * D. Padding to reach required alignment boundary or at minimum > + * D. RCU head for CONFIG_SLUB_RCU_DEBUG (with padding around it) > + * E. Padding to reach required alignment boundary or at minimum > * one word if debugging is on to be able to detect writes > * before the word boundary. > * > @@ -1134,6 +1135,11 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p) > off += sizeof(unsigned int); > } > > +#ifdef CONFIG_SLUB_RCU_DEBUG > + if (s->flags & SLAB_TYPESAFE_BY_RCU) > + off = kasan_align(s->debug_rcu_head_offset + sizeof(struct rcu_head)); > +#endif /* CONFIG_SLUB_RCU_DEBUG */ > + > off += kasan_metadata_size(s, false); > > if (size_from_object(s) == off) > @@ -1751,12 +1757,17 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, > #endif > #endif /* CONFIG_SLUB_DEBUG */ > > +#ifdef CONFIG_SLUB_RCU_DEBUG > +static void slab_free_after_rcu_debug(struct rcu_head *rcu_head); > +#endif > + > /* > * Hooks for other subsystems that check memory allocations. In a typical > * production configuration these hooks all should produce no code at all. > */ > static __always_inline bool slab_free_hook(struct kmem_cache *s, > - void *x, bool init) > + void *x, bool init, > + bool after_rcu_delay) > { > kmemleak_free_recursive(x, s->flags); > kmsan_slab_free(s, x); > @@ -1766,8 +1777,18 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, > if (!(s->flags & SLAB_DEBUG_OBJECTS)) > debug_check_no_obj_freed(x, s->object_size); > > +#ifdef CONFIG_SLUB_RCU_DEBUG > + /* kfence does its own RCU delay */ > + if ((s->flags & SLAB_TYPESAFE_BY_RCU) && !after_rcu_delay && > + !is_kfence_address(x)) { > + call_rcu(kasan_reset_tag(x) + s->debug_rcu_head_offset, > + slab_free_after_rcu_debug); > + return true; > + } > +#endif /* CONFIG_SLUB_RCU_DEBUG */ > + > /* Use KCSAN to help debug racy use-after-free. */ > - if (!(s->flags & SLAB_TYPESAFE_BY_RCU)) > + if (!(s->flags & SLAB_TYPESAFE_BY_RCU) || after_rcu_delay) > __kcsan_check_access(x, s->object_size, > KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT); > > @@ -1802,7 +1823,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, > void *old_tail = *tail ? *tail : *head; > > if (is_kfence_address(next)) { > - slab_free_hook(s, next, false); > + slab_free_hook(s, next, false, false); > return true; > } > > @@ -1815,7 +1836,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, > next = get_freepointer(s, object); > > /* If object's reuse doesn't have to be delayed */ > - if (!slab_free_hook(s, object, slab_want_init_on_free(s))) { > + if (!slab_free_hook(s, object, slab_want_init_on_free(s), false)) { > /* Move object to the new freelist */ > set_freepointer(s, object, *head); > *head = object; > @@ -3802,6 +3823,31 @@ static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab, > do_slab_free(s, slab, head, tail, cnt, addr); > } > > +#ifdef CONFIG_SLUB_RCU_DEBUG > +static void slab_free_after_rcu_debug(struct rcu_head *rcu_head) > +{ > + struct slab *slab = virt_to_slab(rcu_head); > + struct kmem_cache *s; > + void *object; > + > + if (WARN_ON(is_kfence_address(rcu_head))) > + return; > + > + /* find the object and the cache again */ > + if (WARN_ON(!slab)) > + return; > + s = slab->slab_cache; > + if (WARN_ON(!(s->flags & SLAB_TYPESAFE_BY_RCU))) > + return; > + object = (void *)rcu_head - s->debug_rcu_head_offset; > + > + /* resume freeing */ > + if (slab_free_hook(s, object, slab_want_init_on_free(s), true)) > + return; > + do_slab_free(s, slab, object, NULL, 1, _THIS_IP_); > +} > +#endif /* CONFIG_SLUB_RCU_DEBUG */ > + > #ifdef CONFIG_KASAN_GENERIC > void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr) > { > @@ -4443,6 +4489,16 @@ static int calculate_sizes(struct kmem_cache *s) > if (flags & SLAB_KMALLOC) > size += sizeof(unsigned int); > } > + > +#ifdef CONFIG_SLUB_RCU_DEBUG > + if (flags & SLAB_TYPESAFE_BY_RCU) { > + size = kasan_align(size); > + size = ALIGN(size, __alignof__(struct rcu_head)); > + s->debug_rcu_head_offset = size; > + size += sizeof(struct rcu_head); > + size = kasan_align(size); > + } > +#endif /* CONFIG_SLUB_RCU_DEBUG */ > #endif > > kasan_cache_create(s, &size, &s->flags); > > base-commit: 4f9e7fabf8643003afefc172e62dd276686f016e > -- > 2.42.0.rc1.204.g551eb34607-goog >
On Sat, Aug 26, 2023 at 5:32 AM Dmitry Vyukov <dvyukov@google.com> wrote: > On Fri, 25 Aug 2023 at 23:15, Jann Horn <jannh@google.com> wrote: > > Currently, KASAN is unable to catch use-after-free in SLAB_TYPESAFE_BY_RCU > > slabs because use-after-free is allowed within the RCU grace period by > > design. > > > > Add a SLUB debugging feature which RCU-delays every individual > > kmem_cache_free() before either actually freeing the object or handing it > > off to KASAN, and change KASAN to poison freed objects as normal when this > > option is enabled. > > > > Note that this creates a 16-byte unpoisoned area in the middle of the > > slab metadata area, which kinda sucks but seems to be necessary in order > > to be able to store an rcu_head in there without triggering an ASAN > > splat during RCU callback processing. > > Nice! > > Can't we unpoision this rcu_head right before call_rcu() and repoison > after receiving the callback? Yeah, I think that should work. It looks like currently kasan_unpoison() is exposed in include/linux/kasan.h but kasan_poison() is not, and its inline definition probably means I can't just move it out of mm/kasan/kasan.h into include/linux/kasan.h; do you have a preference for how I should handle this? Hmm, and it also looks like code outside of mm/kasan/ anyway wouldn't know what are valid values for the "value" argument to kasan_poison(). I also have another feature idea that would also benefit from having something like kasan_poison() available in include/linux/kasan.h, so I would prefer that over adding another special-case function inside KASAN for poisoning this piece of slab metadata... I guess I could define a wrapper around kasan_poison() in mm/kasan/generic.c that uses a new poison value for "some other part of the kernel told us to poison this area", and then expose that wrapper with a declaration in include/mm/kasan.h? Something like: void kasan_poison_outline(const void *addr, size_t size, bool init) { kasan_poison(addr, size, KASAN_CUSTOM, init); } > What happens on cache destruction? > Currently we purge quarantine on cache destruction to be able to > safely destroy the cache. I suspect we may need to somehow purge rcu > callbacks as well, or do something else. Ooh, good point, I hadn't thought about that... currently shutdown_cache() assumes that all the objects have already been freed, then puts the kmem_cache on a list for slab_caches_to_rcu_destroy_workfn(), which then waits with an rcu_barrier() until the slab's pages are all gone. Luckily kmem_cache_destroy() is already a sleepable operation, so maybe I should just slap another rcu_barrier() in there for builds with this config option enabled... I think that should be fine for an option mostly intended for debugging.
Hello, kernel test robot noticed "BUG_bio-#(Tainted:G_S):Objects_remaining_in_bio-#on__kmem_cache_shutdown()" on: commit: d0dc03872a9d13afc16b9f12e69cb0dc60437198 ("[PATCH] slub: Introduce CONFIG_SLUB_RCU_DEBUG") url: https://github.com/intel-lab-lkp/linux/commits/Jann-Horn/slub-Introduce-CONFIG_SLUB_RCU_DEBUG/20230826-051804 patch link: https://lore.kernel.org/all/20230825211426.3798691-1-jannh@google.com/ patch subject: [PATCH] slub: Introduce CONFIG_SLUB_RCU_DEBUG in testcase: mdadm-selftests version: mdadm-selftests-x86_64-5f41845-1_20220826 with following parameters: disk: 1HDD test_prefix: 01replace compiler: gcc-12 test machine: 8 threads 1 sockets Intel(R) Core(TM) i7-4790T CPU @ 2.70GHz (Haswell) with 16G memory (please refer to attached dmesg/kmsg for entire log/backtrace) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202309051537.999262cc-oliver.sang@intel.com kern :info : [ 120.278340] md: recovery of RAID array md0 kern :info : [ 124.955979] md: md0: recovery done. kern :info : [ 125.769059] md: recovery of RAID array md0 kern :info : [ 127.326184] md: md0: recovery done. kern :err : [ 133.408242] ============================================================================= kern :err : [ 133.417160] BUG bio-144 (Tainted: G S ): Objects remaining in bio-144 on __kmem_cache_shutdown() kern :err : [ 133.427951] ----------------------------------------------------------------------------- kern :err : [ 133.439000] Slab 0x00000000419abc96 objects=32 used=2 fp=0x000000008e0d14fb flags=0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff) kern :warn : [ 133.452568] CPU: 3 PID: 1314 Comm: mdadm Tainted: G S 6.5.0-rc7-00105-gd0dc03872a9d #1 kern :warn : [ 133.462585] Hardware name: Gigabyte Technology Co., Ltd. Z97X-UD5H/Z97X-UD5H, BIOS F9 04/21/2015 kern :warn : [ 133.472076] Call Trace: kern :warn : [ 133.475236] <TASK> kern :warn : [ 133.478051] dump_stack_lvl (kbuild/src/consumer/lib/dump_stack.c:107 (discriminator 1)) kern :warn : [ 133.482427] slab_err (kbuild/src/consumer/mm/slub.c:1016) kern :warn : [ 133.486370] ? _raw_spin_lock_irqsave (kbuild/src/consumer/arch/x86/include/asm/atomic.h:115 kbuild/src/consumer/include/linux/atomic/atomic-arch-fallback.h:2155 kbuild/src/consumer/include/linux/atomic/atomic-instrumented.h:1296 kbuild/src/consumer/include/asm-generic/qspinlock.h:111 kbuild/src/consumer/include/linux/spinlock.h:187 kbuild/src/consumer/include/linux/spinlock_api_smp.h:111 kbuild/src/consumer/kernel/locking/spinlock.c:162) kern :warn : [ 133.491615] __kmem_cache_shutdown (kbuild/src/consumer/include/linux/spinlock.h:351 kbuild/src/consumer/mm/slub.c:4625 kbuild/src/consumer/mm/slub.c:4656 kbuild/src/consumer/mm/slub.c:4688) The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20230905/202309051537.999262cc-oliver.sang@intel.com
On Mon, 28 Aug 2023 at 16:40, Jann Horn <jannh@google.com> wrote: > > On Sat, Aug 26, 2023 at 5:32 AM Dmitry Vyukov <dvyukov@google.com> wrote: > > On Fri, 25 Aug 2023 at 23:15, Jann Horn <jannh@google.com> wrote: > > > Currently, KASAN is unable to catch use-after-free in SLAB_TYPESAFE_BY_RCU > > > slabs because use-after-free is allowed within the RCU grace period by > > > design. > > > > > > Add a SLUB debugging feature which RCU-delays every individual > > > kmem_cache_free() before either actually freeing the object or handing it > > > off to KASAN, and change KASAN to poison freed objects as normal when this > > > option is enabled. > > > > > > Note that this creates a 16-byte unpoisoned area in the middle of the > > > slab metadata area, which kinda sucks but seems to be necessary in order > > > to be able to store an rcu_head in there without triggering an ASAN > > > splat during RCU callback processing. > > > > Nice! > > > > Can't we unpoision this rcu_head right before call_rcu() and repoison > > after receiving the callback? > > Yeah, I think that should work. It looks like currently > kasan_unpoison() is exposed in include/linux/kasan.h but > kasan_poison() is not, and its inline definition probably means I > can't just move it out of mm/kasan/kasan.h into include/linux/kasan.h; > do you have a preference for how I should handle this? Hmm, and it > also looks like code outside of mm/kasan/ anyway wouldn't know what > are valid values for the "value" argument to kasan_poison(). > I also have another feature idea that would also benefit from having > something like kasan_poison() available in include/linux/kasan.h, so I > would prefer that over adding another special-case function inside > KASAN for poisoning this piece of slab metadata... > > I guess I could define a wrapper around kasan_poison() in > mm/kasan/generic.c that uses a new poison value for "some other part > of the kernel told us to poison this area", and then expose that > wrapper with a declaration in include/mm/kasan.h? Something like: > > void kasan_poison_outline(const void *addr, size_t size, bool init) > { > kasan_poison(addr, size, KASAN_CUSTOM, init); > } Looks reasonable. > > What happens on cache destruction? > > Currently we purge quarantine on cache destruction to be able to > > safely destroy the cache. I suspect we may need to somehow purge rcu > > callbacks as well, or do something else. > > Ooh, good point, I hadn't thought about that... currently > shutdown_cache() assumes that all the objects have already been freed, > then puts the kmem_cache on a list for > slab_caches_to_rcu_destroy_workfn(), which then waits with an > rcu_barrier() until the slab's pages are all gone. I guess this is what the test robot found as well. > Luckily kmem_cache_destroy() is already a sleepable operation, so > maybe I should just slap another rcu_barrier() in there for builds > with this config option enabled... I think that should be fine for an > option mostly intended for debugging. This is definitely the simplest option. I am a bit concerned about performance if massive cache destruction happens (e.g. maybe during destruction of a set of namespaces for a container). Net namespace is slow to destroy for this reason IIRC, there were some optimizations to batch rcu synchronization. And now we are adding more. But I don't see any reasonable faster option as well. So I guess let's do this now and optimize later (or not).
On Fri, 25 Aug 2023 at 23:15, 'Jann Horn' via kasan-dev <kasan-dev@googlegroups.com> wrote: > > Currently, KASAN is unable to catch use-after-free in SLAB_TYPESAFE_BY_RCU > slabs because use-after-free is allowed within the RCU grace period by > design. > > Add a SLUB debugging feature which RCU-delays every individual > kmem_cache_free() before either actually freeing the object or handing it > off to KASAN, and change KASAN to poison freed objects as normal when this > option is enabled. > > Note that this creates a 16-byte unpoisoned area in the middle of the > slab metadata area, which kinda sucks but seems to be necessary in order > to be able to store an rcu_head in there without triggering an ASAN > splat during RCU callback processing. > > For now I've configured Kconfig.kasan to always enable this feature in the > GENERIC and SW_TAGS modes; I'm not forcibly enabling it in HW_TAGS mode > because I'm not sure if it might have unwanted performance degradation > effects there. > > Signed-off-by: Jann Horn <jannh@google.com> > --- > can I get a review from the KASAN folks of this? > I have been running it on my laptop for a bit and it seems to be working > fine. > > Notes: > With this patch, a UAF on a TYPESAFE_BY_RCU will splat with an error > like this (tested by reverting a security bugfix). > Note that, in the ASAN memory state dump, we can see the little > unpoisoned 16-byte areas storing the rcu_head. > > BUG: KASAN: slab-use-after-free in folio_lock_anon_vma_read+0x129/0x4c0 > Read of size 8 at addr ffff888004e85b00 by task forkforkfork/592 > > CPU: 0 PID: 592 Comm: forkforkfork Not tainted 6.5.0-rc7-00105-gae70c1e1f6f5-dirty #334 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 > Call Trace: > <TASK> > dump_stack_lvl+0x4a/0x80 > print_report+0xcf/0x660 > kasan_report+0xd4/0x110 > folio_lock_anon_vma_read+0x129/0x4c0 > rmap_walk_anon+0x1cc/0x290 > folio_referenced+0x277/0x2a0 > shrink_folio_list+0xb8c/0x1680 > reclaim_folio_list+0xdc/0x1f0 > reclaim_pages+0x211/0x280 > madvise_cold_or_pageout_pte_range+0x812/0xb70 > walk_pgd_range+0x70b/0xce0 > __walk_page_range+0x343/0x360 > walk_page_range+0x227/0x280 > madvise_pageout+0x1cd/0x2d0 > do_madvise+0x552/0x15a0 > __x64_sys_madvise+0x62/0x70 > do_syscall_64+0x3b/0x90 > entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > [...] > </TASK> > > Allocated by task 574: > kasan_save_stack+0x33/0x60 > kasan_set_track+0x25/0x30 > __kasan_slab_alloc+0x6e/0x70 > kmem_cache_alloc+0xfd/0x2b0 > anon_vma_fork+0x88/0x270 > dup_mmap+0x87c/0xc10 > copy_process+0x3399/0x3590 > kernel_clone+0x10e/0x480 > __do_sys_clone+0xa1/0xe0 > do_syscall_64+0x3b/0x90 > entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > > Freed by task 0: > kasan_save_stack+0x33/0x60 > kasan_set_track+0x25/0x30 > kasan_save_free_info+0x2b/0x50 > __kasan_slab_free+0xfe/0x180 > slab_free_after_rcu_debug+0xad/0x200 > rcu_core+0x638/0x1620 > __do_softirq+0x14c/0x581 > > Last potentially related work creation: > kasan_save_stack+0x33/0x60 > __kasan_record_aux_stack+0x94/0xa0 > __call_rcu_common.constprop.0+0x47/0x730 > __put_anon_vma+0x6e/0x150 > unlink_anon_vmas+0x277/0x2e0 > vma_complete+0x341/0x580 > vma_merge+0x613/0xff0 > mprotect_fixup+0x1c0/0x510 > do_mprotect_pkey+0x5a7/0x710 > __x64_sys_mprotect+0x47/0x60 > do_syscall_64+0x3b/0x90 > entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > > Second to last potentially related work creation: > [...] > > The buggy address belongs to the object at ffff888004e85b00 > which belongs to the cache anon_vma of size 192 > The buggy address is located 0 bytes inside of > freed 192-byte region [ffff888004e85b00, ffff888004e85bc0) > > The buggy address belongs to the physical page: > [...] > > Memory state around the buggy address: > ffff888004e85a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > ffff888004e85a80: 00 00 00 00 00 00 00 00 fc 00 00 fc fc fc fc fc > >ffff888004e85b00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > ^ > ffff888004e85b80: fb fb fb fb fb fb fb fb fc 00 00 fc fc fc fc fc > ffff888004e85c00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > > include/linux/kasan.h | 6 ++++ > include/linux/slub_def.h | 3 ++ > lib/Kconfig.kasan | 2 ++ > mm/Kconfig.debug | 21 +++++++++++++ > mm/kasan/common.c | 15 ++++++++- > mm/slub.c | 66 +++++++++++++++++++++++++++++++++++++--- Nice! It'd be good to add a test case to lib/test_kasan module. I think you could just copy/adjust the test case "test_memcache_typesafe_by_rcu" from the KFENCE KUnit test suite. > 6 files changed, 107 insertions(+), 6 deletions(-) > > diff --git a/include/linux/kasan.h b/include/linux/kasan.h > index 819b6bc8ac08..45e07caf4704 100644 > --- a/include/linux/kasan.h > +++ b/include/linux/kasan.h > @@ -229,6 +229,8 @@ static __always_inline bool kasan_check_byte(const void *addr) > return true; > } > > +size_t kasan_align(size_t size); > + > #else /* CONFIG_KASAN */ > > static inline void kasan_unpoison_range(const void *address, size_t size) {} > @@ -278,6 +280,10 @@ static inline bool kasan_check_byte(const void *address) > { > return true; > } > +static inline size_t kasan_align(size_t size) > +{ > + return size; > +} > > #endif /* CONFIG_KASAN */ > > diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h > index deb90cf4bffb..b87be8fce64a 100644 > --- a/include/linux/slub_def.h > +++ b/include/linux/slub_def.h > @@ -120,6 +120,9 @@ struct kmem_cache { > int refcount; /* Refcount for slab cache destroy */ > void (*ctor)(void *); > unsigned int inuse; /* Offset to metadata */ > +#ifdef CONFIG_SLUB_RCU_DEBUG > + unsigned int debug_rcu_head_offset; > +#endif > unsigned int align; /* Alignment */ > unsigned int red_left_pad; /* Left redzone padding size */ > const char *name; /* Name (only for display!) */ > diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan > index fdca89c05745..7ff7de96c6e4 100644 > --- a/lib/Kconfig.kasan > +++ b/lib/Kconfig.kasan > @@ -79,6 +79,7 @@ config KASAN_GENERIC > depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC > depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS > select SLUB_DEBUG if SLUB > + select SLUB_RCU_DEBUG if SLUB_DEBUG > select CONSTRUCTORS > help > Enables Generic KASAN. > @@ -96,6 +97,7 @@ config KASAN_SW_TAGS > depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS > depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS > select SLUB_DEBUG if SLUB > + select SLUB_RCU_DEBUG if SLUB_DEBUG > select CONSTRUCTORS > help > Enables Software Tag-Based KASAN. > diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug > index 018a5bd2f576..99cce7f0fbef 100644 > --- a/mm/Kconfig.debug > +++ b/mm/Kconfig.debug > @@ -78,6 +78,27 @@ config SLUB_DEBUG_ON > off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying > "slub_debug=-". > > +config SLUB_RCU_DEBUG > + bool "Make use-after-free detection possible in TYPESAFE_BY_RCU caches" > + depends on SLUB && SLUB_DEBUG > + default n > + help > + Make SLAB_TYPESAFE_BY_RCU caches behave approximately as if the cache > + was not marked as SLAB_TYPESAFE_BY_RCU and every caller used > + kfree_rcu() instead. > + > + This is intended for use in combination with KASAN, to enable KASAN to > + detect use-after-free accesses in such caches. > + (KFENCE is able to do that independent of this flag.) > + > + This might degrade performance. > + > + If you're using this for testing bugs / fuzzing and care about > + catching all the bugs WAY more than performance, you might want to > + also turn on CONFIG_RCU_STRICT_GRACE_PERIOD. > + > + If unsure, say N. > + > config PAGE_OWNER > bool "Track page owner" > depends on DEBUG_KERNEL && STACKTRACE_SUPPORT > diff --git a/mm/kasan/common.c b/mm/kasan/common.c > index 256930da578a..b4a3504f9f5e 100644 > --- a/mm/kasan/common.c > +++ b/mm/kasan/common.c > @@ -191,6 +191,13 @@ void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache, > if (kasan_requires_meta()) > kasan_init_object_meta(cache, object); > > +#ifdef CONFIG_SLUB_RCU_DEBUG > + if (cache->flags & SLAB_TYPESAFE_BY_RCU) { > + kasan_unpoison(object + cache->debug_rcu_head_offset, > + sizeof(struct rcu_head), false); > + } > +#endif /* CONFIG_SLUB_RCU_DEBUG */ > + > /* Tag is ignored in set_tag() without CONFIG_KASAN_SW/HW_TAGS */ > object = set_tag(object, assign_tag(cache, object, true)); > > @@ -218,7 +225,8 @@ static inline bool ____kasan_slab_free(struct kmem_cache *cache, void *object, > } > > /* RCU slabs could be legally used after free within the RCU period */ > - if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) > + if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU) && > + !IS_ENABLED(CONFIG_SLUB_RCU_DEBUG)) > return false; > > if (!kasan_byte_accessible(tagged_object)) { > @@ -450,3 +458,8 @@ bool __kasan_check_byte(const void *address, unsigned long ip) > } > return true; > } > + > +size_t kasan_align(size_t size) > +{ > + return round_up(size, KASAN_GRANULE_SIZE); > +} > diff --git a/mm/slub.c b/mm/slub.c > index e3b5d5c0eb3a..bae6c2bc1e5f 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -1108,7 +1108,8 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab, > * A. Free pointer (if we cannot overwrite object on free) > * B. Tracking data for SLAB_STORE_USER > * C. Original request size for kmalloc object (SLAB_STORE_USER enabled) > - * D. Padding to reach required alignment boundary or at minimum > + * D. RCU head for CONFIG_SLUB_RCU_DEBUG (with padding around it) > + * E. Padding to reach required alignment boundary or at minimum > * one word if debugging is on to be able to detect writes > * before the word boundary. > * > @@ -1134,6 +1135,11 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p) > off += sizeof(unsigned int); > } > > +#ifdef CONFIG_SLUB_RCU_DEBUG > + if (s->flags & SLAB_TYPESAFE_BY_RCU) > + off = kasan_align(s->debug_rcu_head_offset + sizeof(struct rcu_head)); > +#endif /* CONFIG_SLUB_RCU_DEBUG */ > + > off += kasan_metadata_size(s, false); > > if (size_from_object(s) == off) > @@ -1751,12 +1757,17 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, > #endif > #endif /* CONFIG_SLUB_DEBUG */ > > +#ifdef CONFIG_SLUB_RCU_DEBUG > +static void slab_free_after_rcu_debug(struct rcu_head *rcu_head); > +#endif > + > /* > * Hooks for other subsystems that check memory allocations. In a typical > * production configuration these hooks all should produce no code at all. > */ > static __always_inline bool slab_free_hook(struct kmem_cache *s, > - void *x, bool init) > + void *x, bool init, > + bool after_rcu_delay) > { > kmemleak_free_recursive(x, s->flags); > kmsan_slab_free(s, x); > @@ -1766,8 +1777,18 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, > if (!(s->flags & SLAB_DEBUG_OBJECTS)) > debug_check_no_obj_freed(x, s->object_size); > > +#ifdef CONFIG_SLUB_RCU_DEBUG > + /* kfence does its own RCU delay */ > + if ((s->flags & SLAB_TYPESAFE_BY_RCU) && !after_rcu_delay && > + !is_kfence_address(x)) { > + call_rcu(kasan_reset_tag(x) + s->debug_rcu_head_offset, > + slab_free_after_rcu_debug); > + return true; > + } > +#endif /* CONFIG_SLUB_RCU_DEBUG */ > + > /* Use KCSAN to help debug racy use-after-free. */ > - if (!(s->flags & SLAB_TYPESAFE_BY_RCU)) > + if (!(s->flags & SLAB_TYPESAFE_BY_RCU) || after_rcu_delay) > __kcsan_check_access(x, s->object_size, > KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT); > > @@ -1802,7 +1823,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, > void *old_tail = *tail ? *tail : *head; > > if (is_kfence_address(next)) { > - slab_free_hook(s, next, false); > + slab_free_hook(s, next, false, false); > return true; > } > > @@ -1815,7 +1836,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, > next = get_freepointer(s, object); > > /* If object's reuse doesn't have to be delayed */ > - if (!slab_free_hook(s, object, slab_want_init_on_free(s))) { > + if (!slab_free_hook(s, object, slab_want_init_on_free(s), false)) { > /* Move object to the new freelist */ > set_freepointer(s, object, *head); > *head = object; > @@ -3802,6 +3823,31 @@ static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab, > do_slab_free(s, slab, head, tail, cnt, addr); > } > > +#ifdef CONFIG_SLUB_RCU_DEBUG > +static void slab_free_after_rcu_debug(struct rcu_head *rcu_head) > +{ > + struct slab *slab = virt_to_slab(rcu_head); > + struct kmem_cache *s; > + void *object; > + > + if (WARN_ON(is_kfence_address(rcu_head))) > + return; > + > + /* find the object and the cache again */ > + if (WARN_ON(!slab)) > + return; > + s = slab->slab_cache; > + if (WARN_ON(!(s->flags & SLAB_TYPESAFE_BY_RCU))) > + return; > + object = (void *)rcu_head - s->debug_rcu_head_offset; > + > + /* resume freeing */ > + if (slab_free_hook(s, object, slab_want_init_on_free(s), true)) > + return; > + do_slab_free(s, slab, object, NULL, 1, _THIS_IP_); > +} > +#endif /* CONFIG_SLUB_RCU_DEBUG */ > + > #ifdef CONFIG_KASAN_GENERIC > void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr) > { > @@ -4443,6 +4489,16 @@ static int calculate_sizes(struct kmem_cache *s) > if (flags & SLAB_KMALLOC) > size += sizeof(unsigned int); > } > + > +#ifdef CONFIG_SLUB_RCU_DEBUG > + if (flags & SLAB_TYPESAFE_BY_RCU) { > + size = kasan_align(size); > + size = ALIGN(size, __alignof__(struct rcu_head)); > + s->debug_rcu_head_offset = size; > + size += sizeof(struct rcu_head); > + size = kasan_align(size); > + } > +#endif /* CONFIG_SLUB_RCU_DEBUG */ > #endif > > kasan_cache_create(s, &size, &s->flags); > > base-commit: 4f9e7fabf8643003afefc172e62dd276686f016e > -- > 2.42.0.rc1.204.g551eb34607-goog > > -- > You received this message because you are subscribed to the Google Groups "kasan-dev" group. > To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com. > To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20230825211426.3798691-1-jannh%40google.com.
On Mon, Aug 28, 2023 at 4:40 PM Jann Horn <jannh@google.com> wrote: > > > Can't we unpoision this rcu_head right before call_rcu() and repoison > > after receiving the callback? > > Yeah, I think that should work. It looks like currently > kasan_unpoison() is exposed in include/linux/kasan.h but > kasan_poison() is not, and its inline definition probably means I > can't just move it out of mm/kasan/kasan.h into include/linux/kasan.h; > do you have a preference for how I should handle this? Hmm, and it > also looks like code outside of mm/kasan/ anyway wouldn't know what > are valid values for the "value" argument to kasan_poison(). > I also have another feature idea that would also benefit from having > something like kasan_poison() available in include/linux/kasan.h, so I > would prefer that over adding another special-case function inside > KASAN for poisoning this piece of slab metadata... This is a problem only for the Generic mode, right? You already call kasan_reset_tag on the rcu_head, which should suppress the reporting for the tag-based modes. If so, would it be possible to reuse metadata_access_enable/disable? They are used for accessing slub_debug metadata and seem to fit nicely with this case as well. I also second Macro's comment to add a test for the new functionality. Thanks for working on this!
diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 819b6bc8ac08..45e07caf4704 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -229,6 +229,8 @@ static __always_inline bool kasan_check_byte(const void *addr) return true; } +size_t kasan_align(size_t size); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_range(const void *address, size_t size) {} @@ -278,6 +280,10 @@ static inline bool kasan_check_byte(const void *address) { return true; } +static inline size_t kasan_align(size_t size) +{ + return size; +} #endif /* CONFIG_KASAN */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index deb90cf4bffb..b87be8fce64a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -120,6 +120,9 @@ struct kmem_cache { int refcount; /* Refcount for slab cache destroy */ void (*ctor)(void *); unsigned int inuse; /* Offset to metadata */ +#ifdef CONFIG_SLUB_RCU_DEBUG + unsigned int debug_rcu_head_offset; +#endif unsigned int align; /* Alignment */ unsigned int red_left_pad; /* Left redzone padding size */ const char *name; /* Name (only for display!) */ diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index fdca89c05745..7ff7de96c6e4 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -79,6 +79,7 @@ config KASAN_GENERIC depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS select SLUB_DEBUG if SLUB + select SLUB_RCU_DEBUG if SLUB_DEBUG select CONSTRUCTORS help Enables Generic KASAN. @@ -96,6 +97,7 @@ config KASAN_SW_TAGS depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS select SLUB_DEBUG if SLUB + select SLUB_RCU_DEBUG if SLUB_DEBUG select CONSTRUCTORS help Enables Software Tag-Based KASAN. diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 018a5bd2f576..99cce7f0fbef 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -78,6 +78,27 @@ config SLUB_DEBUG_ON off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying "slub_debug=-". +config SLUB_RCU_DEBUG + bool "Make use-after-free detection possible in TYPESAFE_BY_RCU caches" + depends on SLUB && SLUB_DEBUG + default n + help + Make SLAB_TYPESAFE_BY_RCU caches behave approximately as if the cache + was not marked as SLAB_TYPESAFE_BY_RCU and every caller used + kfree_rcu() instead. + + This is intended for use in combination with KASAN, to enable KASAN to + detect use-after-free accesses in such caches. + (KFENCE is able to do that independent of this flag.) + + This might degrade performance. + + If you're using this for testing bugs / fuzzing and care about + catching all the bugs WAY more than performance, you might want to + also turn on CONFIG_RCU_STRICT_GRACE_PERIOD. + + If unsure, say N. + config PAGE_OWNER bool "Track page owner" depends on DEBUG_KERNEL && STACKTRACE_SUPPORT diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 256930da578a..b4a3504f9f5e 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -191,6 +191,13 @@ void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache, if (kasan_requires_meta()) kasan_init_object_meta(cache, object); +#ifdef CONFIG_SLUB_RCU_DEBUG + if (cache->flags & SLAB_TYPESAFE_BY_RCU) { + kasan_unpoison(object + cache->debug_rcu_head_offset, + sizeof(struct rcu_head), false); + } +#endif /* CONFIG_SLUB_RCU_DEBUG */ + /* Tag is ignored in set_tag() without CONFIG_KASAN_SW/HW_TAGS */ object = set_tag(object, assign_tag(cache, object, true)); @@ -218,7 +225,8 @@ static inline bool ____kasan_slab_free(struct kmem_cache *cache, void *object, } /* RCU slabs could be legally used after free within the RCU period */ - if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) + if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU) && + !IS_ENABLED(CONFIG_SLUB_RCU_DEBUG)) return false; if (!kasan_byte_accessible(tagged_object)) { @@ -450,3 +458,8 @@ bool __kasan_check_byte(const void *address, unsigned long ip) } return true; } + +size_t kasan_align(size_t size) +{ + return round_up(size, KASAN_GRANULE_SIZE); +} diff --git a/mm/slub.c b/mm/slub.c index e3b5d5c0eb3a..bae6c2bc1e5f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1108,7 +1108,8 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab, * A. Free pointer (if we cannot overwrite object on free) * B. Tracking data for SLAB_STORE_USER * C. Original request size for kmalloc object (SLAB_STORE_USER enabled) - * D. Padding to reach required alignment boundary or at minimum + * D. RCU head for CONFIG_SLUB_RCU_DEBUG (with padding around it) + * E. Padding to reach required alignment boundary or at minimum * one word if debugging is on to be able to detect writes * before the word boundary. * @@ -1134,6 +1135,11 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p) off += sizeof(unsigned int); } +#ifdef CONFIG_SLUB_RCU_DEBUG + if (s->flags & SLAB_TYPESAFE_BY_RCU) + off = kasan_align(s->debug_rcu_head_offset + sizeof(struct rcu_head)); +#endif /* CONFIG_SLUB_RCU_DEBUG */ + off += kasan_metadata_size(s, false); if (size_from_object(s) == off) @@ -1751,12 +1757,17 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, #endif #endif /* CONFIG_SLUB_DEBUG */ +#ifdef CONFIG_SLUB_RCU_DEBUG +static void slab_free_after_rcu_debug(struct rcu_head *rcu_head); +#endif + /* * Hooks for other subsystems that check memory allocations. In a typical * production configuration these hooks all should produce no code at all. */ static __always_inline bool slab_free_hook(struct kmem_cache *s, - void *x, bool init) + void *x, bool init, + bool after_rcu_delay) { kmemleak_free_recursive(x, s->flags); kmsan_slab_free(s, x); @@ -1766,8 +1777,18 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, if (!(s->flags & SLAB_DEBUG_OBJECTS)) debug_check_no_obj_freed(x, s->object_size); +#ifdef CONFIG_SLUB_RCU_DEBUG + /* kfence does its own RCU delay */ + if ((s->flags & SLAB_TYPESAFE_BY_RCU) && !after_rcu_delay && + !is_kfence_address(x)) { + call_rcu(kasan_reset_tag(x) + s->debug_rcu_head_offset, + slab_free_after_rcu_debug); + return true; + } +#endif /* CONFIG_SLUB_RCU_DEBUG */ + /* Use KCSAN to help debug racy use-after-free. */ - if (!(s->flags & SLAB_TYPESAFE_BY_RCU)) + if (!(s->flags & SLAB_TYPESAFE_BY_RCU) || after_rcu_delay) __kcsan_check_access(x, s->object_size, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT); @@ -1802,7 +1823,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, void *old_tail = *tail ? *tail : *head; if (is_kfence_address(next)) { - slab_free_hook(s, next, false); + slab_free_hook(s, next, false, false); return true; } @@ -1815,7 +1836,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, next = get_freepointer(s, object); /* If object's reuse doesn't have to be delayed */ - if (!slab_free_hook(s, object, slab_want_init_on_free(s))) { + if (!slab_free_hook(s, object, slab_want_init_on_free(s), false)) { /* Move object to the new freelist */ set_freepointer(s, object, *head); *head = object; @@ -3802,6 +3823,31 @@ static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab, do_slab_free(s, slab, head, tail, cnt, addr); } +#ifdef CONFIG_SLUB_RCU_DEBUG +static void slab_free_after_rcu_debug(struct rcu_head *rcu_head) +{ + struct slab *slab = virt_to_slab(rcu_head); + struct kmem_cache *s; + void *object; + + if (WARN_ON(is_kfence_address(rcu_head))) + return; + + /* find the object and the cache again */ + if (WARN_ON(!slab)) + return; + s = slab->slab_cache; + if (WARN_ON(!(s->flags & SLAB_TYPESAFE_BY_RCU))) + return; + object = (void *)rcu_head - s->debug_rcu_head_offset; + + /* resume freeing */ + if (slab_free_hook(s, object, slab_want_init_on_free(s), true)) + return; + do_slab_free(s, slab, object, NULL, 1, _THIS_IP_); +} +#endif /* CONFIG_SLUB_RCU_DEBUG */ + #ifdef CONFIG_KASAN_GENERIC void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr) { @@ -4443,6 +4489,16 @@ static int calculate_sizes(struct kmem_cache *s) if (flags & SLAB_KMALLOC) size += sizeof(unsigned int); } + +#ifdef CONFIG_SLUB_RCU_DEBUG + if (flags & SLAB_TYPESAFE_BY_RCU) { + size = kasan_align(size); + size = ALIGN(size, __alignof__(struct rcu_head)); + s->debug_rcu_head_offset = size; + size += sizeof(struct rcu_head); + size = kasan_align(size); + } +#endif /* CONFIG_SLUB_RCU_DEBUG */ #endif kasan_cache_create(s, &size, &s->flags);