@@ -492,6 +492,16 @@ void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags) __assume_slab_alignment __malloc;
void kmem_cache_free(struct kmem_cache *s, void *objp);
+struct kmem_buckets {
+ struct kmem_cache *caches[ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL])];
+};
+
+struct kmem_buckets *
+kmem_buckets_create(const char *name, unsigned int align, slab_flags_t flags,
+ unsigned int useroffset, unsigned int usersize,
+ void (*ctor)(void *));
+
+
/*
* Bulk allocation and freeing operations. These are accelerated in an
* allocator specific way to avoid taking locks repeatedly or building
@@ -594,6 +604,22 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
return __kmalloc(size, flags);
}
+static __always_inline __alloc_size(2)
+void *kmem_buckets_alloc(struct kmem_buckets *b, size_t size, gfp_t flags)
+{
+ unsigned int index;
+
+ if (size > KMALLOC_MAX_CACHE_SIZE)
+ return kmalloc_large(size, flags);
+ if (WARN_ON_ONCE(!b))
+ return NULL;
+ index = kmalloc_index(size);
+ if (WARN_ONCE(!b->caches[index],
+ "missing cache for size %zu (index %d)\n", size, index))
+ return kmalloc(size, flags);
+ return kmalloc_trace(b->caches[index], flags, size);
+}
+
static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
{
if (__builtin_constant_p(size) && size) {
@@ -392,6 +392,66 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align,
}
EXPORT_SYMBOL(kmem_cache_create);
+static struct kmem_cache *kmem_buckets_cache __ro_after_init;
+
+struct kmem_buckets *
+kmem_buckets_create(const char *name, unsigned int align,
+ slab_flags_t flags,
+ unsigned int useroffset, unsigned int usersize,
+ void (*ctor)(void *))
+{
+ struct kmem_buckets *b;
+ int idx;
+
+ if (WARN_ON(!kmem_buckets_cache))
+ return NULL;
+
+ b = kmem_cache_alloc(kmem_buckets_cache, GFP_KERNEL|__GFP_ZERO);
+ if (WARN_ON(!b))
+ return NULL;
+
+ for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) {
+ char *short_size, *cache_name;
+ unsigned int size;
+
+ if (!kmalloc_caches[KMALLOC_NORMAL][idx])
+ continue;
+
+ size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size;
+ if (!size)
+ continue;
+
+ short_size = strchr(kmalloc_caches[KMALLOC_NORMAL][idx]->name, '-');
+ if (WARN_ON(!short_size))
+ goto fail;
+
+ cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1);
+ if (WARN_ON(!cache_name))
+ goto fail;
+
+ b->caches[idx] = kmem_cache_create_usercopy(cache_name, size,
+ align, flags, useroffset,
+ min(size - useroffset, usersize), ctor);
+ kfree(cache_name);
+ if (WARN_ON(!b->caches[idx]))
+ goto fail;
+ }
+
+ return b;
+
+fail:
+ for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) {
+ if (b->caches[idx]) {
+ kfree(b->caches[idx]->name);
+ kmem_cache_destroy(b->caches[idx]);
+ }
+ }
+ kfree(b);
+
+ return NULL;
+}
+EXPORT_SYMBOL(kmem_buckets_create);
+
#ifdef SLAB_SUPPORTS_SYSFS
/*
* For a given kmem_cache, kmem_cache_destroy() should only be called
@@ -934,6 +994,10 @@ void __init create_kmalloc_caches(slab_flags_t flags)
/* Kmalloc array is now usable */
slab_state = UP;
+
+ kmem_buckets_cache = kmem_cache_create("kmalloc_buckets",
+ sizeof(struct kmem_buckets) * ARRAY_SIZE(kmalloc_info),
+ 0, 0, NULL);
}
/**
Dedicated caches are available For fixed size allocations via kmem_cache_alloc(), but for dynamically sized allocations there is only the global kmalloc API's set of buckets available. This means it isn't possible to separate specific sets of dynamically sized allocations into a separate collection of caches. This leads to a use-after-free exploitation weakness in the Linux kernel since many heap memory spraying/grooming attacks depend on using userspace-controllable dynamically sized allocations to collide with fixed size allocations that end up in same cache. While CONFIG_RANDOM_KMALLOC_CACHES provides a probabilistic defense against these kinds of "type confusion" attacks, including for fixed same-size heap objects, we can create a complementary deterministic defense for dynamically sized allocations. In order to isolate user-controllable sized allocations from system allocations, introduce kmem_buckets_create() and kmem_buckets_alloc(), which behave like kmem_cache_create() and like kmem_cache_alloc() for confining allocations to a dedicated set of sized caches (which have the same layout as the kmalloc caches). This can also be used in the future once codetag allocation annotations exist to implement per-caller allocation cache isolation[1] even for dynamic allocations. Link: https://lore.kernel.org/lkml/202402211449.401382D2AF@keescook [1] Signed-off-by: Kees Cook <keescook@chromium.org> --- Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: linux-mm@kvack.org --- include/linux/slab.h | 26 ++++++++++++++++++ mm/slab_common.c | 64 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+)