diff mbox series

[v3,1/2] kunit: add a KUnit test for SLUB debugging functionality

Message ID 20210331085156.5028-1-glittao@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v3,1/2] kunit: add a KUnit test for SLUB debugging functionality | expand

Commit Message

Oliver Glitta March 31, 2021, 8:51 a.m. UTC
From: Oliver Glitta <glittao@gmail.com>

SLUB has resiliency_test() function which is hidden behind #ifdef
SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody
runs it. KUnit should be a proper replacement for it.

Try changing byte in redzone after allocation and changing
pointer to next free node, first byte, 50th byte and redzone
byte. Check if validation finds errors.

There are several differences from the original resiliency test:
Tests create own caches with known state instead of corrupting
shared kmalloc caches.

The corruption of freepointer uses correct offset, the original
resiliency test got broken with freepointer changes.

Scratch changing random byte test, because it does not have
meaning in this form where we need deterministic results.

Add new option CONFIG_SLUB_KUNIT_TEST in Kconfig.

Add a counter field "errors" to struct kmem_cache to count number
of errors detected in cache.

Silence bug report in SLUB test. Add SLAB_SILENT_ERRORS debug flag.
Add SLAB_SILENT_ERRORS flag to SLAB_NEVER_MERGE, SLAB_DEBUG_FLAGS,
SLAB_FLAGS_PERMITTED macros.

Signed-off-by: Oliver Glitta <glittao@gmail.com>
---
Changes since v2

Use bit operation & instead of logical && as reported by kernel test 
robot and Dan Carpenter

Changes since v1

Conversion from kselftest to KUnit test suggested by Marco Elver.
Error silencing.
Error counting improvements. 

 include/linux/slab.h     |   2 +
 include/linux/slub_def.h |   2 +
 lib/Kconfig.debug        |   5 ++
 lib/Makefile             |   1 +
 lib/test_slub.c          | 124 +++++++++++++++++++++++++++++++++++++++
 mm/slab.h                |   7 ++-
 mm/slab_common.c         |   2 +-
 mm/slub.c                |  64 +++++++++++++-------
 8 files changed, 184 insertions(+), 23 deletions(-)
 create mode 100644 lib/test_slub.c

Comments

Vlastimil Babka April 1, 2021, 7:41 a.m. UTC | #1
On 3/31/21 10:51 AM, glittao@gmail.com wrote:
> From: Oliver Glitta <glittao@gmail.com>
> 
> SLUB has resiliency_test() function which is hidden behind #ifdef
> SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody
> runs it. KUnit should be a proper replacement for it.
> 
> Try changing byte in redzone after allocation and changing
> pointer to next free node, first byte, 50th byte and redzone
> byte. Check if validation finds errors.
> 
> There are several differences from the original resiliency test:
> Tests create own caches with known state instead of corrupting
> shared kmalloc caches.
> 
> The corruption of freepointer uses correct offset, the original
> resiliency test got broken with freepointer changes.
> 
> Scratch changing random byte test, because it does not have
> meaning in this form where we need deterministic results.
> 
> Add new option CONFIG_SLUB_KUNIT_TEST in Kconfig.
> 
> Add a counter field "errors" to struct kmem_cache to count number
> of errors detected in cache.
> 
> Silence bug report in SLUB test. Add SLAB_SILENT_ERRORS debug flag.
> Add SLAB_SILENT_ERRORS flag to SLAB_NEVER_MERGE, SLAB_DEBUG_FLAGS,
> SLAB_FLAGS_PERMITTED macros.
> 
> Signed-off-by: Oliver Glitta <glittao@gmail.com>

Acked-by: Vlastimil Babka <vbabka@suse.cz>
Marco Elver April 1, 2021, 9:16 a.m. UTC | #2
[Note, if you'd like me to see future versions, please Cc me, otherwise
it's unlikely I see it in time. Also add kunit-dev@googlegroups.com if
perhaps a KUnit dev should have another look, too.]

On Wed, Mar 31, 2021 at 10:51AM +0200, glittao@gmail.com wrote:
> From: Oliver Glitta <glittao@gmail.com>
> 
> SLUB has resiliency_test() function which is hidden behind #ifdef
> SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody
> runs it. KUnit should be a proper replacement for it.
> 
> Try changing byte in redzone after allocation and changing
> pointer to next free node, first byte, 50th byte and redzone
> byte. Check if validation finds errors.
> 
> There are several differences from the original resiliency test:
> Tests create own caches with known state instead of corrupting
> shared kmalloc caches.
> 
> The corruption of freepointer uses correct offset, the original
> resiliency test got broken with freepointer changes.
> 
> Scratch changing random byte test, because it does not have
> meaning in this form where we need deterministic results.
> 
> Add new option CONFIG_SLUB_KUNIT_TEST in Kconfig.
> 
> Add a counter field "errors" to struct kmem_cache to count number
> of errors detected in cache.
> 
> Silence bug report in SLUB test. Add SLAB_SILENT_ERRORS debug flag.
> Add SLAB_SILENT_ERRORS flag to SLAB_NEVER_MERGE, SLAB_DEBUG_FLAGS,
> SLAB_FLAGS_PERMITTED macros.
> 
> Signed-off-by: Oliver Glitta <glittao@gmail.com>
> ---
> Changes since v2
> 
> Use bit operation & instead of logical && as reported by kernel test 
> robot and Dan Carpenter
> 
> Changes since v1
> 
> Conversion from kselftest to KUnit test suggested by Marco Elver.
> Error silencing.
> Error counting improvements. 
> 
>  include/linux/slab.h     |   2 +
>  include/linux/slub_def.h |   2 +
>  lib/Kconfig.debug        |   5 ++
>  lib/Makefile             |   1 +
>  lib/test_slub.c          | 124 +++++++++++++++++++++++++++++++++++++++
>  mm/slab.h                |   7 ++-
>  mm/slab_common.c         |   2 +-
>  mm/slub.c                |  64 +++++++++++++-------
>  8 files changed, 184 insertions(+), 23 deletions(-)
>  create mode 100644 lib/test_slub.c
> 
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 7ae604076767..ed1a5a64d028 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -25,6 +25,8 @@
>   */
>  /* DEBUG: Perform (expensive) checks on alloc/free */
>  #define SLAB_CONSISTENCY_CHECKS	((slab_flags_t __force)0x00000100U)
> +/* DEBUG: Silent bug reports */
> +#define SLAB_SILENT_ERRORS	((slab_flags_t __force)0x00000200U)

This flag wouldn't be necessary if you do the design using
kunit_resource (see below).

(But perhaps I missed a conversation that said that this flag is
generally useful, but if so, it should probably be in a separate patch
justifying why it is required beyond the test.)

>  /* DEBUG: Red zone objs in a cache */
>  #define SLAB_RED_ZONE		((slab_flags_t __force)0x00000400U)
>  /* DEBUG: Poison objects */
> diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
> index dcde82a4434c..e4b51bb5bb83 100644
> --- a/include/linux/slub_def.h
> +++ b/include/linux/slub_def.h
> @@ -133,6 +133,8 @@ struct kmem_cache {
>  	unsigned int usersize;		/* Usercopy region size */
>  
>  	struct kmem_cache_node *node[MAX_NUMNODES];
> +
> +	int errors;			/* Number of errors in cache */

So, I think it's bad design to add a new field 'errors', just for the
test. This will increase kmem_cache size for all builds, which is
unnecessary.

Is there use to retrieve 'errors' elsewhere?

While you could guard this with #ifdef CONFIG_SLUB_DEBUG or so, there's
a better design option if this is just for the KUnit test's benefit: use
kunit_resource.

The way it'd work is that for each test (you can add a common init
function) you add a named resource, in this case just an 'int' I guess,
that slab would be able to retrieve if this test is being run.

In the test somewhere, you could add something like this:


	static struct kunit_resource resource;
	static int slab_errors;

	..........

	static int test_init(struct kunit *test)
	{
		slab_errors = 0;
		kunit_add_named_resource(test, NULL, NULL, &resource,
					 "slab_errors", &slab_errors);
		return 0;
	}

	...... tests now check slab_errors .....

and then in slub.c you'd have:

	#if IS_ENABLED(CONFIG_KUNIT)
	static bool slab_add_kunit_errors(void)
	{
		struct kunit_resource *resource;

		if (likely(!current->kunit_test))
			return false;
		resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
		if (!resource)
			return false;
		(*(int *)resource->data)++;
		kunit_put_resource(resource);
	}
	#else
	static inline bool slab_add_kunit_errors(void) { return false; }
	#endif

And anywhere you want to increase the error count, you'd call
slab_add_kunit_errors().

Another benefit of this approach is that if KUnit is disabled, there is
zero overhead and no additional code generated (vs. the current
approach).

>  };
>  
>  #ifdef CONFIG_SLUB_CPU_PARTIAL
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 2779c29d9981..e0dec830c269 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -2371,6 +2371,11 @@ config BITS_TEST
>  
>  	  If unsure, say N.
>  
> +config SLUB_KUNIT_TEST
> +	tristate "KUnit Test for SLUB cache error detection" if !KUNIT_ALL_TESTS
> +	depends on SLUB_DEBUG && KUNIT
> +	default KUNIT_ALL_TESTS

Help text please.

> +
>  config TEST_UDELAY
>  	tristate "udelay test driver"
>  	help
> diff --git a/lib/Makefile b/lib/Makefile
> index b5307d3eec1a..e1eb986c0e87 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -352,5 +352,6 @@ obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
>  obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
>  obj-$(CONFIG_BITS_TEST) += test_bits.o
>  obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o
> +obj-$(CONFIG_SLUB_KUNIT_TEST) += test_slub.o

Any reason to not name this slub_kunit.c? This is a new test, and it
could follow the naming convention of the other KUnit tests.

Some of it is also documented:
https://www.kernel.org/doc/html/latest/dev-tools/kunit/style.html

>  obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
> diff --git a/lib/test_slub.c b/lib/test_slub.c
> new file mode 100644
> index 000000000000..4f8ea3c7d867
> --- /dev/null
> +++ b/lib/test_slub.c
> @@ -0,0 +1,124 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <kunit/test.h>
> +#include <linux/mm.h>
> +#include <linux/slab.h>
> +#include <linux/module.h>
> +#include <linux/kernel.h>
> +#include "../mm/slab.h"
> +
> +
> +static void test_clobber_zone(struct kunit *test)
> +{
> +	struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
> +				SLAB_RED_ZONE | SLAB_SILENT_ERRORS, NULL);
> +	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> +
> +	p[64] = 0x12;
> +
> +	validate_slab_cache(s);
> +	KUNIT_EXPECT_EQ(test, 1, s->errors);
> +
> +	kmem_cache_free(s, p);
> +	kmem_cache_destroy(s);
> +}
> +
> +static void test_next_pointer(struct kunit *test)
> +{
> +	struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
> +				SLAB_POISON | SLAB_SILENT_ERRORS, NULL);
> +	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> +	unsigned long tmp;
> +	unsigned long *ptr_addr;
> +
> +	kmem_cache_free(s, p);
> +
> +	ptr_addr = (unsigned long *)(p + s->offset);
> +	tmp = *ptr_addr;
> +	p[s->offset] = 0x12;
> +
> +	/*
> +	 * Expecting two errors.
> +	 * One for the corrupted freechain and the other one for the wrong
> +	 * count of objects in use.
> +	 */
> +	validate_slab_cache(s);
> +	KUNIT_EXPECT_EQ(test, 2, s->errors);
> +
> +	/*
> +	 * Try to repair corrupted freepointer.
> +	 * Still expecting one error for the wrong count of objects in use.
> +	 */
> +	*ptr_addr = tmp;
> +
> +	validate_slab_cache(s);
> +	KUNIT_EXPECT_EQ(test, 1, s->errors);
> +
> +	/*
> +	 * Previous validation repaired the count of objects in use.
> +	 * Now expecting no error.
> +	 */
> +	validate_slab_cache(s);
> +	KUNIT_EXPECT_EQ(test, 0, s->errors);
> +
> +	kmem_cache_destroy(s);
> +}
> +
> +static void test_first_word(struct kunit *test)
> +{
> +	struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
> +				SLAB_POISON | SLAB_SILENT_ERRORS, NULL);
> +	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> +
> +	kmem_cache_free(s, p);
> +	*p = 0x78;
> +
> +	validate_slab_cache(s);
> +	KUNIT_EXPECT_EQ(test, 1, s->errors);
> +
> +	kmem_cache_destroy(s);
> +}
> +
> +static void test_clobber_50th_byte(struct kunit *test)
> +{
> +	struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
> +				SLAB_POISON | SLAB_SILENT_ERRORS, NULL);
> +	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> +
> +	kmem_cache_free(s, p);
> +	p[50] = 0x9a;
> +
> +	validate_slab_cache(s);
> +	KUNIT_EXPECT_EQ(test, 1, s->errors);
> +	kmem_cache_destroy(s);
> +}
> +
> +static void test_clobber_redzone_free(struct kunit *test)
> +{
> +	struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
> +				SLAB_RED_ZONE | SLAB_SILENT_ERRORS, NULL);
> +	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> +
> +	kmem_cache_free(s, p);
> +	p[64] = 0xab;
> +
> +	validate_slab_cache(s);
> +	KUNIT_EXPECT_EQ(test, 1, s->errors);
> +	kmem_cache_destroy(s);
> +}
> +
> +static struct kunit_case test_cases[] = {
> +	KUNIT_CASE(test_clobber_zone),
> +	KUNIT_CASE(test_next_pointer),
> +	KUNIT_CASE(test_first_word),
> +	KUNIT_CASE(test_clobber_50th_byte),
> +	KUNIT_CASE(test_clobber_redzone_free),
> +	{}
> +};
> +
> +static struct kunit_suite test_suite = {
> +	.name = "slub_test",
> +	.test_cases = test_cases,

Here you could add a 

	.init = test_init,

or so, if you go with the kunit_resource design.

> +};
> +kunit_test_suite(test_suite);
> +
> +MODULE_LICENSE("GPL");
> diff --git a/mm/slab.h b/mm/slab.h
> index 076582f58f68..382507b6cab9 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -134,7 +134,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
>  #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
>  #elif defined(CONFIG_SLUB_DEBUG)
>  #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
> -			  SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
> +			  SLAB_TRACE | SLAB_CONSISTENCY_CHECKS | \
> +			  SLAB_SILENT_ERRORS)
>  #else
>  #define SLAB_DEBUG_FLAGS (0)
>  #endif
> @@ -164,7 +165,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
>  			      SLAB_NOLEAKTRACE | \
>  			      SLAB_RECLAIM_ACCOUNT | \
>  			      SLAB_TEMPORARY | \
> -			      SLAB_ACCOUNT)
> +			      SLAB_ACCOUNT | \
> +			      SLAB_SILENT_ERRORS)
>  
>  bool __kmem_cache_empty(struct kmem_cache *);
>  int __kmem_cache_shutdown(struct kmem_cache *);
> @@ -215,6 +217,7 @@ DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
>  DECLARE_STATIC_KEY_FALSE(slub_debug_enabled);
>  #endif
>  extern void print_tracking(struct kmem_cache *s, void *object);
> +long validate_slab_cache(struct kmem_cache *s);
>  #else
>  static inline void print_tracking(struct kmem_cache *s, void *object)
>  {
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 88e833986332..239c9095e7ea 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -55,7 +55,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
>   */
>  #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
>  		SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
> -		SLAB_FAILSLAB | kasan_never_merge())
> +		SLAB_FAILSLAB | SLAB_SILENT_ERRORS | kasan_never_merge())
>  
>  #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
>  			 SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
> diff --git a/mm/slub.c b/mm/slub.c
> index 3021ce9bf1b3..7083e89432d0 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -673,14 +673,16 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
>  
>  static void slab_fix(struct kmem_cache *s, char *fmt, ...)
>  {
> -	struct va_format vaf;
> -	va_list args;
> -
> -	va_start(args, fmt);
> -	vaf.fmt = fmt;
> -	vaf.va = &args;
> -	pr_err("FIX %s: %pV\n", s->name, &vaf);
> -	va_end(args);
> +	if (!(s->flags & SLAB_SILENT_ERRORS)) {

This style of guarding causes lots of line diffs.

Instead, if the whole function is to be skipped, please prefer:

	if (skip_things_if_true)
		return;

instead of

	if (!skip_things_if_true) {
		...
	}

here and everywhere else.

Specifically, for the kunit_resource design, this would simply become:

	if (slab_add_kunit_errors())
		return;

Meaning that whenever a KUnit test has a resource "slab_errors", no
messages are printed -- otherwise it'll always print a message.

> +		struct va_format vaf;
> +		va_list args;
> +
> +		va_start(args, fmt);
> +		vaf.fmt = fmt;
> +		vaf.va = &args;
> +		pr_err("FIX %s: %pV\n", s->name, &vaf);
> +		va_end(args);
> +	}
>  }
>  
>  static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
> @@ -739,8 +741,10 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
>  void object_err(struct kmem_cache *s, struct page *page,
>  			u8 *object, char *reason)
>  {
> -	slab_bug(s, "%s", reason);
> -	print_trailer(s, page, object);
> +	if (!(s->flags & SLAB_SILENT_ERRORS)) {
> +		slab_bug(s, "%s", reason);
> +		print_trailer(s, page, object);
> +	}
>  }
>  
>  static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> @@ -752,9 +756,11 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
>  	va_start(args, fmt);
>  	vsnprintf(buf, sizeof(buf), fmt, args);
>  	va_end(args);
> -	slab_bug(s, "%s", buf);
> -	print_page_info(page);
> -	dump_stack();
> +	if (!(s->flags & SLAB_SILENT_ERRORS)) {
> +		slab_bug(s, "%s", buf);
> +		print_page_info(page);
> +		dump_stack();
> +	}
>  }
>  
>  static void init_object(struct kmem_cache *s, void *object, u8 val)
> @@ -798,11 +804,13 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
>  	while (end > fault && end[-1] == value)
>  		end--;
>  
> -	slab_bug(s, "%s overwritten", what);
> -	pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
> +	if (!(s->flags & SLAB_SILENT_ERRORS)) {
> +		slab_bug(s, "%s overwritten", what);
> +		pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
>  					fault, end - 1, fault - addr,
>  					fault[0], value);
> -	print_trailer(s, page, object);
> +		print_trailer(s, page, object);
> +	}
>  
>  	restore_bytes(s, what, value, fault, end);
>  	return 0;
> @@ -964,6 +972,7 @@ static int check_slab(struct kmem_cache *s, struct page *page)
>  
>  	if (!PageSlab(page)) {
>  		slab_err(s, page, "Not a valid slab page");
> +		s->errors += 1;

So to me it looks like errors is set whenever there's a slab_err() or
slab_fix() call. So why not move setting that an error occurred into
those functions?

>  		return 0;
>  	}
>  
> @@ -971,11 +980,13 @@ static int check_slab(struct kmem_cache *s, struct page *page)
>  	if (page->objects > maxobj) {
>  		slab_err(s, page, "objects %u > max %u",
>  			page->objects, maxobj);
> +		s->errors += 1;
>  		return 0;
>  	}
>  	if (page->inuse > page->objects) {
>  		slab_err(s, page, "inuse %u > max %u",
>  			page->inuse, page->objects);
> +		s->errors += 1;
>  		return 0;
>  	}
>  	/* Slab_pad_check fixes things up after itself */
> @@ -1008,8 +1019,10 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
>  				page->freelist = NULL;
>  				page->inuse = page->objects;
>  				slab_fix(s, "Freelist cleared");
> +				s->errors += 1;
>  				return 0;
>  			}
> +			s->errors += 1;
>  			break;
>  		}
>  		object = fp;
> @@ -1026,12 +1039,14 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
>  			 page->objects, max_objects);
>  		page->objects = max_objects;
>  		slab_fix(s, "Number of objects adjusted.");
> +		s->errors += 1;
>  	}
>  	if (page->inuse != page->objects - nr) {
>  		slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
>  			 page->inuse, page->objects - nr);
>  		page->inuse = page->objects - nr;
>  		slab_fix(s, "Object count adjusted.");
> +		s->errors += 1;
>  	}
>  	return search == NULL;
>  }
> @@ -4629,8 +4644,10 @@ static void validate_slab(struct kmem_cache *s, struct page *page)
>  		u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
>  			 SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
>  
> -		if (!check_object(s, page, p, val))
> +		if (!check_object(s, page, p, val)) {
> +			s->errors += 1;
>  			break;
> +		}
>  	}
>  	put_map(map);
>  unlock:
> @@ -4650,9 +4667,11 @@ static int validate_slab_node(struct kmem_cache *s,
>  		validate_slab(s, page);
>  		count++;
>  	}
> -	if (count != n->nr_partial)
> +	if (count != n->nr_partial) {
>  		pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
>  		       s->name, count, n->nr_partial);
> +		s->errors += 1;
> +	}
>  
>  	if (!(s->flags & SLAB_STORE_USER))
>  		goto out;
> @@ -4661,20 +4680,23 @@ static int validate_slab_node(struct kmem_cache *s,
>  		validate_slab(s, page);
>  		count++;
>  	}
> -	if (count != atomic_long_read(&n->nr_slabs))
> +	if (count != atomic_long_read(&n->nr_slabs)) {
>  		pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
>  		       s->name, count, atomic_long_read(&n->nr_slabs));
> +		s->errors += 1;

I think these here are a few cases where there's no slab_err() or
slab_fix() call, and open coding setting errors is still required...

> +	}
>  
>  out:
>  	spin_unlock_irqrestore(&n->list_lock, flags);
>  	return count;
>  }
>  
> -static long validate_slab_cache(struct kmem_cache *s)
> +long validate_slab_cache(struct kmem_cache *s)
>  {
>  	int node;
>  	unsigned long count = 0;
>  	struct kmem_cache_node *n;
> +	s->errors = 0;

This would also go away if you use the kunit_resource design. I also
think it's better if the test fully controls 'slab_errors', and it isn't
reset by this function.

>  	flush_all(s);
>  	for_each_kmem_cache_node(s, node, n)
> @@ -4682,6 +4704,8 @@ static long validate_slab_cache(struct kmem_cache *s)
>  
>  	return count;
>  }
> +EXPORT_SYMBOL(validate_slab_cache);
> +
>  /*
>   * Generate lists of code addresses where slabcache objects are allocated
>   * and freed.
> -- 
> 2.17.1
Oliver Glitta April 1, 2021, 10:36 a.m. UTC | #3
Thank you for your feedback. I will take a look at that.

št 1. 4. 2021 o 11:16 Marco Elver <elver@google.com> napísal(a):

> [Note, if you'd like me to see future versions, please Cc me, otherwise
> it's unlikely I see it in time. Also add kunit-dev@googlegroups.com if
> perhaps a KUnit dev should have another look, too.]
>
> On Wed, Mar 31, 2021 at 10:51AM +0200, glittao@gmail.com wrote:
> > From: Oliver Glitta <glittao@gmail.com>
> >
> > SLUB has resiliency_test() function which is hidden behind #ifdef
> > SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody
> > runs it. KUnit should be a proper replacement for it.
> >
> > Try changing byte in redzone after allocation and changing
> > pointer to next free node, first byte, 50th byte and redzone
> > byte. Check if validation finds errors.
> >
> > There are several differences from the original resiliency test:
> > Tests create own caches with known state instead of corrupting
> > shared kmalloc caches.
> >
> > The corruption of freepointer uses correct offset, the original
> > resiliency test got broken with freepointer changes.
> >
> > Scratch changing random byte test, because it does not have
> > meaning in this form where we need deterministic results.
> >
> > Add new option CONFIG_SLUB_KUNIT_TEST in Kconfig.
> >
> > Add a counter field "errors" to struct kmem_cache to count number
> > of errors detected in cache.
> >
> > Silence bug report in SLUB test. Add SLAB_SILENT_ERRORS debug flag.
> > Add SLAB_SILENT_ERRORS flag to SLAB_NEVER_MERGE, SLAB_DEBUG_FLAGS,
> > SLAB_FLAGS_PERMITTED macros.
> >
> > Signed-off-by: Oliver Glitta <glittao@gmail.com>
> > ---
> > Changes since v2
> >
> > Use bit operation & instead of logical && as reported by kernel test
> > robot and Dan Carpenter
> >
> > Changes since v1
> >
> > Conversion from kselftest to KUnit test suggested by Marco Elver.
> > Error silencing.
> > Error counting improvements.
> >
> >  include/linux/slab.h     |   2 +
> >  include/linux/slub_def.h |   2 +
> >  lib/Kconfig.debug        |   5 ++
> >  lib/Makefile             |   1 +
> >  lib/test_slub.c          | 124 +++++++++++++++++++++++++++++++++++++++
> >  mm/slab.h                |   7 ++-
> >  mm/slab_common.c         |   2 +-
> >  mm/slub.c                |  64 +++++++++++++-------
> >  8 files changed, 184 insertions(+), 23 deletions(-)
> >  create mode 100644 lib/test_slub.c
> >
> > diff --git a/include/linux/slab.h b/include/linux/slab.h
> > index 7ae604076767..ed1a5a64d028 100644
> > --- a/include/linux/slab.h
> > +++ b/include/linux/slab.h
> > @@ -25,6 +25,8 @@
> >   */
> >  /* DEBUG: Perform (expensive) checks on alloc/free */
> >  #define SLAB_CONSISTENCY_CHECKS      ((slab_flags_t __force)0x00000100U)
> > +/* DEBUG: Silent bug reports */
> > +#define SLAB_SILENT_ERRORS   ((slab_flags_t __force)0x00000200U)
>
> This flag wouldn't be necessary if you do the design using
> kunit_resource (see below).
>
> (But perhaps I missed a conversation that said that this flag is
> generally useful, but if so, it should probably be in a separate patch
> justifying why it is required beyond the test.)
>
> >  /* DEBUG: Red zone objs in a cache */
> >  #define SLAB_RED_ZONE                ((slab_flags_t __force)0x00000400U)
> >  /* DEBUG: Poison objects */
> > diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
> > index dcde82a4434c..e4b51bb5bb83 100644
> > --- a/include/linux/slub_def.h
> > +++ b/include/linux/slub_def.h
> > @@ -133,6 +133,8 @@ struct kmem_cache {
> >       unsigned int usersize;          /* Usercopy region size */
> >
> >       struct kmem_cache_node *node[MAX_NUMNODES];
> > +
> > +     int errors;                     /* Number of errors in cache */
>
> So, I think it's bad design to add a new field 'errors', just for the
> test. This will increase kmem_cache size for all builds, which is
> unnecessary.
>
> Is there use to retrieve 'errors' elsewhere?
>
> While you could guard this with #ifdef CONFIG_SLUB_DEBUG or so, there's
> a better design option if this is just for the KUnit test's benefit: use
> kunit_resource.
>
> The way it'd work is that for each test (you can add a common init
> function) you add a named resource, in this case just an 'int' I guess,
> that slab would be able to retrieve if this test is being run.
>
> In the test somewhere, you could add something like this:
>
>
>         static struct kunit_resource resource;
>         static int slab_errors;
>
>         ..........
>
>         static int test_init(struct kunit *test)
>         {
>                 slab_errors = 0;
>                 kunit_add_named_resource(test, NULL, NULL, &resource,
>                                          "slab_errors", &slab_errors);
>                 return 0;
>         }
>
>         ...... tests now check slab_errors .....
>
> and then in slub.c you'd have:
>
>         #if IS_ENABLED(CONFIG_KUNIT)
>         static bool slab_add_kunit_errors(void)
>         {
>                 struct kunit_resource *resource;
>
>                 if (likely(!current->kunit_test))
>                         return false;
>                 resource = kunit_find_named_resource(current->kunit_test,
> "slab_errors");
>                 if (!resource)
>                         return false;
>                 (*(int *)resource->data)++;
>                 kunit_put_resource(resource);
>         }
>         #else
>         static inline bool slab_add_kunit_errors(void) { return false; }
>         #endif
>
> And anywhere you want to increase the error count, you'd call
> slab_add_kunit_errors().
>
> Another benefit of this approach is that if KUnit is disabled, there is
> zero overhead and no additional code generated (vs. the current
> approach).
>
> >  };
> >
> >  #ifdef CONFIG_SLUB_CPU_PARTIAL
> > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> > index 2779c29d9981..e0dec830c269 100644
> > --- a/lib/Kconfig.debug
> > +++ b/lib/Kconfig.debug
> > @@ -2371,6 +2371,11 @@ config BITS_TEST
> >
> >         If unsure, say N.
> >
> > +config SLUB_KUNIT_TEST
> > +     tristate "KUnit Test for SLUB cache error detection" if
> !KUNIT_ALL_TESTS
> > +     depends on SLUB_DEBUG && KUNIT
> > +     default KUNIT_ALL_TESTS
>
> Help text please.
>
> > +
> >  config TEST_UDELAY
> >       tristate "udelay test driver"
> >       help
> > diff --git a/lib/Makefile b/lib/Makefile
> > index b5307d3eec1a..e1eb986c0e87 100644
> > --- a/lib/Makefile
> > +++ b/lib/Makefile
> > @@ -352,5 +352,6 @@ obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
> >  obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
> >  obj-$(CONFIG_BITS_TEST) += test_bits.o
> >  obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o
> > +obj-$(CONFIG_SLUB_KUNIT_TEST) += test_slub.o
>
> Any reason to not name this slub_kunit.c? This is a new test, and it
> could follow the naming convention of the other KUnit tests.
>
> Some of it is also documented:
> https://www.kernel.org/doc/html/latest/dev-tools/kunit/style.html
>
> >  obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
> > diff --git a/lib/test_slub.c b/lib/test_slub.c
> > new file mode 100644
> > index 000000000000..4f8ea3c7d867
> > --- /dev/null
> > +++ b/lib/test_slub.c
> > @@ -0,0 +1,124 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <kunit/test.h>
> > +#include <linux/mm.h>
> > +#include <linux/slab.h>
> > +#include <linux/module.h>
> > +#include <linux/kernel.h>
> > +#include "../mm/slab.h"
> > +
> > +
> > +static void test_clobber_zone(struct kunit *test)
> > +{
> > +     struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64,
> 0,
> > +                             SLAB_RED_ZONE | SLAB_SILENT_ERRORS, NULL);
> > +     u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> > +
> > +     p[64] = 0x12;
> > +
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 1, s->errors);
> > +
> > +     kmem_cache_free(s, p);
> > +     kmem_cache_destroy(s);
> > +}
> > +
> > +static void test_next_pointer(struct kunit *test)
> > +{
> > +     struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free",
> 64, 0,
> > +                             SLAB_POISON | SLAB_SILENT_ERRORS, NULL);
> > +     u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> > +     unsigned long tmp;
> > +     unsigned long *ptr_addr;
> > +
> > +     kmem_cache_free(s, p);
> > +
> > +     ptr_addr = (unsigned long *)(p + s->offset);
> > +     tmp = *ptr_addr;
> > +     p[s->offset] = 0x12;
> > +
> > +     /*
> > +      * Expecting two errors.
> > +      * One for the corrupted freechain and the other one for the wrong
> > +      * count of objects in use.
> > +      */
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 2, s->errors);
> > +
> > +     /*
> > +      * Try to repair corrupted freepointer.
> > +      * Still expecting one error for the wrong count of objects in use.
> > +      */
> > +     *ptr_addr = tmp;
> > +
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 1, s->errors);
> > +
> > +     /*
> > +      * Previous validation repaired the count of objects in use.
> > +      * Now expecting no error.
> > +      */
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 0, s->errors);
> > +
> > +     kmem_cache_destroy(s);
> > +}
> > +
> > +static void test_first_word(struct kunit *test)
> > +{
> > +     struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free",
> 64, 0,
> > +                             SLAB_POISON | SLAB_SILENT_ERRORS, NULL);
> > +     u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> > +
> > +     kmem_cache_free(s, p);
> > +     *p = 0x78;
> > +
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 1, s->errors);
> > +
> > +     kmem_cache_destroy(s);
> > +}
> > +
> > +static void test_clobber_50th_byte(struct kunit *test)
> > +{
> > +     struct kmem_cache *s =
> kmem_cache_create("TestSlub_50th_word_free", 64, 0,
> > +                             SLAB_POISON | SLAB_SILENT_ERRORS, NULL);
> > +     u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> > +
> > +     kmem_cache_free(s, p);
> > +     p[50] = 0x9a;
> > +
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 1, s->errors);
> > +     kmem_cache_destroy(s);
> > +}
> > +
> > +static void test_clobber_redzone_free(struct kunit *test)
> > +{
> > +     struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
> > +                             SLAB_RED_ZONE | SLAB_SILENT_ERRORS, NULL);
> > +     u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> > +
> > +     kmem_cache_free(s, p);
> > +     p[64] = 0xab;
> > +
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 1, s->errors);
> > +     kmem_cache_destroy(s);
> > +}
> > +
> > +static struct kunit_case test_cases[] = {
> > +     KUNIT_CASE(test_clobber_zone),
> > +     KUNIT_CASE(test_next_pointer),
> > +     KUNIT_CASE(test_first_word),
> > +     KUNIT_CASE(test_clobber_50th_byte),
> > +     KUNIT_CASE(test_clobber_redzone_free),
> > +     {}
> > +};
> > +
> > +static struct kunit_suite test_suite = {
> > +     .name = "slub_test",
> > +     .test_cases = test_cases,
>
> Here you could add a
>
>         .init = test_init,
>
> or so, if you go with the kunit_resource design.
>
> > +};
> > +kunit_test_suite(test_suite);
> > +
> > +MODULE_LICENSE("GPL");
> > diff --git a/mm/slab.h b/mm/slab.h
> > index 076582f58f68..382507b6cab9 100644
> > --- a/mm/slab.h
> > +++ b/mm/slab.h
> > @@ -134,7 +134,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned
> int object_size,
> >  #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
> >  #elif defined(CONFIG_SLUB_DEBUG)
> >  #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER
> | \
> > -                       SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
> > +                       SLAB_TRACE | SLAB_CONSISTENCY_CHECKS | \
> > +                       SLAB_SILENT_ERRORS)
> >  #else
> >  #define SLAB_DEBUG_FLAGS (0)
> >  #endif
> > @@ -164,7 +165,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned
> int object_size,
> >                             SLAB_NOLEAKTRACE | \
> >                             SLAB_RECLAIM_ACCOUNT | \
> >                             SLAB_TEMPORARY | \
> > -                           SLAB_ACCOUNT)
> > +                           SLAB_ACCOUNT | \
> > +                           SLAB_SILENT_ERRORS)
> >
> >  bool __kmem_cache_empty(struct kmem_cache *);
> >  int __kmem_cache_shutdown(struct kmem_cache *);
> > @@ -215,6 +217,7 @@ DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
> >  DECLARE_STATIC_KEY_FALSE(slub_debug_enabled);
> >  #endif
> >  extern void print_tracking(struct kmem_cache *s, void *object);
> > +long validate_slab_cache(struct kmem_cache *s);
> >  #else
> >  static inline void print_tracking(struct kmem_cache *s, void *object)
> >  {
> > diff --git a/mm/slab_common.c b/mm/slab_common.c
> > index 88e833986332..239c9095e7ea 100644
> > --- a/mm/slab_common.c
> > +++ b/mm/slab_common.c
> > @@ -55,7 +55,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
> >   */
> >  #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER
> | \
> >               SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
> > -             SLAB_FAILSLAB | kasan_never_merge())
> > +             SLAB_FAILSLAB | SLAB_SILENT_ERRORS | kasan_never_merge())
> >
> >  #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
> >                        SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 3021ce9bf1b3..7083e89432d0 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -673,14 +673,16 @@ static void slab_bug(struct kmem_cache *s, char
> *fmt, ...)
> >
> >  static void slab_fix(struct kmem_cache *s, char *fmt, ...)
> >  {
> > -     struct va_format vaf;
> > -     va_list args;
> > -
> > -     va_start(args, fmt);
> > -     vaf.fmt = fmt;
> > -     vaf.va = &args;
> > -     pr_err("FIX %s: %pV\n", s->name, &vaf);
> > -     va_end(args);
> > +     if (!(s->flags & SLAB_SILENT_ERRORS)) {
>
> This style of guarding causes lots of line diffs.
>
> Instead, if the whole function is to be skipped, please prefer:
>
>         if (skip_things_if_true)
>                 return;
>
> instead of
>
>         if (!skip_things_if_true) {
>                 ...
>         }
>
> here and everywhere else.
>
> Specifically, for the kunit_resource design, this would simply become:
>
>         if (slab_add_kunit_errors())
>                 return;
>
> Meaning that whenever a KUnit test has a resource "slab_errors", no
> messages are printed -- otherwise it'll always print a message.
>
> > +             struct va_format vaf;
> > +             va_list args;
> > +
> > +             va_start(args, fmt);
> > +             vaf.fmt = fmt;
> > +             vaf.va = &args;
> > +             pr_err("FIX %s: %pV\n", s->name, &vaf);
> > +             va_end(args);
> > +     }
> >  }
> >
> >  static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
> > @@ -739,8 +741,10 @@ static void print_trailer(struct kmem_cache *s,
> struct page *page, u8 *p)
> >  void object_err(struct kmem_cache *s, struct page *page,
> >                       u8 *object, char *reason)
> >  {
> > -     slab_bug(s, "%s", reason);
> > -     print_trailer(s, page, object);
> > +     if (!(s->flags & SLAB_SILENT_ERRORS)) {
> > +             slab_bug(s, "%s", reason);
> > +             print_trailer(s, page, object);
> > +     }
> >  }
> >
> >  static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page
> *page,
> > @@ -752,9 +756,11 @@ static __printf(3, 4) void slab_err(struct
> kmem_cache *s, struct page *page,
> >       va_start(args, fmt);
> >       vsnprintf(buf, sizeof(buf), fmt, args);
> >       va_end(args);
> > -     slab_bug(s, "%s", buf);
> > -     print_page_info(page);
> > -     dump_stack();
> > +     if (!(s->flags & SLAB_SILENT_ERRORS)) {
> > +             slab_bug(s, "%s", buf);
> > +             print_page_info(page);
> > +             dump_stack();
> > +     }
> >  }
> >
> >  static void init_object(struct kmem_cache *s, void *object, u8 val)
> > @@ -798,11 +804,13 @@ static int check_bytes_and_report(struct
> kmem_cache *s, struct page *page,
> >       while (end > fault && end[-1] == value)
> >               end--;
> >
> > -     slab_bug(s, "%s overwritten", what);
> > -     pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of
> 0x%x\n",
> > +     if (!(s->flags & SLAB_SILENT_ERRORS)) {
> > +             slab_bug(s, "%s overwritten", what);
> > +             pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x
> instead of 0x%x\n",
> >                                       fault, end - 1, fault - addr,
> >                                       fault[0], value);
> > -     print_trailer(s, page, object);
> > +             print_trailer(s, page, object);
> > +     }
> >
> >       restore_bytes(s, what, value, fault, end);
> >       return 0;
> > @@ -964,6 +972,7 @@ static int check_slab(struct kmem_cache *s, struct
> page *page)
> >
> >       if (!PageSlab(page)) {
> >               slab_err(s, page, "Not a valid slab page");
> > +             s->errors += 1;
>
> So to me it looks like errors is set whenever there's a slab_err() or
> slab_fix() call. So why not move setting that an error occurred into
> those functions?
>
> >               return 0;
> >       }
> >
> > @@ -971,11 +980,13 @@ static int check_slab(struct kmem_cache *s, struct
> page *page)
> >       if (page->objects > maxobj) {
> >               slab_err(s, page, "objects %u > max %u",
> >                       page->objects, maxobj);
> > +             s->errors += 1;
> >               return 0;
> >       }
> >       if (page->inuse > page->objects) {
> >               slab_err(s, page, "inuse %u > max %u",
> >                       page->inuse, page->objects);
> > +             s->errors += 1;
> >               return 0;
> >       }
> >       /* Slab_pad_check fixes things up after itself */
> > @@ -1008,8 +1019,10 @@ static int on_freelist(struct kmem_cache *s,
> struct page *page, void *search)
> >                               page->freelist = NULL;
> >                               page->inuse = page->objects;
> >                               slab_fix(s, "Freelist cleared");
> > +                             s->errors += 1;
> >                               return 0;
> >                       }
> > +                     s->errors += 1;
> >                       break;
> >               }
> >               object = fp;
> > @@ -1026,12 +1039,14 @@ static int on_freelist(struct kmem_cache *s,
> struct page *page, void *search)
> >                        page->objects, max_objects);
> >               page->objects = max_objects;
> >               slab_fix(s, "Number of objects adjusted.");
> > +             s->errors += 1;
> >       }
> >       if (page->inuse != page->objects - nr) {
> >               slab_err(s, page, "Wrong object count. Counter is %d but
> counted were %d",
> >                        page->inuse, page->objects - nr);
> >               page->inuse = page->objects - nr;
> >               slab_fix(s, "Object count adjusted.");
> > +             s->errors += 1;
> >       }
> >       return search == NULL;
> >  }
> > @@ -4629,8 +4644,10 @@ static void validate_slab(struct kmem_cache *s,
> struct page *page)
> >               u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
> >                        SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
> >
> > -             if (!check_object(s, page, p, val))
> > +             if (!check_object(s, page, p, val)) {
> > +                     s->errors += 1;
> >                       break;
> > +             }
> >       }
> >       put_map(map);
> >  unlock:
> > @@ -4650,9 +4667,11 @@ static int validate_slab_node(struct kmem_cache
> *s,
> >               validate_slab(s, page);
> >               count++;
> >       }
> > -     if (count != n->nr_partial)
> > +     if (count != n->nr_partial) {
> >               pr_err("SLUB %s: %ld partial slabs counted but
> counter=%ld\n",
> >                      s->name, count, n->nr_partial);
> > +             s->errors += 1;
> > +     }
> >
> >       if (!(s->flags & SLAB_STORE_USER))
> >               goto out;
> > @@ -4661,20 +4680,23 @@ static int validate_slab_node(struct kmem_cache
> *s,
> >               validate_slab(s, page);
> >               count++;
> >       }
> > -     if (count != atomic_long_read(&n->nr_slabs))
> > +     if (count != atomic_long_read(&n->nr_slabs)) {
> >               pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
> >                      s->name, count, atomic_long_read(&n->nr_slabs));
> > +             s->errors += 1;
>
> I think these here are a few cases where there's no slab_err() or
> slab_fix() call, and open coding setting errors is still required...
>
> > +     }
> >
> >  out:
> >       spin_unlock_irqrestore(&n->list_lock, flags);
> >       return count;
> >  }
> >
> > -static long validate_slab_cache(struct kmem_cache *s)
> > +long validate_slab_cache(struct kmem_cache *s)
> >  {
> >       int node;
> >       unsigned long count = 0;
> >       struct kmem_cache_node *n;
> > +     s->errors = 0;
>
> This would also go away if you use the kunit_resource design. I also
> think it's better if the test fully controls 'slab_errors', and it isn't
> reset by this function.
>
> >       flush_all(s);
> >       for_each_kmem_cache_node(s, node, n)
> > @@ -4682,6 +4704,8 @@ static long validate_slab_cache(struct kmem_cache
> *s)
> >
> >       return count;
> >  }
> > +EXPORT_SYMBOL(validate_slab_cache);
> > +
> >  /*
> >   * Generate lists of code addresses where slabcache objects are
> allocated
> >   * and freed.
> > --
> > 2.17.1
>
Daniel Latypov April 1, 2021, 7:04 p.m. UTC | #4
On Thu, Apr 1, 2021 at 2:16 AM 'Marco Elver' via KUnit Development
<kunit-dev@googlegroups.com> wrote:
>
> [Note, if you'd like me to see future versions, please Cc me, otherwise
> it's unlikely I see it in time. Also add kunit-dev@googlegroups.com if
> perhaps a KUnit dev should have another look, too.]
>
> On Wed, Mar 31, 2021 at 10:51AM +0200, glittao@gmail.com wrote:
> > From: Oliver Glitta <glittao@gmail.com>
> >
> > SLUB has resiliency_test() function which is hidden behind #ifdef
> > SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody
> > runs it. KUnit should be a proper replacement for it.
> >
> > Try changing byte in redzone after allocation and changing
> > pointer to next free node, first byte, 50th byte and redzone
> > byte. Check if validation finds errors.
> >
> > There are several differences from the original resiliency test:
> > Tests create own caches with known state instead of corrupting
> > shared kmalloc caches.
> >
> > The corruption of freepointer uses correct offset, the original
> > resiliency test got broken with freepointer changes.
> >
> > Scratch changing random byte test, because it does not have
> > meaning in this form where we need deterministic results.
> >
> > Add new option CONFIG_SLUB_KUNIT_TEST in Kconfig.
> >
> > Add a counter field "errors" to struct kmem_cache to count number
> > of errors detected in cache.
> >
> > Silence bug report in SLUB test. Add SLAB_SILENT_ERRORS debug flag.
> > Add SLAB_SILENT_ERRORS flag to SLAB_NEVER_MERGE, SLAB_DEBUG_FLAGS,
> > SLAB_FLAGS_PERMITTED macros.
> >
> > Signed-off-by: Oliver Glitta <glittao@gmail.com>
> > ---
> > Changes since v2
> >
> > Use bit operation & instead of logical && as reported by kernel test
> > robot and Dan Carpenter
> >
> > Changes since v1
> >
> > Conversion from kselftest to KUnit test suggested by Marco Elver.
> > Error silencing.
> > Error counting improvements.
> >
> >  include/linux/slab.h     |   2 +
> >  include/linux/slub_def.h |   2 +
> >  lib/Kconfig.debug        |   5 ++
> >  lib/Makefile             |   1 +
> >  lib/test_slub.c          | 124 +++++++++++++++++++++++++++++++++++++++
> >  mm/slab.h                |   7 ++-
> >  mm/slab_common.c         |   2 +-
> >  mm/slub.c                |  64 +++++++++++++-------
> >  8 files changed, 184 insertions(+), 23 deletions(-)
> >  create mode 100644 lib/test_slub.c
> >
> > diff --git a/include/linux/slab.h b/include/linux/slab.h
> > index 7ae604076767..ed1a5a64d028 100644
> > --- a/include/linux/slab.h
> > +++ b/include/linux/slab.h
> > @@ -25,6 +25,8 @@
> >   */
> >  /* DEBUG: Perform (expensive) checks on alloc/free */
> >  #define SLAB_CONSISTENCY_CHECKS      ((slab_flags_t __force)0x00000100U)
> > +/* DEBUG: Silent bug reports */
> > +#define SLAB_SILENT_ERRORS   ((slab_flags_t __force)0x00000200U)
>
> This flag wouldn't be necessary if you do the design using
> kunit_resource (see below).
>
> (But perhaps I missed a conversation that said that this flag is
> generally useful, but if so, it should probably be in a separate patch
> justifying why it is required beyond the test.)
>
> >  /* DEBUG: Red zone objs in a cache */
> >  #define SLAB_RED_ZONE                ((slab_flags_t __force)0x00000400U)
> >  /* DEBUG: Poison objects */
> > diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
> > index dcde82a4434c..e4b51bb5bb83 100644
> > --- a/include/linux/slub_def.h
> > +++ b/include/linux/slub_def.h
> > @@ -133,6 +133,8 @@ struct kmem_cache {
> >       unsigned int usersize;          /* Usercopy region size */
> >
> >       struct kmem_cache_node *node[MAX_NUMNODES];
> > +
> > +     int errors;                     /* Number of errors in cache */
>
> So, I think it's bad design to add a new field 'errors', just for the
> test. This will increase kmem_cache size for all builds, which is
> unnecessary.
>
> Is there use to retrieve 'errors' elsewhere?
>
> While you could guard this with #ifdef CONFIG_SLUB_DEBUG or so, there's
> a better design option if this is just for the KUnit test's benefit: use
> kunit_resource.
>
> The way it'd work is that for each test (you can add a common init
> function) you add a named resource, in this case just an 'int' I guess,
> that slab would be able to retrieve if this test is being run.
>
> In the test somewhere, you could add something like this:
>
>
>         static struct kunit_resource resource;
>         static int slab_errors;
>
>         ..........
>
>         static int test_init(struct kunit *test)
>         {
>                 slab_errors = 0;
>                 kunit_add_named_resource(test, NULL, NULL, &resource,
>                                          "slab_errors", &slab_errors);
>                 return 0;
>         }
>
>         ...... tests now check slab_errors .....
>
> and then in slub.c you'd have:
>
>         #if IS_ENABLED(CONFIG_KUNIT)
>         static bool slab_add_kunit_errors(void)
>         {
>                 struct kunit_resource *resource;
>
>                 if (likely(!current->kunit_test))
>                         return false;
>                 resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
>                 if (!resource)
>                         return false;
>                 (*(int *)resource->data)++;
>                 kunit_put_resource(resource);
>         }
>         #else
>         static inline bool slab_add_kunit_errors(void) { return false; }
>         #endif
>
> And anywhere you want to increase the error count, you'd call
> slab_add_kunit_errors().
>
> Another benefit of this approach is that if KUnit is disabled, there is
> zero overhead and no additional code generated (vs. the current
> approach).

The resource approach looks really good, but...
You'd be picking up a dependency on
https://lore.kernel.org/linux-kselftest/20210311152314.3814916-2-dlatypov@google.com/
current->kunit_test will always be NULL unless CONFIG_KASAN=y &&
CONFIG_KUNIT=y at the moment.
My patch drops the CONFIG_KASAN requirement and opens it up to all tests.

At the moment, it's just waiting another look over from Brendan or David.
Any ETA on that, folks? :)

So if you don't want to get blocked on that for now, I think it's fine to add:
  #ifdef CONFIG_SLUB_KUNIT_TEST
  int errors;
  #endif

Side note: the resource example is really good, and maybe we should
steal it and add it near
https://www.kernel.org/doc/html/latest/dev-tools/kunit/tips.html#injecting-test-only-code
I had plans to update that section at some point after my patch landed
and added in `kunit_fail_current_test()`.
But this is an interesting inversion of that where we want the test to
only pass if that code gets executed.


>
> >  };
> >
> >  #ifdef CONFIG_SLUB_CPU_PARTIAL
> > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> > index 2779c29d9981..e0dec830c269 100644
> > --- a/lib/Kconfig.debug
> > +++ b/lib/Kconfig.debug
> > @@ -2371,6 +2371,11 @@ config BITS_TEST
> >
> >         If unsure, say N.
> >
> > +config SLUB_KUNIT_TEST
> > +     tristate "KUnit Test for SLUB cache error detection" if !KUNIT_ALL_TESTS
> > +     depends on SLUB_DEBUG && KUNIT
> > +     default KUNIT_ALL_TESTS
>
> Help text please.
>
> > +
> >  config TEST_UDELAY
> >       tristate "udelay test driver"
> >       help
> > diff --git a/lib/Makefile b/lib/Makefile
> > index b5307d3eec1a..e1eb986c0e87 100644
> > --- a/lib/Makefile
> > +++ b/lib/Makefile
> > @@ -352,5 +352,6 @@ obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
> >  obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
> >  obj-$(CONFIG_BITS_TEST) += test_bits.o
> >  obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o
> > +obj-$(CONFIG_SLUB_KUNIT_TEST) += test_slub.o
>
> Any reason to not name this slub_kunit.c? This is a new test, and it
> could follow the naming convention of the other KUnit tests.
>
> Some of it is also documented:
> https://www.kernel.org/doc/html/latest/dev-tools/kunit/style.html
>
> >  obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
> > diff --git a/lib/test_slub.c b/lib/test_slub.c
> > new file mode 100644
> > index 000000000000..4f8ea3c7d867
> > --- /dev/null
> > +++ b/lib/test_slub.c
> > @@ -0,0 +1,124 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <kunit/test.h>
> > +#include <linux/mm.h>
> > +#include <linux/slab.h>
> > +#include <linux/module.h>
> > +#include <linux/kernel.h>
> > +#include "../mm/slab.h"
> > +
> > +
> > +static void test_clobber_zone(struct kunit *test)
> > +{
> > +     struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
> > +                             SLAB_RED_ZONE | SLAB_SILENT_ERRORS, NULL);
> > +     u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> > +
> > +     p[64] = 0x12;
> > +
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 1, s->errors);
> > +
> > +     kmem_cache_free(s, p);
> > +     kmem_cache_destroy(s);
> > +}
> > +
> > +static void test_next_pointer(struct kunit *test)
> > +{
> > +     struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
> > +                             SLAB_POISON | SLAB_SILENT_ERRORS, NULL);
> > +     u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> > +     unsigned long tmp;
> > +     unsigned long *ptr_addr;
> > +
> > +     kmem_cache_free(s, p);
> > +
> > +     ptr_addr = (unsigned long *)(p + s->offset);
> > +     tmp = *ptr_addr;
> > +     p[s->offset] = 0x12;
> > +
> > +     /*
> > +      * Expecting two errors.
> > +      * One for the corrupted freechain and the other one for the wrong
> > +      * count of objects in use.
> > +      */
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 2, s->errors);
> > +
> > +     /*
> > +      * Try to repair corrupted freepointer.
> > +      * Still expecting one error for the wrong count of objects in use.
> > +      */
> > +     *ptr_addr = tmp;
> > +
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 1, s->errors);
> > +
> > +     /*
> > +      * Previous validation repaired the count of objects in use.
> > +      * Now expecting no error.
> > +      */
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 0, s->errors);
> > +
> > +     kmem_cache_destroy(s);
> > +}
> > +
> > +static void test_first_word(struct kunit *test)
> > +{
> > +     struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
> > +                             SLAB_POISON | SLAB_SILENT_ERRORS, NULL);
> > +     u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> > +
> > +     kmem_cache_free(s, p);
> > +     *p = 0x78;
> > +
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 1, s->errors);
> > +
> > +     kmem_cache_destroy(s);
> > +}
> > +
> > +static void test_clobber_50th_byte(struct kunit *test)
> > +{
> > +     struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
> > +                             SLAB_POISON | SLAB_SILENT_ERRORS, NULL);
> > +     u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> > +
> > +     kmem_cache_free(s, p);
> > +     p[50] = 0x9a;
> > +
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 1, s->errors);
> > +     kmem_cache_destroy(s);
> > +}
> > +
> > +static void test_clobber_redzone_free(struct kunit *test)
> > +{
> > +     struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
> > +                             SLAB_RED_ZONE | SLAB_SILENT_ERRORS, NULL);
> > +     u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
> > +
> > +     kmem_cache_free(s, p);
> > +     p[64] = 0xab;
> > +
> > +     validate_slab_cache(s);
> > +     KUNIT_EXPECT_EQ(test, 1, s->errors);
> > +     kmem_cache_destroy(s);
> > +}
> > +
> > +static struct kunit_case test_cases[] = {
> > +     KUNIT_CASE(test_clobber_zone),
> > +     KUNIT_CASE(test_next_pointer),
> > +     KUNIT_CASE(test_first_word),
> > +     KUNIT_CASE(test_clobber_50th_byte),
> > +     KUNIT_CASE(test_clobber_redzone_free),
> > +     {}
> > +};
> > +
> > +static struct kunit_suite test_suite = {
> > +     .name = "slub_test",
> > +     .test_cases = test_cases,
>
> Here you could add a
>
>         .init = test_init,
>
> or so, if you go with the kunit_resource design.
>
> > +};
> > +kunit_test_suite(test_suite);
> > +
> > +MODULE_LICENSE("GPL");
> > diff --git a/mm/slab.h b/mm/slab.h
> > index 076582f58f68..382507b6cab9 100644
> > --- a/mm/slab.h
> > +++ b/mm/slab.h
> > @@ -134,7 +134,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
> >  #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
> >  #elif defined(CONFIG_SLUB_DEBUG)
> >  #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
> > -                       SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
> > +                       SLAB_TRACE | SLAB_CONSISTENCY_CHECKS | \
> > +                       SLAB_SILENT_ERRORS)
> >  #else
> >  #define SLAB_DEBUG_FLAGS (0)
> >  #endif
> > @@ -164,7 +165,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
> >                             SLAB_NOLEAKTRACE | \
> >                             SLAB_RECLAIM_ACCOUNT | \
> >                             SLAB_TEMPORARY | \
> > -                           SLAB_ACCOUNT)
> > +                           SLAB_ACCOUNT | \
> > +                           SLAB_SILENT_ERRORS)
> >
> >  bool __kmem_cache_empty(struct kmem_cache *);
> >  int __kmem_cache_shutdown(struct kmem_cache *);
> > @@ -215,6 +217,7 @@ DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
> >  DECLARE_STATIC_KEY_FALSE(slub_debug_enabled);
> >  #endif
> >  extern void print_tracking(struct kmem_cache *s, void *object);
> > +long validate_slab_cache(struct kmem_cache *s);
> >  #else
> >  static inline void print_tracking(struct kmem_cache *s, void *object)
> >  {
> > diff --git a/mm/slab_common.c b/mm/slab_common.c
> > index 88e833986332..239c9095e7ea 100644
> > --- a/mm/slab_common.c
> > +++ b/mm/slab_common.c
> > @@ -55,7 +55,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
> >   */
> >  #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
> >               SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
> > -             SLAB_FAILSLAB | kasan_never_merge())
> > +             SLAB_FAILSLAB | SLAB_SILENT_ERRORS | kasan_never_merge())
> >
> >  #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
> >                        SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 3021ce9bf1b3..7083e89432d0 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -673,14 +673,16 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
> >
> >  static void slab_fix(struct kmem_cache *s, char *fmt, ...)
> >  {
> > -     struct va_format vaf;
> > -     va_list args;
> > -
> > -     va_start(args, fmt);
> > -     vaf.fmt = fmt;
> > -     vaf.va = &args;
> > -     pr_err("FIX %s: %pV\n", s->name, &vaf);
> > -     va_end(args);
> > +     if (!(s->flags & SLAB_SILENT_ERRORS)) {
>
> This style of guarding causes lots of line diffs.
>
> Instead, if the whole function is to be skipped, please prefer:
>
>         if (skip_things_if_true)
>                 return;
>
> instead of
>
>         if (!skip_things_if_true) {
>                 ...
>         }
>
> here and everywhere else.
>
> Specifically, for the kunit_resource design, this would simply become:
>
>         if (slab_add_kunit_errors())
>                 return;
>
> Meaning that whenever a KUnit test has a resource "slab_errors", no
> messages are printed -- otherwise it'll always print a message.
>
> > +             struct va_format vaf;
> > +             va_list args;
> > +
> > +             va_start(args, fmt);
> > +             vaf.fmt = fmt;
> > +             vaf.va = &args;
> > +             pr_err("FIX %s: %pV\n", s->name, &vaf);
> > +             va_end(args);
> > +     }
> >  }
> >
> >  static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
> > @@ -739,8 +741,10 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
> >  void object_err(struct kmem_cache *s, struct page *page,
> >                       u8 *object, char *reason)
> >  {
> > -     slab_bug(s, "%s", reason);
> > -     print_trailer(s, page, object);
> > +     if (!(s->flags & SLAB_SILENT_ERRORS)) {
> > +             slab_bug(s, "%s", reason);
> > +             print_trailer(s, page, object);
> > +     }
> >  }
> >
> >  static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> > @@ -752,9 +756,11 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> >       va_start(args, fmt);
> >       vsnprintf(buf, sizeof(buf), fmt, args);
> >       va_end(args);
> > -     slab_bug(s, "%s", buf);
> > -     print_page_info(page);
> > -     dump_stack();
> > +     if (!(s->flags & SLAB_SILENT_ERRORS)) {
> > +             slab_bug(s, "%s", buf);
> > +             print_page_info(page);
> > +             dump_stack();
> > +     }
> >  }
> >
> >  static void init_object(struct kmem_cache *s, void *object, u8 val)
> > @@ -798,11 +804,13 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
> >       while (end > fault && end[-1] == value)
> >               end--;
> >
> > -     slab_bug(s, "%s overwritten", what);
> > -     pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
> > +     if (!(s->flags & SLAB_SILENT_ERRORS)) {
> > +             slab_bug(s, "%s overwritten", what);
> > +             pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
> >                                       fault, end - 1, fault - addr,
> >                                       fault[0], value);
> > -     print_trailer(s, page, object);
> > +             print_trailer(s, page, object);
> > +     }
> >
> >       restore_bytes(s, what, value, fault, end);
> >       return 0;
> > @@ -964,6 +972,7 @@ static int check_slab(struct kmem_cache *s, struct page *page)
> >
> >       if (!PageSlab(page)) {
> >               slab_err(s, page, "Not a valid slab page");
> > +             s->errors += 1;
>
> So to me it looks like errors is set whenever there's a slab_err() or
> slab_fix() call. So why not move setting that an error occurred into
> those functions?
>
> >               return 0;
> >       }
> >
> > @@ -971,11 +980,13 @@ static int check_slab(struct kmem_cache *s, struct page *page)
> >       if (page->objects > maxobj) {
> >               slab_err(s, page, "objects %u > max %u",
> >                       page->objects, maxobj);
> > +             s->errors += 1;
> >               return 0;
> >       }
> >       if (page->inuse > page->objects) {
> >               slab_err(s, page, "inuse %u > max %u",
> >                       page->inuse, page->objects);
> > +             s->errors += 1;
> >               return 0;
> >       }
> >       /* Slab_pad_check fixes things up after itself */
> > @@ -1008,8 +1019,10 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
> >                               page->freelist = NULL;
> >                               page->inuse = page->objects;
> >                               slab_fix(s, "Freelist cleared");
> > +                             s->errors += 1;
> >                               return 0;
> >                       }
> > +                     s->errors += 1;
> >                       break;
> >               }
> >               object = fp;
> > @@ -1026,12 +1039,14 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
> >                        page->objects, max_objects);
> >               page->objects = max_objects;
> >               slab_fix(s, "Number of objects adjusted.");
> > +             s->errors += 1;
> >       }
> >       if (page->inuse != page->objects - nr) {
> >               slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
> >                        page->inuse, page->objects - nr);
> >               page->inuse = page->objects - nr;
> >               slab_fix(s, "Object count adjusted.");
> > +             s->errors += 1;
> >       }
> >       return search == NULL;
> >  }
> > @@ -4629,8 +4644,10 @@ static void validate_slab(struct kmem_cache *s, struct page *page)
> >               u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
> >                        SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
> >
> > -             if (!check_object(s, page, p, val))
> > +             if (!check_object(s, page, p, val)) {
> > +                     s->errors += 1;
> >                       break;
> > +             }
> >       }
> >       put_map(map);
> >  unlock:
> > @@ -4650,9 +4667,11 @@ static int validate_slab_node(struct kmem_cache *s,
> >               validate_slab(s, page);
> >               count++;
> >       }
> > -     if (count != n->nr_partial)
> > +     if (count != n->nr_partial) {
> >               pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
> >                      s->name, count, n->nr_partial);
> > +             s->errors += 1;
> > +     }
> >
> >       if (!(s->flags & SLAB_STORE_USER))
> >               goto out;
> > @@ -4661,20 +4680,23 @@ static int validate_slab_node(struct kmem_cache *s,
> >               validate_slab(s, page);
> >               count++;
> >       }
> > -     if (count != atomic_long_read(&n->nr_slabs))
> > +     if (count != atomic_long_read(&n->nr_slabs)) {
> >               pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
> >                      s->name, count, atomic_long_read(&n->nr_slabs));
> > +             s->errors += 1;
>
> I think these here are a few cases where there's no slab_err() or
> slab_fix() call, and open coding setting errors is still required...
>
> > +     }
> >
> >  out:
> >       spin_unlock_irqrestore(&n->list_lock, flags);
> >       return count;
> >  }
> >
> > -static long validate_slab_cache(struct kmem_cache *s)
> > +long validate_slab_cache(struct kmem_cache *s)
> >  {
> >       int node;
> >       unsigned long count = 0;
> >       struct kmem_cache_node *n;
> > +     s->errors = 0;
>
> This would also go away if you use the kunit_resource design. I also
> think it's better if the test fully controls 'slab_errors', and it isn't
> reset by this function.
>
> >       flush_all(s);
> >       for_each_kmem_cache_node(s, node, n)
> > @@ -4682,6 +4704,8 @@ static long validate_slab_cache(struct kmem_cache *s)
> >
> >       return count;
> >  }
> > +EXPORT_SYMBOL(validate_slab_cache);
> > +
> >  /*
> >   * Generate lists of code addresses where slabcache objects are allocated
> >   * and freed.
> > --
> > 2.17.1
>
> --
> You received this message because you are subscribed to the Google Groups "KUnit Development" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to kunit-dev+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/kunit-dev/YGWPdFywfNUl4d3S%40elver.google.com.
Marco Elver April 1, 2021, 9:24 p.m. UTC | #5
On Thu, 1 Apr 2021 at 21:04, Daniel Latypov <dlatypov@google.com> wrote:
...
> > > --- a/include/linux/slub_def.h
> > > +++ b/include/linux/slub_def.h
> > > @@ -133,6 +133,8 @@ struct kmem_cache {
> > >       unsigned int usersize;          /* Usercopy region size */
> > >
> > >       struct kmem_cache_node *node[MAX_NUMNODES];
> > > +
> > > +     int errors;                     /* Number of errors in cache */
> >
> > So, I think it's bad design to add a new field 'errors', just for the
> > test. This will increase kmem_cache size for all builds, which is
> > unnecessary.
> >
> > Is there use to retrieve 'errors' elsewhere?
> >
> > While you could guard this with #ifdef CONFIG_SLUB_DEBUG or so, there's
> > a better design option if this is just for the KUnit test's benefit: use
> > kunit_resource.
> >
> > The way it'd work is that for each test (you can add a common init
> > function) you add a named resource, in this case just an 'int' I guess,
> > that slab would be able to retrieve if this test is being run.
> >
> > In the test somewhere, you could add something like this:
> >
> >
> >         static struct kunit_resource resource;
> >         static int slab_errors;
> >
> >         ..........
> >
> >         static int test_init(struct kunit *test)
> >         {
> >                 slab_errors = 0;
> >                 kunit_add_named_resource(test, NULL, NULL, &resource,
> >                                          "slab_errors", &slab_errors);
> >                 return 0;
> >         }
> >
> >         ...... tests now check slab_errors .....
> >
> > and then in slub.c you'd have:
> >
> >         #if IS_ENABLED(CONFIG_KUNIT)
> >         static bool slab_add_kunit_errors(void)
> >         {
> >                 struct kunit_resource *resource;
> >
> >                 if (likely(!current->kunit_test))
> >                         return false;
> >                 resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
> >                 if (!resource)
> >                         return false;
> >                 (*(int *)resource->data)++;
> >                 kunit_put_resource(resource);

      return true;

was missing.

> >         }
> >         #else
> >         static inline bool slab_add_kunit_errors(void) { return false; }
> >         #endif
> >
> > And anywhere you want to increase the error count, you'd call
> > slab_add_kunit_errors().
> >
> > Another benefit of this approach is that if KUnit is disabled, there is
> > zero overhead and no additional code generated (vs. the current
> > approach).
>
> The resource approach looks really good, but...
> You'd be picking up a dependency on
> https://lore.kernel.org/linux-kselftest/20210311152314.3814916-2-dlatypov@google.com/
> current->kunit_test will always be NULL unless CONFIG_KASAN=y &&
> CONFIG_KUNIT=y at the moment.
> My patch drops the CONFIG_KASAN requirement and opens it up to all tests.

Oh, that's a shame, but hopefully it'll be in -next soon.

> At the moment, it's just waiting another look over from Brendan or David.
> Any ETA on that, folks? :)
>
> So if you don't want to get blocked on that for now, I think it's fine to add:
>   #ifdef CONFIG_SLUB_KUNIT_TEST
>   int errors;
>   #endif

Until kunit fixes setting current->kunit_test, a cleaner workaround
that would allow to do the patch with kunit_resource, is to just have
an .init/.exit function that sets it ("current->kunit_test = test;").
And then perhaps add a note ("FIXME: ...") to remove it once the above
patch has landed.

At least that way we get the least intrusive change for mm/slub.c, and
the test is the only thing that needs a 2-line patch to clean up
later.

Thanks,
-- Marco
Brendan Higgins April 2, 2021, 9:38 a.m. UTC | #6
On Thu, Apr 1, 2021 at 12:04 PM 'Daniel Latypov' via KUnit Development
<kunit-dev@googlegroups.com> wrote:
>
> On Thu, Apr 1, 2021 at 2:16 AM 'Marco Elver' via KUnit Development
> <kunit-dev@googlegroups.com> wrote:
[...]
> >         #else
> >         static inline bool slab_add_kunit_errors(void) { return false; }
> >         #endif
> >
> > And anywhere you want to increase the error count, you'd call
> > slab_add_kunit_errors().
> >
> > Another benefit of this approach is that if KUnit is disabled, there is
> > zero overhead and no additional code generated (vs. the current
> > approach).
>
> The resource approach looks really good, but...
> You'd be picking up a dependency on
> https://lore.kernel.org/linux-kselftest/20210311152314.3814916-2-dlatypov@google.com/
> current->kunit_test will always be NULL unless CONFIG_KASAN=y &&
> CONFIG_KUNIT=y at the moment.
> My patch drops the CONFIG_KASAN requirement and opens it up to all tests.
>
> At the moment, it's just waiting another look over from Brendan or David.
> Any ETA on that, folks? :)

I just gave a "Reviewed-by" and sent it off to Shuah. Should be
available in 5.13.

Cheers
Vlastimil Babka April 6, 2021, 10:57 a.m. UTC | #7
On 4/1/21 11:24 PM, Marco Elver wrote:
> On Thu, 1 Apr 2021 at 21:04, Daniel Latypov <dlatypov@google.com> wrote:
>> >         }
>> >         #else
>> >         static inline bool slab_add_kunit_errors(void) { return false; }
>> >         #endif
>> >
>> > And anywhere you want to increase the error count, you'd call
>> > slab_add_kunit_errors().
>> >
>> > Another benefit of this approach is that if KUnit is disabled, there is
>> > zero overhead and no additional code generated (vs. the current
>> > approach).
>>
>> The resource approach looks really good, but...
>> You'd be picking up a dependency on
>> https://lore.kernel.org/linux-kselftest/20210311152314.3814916-2-dlatypov@google.com/
>> current->kunit_test will always be NULL unless CONFIG_KASAN=y &&
>> CONFIG_KUNIT=y at the moment.
>> My patch drops the CONFIG_KASAN requirement and opens it up to all tests.
> 
> Oh, that's a shame, but hopefully it'll be in -next soon.
> 
>> At the moment, it's just waiting another look over from Brendan or David.
>> Any ETA on that, folks? :)
>>
>> So if you don't want to get blocked on that for now, I think it's fine to add:
>>   #ifdef CONFIG_SLUB_KUNIT_TEST
>>   int errors;
>>   #endif
> 
> Until kunit fixes setting current->kunit_test, a cleaner workaround
> that would allow to do the patch with kunit_resource, is to just have
> an .init/.exit function that sets it ("current->kunit_test = test;").
> And then perhaps add a note ("FIXME: ...") to remove it once the above
> patch has landed.
> 
> At least that way we get the least intrusive change for mm/slub.c, and
> the test is the only thing that needs a 2-line patch to clean up
> later.

So when testing internally Oliver's new version with your suggestions (thanks
again for those), I got lockdep splats because slab_add_kunit_errors is called
also from irq disabled contexts, and kunit_find_named_resource will call
spin_lock(&test->lock) that's not irq safe. Can we make the lock irq safe? I
tried the change below and it makde the problem go away. If you agree, the
question is how to proceed - make it part of Oliver's patch series and let
Andrew pick it all with eventually kunit team's acks on this patch, or whatnot.

----8<----

commit ab28505477892e9824c57ac338c88aec2ec0abce
Author: Vlastimil Babka <vbabka@suse.cz>
Date:   Tue Apr 6 12:28:07 2021 +0200

    kunit: make test->lock irq safe

diff --git a/include/kunit/test.h b/include/kunit/test.h
index 49601c4b98b8..524d4789af22 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -515,8 +515,9 @@ kunit_find_resource(struct kunit *test,
 		    void *match_data)
 {
 	struct kunit_resource *res, *found = NULL;
+	unsigned long flags;
 
-	spin_lock(&test->lock);
+	spin_lock_irqsave(&test->lock, flags);
 
 	list_for_each_entry_reverse(res, &test->resources, node) {
 		if (match(test, res, (void *)match_data)) {
@@ -526,7 +527,7 @@ kunit_find_resource(struct kunit *test,
 		}
 	}
 
-	spin_unlock(&test->lock);
+	spin_unlock_irqrestore(&test->lock, flags);
 
 	return found;
 }
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index ec9494e914ef..2c62eeb45b82 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -442,6 +442,7 @@ int kunit_add_resource(struct kunit *test,
 		       void *data)
 {
 	int ret = 0;
+	unsigned long flags;
 
 	res->free = free;
 	kref_init(&res->refcount);
@@ -454,10 +455,10 @@ int kunit_add_resource(struct kunit *test,
 		res->data = data;
 	}
 
-	spin_lock(&test->lock);
+	spin_lock_irqsave(&test->lock, flags);
 	list_add_tail(&res->node, &test->resources);
 	/* refcount for list is established by kref_init() */
-	spin_unlock(&test->lock);
+	spin_unlock_irqrestore(&test->lock, flags);
 
 	return ret;
 }
@@ -515,9 +516,11 @@ EXPORT_SYMBOL_GPL(kunit_alloc_and_get_resource);
 
 void kunit_remove_resource(struct kunit *test, struct kunit_resource *res)
 {
-	spin_lock(&test->lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&test->lock, flags);
 	list_del(&res->node);
-	spin_unlock(&test->lock);
+	spin_unlock_irqrestore(&test->lock, flags);
 	kunit_put_resource(res);
 }
 EXPORT_SYMBOL_GPL(kunit_remove_resource);
@@ -597,6 +600,7 @@ EXPORT_SYMBOL_GPL(kunit_kfree);
 void kunit_cleanup(struct kunit *test)
 {
 	struct kunit_resource *res;
+	unsigned long flags;
 
 	/*
 	 * test->resources is a stack - each allocation must be freed in the
@@ -608,9 +612,9 @@ void kunit_cleanup(struct kunit *test)
 	 * protect against the current node being deleted, not the next.
 	 */
 	while (true) {
-		spin_lock(&test->lock);
+		spin_lock_irqsave(&test->lock, flags);
 		if (list_empty(&test->resources)) {
-			spin_unlock(&test->lock);
+			spin_unlock_irqrestore(&test->lock, flags);
 			break;
 		}
 		res = list_last_entry(&test->resources,
@@ -621,7 +625,7 @@ void kunit_cleanup(struct kunit *test)
 		 * resource, and this can't happen if the test->lock
 		 * is held.
 		 */
-		spin_unlock(&test->lock);
+		spin_unlock_irqrestore(&test->lock, flags);
 		kunit_remove_resource(test, res);
 	}
 #if (IS_ENABLED(CONFIG_KASAN) && IS_ENABLED(CONFIG_KUNIT))
Marco Elver April 8, 2021, 10:29 a.m. UTC | #8
On Tue, 6 Apr 2021 at 12:57, Vlastimil Babka <vbabka@suse.cz> wrote:
>
>
> On 4/1/21 11:24 PM, Marco Elver wrote:
> > On Thu, 1 Apr 2021 at 21:04, Daniel Latypov <dlatypov@google.com> wrote:
> >> >         }
> >> >         #else
> >> >         static inline bool slab_add_kunit_errors(void) { return false; }
> >> >         #endif
> >> >
> >> > And anywhere you want to increase the error count, you'd call
> >> > slab_add_kunit_errors().
> >> >
> >> > Another benefit of this approach is that if KUnit is disabled, there is
> >> > zero overhead and no additional code generated (vs. the current
> >> > approach).
> >>
> >> The resource approach looks really good, but...
> >> You'd be picking up a dependency on
> >> https://lore.kernel.org/linux-kselftest/20210311152314.3814916-2-dlatypov@google.com/
> >> current->kunit_test will always be NULL unless CONFIG_KASAN=y &&
> >> CONFIG_KUNIT=y at the moment.
> >> My patch drops the CONFIG_KASAN requirement and opens it up to all tests.
> >
> > Oh, that's a shame, but hopefully it'll be in -next soon.
> >
> >> At the moment, it's just waiting another look over from Brendan or David.
> >> Any ETA on that, folks? :)
> >>
> >> So if you don't want to get blocked on that for now, I think it's fine to add:
> >>   #ifdef CONFIG_SLUB_KUNIT_TEST
> >>   int errors;
> >>   #endif
> >
> > Until kunit fixes setting current->kunit_test, a cleaner workaround
> > that would allow to do the patch with kunit_resource, is to just have
> > an .init/.exit function that sets it ("current->kunit_test = test;").
> > And then perhaps add a note ("FIXME: ...") to remove it once the above
> > patch has landed.
> >
> > At least that way we get the least intrusive change for mm/slub.c, and
> > the test is the only thing that needs a 2-line patch to clean up
> > later.
>
> So when testing internally Oliver's new version with your suggestions (thanks
> again for those), I got lockdep splats because slab_add_kunit_errors is called
> also from irq disabled contexts, and kunit_find_named_resource will call
> spin_lock(&test->lock) that's not irq safe. Can we make the lock irq safe? I
> tried the change below and it makde the problem go away. If you agree, the
> question is how to proceed - make it part of Oliver's patch series and let
> Andrew pick it all with eventually kunit team's acks on this patch, or whatnot.

From what I can tell it should be fine to make it irq safe (ack for
your patch below). Regarding patch logistics, I'd probably add it to
the series. If that ends up not working, we'll find out sooner or
later.

(FYI, the prerequisite patch for current->kunit_test is in -next now.)

KUnit maintainers, do you have any preferences?

> ----8<----
>
> commit ab28505477892e9824c57ac338c88aec2ec0abce
> Author: Vlastimil Babka <vbabka@suse.cz>
> Date:   Tue Apr 6 12:28:07 2021 +0200
>
>     kunit: make test->lock irq safe
>
> diff --git a/include/kunit/test.h b/include/kunit/test.h
> index 49601c4b98b8..524d4789af22 100644
> --- a/include/kunit/test.h
> +++ b/include/kunit/test.h
> @@ -515,8 +515,9 @@ kunit_find_resource(struct kunit *test,
>                     void *match_data)
>  {
>         struct kunit_resource *res, *found = NULL;
> +       unsigned long flags;
>
> -       spin_lock(&test->lock);
> +       spin_lock_irqsave(&test->lock, flags);
>
>         list_for_each_entry_reverse(res, &test->resources, node) {
>                 if (match(test, res, (void *)match_data)) {
> @@ -526,7 +527,7 @@ kunit_find_resource(struct kunit *test,
>                 }
>         }
>
> -       spin_unlock(&test->lock);
> +       spin_unlock_irqrestore(&test->lock, flags);
>
>         return found;
>  }
> diff --git a/lib/kunit/test.c b/lib/kunit/test.c
> index ec9494e914ef..2c62eeb45b82 100644
> --- a/lib/kunit/test.c
> +++ b/lib/kunit/test.c
> @@ -442,6 +442,7 @@ int kunit_add_resource(struct kunit *test,
>                        void *data)
>  {
>         int ret = 0;
> +       unsigned long flags;
>
>         res->free = free;
>         kref_init(&res->refcount);
> @@ -454,10 +455,10 @@ int kunit_add_resource(struct kunit *test,
>                 res->data = data;
>         }
>
> -       spin_lock(&test->lock);
> +       spin_lock_irqsave(&test->lock, flags);
>         list_add_tail(&res->node, &test->resources);
>         /* refcount for list is established by kref_init() */
> -       spin_unlock(&test->lock);
> +       spin_unlock_irqrestore(&test->lock, flags);
>
>         return ret;
>  }
> @@ -515,9 +516,11 @@ EXPORT_SYMBOL_GPL(kunit_alloc_and_get_resource);
>
>  void kunit_remove_resource(struct kunit *test, struct kunit_resource *res)
>  {
> -       spin_lock(&test->lock);
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&test->lock, flags);
>         list_del(&res->node);
> -       spin_unlock(&test->lock);
> +       spin_unlock_irqrestore(&test->lock, flags);
>         kunit_put_resource(res);
>  }
>  EXPORT_SYMBOL_GPL(kunit_remove_resource);
> @@ -597,6 +600,7 @@ EXPORT_SYMBOL_GPL(kunit_kfree);
>  void kunit_cleanup(struct kunit *test)
>  {
>         struct kunit_resource *res;
> +       unsigned long flags;
>
>         /*
>          * test->resources is a stack - each allocation must be freed in the
> @@ -608,9 +612,9 @@ void kunit_cleanup(struct kunit *test)
>          * protect against the current node being deleted, not the next.
>          */
>         while (true) {
> -               spin_lock(&test->lock);
> +               spin_lock_irqsave(&test->lock, flags);
>                 if (list_empty(&test->resources)) {
> -                       spin_unlock(&test->lock);
> +                       spin_unlock_irqrestore(&test->lock, flags);
>                         break;
>                 }
>                 res = list_last_entry(&test->resources,
> @@ -621,7 +625,7 @@ void kunit_cleanup(struct kunit *test)
>                  * resource, and this can't happen if the test->lock
>                  * is held.
>                  */
> -               spin_unlock(&test->lock);
> +               spin_unlock_irqrestore(&test->lock, flags);
>                 kunit_remove_resource(test, res);
>         }
>  #if (IS_ENABLED(CONFIG_KASAN) && IS_ENABLED(CONFIG_KUNIT))
Daniel Latypov April 8, 2021, 5:19 p.m. UTC | #9
On Thu, Apr 8, 2021 at 3:30 AM Marco Elver <elver@google.com> wrote:
>
> On Tue, 6 Apr 2021 at 12:57, Vlastimil Babka <vbabka@suse.cz> wrote:
> >
> >
> > On 4/1/21 11:24 PM, Marco Elver wrote:
> > > On Thu, 1 Apr 2021 at 21:04, Daniel Latypov <dlatypov@google.com> wrote:
> > >> >         }
> > >> >         #else
> > >> >         static inline bool slab_add_kunit_errors(void) { return false; }
> > >> >         #endif
> > >> >
> > >> > And anywhere you want to increase the error count, you'd call
> > >> > slab_add_kunit_errors().
> > >> >
> > >> > Another benefit of this approach is that if KUnit is disabled, there is
> > >> > zero overhead and no additional code generated (vs. the current
> > >> > approach).
> > >>
> > >> The resource approach looks really good, but...
> > >> You'd be picking up a dependency on
> > >> https://lore.kernel.org/linux-kselftest/20210311152314.3814916-2-dlatypov@google.com/
> > >> current->kunit_test will always be NULL unless CONFIG_KASAN=y &&
> > >> CONFIG_KUNIT=y at the moment.
> > >> My patch drops the CONFIG_KASAN requirement and opens it up to all tests.
> > >
> > > Oh, that's a shame, but hopefully it'll be in -next soon.
> > >
> > >> At the moment, it's just waiting another look over from Brendan or David.
> > >> Any ETA on that, folks? :)
> > >>
> > >> So if you don't want to get blocked on that for now, I think it's fine to add:
> > >>   #ifdef CONFIG_SLUB_KUNIT_TEST
> > >>   int errors;
> > >>   #endif
> > >
> > > Until kunit fixes setting current->kunit_test, a cleaner workaround
> > > that would allow to do the patch with kunit_resource, is to just have
> > > an .init/.exit function that sets it ("current->kunit_test = test;").
> > > And then perhaps add a note ("FIXME: ...") to remove it once the above
> > > patch has landed.
> > >
> > > At least that way we get the least intrusive change for mm/slub.c, and
> > > the test is the only thing that needs a 2-line patch to clean up
> > > later.
> >
> > So when testing internally Oliver's new version with your suggestions (thanks
> > again for those), I got lockdep splats because slab_add_kunit_errors is called
> > also from irq disabled contexts, and kunit_find_named_resource will call
> > spin_lock(&test->lock) that's not irq safe. Can we make the lock irq safe? I
> > tried the change below and it makde the problem go away. If you agree, the
> > question is how to proceed - make it part of Oliver's patch series and let
> > Andrew pick it all with eventually kunit team's acks on this patch, or whatnot.
>
> From what I can tell it should be fine to make it irq safe (ack for
> your patch below). Regarding patch logistics, I'd probably add it to
> the series. If that ends up not working, we'll find out sooner or
> later.
>
> (FYI, the prerequisite patch for current->kunit_test is in -next now.)

Yep.
There's also two follow-up patches in
https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git/log/?h=kunit

>
> KUnit maintainers, do you have any preferences?

Poked offline and Brendan and David seemed fine either way.
So probably just include it in this patch series for convenience.

Brendan also mentioned KUnit used to use spin_lock_irqsave/restore()
but had been told to not use it until necessary.
See https://lore.kernel.org/linux-kselftest/20181016235120.138227-3-brendanhiggins@google.com/
So I think there's no objections to the patch itself either.

But I'd wait for Brendan to chime in to confirm.



>
> > ----8<----
> >
> > commit ab28505477892e9824c57ac338c88aec2ec0abce
> > Author: Vlastimil Babka <vbabka@suse.cz>
> > Date:   Tue Apr 6 12:28:07 2021 +0200
> >
> >     kunit: make test->lock irq safe
> >
> > diff --git a/include/kunit/test.h b/include/kunit/test.h
> > index 49601c4b98b8..524d4789af22 100644
> > --- a/include/kunit/test.h
> > +++ b/include/kunit/test.h
> > @@ -515,8 +515,9 @@ kunit_find_resource(struct kunit *test,
> >                     void *match_data)
> >  {
> >         struct kunit_resource *res, *found = NULL;
> > +       unsigned long flags;
> >
> > -       spin_lock(&test->lock);
> > +       spin_lock_irqsave(&test->lock, flags);
> >
> >         list_for_each_entry_reverse(res, &test->resources, node) {
> >                 if (match(test, res, (void *)match_data)) {
> > @@ -526,7 +527,7 @@ kunit_find_resource(struct kunit *test,
> >                 }
> >         }
> >
> > -       spin_unlock(&test->lock);
> > +       spin_unlock_irqrestore(&test->lock, flags);
> >
> >         return found;
> >  }
> > diff --git a/lib/kunit/test.c b/lib/kunit/test.c
> > index ec9494e914ef..2c62eeb45b82 100644
> > --- a/lib/kunit/test.c
> > +++ b/lib/kunit/test.c
> > @@ -442,6 +442,7 @@ int kunit_add_resource(struct kunit *test,
> >                        void *data)
> >  {
> >         int ret = 0;
> > +       unsigned long flags;
> >
> >         res->free = free;
> >         kref_init(&res->refcount);
> > @@ -454,10 +455,10 @@ int kunit_add_resource(struct kunit *test,
> >                 res->data = data;
> >         }
> >
> > -       spin_lock(&test->lock);
> > +       spin_lock_irqsave(&test->lock, flags);
> >         list_add_tail(&res->node, &test->resources);
> >         /* refcount for list is established by kref_init() */
> > -       spin_unlock(&test->lock);
> > +       spin_unlock_irqrestore(&test->lock, flags);
> >
> >         return ret;
> >  }
> > @@ -515,9 +516,11 @@ EXPORT_SYMBOL_GPL(kunit_alloc_and_get_resource);
> >
> >  void kunit_remove_resource(struct kunit *test, struct kunit_resource *res)
> >  {
> > -       spin_lock(&test->lock);
> > +       unsigned long flags;
> > +
> > +       spin_lock_irqsave(&test->lock, flags);
> >         list_del(&res->node);
> > -       spin_unlock(&test->lock);
> > +       spin_unlock_irqrestore(&test->lock, flags);
> >         kunit_put_resource(res);
> >  }
> >  EXPORT_SYMBOL_GPL(kunit_remove_resource);
> > @@ -597,6 +600,7 @@ EXPORT_SYMBOL_GPL(kunit_kfree);
> >  void kunit_cleanup(struct kunit *test)
> >  {
> >         struct kunit_resource *res;
> > +       unsigned long flags;
> >
> >         /*
> >          * test->resources is a stack - each allocation must be freed in the
> > @@ -608,9 +612,9 @@ void kunit_cleanup(struct kunit *test)
> >          * protect against the current node being deleted, not the next.
> >          */
> >         while (true) {
> > -               spin_lock(&test->lock);
> > +               spin_lock_irqsave(&test->lock, flags);
> >                 if (list_empty(&test->resources)) {
> > -                       spin_unlock(&test->lock);
> > +                       spin_unlock_irqrestore(&test->lock, flags);
> >                         break;
> >                 }
> >                 res = list_last_entry(&test->resources,
> > @@ -621,7 +625,7 @@ void kunit_cleanup(struct kunit *test)
> >                  * resource, and this can't happen if the test->lock
> >                  * is held.
> >                  */
> > -               spin_unlock(&test->lock);
> > +               spin_unlock_irqrestore(&test->lock, flags);
> >                 kunit_remove_resource(test, res);
> >         }
> >  #if (IS_ENABLED(CONFIG_KASAN) && IS_ENABLED(CONFIG_KUNIT))
Brendan Higgins April 9, 2021, 7:54 a.m. UTC | #10
On Thu, Apr 8, 2021 at 10:19 AM Daniel Latypov <dlatypov@google.com> wrote:
>
> On Thu, Apr 8, 2021 at 3:30 AM Marco Elver <elver@google.com> wrote:
> >
> > On Tue, 6 Apr 2021 at 12:57, Vlastimil Babka <vbabka@suse.cz> wrote:
> > >
> > >
> > > On 4/1/21 11:24 PM, Marco Elver wrote:
> > > > On Thu, 1 Apr 2021 at 21:04, Daniel Latypov <dlatypov@google.com> wrote:
> > > >> >         }
> > > >> >         #else
> > > >> >         static inline bool slab_add_kunit_errors(void) { return false; }
> > > >> >         #endif
> > > >> >
> > > >> > And anywhere you want to increase the error count, you'd call
> > > >> > slab_add_kunit_errors().
> > > >> >
> > > >> > Another benefit of this approach is that if KUnit is disabled, there is
> > > >> > zero overhead and no additional code generated (vs. the current
> > > >> > approach).
> > > >>
> > > >> The resource approach looks really good, but...
> > > >> You'd be picking up a dependency on
> > > >> https://lore.kernel.org/linux-kselftest/20210311152314.3814916-2-dlatypov@google.com/
> > > >> current->kunit_test will always be NULL unless CONFIG_KASAN=y &&
> > > >> CONFIG_KUNIT=y at the moment.
> > > >> My patch drops the CONFIG_KASAN requirement and opens it up to all tests.
> > > >
> > > > Oh, that's a shame, but hopefully it'll be in -next soon.
> > > >
> > > >> At the moment, it's just waiting another look over from Brendan or David.
> > > >> Any ETA on that, folks? :)
> > > >>
> > > >> So if you don't want to get blocked on that for now, I think it's fine to add:
> > > >>   #ifdef CONFIG_SLUB_KUNIT_TEST
> > > >>   int errors;
> > > >>   #endif
> > > >
> > > > Until kunit fixes setting current->kunit_test, a cleaner workaround
> > > > that would allow to do the patch with kunit_resource, is to just have
> > > > an .init/.exit function that sets it ("current->kunit_test = test;").
> > > > And then perhaps add a note ("FIXME: ...") to remove it once the above
> > > > patch has landed.
> > > >
> > > > At least that way we get the least intrusive change for mm/slub.c, and
> > > > the test is the only thing that needs a 2-line patch to clean up
> > > > later.
> > >
> > > So when testing internally Oliver's new version with your suggestions (thanks
> > > again for those), I got lockdep splats because slab_add_kunit_errors is called
> > > also from irq disabled contexts, and kunit_find_named_resource will call
> > > spin_lock(&test->lock) that's not irq safe. Can we make the lock irq safe? I
> > > tried the change below and it makde the problem go away. If you agree, the
> > > question is how to proceed - make it part of Oliver's patch series and let
> > > Andrew pick it all with eventually kunit team's acks on this patch, or whatnot.
> >
> > From what I can tell it should be fine to make it irq safe (ack for
> > your patch below). Regarding patch logistics, I'd probably add it to
> > the series. If that ends up not working, we'll find out sooner or
> > later.
> >
> > (FYI, the prerequisite patch for current->kunit_test is in -next now.)
>
> Yep.
> There's also two follow-up patches in
> https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git/log/?h=kunit
>
> >
> > KUnit maintainers, do you have any preferences?
>
> Poked offline and Brendan and David seemed fine either way.
> So probably just include it in this patch series for convenience.
>
> Brendan also mentioned KUnit used to use spin_lock_irqsave/restore()
> but had been told to not use it until necessary.
> See https://lore.kernel.org/linux-kselftest/20181016235120.138227-3-brendanhiggins@google.com/
> So I think there's no objections to the patch itself either.
>
> But I'd wait for Brendan to chime in to confirm.

That's correct. Before KUnit was accepted upstream, early versions of
the patchset used the irqsave/restore versions. I was asked to remove
them until they were necessary, and it looks like that time is now :-)

So yes, I would be happy to see this patch go in. Looks good to me the
way you have it below. Send it out as its own patch in your series and
I will give it a Reviewed-by.

Thanks!

> > > ----8<----
> > >
> > > commit ab28505477892e9824c57ac338c88aec2ec0abce
> > > Author: Vlastimil Babka <vbabka@suse.cz>
> > > Date:   Tue Apr 6 12:28:07 2021 +0200
> > >
> > >     kunit: make test->lock irq safe
> > >
> > > diff --git a/include/kunit/test.h b/include/kunit/test.h
> > > index 49601c4b98b8..524d4789af22 100644
> > > --- a/include/kunit/test.h
> > > +++ b/include/kunit/test.h
> > > @@ -515,8 +515,9 @@ kunit_find_resource(struct kunit *test,
> > >                     void *match_data)
> > >  {
> > >         struct kunit_resource *res, *found = NULL;
> > > +       unsigned long flags;
> > >
> > > -       spin_lock(&test->lock);
> > > +       spin_lock_irqsave(&test->lock, flags);
> > >
> > >         list_for_each_entry_reverse(res, &test->resources, node) {
> > >                 if (match(test, res, (void *)match_data)) {
> > > @@ -526,7 +527,7 @@ kunit_find_resource(struct kunit *test,
> > >                 }
> > >         }
> > >
> > > -       spin_unlock(&test->lock);
> > > +       spin_unlock_irqrestore(&test->lock, flags);
> > >
> > >         return found;
> > >  }
> > > diff --git a/lib/kunit/test.c b/lib/kunit/test.c
> > > index ec9494e914ef..2c62eeb45b82 100644
> > > --- a/lib/kunit/test.c
> > > +++ b/lib/kunit/test.c
> > > @@ -442,6 +442,7 @@ int kunit_add_resource(struct kunit *test,
> > >                        void *data)
> > >  {
> > >         int ret = 0;
> > > +       unsigned long flags;
> > >
> > >         res->free = free;
> > >         kref_init(&res->refcount);
> > > @@ -454,10 +455,10 @@ int kunit_add_resource(struct kunit *test,
> > >                 res->data = data;
> > >         }
> > >
> > > -       spin_lock(&test->lock);
> > > +       spin_lock_irqsave(&test->lock, flags);
> > >         list_add_tail(&res->node, &test->resources);
> > >         /* refcount for list is established by kref_init() */
> > > -       spin_unlock(&test->lock);
> > > +       spin_unlock_irqrestore(&test->lock, flags);
> > >
> > >         return ret;
> > >  }
> > > @@ -515,9 +516,11 @@ EXPORT_SYMBOL_GPL(kunit_alloc_and_get_resource);
> > >
> > >  void kunit_remove_resource(struct kunit *test, struct kunit_resource *res)
> > >  {
> > > -       spin_lock(&test->lock);
> > > +       unsigned long flags;
> > > +
> > > +       spin_lock_irqsave(&test->lock, flags);
> > >         list_del(&res->node);
> > > -       spin_unlock(&test->lock);
> > > +       spin_unlock_irqrestore(&test->lock, flags);
> > >         kunit_put_resource(res);
> > >  }
> > >  EXPORT_SYMBOL_GPL(kunit_remove_resource);
> > > @@ -597,6 +600,7 @@ EXPORT_SYMBOL_GPL(kunit_kfree);
> > >  void kunit_cleanup(struct kunit *test)
> > >  {
> > >         struct kunit_resource *res;
> > > +       unsigned long flags;
> > >
> > >         /*
> > >          * test->resources is a stack - each allocation must be freed in the
> > > @@ -608,9 +612,9 @@ void kunit_cleanup(struct kunit *test)
> > >          * protect against the current node being deleted, not the next.
> > >          */
> > >         while (true) {
> > > -               spin_lock(&test->lock);
> > > +               spin_lock_irqsave(&test->lock, flags);
> > >                 if (list_empty(&test->resources)) {
> > > -                       spin_unlock(&test->lock);
> > > +                       spin_unlock_irqrestore(&test->lock, flags);
> > >                         break;
> > >                 }
> > >                 res = list_last_entry(&test->resources,
> > > @@ -621,7 +625,7 @@ void kunit_cleanup(struct kunit *test)
> > >                  * resource, and this can't happen if the test->lock
> > >                  * is held.
> > >                  */
> > > -               spin_unlock(&test->lock);
> > > +               spin_unlock_irqrestore(&test->lock, flags);
> > >                 kunit_remove_resource(test, res);
> > >         }
> > >  #if (IS_ENABLED(CONFIG_KASAN) && IS_ENABLED(CONFIG_KUNIT))
diff mbox series

Patch

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7ae604076767..ed1a5a64d028 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -25,6 +25,8 @@ 
  */
 /* DEBUG: Perform (expensive) checks on alloc/free */
 #define SLAB_CONSISTENCY_CHECKS	((slab_flags_t __force)0x00000100U)
+/* DEBUG: Silent bug reports */
+#define SLAB_SILENT_ERRORS	((slab_flags_t __force)0x00000200U)
 /* DEBUG: Red zone objs in a cache */
 #define SLAB_RED_ZONE		((slab_flags_t __force)0x00000400U)
 /* DEBUG: Poison objects */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index dcde82a4434c..e4b51bb5bb83 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -133,6 +133,8 @@  struct kmem_cache {
 	unsigned int usersize;		/* Usercopy region size */
 
 	struct kmem_cache_node *node[MAX_NUMNODES];
+
+	int errors;			/* Number of errors in cache */
 };
 
 #ifdef CONFIG_SLUB_CPU_PARTIAL
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2779c29d9981..e0dec830c269 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2371,6 +2371,11 @@  config BITS_TEST
 
 	  If unsure, say N.
 
+config SLUB_KUNIT_TEST
+	tristate "KUnit Test for SLUB cache error detection" if !KUNIT_ALL_TESTS
+	depends on SLUB_DEBUG && KUNIT
+	default KUNIT_ALL_TESTS
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index b5307d3eec1a..e1eb986c0e87 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -352,5 +352,6 @@  obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
 obj-$(CONFIG_BITS_TEST) += test_bits.o
 obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o
+obj-$(CONFIG_SLUB_KUNIT_TEST) += test_slub.o
 
 obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
diff --git a/lib/test_slub.c b/lib/test_slub.c
new file mode 100644
index 000000000000..4f8ea3c7d867
--- /dev/null
+++ b/lib/test_slub.c
@@ -0,0 +1,124 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <kunit/test.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include "../mm/slab.h"
+
+
+static void test_clobber_zone(struct kunit *test)
+{
+	struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
+				SLAB_RED_ZONE | SLAB_SILENT_ERRORS, NULL);
+	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+	p[64] = 0x12;
+
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 1, s->errors);
+
+	kmem_cache_free(s, p);
+	kmem_cache_destroy(s);
+}
+
+static void test_next_pointer(struct kunit *test)
+{
+	struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
+				SLAB_POISON | SLAB_SILENT_ERRORS, NULL);
+	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+	unsigned long tmp;
+	unsigned long *ptr_addr;
+
+	kmem_cache_free(s, p);
+
+	ptr_addr = (unsigned long *)(p + s->offset);
+	tmp = *ptr_addr;
+	p[s->offset] = 0x12;
+
+	/*
+	 * Expecting two errors.
+	 * One for the corrupted freechain and the other one for the wrong
+	 * count of objects in use.
+	 */
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 2, s->errors);
+
+	/*
+	 * Try to repair corrupted freepointer.
+	 * Still expecting one error for the wrong count of objects in use.
+	 */
+	*ptr_addr = tmp;
+
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 1, s->errors);
+
+	/*
+	 * Previous validation repaired the count of objects in use.
+	 * Now expecting no error.
+	 */
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 0, s->errors);
+
+	kmem_cache_destroy(s);
+}
+
+static void test_first_word(struct kunit *test)
+{
+	struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
+				SLAB_POISON | SLAB_SILENT_ERRORS, NULL);
+	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+	kmem_cache_free(s, p);
+	*p = 0x78;
+
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 1, s->errors);
+
+	kmem_cache_destroy(s);
+}
+
+static void test_clobber_50th_byte(struct kunit *test)
+{
+	struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
+				SLAB_POISON | SLAB_SILENT_ERRORS, NULL);
+	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+	kmem_cache_free(s, p);
+	p[50] = 0x9a;
+
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 1, s->errors);
+	kmem_cache_destroy(s);
+}
+
+static void test_clobber_redzone_free(struct kunit *test)
+{
+	struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
+				SLAB_RED_ZONE | SLAB_SILENT_ERRORS, NULL);
+	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+	kmem_cache_free(s, p);
+	p[64] = 0xab;
+
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 1, s->errors);
+	kmem_cache_destroy(s);
+}
+
+static struct kunit_case test_cases[] = {
+	KUNIT_CASE(test_clobber_zone),
+	KUNIT_CASE(test_next_pointer),
+	KUNIT_CASE(test_first_word),
+	KUNIT_CASE(test_clobber_50th_byte),
+	KUNIT_CASE(test_clobber_redzone_free),
+	{}
+};
+
+static struct kunit_suite test_suite = {
+	.name = "slub_test",
+	.test_cases = test_cases,
+};
+kunit_test_suite(test_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/mm/slab.h b/mm/slab.h
index 076582f58f68..382507b6cab9 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -134,7 +134,8 @@  static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
 #elif defined(CONFIG_SLUB_DEBUG)
 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-			  SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
+			  SLAB_TRACE | SLAB_CONSISTENCY_CHECKS | \
+			  SLAB_SILENT_ERRORS)
 #else
 #define SLAB_DEBUG_FLAGS (0)
 #endif
@@ -164,7 +165,8 @@  static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
 			      SLAB_NOLEAKTRACE | \
 			      SLAB_RECLAIM_ACCOUNT | \
 			      SLAB_TEMPORARY | \
-			      SLAB_ACCOUNT)
+			      SLAB_ACCOUNT | \
+			      SLAB_SILENT_ERRORS)
 
 bool __kmem_cache_empty(struct kmem_cache *);
 int __kmem_cache_shutdown(struct kmem_cache *);
@@ -215,6 +217,7 @@  DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
 DECLARE_STATIC_KEY_FALSE(slub_debug_enabled);
 #endif
 extern void print_tracking(struct kmem_cache *s, void *object);
+long validate_slab_cache(struct kmem_cache *s);
 #else
 static inline void print_tracking(struct kmem_cache *s, void *object)
 {
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 88e833986332..239c9095e7ea 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -55,7 +55,7 @@  static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
  */
 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
 		SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
-		SLAB_FAILSLAB | kasan_never_merge())
+		SLAB_FAILSLAB | SLAB_SILENT_ERRORS | kasan_never_merge())
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
 			 SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
diff --git a/mm/slub.c b/mm/slub.c
index 3021ce9bf1b3..7083e89432d0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -673,14 +673,16 @@  static void slab_bug(struct kmem_cache *s, char *fmt, ...)
 
 static void slab_fix(struct kmem_cache *s, char *fmt, ...)
 {
-	struct va_format vaf;
-	va_list args;
-
-	va_start(args, fmt);
-	vaf.fmt = fmt;
-	vaf.va = &args;
-	pr_err("FIX %s: %pV\n", s->name, &vaf);
-	va_end(args);
+	if (!(s->flags & SLAB_SILENT_ERRORS)) {
+		struct va_format vaf;
+		va_list args;
+
+		va_start(args, fmt);
+		vaf.fmt = fmt;
+		vaf.va = &args;
+		pr_err("FIX %s: %pV\n", s->name, &vaf);
+		va_end(args);
+	}
 }
 
 static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
@@ -739,8 +741,10 @@  static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
 void object_err(struct kmem_cache *s, struct page *page,
 			u8 *object, char *reason)
 {
-	slab_bug(s, "%s", reason);
-	print_trailer(s, page, object);
+	if (!(s->flags & SLAB_SILENT_ERRORS)) {
+		slab_bug(s, "%s", reason);
+		print_trailer(s, page, object);
+	}
 }
 
 static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
@@ -752,9 +756,11 @@  static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
 	va_start(args, fmt);
 	vsnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
-	slab_bug(s, "%s", buf);
-	print_page_info(page);
-	dump_stack();
+	if (!(s->flags & SLAB_SILENT_ERRORS)) {
+		slab_bug(s, "%s", buf);
+		print_page_info(page);
+		dump_stack();
+	}
 }
 
 static void init_object(struct kmem_cache *s, void *object, u8 val)
@@ -798,11 +804,13 @@  static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
 	while (end > fault && end[-1] == value)
 		end--;
 
-	slab_bug(s, "%s overwritten", what);
-	pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
+	if (!(s->flags & SLAB_SILENT_ERRORS)) {
+		slab_bug(s, "%s overwritten", what);
+		pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
 					fault, end - 1, fault - addr,
 					fault[0], value);
-	print_trailer(s, page, object);
+		print_trailer(s, page, object);
+	}
 
 	restore_bytes(s, what, value, fault, end);
 	return 0;
@@ -964,6 +972,7 @@  static int check_slab(struct kmem_cache *s, struct page *page)
 
 	if (!PageSlab(page)) {
 		slab_err(s, page, "Not a valid slab page");
+		s->errors += 1;
 		return 0;
 	}
 
@@ -971,11 +980,13 @@  static int check_slab(struct kmem_cache *s, struct page *page)
 	if (page->objects > maxobj) {
 		slab_err(s, page, "objects %u > max %u",
 			page->objects, maxobj);
+		s->errors += 1;
 		return 0;
 	}
 	if (page->inuse > page->objects) {
 		slab_err(s, page, "inuse %u > max %u",
 			page->inuse, page->objects);
+		s->errors += 1;
 		return 0;
 	}
 	/* Slab_pad_check fixes things up after itself */
@@ -1008,8 +1019,10 @@  static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
 				page->freelist = NULL;
 				page->inuse = page->objects;
 				slab_fix(s, "Freelist cleared");
+				s->errors += 1;
 				return 0;
 			}
+			s->errors += 1;
 			break;
 		}
 		object = fp;
@@ -1026,12 +1039,14 @@  static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
 			 page->objects, max_objects);
 		page->objects = max_objects;
 		slab_fix(s, "Number of objects adjusted.");
+		s->errors += 1;
 	}
 	if (page->inuse != page->objects - nr) {
 		slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
 			 page->inuse, page->objects - nr);
 		page->inuse = page->objects - nr;
 		slab_fix(s, "Object count adjusted.");
+		s->errors += 1;
 	}
 	return search == NULL;
 }
@@ -4629,8 +4644,10 @@  static void validate_slab(struct kmem_cache *s, struct page *page)
 		u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
 			 SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
 
-		if (!check_object(s, page, p, val))
+		if (!check_object(s, page, p, val)) {
+			s->errors += 1;
 			break;
+		}
 	}
 	put_map(map);
 unlock:
@@ -4650,9 +4667,11 @@  static int validate_slab_node(struct kmem_cache *s,
 		validate_slab(s, page);
 		count++;
 	}
-	if (count != n->nr_partial)
+	if (count != n->nr_partial) {
 		pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
 		       s->name, count, n->nr_partial);
+		s->errors += 1;
+	}
 
 	if (!(s->flags & SLAB_STORE_USER))
 		goto out;
@@ -4661,20 +4680,23 @@  static int validate_slab_node(struct kmem_cache *s,
 		validate_slab(s, page);
 		count++;
 	}
-	if (count != atomic_long_read(&n->nr_slabs))
+	if (count != atomic_long_read(&n->nr_slabs)) {
 		pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
 		       s->name, count, atomic_long_read(&n->nr_slabs));
+		s->errors += 1;
+	}
 
 out:
 	spin_unlock_irqrestore(&n->list_lock, flags);
 	return count;
 }
 
-static long validate_slab_cache(struct kmem_cache *s)
+long validate_slab_cache(struct kmem_cache *s)
 {
 	int node;
 	unsigned long count = 0;
 	struct kmem_cache_node *n;
+	s->errors = 0;
 
 	flush_all(s);
 	for_each_kmem_cache_node(s, node, n)
@@ -4682,6 +4704,8 @@  static long validate_slab_cache(struct kmem_cache *s)
 
 	return count;
 }
+EXPORT_SYMBOL(validate_slab_cache);
+
 /*
  * Generate lists of code addresses where slabcache objects are allocated
  * and freed.