diff mbox series

[v4,1/2] mm: slub: Print the broken data before restoring slub.

Message ID 20250226081206.680495-2-hyesoo.yu@samsung.com (mailing list archive)
State New
Headers show
Series mm: slub: Enhanced debugging in slub error | expand

Commit Message

Hyesoo Yu Feb. 26, 2025, 8:12 a.m. UTC
Previously, the restore occured after printing the object in slub.
After commit 47d911b02cbe ("slab: make check_object() more consistent"),
the bytes are printed after the restore. This information about the bytes
before the restore is highly valuable for debugging purpose.
For instance, in a event of cache issue, it displays byte patterns
by breaking them down into 64-bytes units. Without this information,
we can only speculate on how it was broken. Hence the corrupted regions
should be printed prior to the restoration process. However if an object
breaks in multiple places, the same log may be output multiple times.
Therefore the slub log is reported only once to prevent redundant printing,
by sending a parameter indicating whether an error has occurred previously.

Changes in v4:
- Change the print format to include specific error names.

Changes in v3:
- Change the parameter type of check_bytes_and_report.

Changes in v2:
- Instead of using print_section every time on check_bytes_and_report,
just print it once for the entire slub object before the restore.

Signed-off-by: Hyesoo Yu <hyesoo.yu@samsung.com>
---
 mm/slub.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

Comments

Harry Yoo Feb. 27, 2025, 11:51 a.m. UTC | #1
On Wed, Feb 26, 2025 at 05:12:00PM +0900, Hyesoo Yu wrote:
> Previously, the restore occured after printing the object in slub.
> After commit 47d911b02cbe ("slab: make check_object() more consistent"),
> the bytes are printed after the restore. This information about the bytes
> before the restore is highly valuable for debugging purpose.
> For instance, in a event of cache issue, it displays byte patterns
> by breaking them down into 64-bytes units. Without this information,
> we can only speculate on how it was broken. Hence the corrupted regions
> should be printed prior to the restoration process. However if an object
> breaks in multiple places, the same log may be output multiple times.
> Therefore the slub log is reported only once to prevent redundant printing,
> by sending a parameter indicating whether an error has occurred previously.
> 
> Changes in v4:
> - Change the print format to include specific error names.
> 
> Changes in v3:
> - Change the parameter type of check_bytes_and_report.
> 
> Changes in v2:
> - Instead of using print_section every time on check_bytes_and_report,
> just print it once for the entire slub object before the restore.

IMHO it is not a good practice to include patch version changes
in the changelog, because the changelog should make sense on its own.

> Signed-off-by: Hyesoo Yu <hyesoo.yu@samsung.com>
> ---

I think that's why people usually place patch version log just below '---' line. 
(More details can be found in the "Submitting patches" documentation
 https://docs.kernel.org/process/submitting-patches.html#commentary)

Anyway, the code itself looks good to me (with a nit below).
Please feel free to add:

Reviewed-by: Harry Yoo <harry.yoo@oracle.com>

>  mm/slub.c | 32 ++++++++++++++------------------
>  1 file changed, 14 insertions(+), 18 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index b3969d63cc04..8c13cd43c0fd 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1192,8 +1192,8 @@ static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
>  
>  static pad_check_attributes int
>  check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
> -		       u8 *object, char *what,
> -		       u8 *start, unsigned int value, unsigned int bytes)
> +		       u8 *object, char *what, u8 *start, unsigned int value,
> +		       unsigned int bytes, bool slab_obj_print)
>  {
>  	u8 *fault;
>  	u8 *end;
> @@ -1212,10 +1212,11 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
>  	if (slab_add_kunit_errors())
>  		goto skip_bug_print;
>  
> -	slab_bug(s, "%s overwritten", what);
> -	pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
> -					fault, end - 1, fault - addr,
> -					fault[0], value);
> +	pr_err("[%s overwritten] 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
> +	       what, fault, end - 1, fault - addr, fault[0], value);
> +
> +	if (slab_obj_print)
> +		object_err(s, slab, object, "Object corrupt");
>  
>  skip_bug_print:
>  	restore_bytes(s, what, value, fault, end);
> @@ -1279,7 +1280,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
>  		return 1;
>  
>  	return check_bytes_and_report(s, slab, p, "Object padding",
> -			p + off, POISON_INUSE, size_from_object(s) - off);
> +			p + off, POISON_INUSE, size_from_object(s) - off, true);
>  }
>  
>  /* Check the pad bytes at the end of a slab page */
> @@ -1329,11 +1330,11 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
>  
>  	if (s->flags & SLAB_RED_ZONE) {
>  		if (!check_bytes_and_report(s, slab, object, "Left Redzone",
> -			object - s->red_left_pad, val, s->red_left_pad))
> +			object - s->red_left_pad, val, s->red_left_pad, ret))
>  			ret = 0;
>  
>  		if (!check_bytes_and_report(s, slab, object, "Right Redzone",
> -			endobject, val, s->inuse - s->object_size))
> +			endobject, val, s->inuse - s->object_size, ret))
>  			ret = 0;
>  
>  		if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
> @@ -1342,7 +1343,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
>  			if (s->object_size > orig_size  &&
>  				!check_bytes_and_report(s, slab, object,
>  					"kmalloc Redzone", p + orig_size,
> -					val, s->object_size - orig_size)) {
> +					val, s->object_size - orig_size, ret)) {
>  				ret = 0;
>  			}
>  		}
> @@ -1350,7 +1351,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
>  		if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
>  			if (!check_bytes_and_report(s, slab, p, "Alignment padding",
>  				endobject, POISON_INUSE,
> -				s->inuse - s->object_size))
> +				s->inuse - s->object_size, ret))
>  				ret = 0;
>  		}
>  	}
> @@ -1366,11 +1367,11 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
>  			if (kasan_meta_size < s->object_size - 1 &&
>  			    !check_bytes_and_report(s, slab, p, "Poison",
>  					p + kasan_meta_size, POISON_FREE,
> -					s->object_size - kasan_meta_size - 1))
> +					s->object_size - kasan_meta_size - 1, ret))
>  				ret = 0;
>  			if (kasan_meta_size < s->object_size &&
>  			    !check_bytes_and_report(s, slab, p, "End Poison",
> -					p + s->object_size - 1, POISON_END, 1))
> +					p + s->object_size - 1, POISON_END, 1, ret))
>  				ret = 0;
>  		}
>  		/*
> @@ -1396,11 +1397,6 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
>  		ret = 0;
>  	}
>  
> -	if (!ret && !slab_in_kunit_test()) {

nit: check_object() was the only user of slab_in_kunit_test().
Can we remove it altogether?
Harry Yoo Feb. 27, 2025, 12:36 p.m. UTC | #2
On Thu, Feb 27, 2025 at 08:51:19PM +0900, Harry Yoo wrote:
> On Wed, Feb 26, 2025 at 05:12:00PM +0900, Hyesoo Yu wrote:
> > @@ -1396,11 +1397,6 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
> >  		ret = 0;
> >  	}
> >  
> > -	if (!ret && !slab_in_kunit_test()) {
> 
> nit: check_object() was the only user of slab_in_kunit_test().
> Can we remove it altogether?

Uh, there is another user in mm/slab_common. But it is also removed
in patch 2. So can be removed in patch 2.
diff mbox series

Patch

diff --git a/mm/slub.c b/mm/slub.c
index b3969d63cc04..8c13cd43c0fd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1192,8 +1192,8 @@  static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
 
 static pad_check_attributes int
 check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
-		       u8 *object, char *what,
-		       u8 *start, unsigned int value, unsigned int bytes)
+		       u8 *object, char *what, u8 *start, unsigned int value,
+		       unsigned int bytes, bool slab_obj_print)
 {
 	u8 *fault;
 	u8 *end;
@@ -1212,10 +1212,11 @@  check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
 	if (slab_add_kunit_errors())
 		goto skip_bug_print;
 
-	slab_bug(s, "%s overwritten", what);
-	pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
-					fault, end - 1, fault - addr,
-					fault[0], value);
+	pr_err("[%s overwritten] 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
+	       what, fault, end - 1, fault - addr, fault[0], value);
+
+	if (slab_obj_print)
+		object_err(s, slab, object, "Object corrupt");
 
 skip_bug_print:
 	restore_bytes(s, what, value, fault, end);
@@ -1279,7 +1280,7 @@  static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
 		return 1;
 
 	return check_bytes_and_report(s, slab, p, "Object padding",
-			p + off, POISON_INUSE, size_from_object(s) - off);
+			p + off, POISON_INUSE, size_from_object(s) - off, true);
 }
 
 /* Check the pad bytes at the end of a slab page */
@@ -1329,11 +1330,11 @@  static int check_object(struct kmem_cache *s, struct slab *slab,
 
 	if (s->flags & SLAB_RED_ZONE) {
 		if (!check_bytes_and_report(s, slab, object, "Left Redzone",
-			object - s->red_left_pad, val, s->red_left_pad))
+			object - s->red_left_pad, val, s->red_left_pad, ret))
 			ret = 0;
 
 		if (!check_bytes_and_report(s, slab, object, "Right Redzone",
-			endobject, val, s->inuse - s->object_size))
+			endobject, val, s->inuse - s->object_size, ret))
 			ret = 0;
 
 		if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
@@ -1342,7 +1343,7 @@  static int check_object(struct kmem_cache *s, struct slab *slab,
 			if (s->object_size > orig_size  &&
 				!check_bytes_and_report(s, slab, object,
 					"kmalloc Redzone", p + orig_size,
-					val, s->object_size - orig_size)) {
+					val, s->object_size - orig_size, ret)) {
 				ret = 0;
 			}
 		}
@@ -1350,7 +1351,7 @@  static int check_object(struct kmem_cache *s, struct slab *slab,
 		if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
 			if (!check_bytes_and_report(s, slab, p, "Alignment padding",
 				endobject, POISON_INUSE,
-				s->inuse - s->object_size))
+				s->inuse - s->object_size, ret))
 				ret = 0;
 		}
 	}
@@ -1366,11 +1367,11 @@  static int check_object(struct kmem_cache *s, struct slab *slab,
 			if (kasan_meta_size < s->object_size - 1 &&
 			    !check_bytes_and_report(s, slab, p, "Poison",
 					p + kasan_meta_size, POISON_FREE,
-					s->object_size - kasan_meta_size - 1))
+					s->object_size - kasan_meta_size - 1, ret))
 				ret = 0;
 			if (kasan_meta_size < s->object_size &&
 			    !check_bytes_and_report(s, slab, p, "End Poison",
-					p + s->object_size - 1, POISON_END, 1))
+					p + s->object_size - 1, POISON_END, 1, ret))
 				ret = 0;
 		}
 		/*
@@ -1396,11 +1397,6 @@  static int check_object(struct kmem_cache *s, struct slab *slab,
 		ret = 0;
 	}
 
-	if (!ret && !slab_in_kunit_test()) {
-		print_trailer(s, slab, object);
-		add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
-	}
-
 	return ret;
 }