diff mbox series

mm/slub: Add taint after the errors are printed

Message ID 1623860738-146761-1-git-send-email-quic_c_gdjako@quicinc.com (mailing list archive)
State New, archived
Headers show
Series mm/slub: Add taint after the errors are printed | expand

Commit Message

Georgi Djakov June 16, 2021, 4:25 p.m. UTC
When running the kernel with panic_on_taint, the usual slub debug error
messages are not being printed when object corruption happens. That's
because we panic in add_taint(), which is called before printing the
additional information. This is a bit unfortunate as the error messages
are actually very useful, especially before a panic. Let's fix this by
moving add_taint() after the errors are printed on the console.

Signed-off-by: Georgi Djakov <quic_c_gdjako@quicinc.com>
---
 mm/slub.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

Comments

Vlastimil Babka June 16, 2021, 4:34 p.m. UTC | #1
On 6/16/21 6:25 PM, Georgi Djakov wrote:
> When running the kernel with panic_on_taint, the usual slub debug error
> messages are not being printed when object corruption happens. That's
> because we panic in add_taint(), which is called before printing the
> additional information. This is a bit unfortunate as the error messages
> are actually very useful, especially before a panic. Let's fix this by
> moving add_taint() after the errors are printed on the console.
> 
> Signed-off-by: Georgi Djakov <quic_c_gdjako@quicinc.com>

Makes sense.

While at it, I wonder if we should use LOCKDEP_STILL_OK instead of
LOCKDEP_NOW_UNRELIABLE. Isn't it too pessimistic to assume that some slab's
memory corruption hit some lock state?

> ---
>  mm/slub.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index a8b0daa1a307..ce7b8e4551b5 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -719,8 +719,6 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
>  	pr_err("=============================================================================\n");
>  	pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
>  	pr_err("-----------------------------------------------------------------------------\n\n");
> -
> -	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  	va_end(args);
>  }
>  
> @@ -801,6 +799,7 @@ void object_err(struct kmem_cache *s, struct page *page,
>  
>  	slab_bug(s, "%s", reason);
>  	print_trailer(s, page, object);
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  }
>  
>  static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> @@ -818,6 +817,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
>  	slab_bug(s, "%s", buf);
>  	print_page_info(page);
>  	dump_stack();
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  }
>  
>  static void init_object(struct kmem_cache *s, void *object, u8 val)
> @@ -869,6 +869,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
>  					fault, end - 1, fault - addr,
>  					fault[0], value);
>  	print_trailer(s, page, object);
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  
>  skip_bug_print:
>  	restore_bytes(s, what, value, fault, end);
>
Rafael Aquini June 16, 2021, 10:11 p.m. UTC | #2
On Wed, Jun 16, 2021 at 09:25:38AM -0700, Georgi Djakov wrote:
> When running the kernel with panic_on_taint, the usual slub debug error
> messages are not being printed when object corruption happens. That's
> because we panic in add_taint(), which is called before printing the
> additional information. This is a bit unfortunate as the error messages
> are actually very useful, especially before a panic. Let's fix this by
> moving add_taint() after the errors are printed on the console.
> 
> Signed-off-by: Georgi Djakov <quic_c_gdjako@quicinc.com>
> ---
>  mm/slub.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index a8b0daa1a307..ce7b8e4551b5 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -719,8 +719,6 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
>  	pr_err("=============================================================================\n");
>  	pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
>  	pr_err("-----------------------------------------------------------------------------\n\n");
> -
> -	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  	va_end(args);
>  }
>  
> @@ -801,6 +799,7 @@ void object_err(struct kmem_cache *s, struct page *page,
>  
>  	slab_bug(s, "%s", reason);
>  	print_trailer(s, page, object);
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  }
>  
>  static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> @@ -818,6 +817,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
>  	slab_bug(s, "%s", buf);
>  	print_page_info(page);
>  	dump_stack();
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  }
>  
>  static void init_object(struct kmem_cache *s, void *object, u8 val)
> @@ -869,6 +869,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
>  					fault, end - 1, fault - addr,
>  					fault[0], value);
>  	print_trailer(s, page, object);
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  
>  skip_bug_print:
>  	restore_bytes(s, what, value, fault, end);
> 
Acked-by: Rafael Aquini <aquini@redhat.com>
Rafael Aquini June 16, 2021, 10:13 p.m. UTC | #3
On Wed, Jun 16, 2021 at 06:34:41PM +0200, Vlastimil Babka wrote:
> On 6/16/21 6:25 PM, Georgi Djakov wrote:
> > When running the kernel with panic_on_taint, the usual slub debug error
> > messages are not being printed when object corruption happens. That's
> > because we panic in add_taint(), which is called before printing the
> > additional information. This is a bit unfortunate as the error messages
> > are actually very useful, especially before a panic. Let's fix this by
> > moving add_taint() after the errors are printed on the console.
> > 
> > Signed-off-by: Georgi Djakov <quic_c_gdjako@quicinc.com>
> 
> Makes sense.
> 
> While at it, I wonder if we should use LOCKDEP_STILL_OK instead of
> LOCKDEP_NOW_UNRELIABLE. Isn't it too pessimistic to assume that some slab's
> memory corruption hit some lock state?
>

Given there is noted corruption I don't think it's safe to assume otherwise.
Aaron Tomlin June 17, 2021, 9:22 a.m. UTC | #4
On Wed 2021-06-16 09:25 -0700, Georgi Djakov wrote:
> When running the kernel with panic_on_taint, the usual slub debug error
> messages are not being printed when object corruption happens. That's
> because we panic in add_taint(), which is called before printing the
> additional information. This is a bit unfortunate as the error messages
> are actually very useful, especially before a panic. Let's fix this by
> moving add_taint() after the errors are printed on the console.
> 
> Signed-off-by: Georgi Djakov <quic_c_gdjako@quicinc.com>
> ---
>  mm/slub.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index a8b0daa1a307..ce7b8e4551b5 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -719,8 +719,6 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
>  	pr_err("=============================================================================\n");
>  	pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
>  	pr_err("-----------------------------------------------------------------------------\n\n");
> -
> -	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  	va_end(args);
>  }
>  
> @@ -801,6 +799,7 @@ void object_err(struct kmem_cache *s, struct page *page,
>  
>  	slab_bug(s, "%s", reason);
>  	print_trailer(s, page, object);
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  }
>  
>  static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> @@ -818,6 +817,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
>  	slab_bug(s, "%s", buf);
>  	print_page_info(page);
>  	dump_stack();
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  }
>  
>  static void init_object(struct kmem_cache *s, void *object, u8 val)
> @@ -869,6 +869,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
>  					fault, end - 1, fault - addr,
>  					fault[0], value);
>  	print_trailer(s, page, object);
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  
>  skip_bug_print:
>  	restore_bytes(s, what, value, fault, end);
> 

Good catch. Thanks!

Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Vlastimil Babka June 17, 2021, 2:07 p.m. UTC | #5
On 6/16/21 6:25 PM, Georgi Djakov wrote:
> When running the kernel with panic_on_taint, the usual slub debug error
> messages are not being printed when object corruption happens. That's
> because we panic in add_taint(), which is called before printing the
> additional information. This is a bit unfortunate as the error messages
> are actually very useful, especially before a panic. Let's fix this by
> moving add_taint() after the errors are printed on the console.
> 
> Signed-off-by: Georgi Djakov <quic_c_gdjako@quicinc.com>

Acked-by: Vlastimil Babka <vbabka@suse.cz>

> ---
>  mm/slub.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index a8b0daa1a307..ce7b8e4551b5 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -719,8 +719,6 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
>  	pr_err("=============================================================================\n");
>  	pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
>  	pr_err("-----------------------------------------------------------------------------\n\n");
> -
> -	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  	va_end(args);
>  }
>  
> @@ -801,6 +799,7 @@ void object_err(struct kmem_cache *s, struct page *page,
>  
>  	slab_bug(s, "%s", reason);
>  	print_trailer(s, page, object);
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  }
>  
>  static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
> @@ -818,6 +817,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
>  	slab_bug(s, "%s", buf);
>  	print_page_info(page);
>  	dump_stack();
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  }
>  
>  static void init_object(struct kmem_cache *s, void *object, u8 val)
> @@ -869,6 +869,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
>  					fault, end - 1, fault - addr,
>  					fault[0], value);
>  	print_trailer(s, page, object);
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
>  
>  skip_bug_print:
>  	restore_bytes(s, what, value, fault, end);
>
David Rientjes June 18, 2021, 7:38 p.m. UTC | #6
On Wed, 16 Jun 2021, Georgi Djakov wrote:

> When running the kernel with panic_on_taint, the usual slub debug error
> messages are not being printed when object corruption happens. That's
> because we panic in add_taint(), which is called before printing the
> additional information. This is a bit unfortunate as the error messages
> are actually very useful, especially before a panic. Let's fix this by
> moving add_taint() after the errors are printed on the console.
> 
> Signed-off-by: Georgi Djakov <quic_c_gdjako@quicinc.com>

Acked-by: David Rientjes <rientjes@google.com>
diff mbox series

Patch

diff --git a/mm/slub.c b/mm/slub.c
index a8b0daa1a307..ce7b8e4551b5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -719,8 +719,6 @@  static void slab_bug(struct kmem_cache *s, char *fmt, ...)
 	pr_err("=============================================================================\n");
 	pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
 	pr_err("-----------------------------------------------------------------------------\n\n");
-
-	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 	va_end(args);
 }
 
@@ -801,6 +799,7 @@  void object_err(struct kmem_cache *s, struct page *page,
 
 	slab_bug(s, "%s", reason);
 	print_trailer(s, page, object);
+	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 }
 
 static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
@@ -818,6 +817,7 @@  static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
 	slab_bug(s, "%s", buf);
 	print_page_info(page);
 	dump_stack();
+	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 }
 
 static void init_object(struct kmem_cache *s, void *object, u8 val)
@@ -869,6 +869,7 @@  static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
 					fault, end - 1, fault - addr,
 					fault[0], value);
 	print_trailer(s, page, object);
+	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 
 skip_bug_print:
 	restore_bytes(s, what, value, fault, end);