diff mbox series

[1/2] mm/kasan: avoid duplicate KASAN issues from reporting

Message ID 1619079317-1131-1-git-send-email-maninder1.s@samsung.com (mailing list archive)
State New
Headers show
Series [1/2] mm/kasan: avoid duplicate KASAN issues from reporting | expand

Commit Message

Maninder Singh April 22, 2021, 8:15 a.m. UTC
when KASAN multishot is ON and some buggy code hits same code path
of KASAN issue repetetively, it can flood logs on console.

Check for allocaton, free and backtrace path at time of KASAN error,
if these are same then it is duplicate error and avoid these prints
from KASAN.

Co-developed-by: Vaneet Narang <v.narang@samsung.com>
Signed-off-by: Vaneet Narang <v.narang@samsung.com>
Signed-off-by: Maninder Singh <maninder1.s@samsung.com>
---
 mm/kasan/kasan.h  |  6 +++++
 mm/kasan/report.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

Comments

Dmitry Vyukov April 22, 2021, 9:58 a.m. UTC | #1
On Thu, Apr 22, 2021 at 11:17 AM Maninder Singh <maninder1.s@samsung.com> wrote:
>
> when KASAN multishot is ON and some buggy code hits same code path
> of KASAN issue repetetively, it can flood logs on console.
>
> Check for allocaton, free and backtrace path at time of KASAN error,
> if these are same then it is duplicate error and avoid these prints
> from KASAN.
>
> Co-developed-by: Vaneet Narang <v.narang@samsung.com>
> Signed-off-by: Vaneet Narang <v.narang@samsung.com>
> Signed-off-by: Maninder Singh <maninder1.s@samsung.com>
> ---
>  mm/kasan/kasan.h  |  6 +++++
>  mm/kasan/report.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 73 insertions(+)
>
> diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
> index 78cf99247139..d14ccce246ba 100644
> --- a/mm/kasan/kasan.h
> +++ b/mm/kasan/kasan.h
> @@ -102,6 +102,12 @@ struct kasan_access_info {
>         unsigned long ip;
>  };
>
> +struct kasan_record {
> +       depot_stack_handle_t    bt_handle;
> +       depot_stack_handle_t    alloc_handle;
> +       depot_stack_handle_t    free_handle;
> +};

Hi Maninder,

There is no need to declare this in the header, it can be declared
more locally in report.h.

> +
>  /* The layout of struct dictated by compiler */
>  struct kasan_source_location {
>         const char *filename;
> diff --git a/mm/kasan/report.c b/mm/kasan/report.c
> index 87b271206163..4576de76991b 100644
> --- a/mm/kasan/report.c
> +++ b/mm/kasan/report.c
> @@ -39,6 +39,10 @@ static unsigned long kasan_flags;
>  #define KASAN_BIT_REPORTED     0
>  #define KASAN_BIT_MULTI_SHOT   1
>
> +#define MAX_RECORDS            (200)

s/MAX_RECORDS/KASAN_MAX_RECORDS/

> +static struct kasan_record kasan_records[MAX_RECORDS];

Since all fields in kasan_record are stack handles, the code will be
simpler and more uniform, if we store just an array of handles w/o
distinguishing between alloc/free/access.

> +static int stored_kasan_records;
> +
>  bool kasan_save_enable_multi_shot(void)
>  {
>         return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
> @@ -360,6 +364,65 @@ void kasan_report_invalid_free(void *object, unsigned long ip)
>         end_report(&flags, (unsigned long)object);
>  }
>
> +/*
> + * @save_report()
> + *
> + * returns false if same record is already saved.

s/same/the same/

> + * returns true if its new record and saved in database of KASAN.

s/its/it's/
s/database/the database/

> + */
> +static bool save_report(void *addr, struct kasan_access_info *info, u8 tag, unsigned long *flags)
> +{
> +       struct kasan_record record = {0};
> +       depot_stack_handle_t bt_handle;
> +       int i = 0;
> +       const char *bug_type;
> +       struct kasan_alloc_meta *alloc_meta;
> +       struct kasan_track *free_track;
> +       struct page *page;
> +       bool ret = true;
> +
> +       kasan_disable_current();
> +       spin_lock_irqsave(&report_lock, *flags);

Reusing the caller flags looks strange, do we need it?
But also the very next function start_report() also does the same
dance: kasan_disable_current/spin_lock_irqsave. It feels reasonable to
lock once, check for dups and return early if it's a dup.

> +       bug_type = kasan_get_bug_type(info);
> +       page = kasan_addr_to_page(addr);
> +       bt_handle = kasan_save_stack(GFP_KERNEL);

ASsign directly to record.bt_handle.

> +       if (page && PageSlab(page)) {
> +               struct kmem_cache *cache = page->slab_cache;
> +               void *object = nearest_obj(cache, page, addr);

Since you already declare new var in this block, move
alloc_meta/free_track here as well.

> +
> +               alloc_meta = kasan_get_alloc_meta(cache, object);
> +               free_track = kasan_get_free_track(cache, object, tag);
> +               record.alloc_handle = alloc_meta->alloc_track.stack;
> +               if (free_track)
> +                       record.free_handle = free_track->stack;
> +       }
> +
> +       record.bt_handle = bt_handle;
> +
> +       for (i = 0; i < stored_kasan_records; i++) {
> +               if (record.bt_handle != kasan_records[i].bt_handle)
> +                       continue;
> +               if (record.alloc_handle != kasan_records[i].alloc_handle)
> +                       continue;
> +               if (!strncmp("use-after-free", bug_type, 15) &&

Comparing strings is unreliable and will break in future. Compare
handle with 0 instead, you already assume that 0 handle is "no
handle".

> +                       (record.free_handle != kasan_records[i].free_handle))
> +                       continue;
> +
> +               ret = false;
> +               goto done;
> +       }
> +
> +       memcpy(&kasan_records[stored_kasan_records], &record, sizeof(struct kasan_record));
> +       stored_kasan_records++;

I think you just introduced an out-of-bounds write into KASAN, check
for MAX_RECORDS ;)


> +
> +done:
> +       spin_unlock_irqrestore(&report_lock, *flags);
> +       kasan_enable_current();
> +       return ret;
> +}
> +
>  static void __kasan_report(unsigned long addr, size_t size, bool is_write,
>                                 unsigned long ip)
>  {
> @@ -388,6 +451,10 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write,
>         info.is_write = is_write;
>         info.ip = ip;
>
> +       if (addr_has_metadata(untagged_addr) &&

Why addr_has_metadata check?
The kernel will probably crash later anyway, but from point of view of
this code, I don't see reasons to not dedup wild accesses.

> +               !save_report(untagged_addr, &info, get_tag(tagged_addr), &flags))
> +               return;
> +
>         start_report(&flags);
>
>         print_error_description(&info);
> --
> 2.17.1
>
Dmitry Vyukov April 22, 2021, 9:59 a.m. UTC | #2
On Thu, Apr 22, 2021 at 11:58 AM Dmitry Vyukov <dvyukov@google.com> wrote:
>
> On Thu, Apr 22, 2021 at 11:17 AM Maninder Singh <maninder1.s@samsung.com> wrote:
> >
> > when KASAN multishot is ON and some buggy code hits same code path
> > of KASAN issue repetetively, it can flood logs on console.
> >
> > Check for allocaton, free and backtrace path at time of KASAN error,
> > if these are same then it is duplicate error and avoid these prints
> > from KASAN.

Can this be tested with the kunit kasan tests? If yes, please add a
test for this new code.


> > Co-developed-by: Vaneet Narang <v.narang@samsung.com>
> > Signed-off-by: Vaneet Narang <v.narang@samsung.com>
> > Signed-off-by: Maninder Singh <maninder1.s@samsung.com>
> > ---
> >  mm/kasan/kasan.h  |  6 +++++
> >  mm/kasan/report.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 73 insertions(+)
> >
> > diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
> > index 78cf99247139..d14ccce246ba 100644
> > --- a/mm/kasan/kasan.h
> > +++ b/mm/kasan/kasan.h
> > @@ -102,6 +102,12 @@ struct kasan_access_info {
> >         unsigned long ip;
> >  };
> >
> > +struct kasan_record {
> > +       depot_stack_handle_t    bt_handle;
> > +       depot_stack_handle_t    alloc_handle;
> > +       depot_stack_handle_t    free_handle;
> > +};
>
> Hi Maninder,
>
> There is no need to declare this in the header, it can be declared
> more locally in report.h.
>
> > +
> >  /* The layout of struct dictated by compiler */
> >  struct kasan_source_location {
> >         const char *filename;
> > diff --git a/mm/kasan/report.c b/mm/kasan/report.c
> > index 87b271206163..4576de76991b 100644
> > --- a/mm/kasan/report.c
> > +++ b/mm/kasan/report.c
> > @@ -39,6 +39,10 @@ static unsigned long kasan_flags;
> >  #define KASAN_BIT_REPORTED     0
> >  #define KASAN_BIT_MULTI_SHOT   1
> >
> > +#define MAX_RECORDS            (200)
>
> s/MAX_RECORDS/KASAN_MAX_RECORDS/
>
> > +static struct kasan_record kasan_records[MAX_RECORDS];
>
> Since all fields in kasan_record are stack handles, the code will be
> simpler and more uniform, if we store just an array of handles w/o
> distinguishing between alloc/free/access.
>
> > +static int stored_kasan_records;
> > +
> >  bool kasan_save_enable_multi_shot(void)
> >  {
> >         return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
> > @@ -360,6 +364,65 @@ void kasan_report_invalid_free(void *object, unsigned long ip)
> >         end_report(&flags, (unsigned long)object);
> >  }
> >
> > +/*
> > + * @save_report()
> > + *
> > + * returns false if same record is already saved.
>
> s/same/the same/
>
> > + * returns true if its new record and saved in database of KASAN.
>
> s/its/it's/
> s/database/the database/
>
> > + */
> > +static bool save_report(void *addr, struct kasan_access_info *info, u8 tag, unsigned long *flags)
> > +{
> > +       struct kasan_record record = {0};
> > +       depot_stack_handle_t bt_handle;
> > +       int i = 0;
> > +       const char *bug_type;
> > +       struct kasan_alloc_meta *alloc_meta;
> > +       struct kasan_track *free_track;
> > +       struct page *page;
> > +       bool ret = true;
> > +
> > +       kasan_disable_current();
> > +       spin_lock_irqsave(&report_lock, *flags);
>
> Reusing the caller flags looks strange, do we need it?
> But also the very next function start_report() also does the same
> dance: kasan_disable_current/spin_lock_irqsave. It feels reasonable to
> lock once, check for dups and return early if it's a dup.
>
> > +       bug_type = kasan_get_bug_type(info);
> > +       page = kasan_addr_to_page(addr);
> > +       bt_handle = kasan_save_stack(GFP_KERNEL);
>
> ASsign directly to record.bt_handle.
>
> > +       if (page && PageSlab(page)) {
> > +               struct kmem_cache *cache = page->slab_cache;
> > +               void *object = nearest_obj(cache, page, addr);
>
> Since you already declare new var in this block, move
> alloc_meta/free_track here as well.
>
> > +
> > +               alloc_meta = kasan_get_alloc_meta(cache, object);
> > +               free_track = kasan_get_free_track(cache, object, tag);
> > +               record.alloc_handle = alloc_meta->alloc_track.stack;
> > +               if (free_track)
> > +                       record.free_handle = free_track->stack;
> > +       }
> > +
> > +       record.bt_handle = bt_handle;
> > +
> > +       for (i = 0; i < stored_kasan_records; i++) {
> > +               if (record.bt_handle != kasan_records[i].bt_handle)
> > +                       continue;
> > +               if (record.alloc_handle != kasan_records[i].alloc_handle)
> > +                       continue;
> > +               if (!strncmp("use-after-free", bug_type, 15) &&
>
> Comparing strings is unreliable and will break in future. Compare
> handle with 0 instead, you already assume that 0 handle is "no
> handle".
>
> > +                       (record.free_handle != kasan_records[i].free_handle))
> > +                       continue;
> > +
> > +               ret = false;
> > +               goto done;
> > +       }
> > +
> > +       memcpy(&kasan_records[stored_kasan_records], &record, sizeof(struct kasan_record));
> > +       stored_kasan_records++;
>
> I think you just introduced an out-of-bounds write into KASAN, check
> for MAX_RECORDS ;)
>
>
> > +
> > +done:
> > +       spin_unlock_irqrestore(&report_lock, *flags);
> > +       kasan_enable_current();
> > +       return ret;
> > +}
> > +
> >  static void __kasan_report(unsigned long addr, size_t size, bool is_write,
> >                                 unsigned long ip)
> >  {
> > @@ -388,6 +451,10 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write,
> >         info.is_write = is_write;
> >         info.ip = ip;
> >
> > +       if (addr_has_metadata(untagged_addr) &&
>
> Why addr_has_metadata check?
> The kernel will probably crash later anyway, but from point of view of
> this code, I don't see reasons to not dedup wild accesses.
>
> > +               !save_report(untagged_addr, &info, get_tag(tagged_addr), &flags))
> > +               return;
> > +
> >         start_report(&flags);
> >
> >         print_error_description(&info);
> > --
> > 2.17.1
> >
Marco Elver April 22, 2021, 2:10 p.m. UTC | #3
On Thu, 22 Apr 2021 at 11:17, Maninder Singh <maninder1.s@samsung.com> wrote:
>
> when KASAN multishot is ON and some buggy code hits same code path
> of KASAN issue repetetively, it can flood logs on console.
>
> Check for allocaton, free and backtrace path at time of KASAN error,
> if these are same then it is duplicate error and avoid these prints
> from KASAN.

On a more fundamental level, I think this sort of filtering is the
wrong solution to your problem. One reason why it's good that
multishot is off by default is, because _every_ KASAN report is
critical and can destabilize the system. Therefore, any report after
the first one might be completely bogus, because the system is in a
potentially bad state and its behaviour might be completely random.

The correct solution is to not leave the system running, fix the first
bug found, continue; rinse and repeat. Therefore, this patch adds a
lot of code for little benefit.

The much simpler solution that will likely yield a similar result is
to simply define an upper bound on the number of reports if multishot
is on. Because if I've seen 1000 reports, I already know the system is
completely trashed and whatever else it's reporting might just be
random.

Thanks,
-- Marco
Andrey Konovalov April 22, 2021, 3:06 p.m. UTC | #4
On Thu, Apr 22, 2021 at 4:10 PM Marco Elver <elver@google.com> wrote:
>
> On Thu, 22 Apr 2021 at 11:17, Maninder Singh <maninder1.s@samsung.com> wrote:
> >
> > when KASAN multishot is ON and some buggy code hits same code path
> > of KASAN issue repetetively, it can flood logs on console.
> >
> > Check for allocaton, free and backtrace path at time of KASAN error,
> > if these are same then it is duplicate error and avoid these prints
> > from KASAN.
>
> On a more fundamental level, I think this sort of filtering is the
> wrong solution to your problem. One reason why it's good that
> multishot is off by default is, because _every_ KASAN report is
> critical and can destabilize the system. Therefore, any report after
> the first one might be completely bogus, because the system is in a
> potentially bad state and its behaviour might be completely random.
>
> The correct solution is to not leave the system running, fix the first
> bug found, continue; rinse and repeat. Therefore, this patch adds a
> lot of code for little benefit.

I agree with Marco here.

It doesn't make sense to have this deduplication code in the kernel
anyway. If you want unique reports, write a userspace script that
parses dmesg and groups the reports.

Thanks!
diff mbox series

Patch

diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 78cf99247139..d14ccce246ba 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -102,6 +102,12 @@  struct kasan_access_info {
 	unsigned long ip;
 };
 
+struct kasan_record {
+	depot_stack_handle_t	bt_handle;
+	depot_stack_handle_t	alloc_handle;
+	depot_stack_handle_t	free_handle;
+};
+
 /* The layout of struct dictated by compiler */
 struct kasan_source_location {
 	const char *filename;
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 87b271206163..4576de76991b 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -39,6 +39,10 @@  static unsigned long kasan_flags;
 #define KASAN_BIT_REPORTED	0
 #define KASAN_BIT_MULTI_SHOT	1
 
+#define MAX_RECORDS		(200)
+static struct kasan_record kasan_records[MAX_RECORDS];
+static int stored_kasan_records;
+
 bool kasan_save_enable_multi_shot(void)
 {
 	return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
@@ -360,6 +364,65 @@  void kasan_report_invalid_free(void *object, unsigned long ip)
 	end_report(&flags, (unsigned long)object);
 }
 
+/*
+ * @save_report()
+ *
+ * returns false if same record is already saved.
+ * returns true if its new record and saved in database of KASAN.
+ */
+static bool save_report(void *addr, struct kasan_access_info *info, u8 tag, unsigned long *flags)
+{
+	struct kasan_record record = {0};
+	depot_stack_handle_t bt_handle;
+	int i = 0;
+	const char *bug_type;
+	struct kasan_alloc_meta *alloc_meta;
+	struct kasan_track *free_track;
+	struct page *page;
+	bool ret = true;
+
+	kasan_disable_current();
+	spin_lock_irqsave(&report_lock, *flags);
+
+	bug_type = kasan_get_bug_type(info);
+	page = kasan_addr_to_page(addr);
+	bt_handle = kasan_save_stack(GFP_KERNEL);
+
+	if (page && PageSlab(page)) {
+		struct kmem_cache *cache = page->slab_cache;
+		void *object = nearest_obj(cache, page, addr);
+
+		alloc_meta = kasan_get_alloc_meta(cache, object);
+		free_track = kasan_get_free_track(cache, object, tag);
+		record.alloc_handle = alloc_meta->alloc_track.stack;
+		if (free_track)
+			record.free_handle = free_track->stack;
+	}
+
+	record.bt_handle = bt_handle;
+
+	for (i = 0; i < stored_kasan_records; i++) {
+		if (record.bt_handle != kasan_records[i].bt_handle)
+			continue;
+		if (record.alloc_handle != kasan_records[i].alloc_handle)
+			continue;
+		if (!strncmp("use-after-free", bug_type, 15) &&
+			(record.free_handle != kasan_records[i].free_handle))
+			continue;
+
+		ret = false;
+		goto done;
+	}
+
+	memcpy(&kasan_records[stored_kasan_records], &record, sizeof(struct kasan_record));
+	stored_kasan_records++;
+
+done:
+	spin_unlock_irqrestore(&report_lock, *flags);
+	kasan_enable_current();
+	return ret;
+}
+
 static void __kasan_report(unsigned long addr, size_t size, bool is_write,
 				unsigned long ip)
 {
@@ -388,6 +451,10 @@  static void __kasan_report(unsigned long addr, size_t size, bool is_write,
 	info.is_write = is_write;
 	info.ip = ip;
 
+	if (addr_has_metadata(untagged_addr) &&
+		!save_report(untagged_addr, &info, get_tag(tagged_addr), &flags))
+		return;
+
 	start_report(&flags);
 
 	print_error_description(&info);