Message ID | 1619079317-1131-1-git-send-email-maninder1.s@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/2] mm/kasan: avoid duplicate KASAN issues from reporting | expand |
On Thu, Apr 22, 2021 at 11:17 AM Maninder Singh <maninder1.s@samsung.com> wrote: > > when KASAN multishot is ON and some buggy code hits same code path > of KASAN issue repetetively, it can flood logs on console. > > Check for allocaton, free and backtrace path at time of KASAN error, > if these are same then it is duplicate error and avoid these prints > from KASAN. > > Co-developed-by: Vaneet Narang <v.narang@samsung.com> > Signed-off-by: Vaneet Narang <v.narang@samsung.com> > Signed-off-by: Maninder Singh <maninder1.s@samsung.com> > --- > mm/kasan/kasan.h | 6 +++++ > mm/kasan/report.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 73 insertions(+) > > diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h > index 78cf99247139..d14ccce246ba 100644 > --- a/mm/kasan/kasan.h > +++ b/mm/kasan/kasan.h > @@ -102,6 +102,12 @@ struct kasan_access_info { > unsigned long ip; > }; > > +struct kasan_record { > + depot_stack_handle_t bt_handle; > + depot_stack_handle_t alloc_handle; > + depot_stack_handle_t free_handle; > +}; Hi Maninder, There is no need to declare this in the header, it can be declared more locally in report.h. > + > /* The layout of struct dictated by compiler */ > struct kasan_source_location { > const char *filename; > diff --git a/mm/kasan/report.c b/mm/kasan/report.c > index 87b271206163..4576de76991b 100644 > --- a/mm/kasan/report.c > +++ b/mm/kasan/report.c > @@ -39,6 +39,10 @@ static unsigned long kasan_flags; > #define KASAN_BIT_REPORTED 0 > #define KASAN_BIT_MULTI_SHOT 1 > > +#define MAX_RECORDS (200) s/MAX_RECORDS/KASAN_MAX_RECORDS/ > +static struct kasan_record kasan_records[MAX_RECORDS]; Since all fields in kasan_record are stack handles, the code will be simpler and more uniform, if we store just an array of handles w/o distinguishing between alloc/free/access. > +static int stored_kasan_records; > + > bool kasan_save_enable_multi_shot(void) > { > return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); > @@ -360,6 +364,65 @@ void kasan_report_invalid_free(void *object, unsigned long ip) > end_report(&flags, (unsigned long)object); > } > > +/* > + * @save_report() > + * > + * returns false if same record is already saved. s/same/the same/ > + * returns true if its new record and saved in database of KASAN. s/its/it's/ s/database/the database/ > + */ > +static bool save_report(void *addr, struct kasan_access_info *info, u8 tag, unsigned long *flags) > +{ > + struct kasan_record record = {0}; > + depot_stack_handle_t bt_handle; > + int i = 0; > + const char *bug_type; > + struct kasan_alloc_meta *alloc_meta; > + struct kasan_track *free_track; > + struct page *page; > + bool ret = true; > + > + kasan_disable_current(); > + spin_lock_irqsave(&report_lock, *flags); Reusing the caller flags looks strange, do we need it? But also the very next function start_report() also does the same dance: kasan_disable_current/spin_lock_irqsave. It feels reasonable to lock once, check for dups and return early if it's a dup. > + bug_type = kasan_get_bug_type(info); > + page = kasan_addr_to_page(addr); > + bt_handle = kasan_save_stack(GFP_KERNEL); ASsign directly to record.bt_handle. > + if (page && PageSlab(page)) { > + struct kmem_cache *cache = page->slab_cache; > + void *object = nearest_obj(cache, page, addr); Since you already declare new var in this block, move alloc_meta/free_track here as well. > + > + alloc_meta = kasan_get_alloc_meta(cache, object); > + free_track = kasan_get_free_track(cache, object, tag); > + record.alloc_handle = alloc_meta->alloc_track.stack; > + if (free_track) > + record.free_handle = free_track->stack; > + } > + > + record.bt_handle = bt_handle; > + > + for (i = 0; i < stored_kasan_records; i++) { > + if (record.bt_handle != kasan_records[i].bt_handle) > + continue; > + if (record.alloc_handle != kasan_records[i].alloc_handle) > + continue; > + if (!strncmp("use-after-free", bug_type, 15) && Comparing strings is unreliable and will break in future. Compare handle with 0 instead, you already assume that 0 handle is "no handle". > + (record.free_handle != kasan_records[i].free_handle)) > + continue; > + > + ret = false; > + goto done; > + } > + > + memcpy(&kasan_records[stored_kasan_records], &record, sizeof(struct kasan_record)); > + stored_kasan_records++; I think you just introduced an out-of-bounds write into KASAN, check for MAX_RECORDS ;) > + > +done: > + spin_unlock_irqrestore(&report_lock, *flags); > + kasan_enable_current(); > + return ret; > +} > + > static void __kasan_report(unsigned long addr, size_t size, bool is_write, > unsigned long ip) > { > @@ -388,6 +451,10 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write, > info.is_write = is_write; > info.ip = ip; > > + if (addr_has_metadata(untagged_addr) && Why addr_has_metadata check? The kernel will probably crash later anyway, but from point of view of this code, I don't see reasons to not dedup wild accesses. > + !save_report(untagged_addr, &info, get_tag(tagged_addr), &flags)) > + return; > + > start_report(&flags); > > print_error_description(&info); > -- > 2.17.1 >
On Thu, Apr 22, 2021 at 11:58 AM Dmitry Vyukov <dvyukov@google.com> wrote: > > On Thu, Apr 22, 2021 at 11:17 AM Maninder Singh <maninder1.s@samsung.com> wrote: > > > > when KASAN multishot is ON and some buggy code hits same code path > > of KASAN issue repetetively, it can flood logs on console. > > > > Check for allocaton, free and backtrace path at time of KASAN error, > > if these are same then it is duplicate error and avoid these prints > > from KASAN. Can this be tested with the kunit kasan tests? If yes, please add a test for this new code. > > Co-developed-by: Vaneet Narang <v.narang@samsung.com> > > Signed-off-by: Vaneet Narang <v.narang@samsung.com> > > Signed-off-by: Maninder Singh <maninder1.s@samsung.com> > > --- > > mm/kasan/kasan.h | 6 +++++ > > mm/kasan/report.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++ > > 2 files changed, 73 insertions(+) > > > > diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h > > index 78cf99247139..d14ccce246ba 100644 > > --- a/mm/kasan/kasan.h > > +++ b/mm/kasan/kasan.h > > @@ -102,6 +102,12 @@ struct kasan_access_info { > > unsigned long ip; > > }; > > > > +struct kasan_record { > > + depot_stack_handle_t bt_handle; > > + depot_stack_handle_t alloc_handle; > > + depot_stack_handle_t free_handle; > > +}; > > Hi Maninder, > > There is no need to declare this in the header, it can be declared > more locally in report.h. > > > + > > /* The layout of struct dictated by compiler */ > > struct kasan_source_location { > > const char *filename; > > diff --git a/mm/kasan/report.c b/mm/kasan/report.c > > index 87b271206163..4576de76991b 100644 > > --- a/mm/kasan/report.c > > +++ b/mm/kasan/report.c > > @@ -39,6 +39,10 @@ static unsigned long kasan_flags; > > #define KASAN_BIT_REPORTED 0 > > #define KASAN_BIT_MULTI_SHOT 1 > > > > +#define MAX_RECORDS (200) > > s/MAX_RECORDS/KASAN_MAX_RECORDS/ > > > +static struct kasan_record kasan_records[MAX_RECORDS]; > > Since all fields in kasan_record are stack handles, the code will be > simpler and more uniform, if we store just an array of handles w/o > distinguishing between alloc/free/access. > > > +static int stored_kasan_records; > > + > > bool kasan_save_enable_multi_shot(void) > > { > > return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); > > @@ -360,6 +364,65 @@ void kasan_report_invalid_free(void *object, unsigned long ip) > > end_report(&flags, (unsigned long)object); > > } > > > > +/* > > + * @save_report() > > + * > > + * returns false if same record is already saved. > > s/same/the same/ > > > + * returns true if its new record and saved in database of KASAN. > > s/its/it's/ > s/database/the database/ > > > + */ > > +static bool save_report(void *addr, struct kasan_access_info *info, u8 tag, unsigned long *flags) > > +{ > > + struct kasan_record record = {0}; > > + depot_stack_handle_t bt_handle; > > + int i = 0; > > + const char *bug_type; > > + struct kasan_alloc_meta *alloc_meta; > > + struct kasan_track *free_track; > > + struct page *page; > > + bool ret = true; > > + > > + kasan_disable_current(); > > + spin_lock_irqsave(&report_lock, *flags); > > Reusing the caller flags looks strange, do we need it? > But also the very next function start_report() also does the same > dance: kasan_disable_current/spin_lock_irqsave. It feels reasonable to > lock once, check for dups and return early if it's a dup. > > > + bug_type = kasan_get_bug_type(info); > > + page = kasan_addr_to_page(addr); > > + bt_handle = kasan_save_stack(GFP_KERNEL); > > ASsign directly to record.bt_handle. > > > + if (page && PageSlab(page)) { > > + struct kmem_cache *cache = page->slab_cache; > > + void *object = nearest_obj(cache, page, addr); > > Since you already declare new var in this block, move > alloc_meta/free_track here as well. > > > + > > + alloc_meta = kasan_get_alloc_meta(cache, object); > > + free_track = kasan_get_free_track(cache, object, tag); > > + record.alloc_handle = alloc_meta->alloc_track.stack; > > + if (free_track) > > + record.free_handle = free_track->stack; > > + } > > + > > + record.bt_handle = bt_handle; > > + > > + for (i = 0; i < stored_kasan_records; i++) { > > + if (record.bt_handle != kasan_records[i].bt_handle) > > + continue; > > + if (record.alloc_handle != kasan_records[i].alloc_handle) > > + continue; > > + if (!strncmp("use-after-free", bug_type, 15) && > > Comparing strings is unreliable and will break in future. Compare > handle with 0 instead, you already assume that 0 handle is "no > handle". > > > + (record.free_handle != kasan_records[i].free_handle)) > > + continue; > > + > > + ret = false; > > + goto done; > > + } > > + > > + memcpy(&kasan_records[stored_kasan_records], &record, sizeof(struct kasan_record)); > > + stored_kasan_records++; > > I think you just introduced an out-of-bounds write into KASAN, check > for MAX_RECORDS ;) > > > > + > > +done: > > + spin_unlock_irqrestore(&report_lock, *flags); > > + kasan_enable_current(); > > + return ret; > > +} > > + > > static void __kasan_report(unsigned long addr, size_t size, bool is_write, > > unsigned long ip) > > { > > @@ -388,6 +451,10 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write, > > info.is_write = is_write; > > info.ip = ip; > > > > + if (addr_has_metadata(untagged_addr) && > > Why addr_has_metadata check? > The kernel will probably crash later anyway, but from point of view of > this code, I don't see reasons to not dedup wild accesses. > > > + !save_report(untagged_addr, &info, get_tag(tagged_addr), &flags)) > > + return; > > + > > start_report(&flags); > > > > print_error_description(&info); > > -- > > 2.17.1 > >
On Thu, 22 Apr 2021 at 11:17, Maninder Singh <maninder1.s@samsung.com> wrote: > > when KASAN multishot is ON and some buggy code hits same code path > of KASAN issue repetetively, it can flood logs on console. > > Check for allocaton, free and backtrace path at time of KASAN error, > if these are same then it is duplicate error and avoid these prints > from KASAN. On a more fundamental level, I think this sort of filtering is the wrong solution to your problem. One reason why it's good that multishot is off by default is, because _every_ KASAN report is critical and can destabilize the system. Therefore, any report after the first one might be completely bogus, because the system is in a potentially bad state and its behaviour might be completely random. The correct solution is to not leave the system running, fix the first bug found, continue; rinse and repeat. Therefore, this patch adds a lot of code for little benefit. The much simpler solution that will likely yield a similar result is to simply define an upper bound on the number of reports if multishot is on. Because if I've seen 1000 reports, I already know the system is completely trashed and whatever else it's reporting might just be random. Thanks, -- Marco
On Thu, Apr 22, 2021 at 4:10 PM Marco Elver <elver@google.com> wrote: > > On Thu, 22 Apr 2021 at 11:17, Maninder Singh <maninder1.s@samsung.com> wrote: > > > > when KASAN multishot is ON and some buggy code hits same code path > > of KASAN issue repetetively, it can flood logs on console. > > > > Check for allocaton, free and backtrace path at time of KASAN error, > > if these are same then it is duplicate error and avoid these prints > > from KASAN. > > On a more fundamental level, I think this sort of filtering is the > wrong solution to your problem. One reason why it's good that > multishot is off by default is, because _every_ KASAN report is > critical and can destabilize the system. Therefore, any report after > the first one might be completely bogus, because the system is in a > potentially bad state and its behaviour might be completely random. > > The correct solution is to not leave the system running, fix the first > bug found, continue; rinse and repeat. Therefore, this patch adds a > lot of code for little benefit. I agree with Marco here. It doesn't make sense to have this deduplication code in the kernel anyway. If you want unique reports, write a userspace script that parses dmesg and groups the reports. Thanks!
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 78cf99247139..d14ccce246ba 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -102,6 +102,12 @@ struct kasan_access_info { unsigned long ip; }; +struct kasan_record { + depot_stack_handle_t bt_handle; + depot_stack_handle_t alloc_handle; + depot_stack_handle_t free_handle; +}; + /* The layout of struct dictated by compiler */ struct kasan_source_location { const char *filename; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 87b271206163..4576de76991b 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -39,6 +39,10 @@ static unsigned long kasan_flags; #define KASAN_BIT_REPORTED 0 #define KASAN_BIT_MULTI_SHOT 1 +#define MAX_RECORDS (200) +static struct kasan_record kasan_records[MAX_RECORDS]; +static int stored_kasan_records; + bool kasan_save_enable_multi_shot(void) { return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); @@ -360,6 +364,65 @@ void kasan_report_invalid_free(void *object, unsigned long ip) end_report(&flags, (unsigned long)object); } +/* + * @save_report() + * + * returns false if same record is already saved. + * returns true if its new record and saved in database of KASAN. + */ +static bool save_report(void *addr, struct kasan_access_info *info, u8 tag, unsigned long *flags) +{ + struct kasan_record record = {0}; + depot_stack_handle_t bt_handle; + int i = 0; + const char *bug_type; + struct kasan_alloc_meta *alloc_meta; + struct kasan_track *free_track; + struct page *page; + bool ret = true; + + kasan_disable_current(); + spin_lock_irqsave(&report_lock, *flags); + + bug_type = kasan_get_bug_type(info); + page = kasan_addr_to_page(addr); + bt_handle = kasan_save_stack(GFP_KERNEL); + + if (page && PageSlab(page)) { + struct kmem_cache *cache = page->slab_cache; + void *object = nearest_obj(cache, page, addr); + + alloc_meta = kasan_get_alloc_meta(cache, object); + free_track = kasan_get_free_track(cache, object, tag); + record.alloc_handle = alloc_meta->alloc_track.stack; + if (free_track) + record.free_handle = free_track->stack; + } + + record.bt_handle = bt_handle; + + for (i = 0; i < stored_kasan_records; i++) { + if (record.bt_handle != kasan_records[i].bt_handle) + continue; + if (record.alloc_handle != kasan_records[i].alloc_handle) + continue; + if (!strncmp("use-after-free", bug_type, 15) && + (record.free_handle != kasan_records[i].free_handle)) + continue; + + ret = false; + goto done; + } + + memcpy(&kasan_records[stored_kasan_records], &record, sizeof(struct kasan_record)); + stored_kasan_records++; + +done: + spin_unlock_irqrestore(&report_lock, *flags); + kasan_enable_current(); + return ret; +} + static void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip) { @@ -388,6 +451,10 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write, info.is_write = is_write; info.ip = ip; + if (addr_has_metadata(untagged_addr) && + !save_report(untagged_addr, &info, get_tag(tagged_addr), &flags)) + return; + start_report(&flags); print_error_description(&info);