diff mbox series

[v6,31/39] kasan, vmalloc: only tag normal vmalloc allocations

Message ID fbfd9939a4dc375923c9a5c6b9e7ab05c26b8c6b.1643047180.git.andreyknvl@google.com (mailing list archive)
State New, archived
Headers show
Series kasan, vmalloc, arm64: add vmalloc tagging support for SW/HW_TAGS | expand

Commit Message

andrey.konovalov@linux.dev Jan. 24, 2022, 6:05 p.m. UTC
From: Andrey Konovalov <andreyknvl@google.com>

The kernel can use to allocate executable memory. The only supported way
to do that is via __vmalloc_node_range() with the executable bit set in
the prot argument. (vmap() resets the bit via pgprot_nx()).

Once tag-based KASAN modes start tagging vmalloc allocations, executing
code from such allocations will lead to the PC register getting a tag,
which is not tolerated by the kernel.

Only tag the allocations for normal kernel pages.

Signed-off-by: Andrey Konovalov <andreyknvl@google.com>

---

Changes v3->v4:
- Rename KASAN_VMALLOC_NOEXEC to KASAN_VMALLOC_PROT_NORMAL.
- Compare with PAGE_KERNEL instead of using pgprot_nx().
- Update patch description.

Changes v2->v3:
- Add this patch.
---
 include/linux/kasan.h |  7 ++++---
 mm/kasan/hw_tags.c    |  7 +++++++
 mm/kasan/shadow.c     |  7 +++++++
 mm/vmalloc.c          | 49 +++++++++++++++++++++++++------------------
 4 files changed, 47 insertions(+), 23 deletions(-)

Comments

Vasily Gorbik March 8, 2022, 3:17 p.m. UTC | #1
On Mon, Jan 24, 2022 at 07:05:05PM +0100, andrey.konovalov@linux.dev wrote:
> From: Andrey Konovalov <andreyknvl@google.com>
> 
> The kernel can use to allocate executable memory. The only supported way
> to do that is via __vmalloc_node_range() with the executable bit set in
> the prot argument. (vmap() resets the bit via pgprot_nx()).
> 
> Once tag-based KASAN modes start tagging vmalloc allocations, executing
> code from such allocations will lead to the PC register getting a tag,
> which is not tolerated by the kernel.
> 
> Only tag the allocations for normal kernel pages.
> 
> Signed-off-by: Andrey Konovalov <andreyknvl@google.com>

This breaks s390 and produce huge amount of false positives.
I haven't been testing linux-next with KASAN for while, now tried it with
next-20220308 and bisected false positives to this commit.

Any idea what is going wrong here?

I see 2 patterns:

[    1.123723] BUG: KASAN: vmalloc-out-of-bounds in ftrace_plt_init+0xb8/0xe0
[    1.123740] Write of size 8 at addr 001bffff80000000 by task swapper/0/1
[    1.123745]
[    1.123749] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.17.0-rc7-118520-ga20d77ce812a #142
[    1.123755] Hardware name: IBM 8561 T01 701 (KVM/Linux)
[    1.123758] Call Trace:
[    1.123761]  [<000000000218e5fe>] dump_stack_lvl+0xc6/0xf8
[    1.123782]  [<0000000002176cb4>] print_address_description.constprop.0+0x64/0x2f0
[    1.123793]  [<000000000086fd3e>] kasan_report+0x15e/0x1c8
[    1.123802]  [<0000000000870f5c>] kasan_check_range+0x174/0x1c0
[    1.123808]  [<0000000000871988>] memcpy+0x58/0x88
[    1.123813]  [<000000000342cea8>] ftrace_plt_init+0xb8/0xe0
[    1.123819]  [<0000000000101522>] do_one_initcall+0xc2/0x468
[    1.123825]  [<000000000341ffc6>] do_initcalls+0x1be/0x1e8
[    1.123830]  [<0000000003420504>] kernel_init_freeable+0x494/0x4e8
[    1.123834]  [<0000000002196556>] kernel_init+0x2e/0x180
[    1.123838]  [<000000000010625a>] __ret_from_fork+0x8a/0xe8
[    1.123843]  [<00000000021b557a>] ret_from_fork+0xa/0x40
[    1.123852]
[    1.123854]
[    1.123856] Memory state around the buggy address:
[    1.123861]  001bffff7fffff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[    1.123865]  001bffff7fffff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[    1.123868] >001bffff80000000: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[    1.123872]                    ^
[    1.123874]  001bffff80000080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[    1.123878]  001bffff80000100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8

$ cat /sys/kernel/debug/kernel_page_tables
---[ Modules Area Start ]---
0x001bffff80000000-0x001bffff80001000         4K PTE RO X
0x001bffff80001000-0x001bffff80002000         4K PTE I
0x001bffff80002000-0x001bffff80003000         4K PTE RO X
0x001bffff80003000-0x001bffff80004000         4K PTE I

[    1.409146] BUG: KASAN: vmalloc-out-of-bounds in bpf_jit_binary_alloc+0x138/0x170
[    1.409154] Write of size 4 at addr 001bffff80002000 by task systemd/1
[    1.409158]
[    1.409160] CPU: 0 PID: 1 Comm: systemd Tainted: G    B   W         5.17.0-rc7-118520-ga20d77ce812a #141
[    1.409166] Hardware name: IBM 8561 T01 701 (KVM/Linux)
[    1.409169] Call Trace:
[    1.409171]  [<000000000218e5fe>] dump_stack_lvl+0xc6/0xf8
[    1.409176]  [<0000000002176cb4>] print_address_description.constprop.0+0x64/0x2f0
[    1.409183]  [<000000000086fd3e>] kasan_report+0x15e/0x1c8
[    1.409188]  [<0000000000588860>] bpf_jit_binary_alloc+0x138/0x170
[    1.409192]  [<000000000019fa84>] bpf_int_jit_compile+0x814/0xca8
[    1.409197]  [<000000000058b60e>] bpf_prog_select_runtime+0x286/0x3e8
[    1.409202]  [<000000000059ac2e>] bpf_prog_load+0xe66/0x1a10
[    1.409206]  [<000000000059ebd4>] __sys_bpf+0x8bc/0x1088
[    1.409211]  [<000000000059f9e8>] __s390x_sys_bpf+0x98/0xc8
[    1.409216]  [<000000000010ce74>] do_syscall+0x22c/0x328
[    1.409221]  [<000000000219599c>] __do_syscall+0x94/0xf0
[    1.409226]  [<00000000021b5542>] system_call+0x82/0xb0
[    1.409232]
[    1.409234]
[    1.409235] Memory state around the buggy address:
[    1.409238]  001bffff80001f00: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[    1.409242]  001bffff80001f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[    1.409246] >001bffff80002000: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[    1.409249]                    ^
[    1.409251]  001bffff80002080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[    1.409255]  001bffff80002100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8

$ git bisect log
git bisect start
# good: [ea4424be16887a37735d6550cfd0611528dbe5d9] Merge tag 'mtd/fixes-for-5.17-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux
git bisect good ea4424be16887a37735d6550cfd0611528dbe5d9
# bad: [cb153b68ff91cbc434f3de70ac549e110543e1bb] Add linux-next specific files for 20220308
git bisect bad cb153b68ff91cbc434f3de70ac549e110543e1bb
# good: [1ce7aac49a7b73abbd691c6e6a1577a449d90bad] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
git bisect good 1ce7aac49a7b73abbd691c6e6a1577a449d90bad
# good: [08688e100b1b07ce178c1d3c6b9983e00cd85413] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
git bisect good 08688e100b1b07ce178c1d3c6b9983e00cd85413
# good: [82a204646439657e5c2f94da5cad7ba96de10414] Merge branch 'togreg' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio.git
git bisect good 82a204646439657e5c2f94da5cad7ba96de10414
# good: [ac82bf337c937458bf4f75985857bf3a68cd7c16] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git
git bisect good ac82bf337c937458bf4f75985857bf3a68cd7c16
# good: [a36f330518af9bd205451bedb4eb22a5245cf010] ipc/mqueue: use get_tree_nodev() in mqueue_get_tree()
git bisect good a36f330518af9bd205451bedb4eb22a5245cf010
# good: [339c1d0fb400ab3acd2da2d9990242f654689f6e] Merge branch 'for-next' of git://git.infradead.org/users/willy/pagecache.git
git bisect good 339c1d0fb400ab3acd2da2d9990242f654689f6e
# good: [b8a58fecbd4982211f528d405a9ded00ddc7d646] kasan: only apply __GFP_ZEROTAGS when memory is zeroed
git bisect good b8a58fecbd4982211f528d405a9ded00ddc7d646
# bad: [141e05389762bee5fb0eb54af9c4d5266ce11d26] kasan: drop addr check from describe_object_addr
git bisect bad 141e05389762bee5fb0eb54af9c4d5266ce11d26
# good: [97fedbc9a6bccd508c392b0e177380313dd9fcd2] kasan, page_alloc: allow skipping unpoisoning for HW_TAGS
git bisect good 97fedbc9a6bccd508c392b0e177380313dd9fcd2
# bad: [606c2ee3fabbf66594f39998be9b5a21c2bf5dff] arm64: select KASAN_VMALLOC for SW/HW_TAGS modes
git bisect bad 606c2ee3fabbf66594f39998be9b5a21c2bf5dff
# bad: [bd2c296805cff9572080bf56807c16d1dd382260] kasan, scs: support tagged vmalloc mappings
git bisect bad bd2c296805cff9572080bf56807c16d1dd382260
# good: [7b80fa947b3a3ee746115395d1c5f7157119b7d2] kasan, vmalloc: add vmalloc tagging for HW_TAGS
git bisect good 7b80fa947b3a3ee746115395d1c5f7157119b7d2
# bad: [f51c09448ea124622f8ebcfb41d06c809ee01bca] fix for "kasan, vmalloc: only tag normal vmalloc allocations"
git bisect bad f51c09448ea124622f8ebcfb41d06c809ee01bca
# bad: [a20d77ce812a3e11b3cf2cb4f411904bb5c6edaa] kasan, vmalloc: only tag normal vmalloc allocations
git bisect bad a20d77ce812a3e11b3cf2cb4f411904bb5c6edaa
# first bad commit: [a20d77ce812a3e11b3cf2cb4f411904bb5c6edaa] kasan, vmalloc: only tag normal vmalloc allocations
Andrey Konovalov March 8, 2022, 3:30 p.m. UTC | #2
On Tue, Mar 8, 2022 at 4:17 PM Vasily Gorbik <gor@linux.ibm.com> wrote:
>
> On Mon, Jan 24, 2022 at 07:05:05PM +0100, andrey.konovalov@linux.dev wrote:
> > From: Andrey Konovalov <andreyknvl@google.com>
> >
> > The kernel can use to allocate executable memory. The only supported way
> > to do that is via __vmalloc_node_range() with the executable bit set in
> > the prot argument. (vmap() resets the bit via pgprot_nx()).
> >
> > Once tag-based KASAN modes start tagging vmalloc allocations, executing
> > code from such allocations will lead to the PC register getting a tag,
> > which is not tolerated by the kernel.
> >
> > Only tag the allocations for normal kernel pages.
> >
> > Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
>
> This breaks s390 and produce huge amount of false positives.
> I haven't been testing linux-next with KASAN for while, now tried it with
> next-20220308 and bisected false positives to this commit.
>
> Any idea what is going wrong here?

Hi Vasily,

Could you try the attached fix?

Thanks!
Vasily Gorbik March 8, 2022, 3:48 p.m. UTC | #3
On Tue, Mar 08, 2022 at 04:30:46PM +0100, Andrey Konovalov wrote:
> On Tue, Mar 8, 2022 at 4:17 PM Vasily Gorbik <gor@linux.ibm.com> wrote:
> >
> > On Mon, Jan 24, 2022 at 07:05:05PM +0100, andrey.konovalov@linux.dev wrote:
> > > From: Andrey Konovalov <andreyknvl@google.com>
> > >
> > > The kernel can use to allocate executable memory. The only supported way
> > > to do that is via __vmalloc_node_range() with the executable bit set in
> > > the prot argument. (vmap() resets the bit via pgprot_nx()).
> > >
> > > Once tag-based KASAN modes start tagging vmalloc allocations, executing
> > > code from such allocations will lead to the PC register getting a tag,
> > > which is not tolerated by the kernel.
> > >
> > > Only tag the allocations for normal kernel pages.
> > >
> > > Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
> >
> > This breaks s390 and produce huge amount of false positives.
> > I haven't been testing linux-next with KASAN for while, now tried it with
> > next-20220308 and bisected false positives to this commit.
> >
> > Any idea what is going wrong here?
> 
> Could you try the attached fix?

Wow, that was quick!
Yes, it fixes the issue for s390, kasan tests pass as well.
Thank you!
diff mbox series

Patch

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 499f1573dba4..3593c95d1fa5 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -27,9 +27,10 @@  struct kunit_kasan_expectation {
 
 typedef unsigned int __bitwise kasan_vmalloc_flags_t;
 
-#define KASAN_VMALLOC_NONE	0x00u
-#define KASAN_VMALLOC_INIT	0x01u
-#define KASAN_VMALLOC_VM_ALLOC	0x02u
+#define KASAN_VMALLOC_NONE		0x00u
+#define KASAN_VMALLOC_INIT		0x01u
+#define KASAN_VMALLOC_VM_ALLOC		0x02u
+#define KASAN_VMALLOC_PROT_NORMAL	0x04u
 
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index 21104fd51872..2e9378a4f07f 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -247,6 +247,13 @@  void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
 	if (!(flags & KASAN_VMALLOC_VM_ALLOC))
 		return (void *)start;
 
+	/*
+	 * Don't tag executable memory.
+	 * The kernel doesn't tolerate having the PC register tagged.
+	 */
+	if (!(flags & KASAN_VMALLOC_PROT_NORMAL))
+		return (void *)start;
+
 	tag = kasan_random_tag();
 	start = set_tag(start, tag);
 
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index b958babc8fed..7272e248db87 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -488,6 +488,13 @@  void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
 	if (!is_vmalloc_or_module_addr(start))
 		return (void *)start;
 
+	/*
+	 * Don't tag executable memory.
+	 * The kernel doesn't tolerate having the PC register tagged.
+	 */
+	if (!(flags & KASAN_VMALLOC_PROT_NORMAL))
+		return (void *)start;
+
 	start = set_tag(start, kasan_random_tag());
 	kasan_unpoison(start, size, false);
 	return (void *)start;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6dcdf815576b..375b53fd939f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2221,7 +2221,7 @@  void *vm_map_ram(struct page **pages, unsigned int count, int node)
 	 * With hardware tag-based KASAN, marking is skipped for
 	 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
 	 */
-	mem = kasan_unpoison_vmalloc(mem, size, KASAN_VMALLOC_NONE);
+	mem = kasan_unpoison_vmalloc(mem, size, KASAN_VMALLOC_PROT_NORMAL);
 
 	return mem;
 }
@@ -2460,7 +2460,7 @@  static struct vm_struct *__get_vm_area_node(unsigned long size,
 	 */
 	if (!(flags & VM_ALLOC))
 		area->addr = kasan_unpoison_vmalloc(area->addr, requested_size,
-							KASAN_VMALLOC_NONE);
+						    KASAN_VMALLOC_PROT_NORMAL);
 
 	return area;
 }
@@ -3071,7 +3071,7 @@  void *__vmalloc_node_range(unsigned long size, unsigned long align,
 {
 	struct vm_struct *area;
 	void *ret;
-	kasan_vmalloc_flags_t kasan_flags;
+	kasan_vmalloc_flags_t kasan_flags = KASAN_VMALLOC_NONE;
 	unsigned long real_size = size;
 	unsigned long real_align = align;
 	unsigned int shift = PAGE_SHIFT;
@@ -3124,21 +3124,28 @@  void *__vmalloc_node_range(unsigned long size, unsigned long align,
 		goto fail;
 	}
 
-	/* Prepare arguments for __vmalloc_area_node(). */
-	if (kasan_hw_tags_enabled() &&
-	    pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) {
-		/*
-		 * Modify protection bits to allow tagging.
-		 * This must be done before mapping in __vmalloc_area_node().
-		 */
-		prot = arch_vmap_pgprot_tagged(prot);
+	/*
+	 * Prepare arguments for __vmalloc_area_node() and
+	 * kasan_unpoison_vmalloc().
+	 */
+	if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) {
+		if (kasan_hw_tags_enabled()) {
+			/*
+			 * Modify protection bits to allow tagging.
+			 * This must be done before mapping.
+			 */
+			prot = arch_vmap_pgprot_tagged(prot);
 
-		/*
-		 * Skip page_alloc poisoning and zeroing for physical pages
-		 * backing VM_ALLOC mapping. Memory is instead poisoned and
-		 * zeroed by kasan_unpoison_vmalloc().
-		 */
-		gfp_mask |= __GFP_SKIP_KASAN_UNPOISON | __GFP_SKIP_ZERO;
+			/*
+			 * Skip page_alloc poisoning and zeroing for physical
+			 * pages backing VM_ALLOC mapping. Memory is instead
+			 * poisoned and zeroed by kasan_unpoison_vmalloc().
+			 */
+			gfp_mask |= __GFP_SKIP_KASAN_UNPOISON | __GFP_SKIP_ZERO;
+		}
+
+		/* Take note that the mapping is PAGE_KERNEL. */
+		kasan_flags |= KASAN_VMALLOC_PROT_NORMAL;
 	}
 
 	/* Allocate physical pages and map them into vmalloc space. */
@@ -3152,10 +3159,13 @@  void *__vmalloc_node_range(unsigned long size, unsigned long align,
 	 * (except for the should_skip_init() check) to make sure that memory
 	 * is initialized under the same conditions regardless of the enabled
 	 * KASAN mode.
+	 * Tag-based KASAN modes only assign tags to normal non-executable
+	 * allocations, see __kasan_unpoison_vmalloc().
 	 */
-	kasan_flags = KASAN_VMALLOC_VM_ALLOC;
+	kasan_flags |= KASAN_VMALLOC_VM_ALLOC;
 	if (!want_init_on_free() && want_init_on_alloc(gfp_mask))
 		kasan_flags |= KASAN_VMALLOC_INIT;
+	/* KASAN_VMALLOC_PROT_NORMAL already set if required. */
 	area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags);
 
 	/*
@@ -3861,8 +3871,7 @@  struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 	 */
 	for (area = 0; area < nr_vms; area++)
 		vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr,
-							 vms[area]->size,
-							 KASAN_VMALLOC_NONE);
+				vms[area]->size, KASAN_VMALLOC_PROT_NORMAL);
 
 	kfree(vas);
 	return vms;